[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v3 8/9] viridian: add implementation of synthetic timers



>>> On 31.01.19 at 11:47, <paul.durrant@xxxxxxxxxx> wrote:
> --- a/xen/arch/x86/hvm/viridian/synic.c
> +++ b/xen/arch/x86/hvm/viridian/synic.c
> @@ -329,7 +329,53 @@ void viridian_synic_domain_deinit(struct domain *d)
>  
>  void viridian_synic_poll_messages(struct vcpu *v)
>  {
> -    /* There are currently no message sources */
> +    viridian_time_poll_timers(v);
> +}
> +
> +bool viridian_synic_deliver_timer_msg(struct vcpu *v, unsigned int sintx,
> +                                      unsigned int index,
> +                                      int64_t expiration, int64_t delivery)
> +{
> +    const union viridian_sint_msr *vs = &v->arch.hvm.viridian->sint[sintx];
> +    HV_MESSAGE *msg = v->arch.hvm.viridian->simp.ptr;
> +    struct {
> +        uint32_t TimerIndex;
> +        uint32_t Reserved;
> +        uint64_t ExpirationTime;
> +        uint64_t DeliveryTime;
> +    } payload = {
> +        .TimerIndex = index,
> +        .ExpirationTime = expiration,
> +        .DeliveryTime = delivery,
> +    };
> +
> +    if ( test_bit(sintx, &v->arch.hvm.viridian->msg_pending) )
> +        return false;
> +
> +    BUILD_BUG_ON(sizeof(*msg) != HV_MESSAGE_SIZE);
> +    msg += sintx;
> +
> +    /*
> +     * To avoid using an atomic test-and-set this function must be called
> +     * in context of the vcpu receiving the message.
> +     */
> +    ASSERT(v == current);
> +    if ( msg->Header.MessageType != HvMessageTypeNone )
> +    {
> +        msg->Header.MessageFlags.MessagePending = 1;
> +        set_bit(sintx, &v->arch.hvm.viridian->msg_pending);

As per the comment above this is always in context of the subject
vCPU. It looks to me as if this was also the case for the two
clear_bit() on the field in the prior patch. If so, all three could be
the non-atomic variants instead.

> +        return false;
> +    }
> +
> +    msg->Header.MessageType = HvMessageTimerExpired;
> +    msg->Header.MessageFlags.MessagePending = 0;
> +    msg->Header.PayloadSize = sizeof(payload);
> +    memcpy(msg->Payload, &payload, sizeof(payload));
> +
> +    if ( !vs->fields.mask )
> +        vlapic_set_irq(vcpu_vlapic(v), vs->fields.vector, 0);

If this wasn't with v == current, I think you'd also need a barrier
here. Could you extend the comment above to also mention this
aspect?

> @@ -118,14 +119,237 @@ static int64_t time_ref_count(struct domain *d)
>      return raw_trc_val(d) + trc->off;
>  }
>  
> +static int64_t time_now(struct domain *d)

Why would this return a signed value? And can't the function
parameter be const?

> +{
> +    const struct viridian_page *rt = &d->arch.hvm.viridian->reference_tsc;
> +    HV_REFERENCE_TSC_PAGE *p = rt->ptr;
> +    uint32_t start, end;
> +    __int128_t tsc;
> +    __int128_t scale;

I don't think you need both of them be 128 bits wide. I also don't
see why either would want to be of a signed type.

> +    int64_t offset;
> +
> +    /*
> +     * If the reference TSC page is not enabled, or has been invalidated
> +     * fall back to the partition reference counter.
> +     */
> +    if ( !p || !p->TscSequence )
> +        return time_ref_count(d);
> +
> +    /*
> +     * The following sampling algorithm for tsc, scale and offset is
> +     * documented in the specifiction.
> +     */
> +    start = p->TscSequence;
> +
> +    do {
> +        tsc = rdtsc();
> +        scale = p->TscScale;
> +        offset = p->TscOffset;
> +
> +        smp_mb();
> +        end = p->TscSequence;

Why is this a full barrier, rather than just a read one? And don't you need
to add a counterpart in update_reference_tsc()?

> +    } while (end != start);

update_reference_tsc() increments TscSequence. If end doesn't match
start at this point, you're entering a near infinite loop here as long as
you don't update start inside the loop. I also think that there's a
second read barrier needed between this initial reading of the sequence
number and the reading of the actual values.

> +    /*
> +     * The specification says: "The partition reference time is computed
> +     * by the following formula:
> +     *
> +     * ReferenceTime = ((VirtualTsc * TscScale) >> 64) + TscOffset
> +     *
> +     * The multiplication is a 64 bit multiplication, which results in a
> +     * 128 bit number which is then shifted 64 times to the right to obtain
> +     * the high 64 bits."
> +     */
> +    return ((tsc * scale) >> 64) + offset;
> +}
> +
> +static void stop_stimer(struct viridian_stimer *vs)
> +{
> +    struct vcpu *v = vs->v;

const?

> +    unsigned int stimerx = vs - &v->arch.hvm.viridian->stimer[0];
> +
> +    if ( !vs->started )
> +        return;
> +
> +    stop_timer(&vs->timer);
> +    clear_bit(stimerx, &v->arch.hvm.viridian->stimer_pending);
> +    vs->started = false;
> +}
> +
> +static void stimer_expire(void *data)
> +{
> +    struct viridian_stimer *vs = data;

const?

> +    struct vcpu *v = vs->v;
> +    unsigned int stimerx = vs - &v->arch.hvm.viridian->stimer[0];
> +
> +    if ( !vs->config.fields.enabled )
> +        return;
> +
> +    set_bit(stimerx, &v->arch.hvm.viridian->stimer_pending);
> +    vcpu_kick(v);
> +}
> +
> +static void start_stimer(struct viridian_stimer *vs)
> +{
> +    struct vcpu *v = vs->v;
> +    unsigned int stimerx = vs - &v->arch.hvm.viridian->stimer[0];
> +    int64_t now = time_now(v->domain);
> +    s_time_t timeout;
> +
> +    if ( !test_and_set_bit(stimerx, &v->arch.hvm.viridian->stimer_enabled) )
> +        printk(XENLOG_G_INFO "%pv: VIRIDIAN STIMER%u: enabled\n", v,
> +               stimerx);
> +
> +    if ( vs->config.fields.periodic )
> +    {
> +        unsigned int missed = 0;
> +        int64_t next;
> +
> +        /*
> +         * The specification says that if the timer is lazy then we
> +         * skip over any missed expirations so we can treat this case
> +         * as the same as if the timer is currently stopped, i.e. we
> +         * just schedule expiration to be 'count' ticks from now.
> +         */
> +        if ( !vs->started || vs->config.fields.lazy )
> +        {
> +            next = now + vs->count;
> +        }

Unnecessary braces.

> @@ -149,6 +373,57 @@ int viridian_time_wrmsr(struct vcpu *v, uint32_t idx, 
> uint64_t val)
>          }
>          break;
>  
> +    case HV_X64_MSR_TIME_REF_COUNT:
> +        return X86EMUL_EXCEPTION;

Isn't this an unrelated change?

> +    case HV_X64_MSR_STIMER0_CONFIG:
> +    case HV_X64_MSR_STIMER1_CONFIG:
> +    case HV_X64_MSR_STIMER2_CONFIG:
> +    case HV_X64_MSR_STIMER3_CONFIG:
> +    {
> +        unsigned int stimerx = (idx - HV_X64_MSR_STIMER0_CONFIG) / 2;
> +        struct viridian_stimer *vs = &v->arch.hvm.viridian->stimer[stimerx];
> +
> +        if ( !(viridian_feature_mask(d) & HVMPV_stimer) )
> +            return X86EMUL_EXCEPTION;
> +
> +        stop_stimer(vs);
> +
> +        vs->config.raw = val;
> +
> +        if ( !vs->config.fields.sintx )
> +            vs->config.fields.enabled = 0;
> +
> +        if ( vs->config.fields.enabled )
> +            start_stimer(vs);
> +
> +        break;
> +    }
> +    case HV_X64_MSR_STIMER0_COUNT:

Missing blank line again (and also further down here as well as in the
rdmsr code).

> +    case HV_X64_MSR_STIMER1_COUNT:
> +    case HV_X64_MSR_STIMER2_COUNT:
> +    case HV_X64_MSR_STIMER3_COUNT:
> +    {
> +        unsigned int stimerx = (idx - HV_X64_MSR_STIMER0_COUNT) / 2;
> +        struct viridian_stimer *vs = &v->arch.hvm.viridian->stimer[stimerx];
> +
> +        if ( !(viridian_feature_mask(d) & HVMPV_stimer) )
> +            return X86EMUL_EXCEPTION;
> +
> +        stop_stimer(vs);
> +
> +        vs->count = val;
> +
> +        if ( !vs->count  )

Any reason you don't use val here (which the compiler likely will do
anyway)?

> @@ -201,6 +476,32 @@ int viridian_time_rdmsr(const struct vcpu *v, uint32_t 
> idx, uint64_t *val)
>          break;
>      }
>  
> +    case HV_X64_MSR_STIMER0_CONFIG:
> +    case HV_X64_MSR_STIMER1_CONFIG:
> +    case HV_X64_MSR_STIMER2_CONFIG:
> +    case HV_X64_MSR_STIMER3_CONFIG:
> +    {
> +        unsigned int stimerx = (idx - HV_X64_MSR_STIMER0_CONFIG) / 2;
> +
> +        if ( !(viridian_feature_mask(d) & HVMPV_stimer) )
> +            return X86EMUL_EXCEPTION;
> +
> +        *val = v->arch.hvm.viridian->stimer[stimerx].config.raw;

While more noticeable here and ...

> +        break;
> +    }
> +    case HV_X64_MSR_STIMER0_COUNT:
> +    case HV_X64_MSR_STIMER1_COUNT:
> +    case HV_X64_MSR_STIMER2_COUNT:
> +    case HV_X64_MSR_STIMER3_COUNT:
> +    {
> +        unsigned int stimerx = (idx - HV_X64_MSR_STIMER0_COUNT) / 2;
> +
> +        if ( !(viridian_feature_mask(d) & HVMPV_stimer) )
> +            return X86EMUL_EXCEPTION;
> +
> +        *val = v->arch.hvm.viridian->stimer[stimerx].count;

... here, array_access_nospec() are probably needed not just here,
but also in the wrmsr logic.

> --- a/xen/include/asm-x86/hvm/viridian.h
> +++ b/xen/include/asm-x86/hvm/viridian.h
> @@ -40,6 +40,33 @@ union viridian_sint_msr
>      } fields;
>  };
>  
> +union viridian_stimer_config_msr
> +{
> +    uint64_t raw;
> +    struct
> +    {
> +        uint64_t enabled:1;
> +        uint64_t periodic:1;
> +        uint64_t lazy:1;
> +        uint64_t auto_enable:1;
> +        uint64_t vector:8;
> +        uint64_t direct_mode:1;
> +        uint64_t reserved_zero1:3;
> +        uint64_t sintx:4;
> +        uint64_t reserved_zero2:44;
> +    } fields;
> +};
> +
> +struct viridian_stimer {
> +    struct vcpu *v;

Isn't a full 8-byte pointer a little too much overhead here? You could
instead store the timer index ...

> +    struct timer timer;
> +    union viridian_stimer_config_msr config;
> +    uint64_t count;
> +    int64_t expiration;
> +    s_time_t timeout;
> +    bool started;

... in a field using the 7-byte padding here, and use container_of()
to get at the outer structure.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.