[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH] viridian: fix the HvFlushVirtualAddress/List hypercall implementation



Apologies if you get multiple copies of this but I seem to be having problems 
sending to the list.

  Paul

> -----Original Message-----
> From: Paul Durrant [mailto:paul.durrant@xxxxxxxxxx]
> Sent: 13 February 2019 16:03
> To: Paul Durrant <Paul.Durrant@xxxxxxxxxx>
> Cc: Jan Beulich <jbeulich@xxxxxxxx>; Andrew Cooper
> <Andrew.Cooper3@xxxxxxxxxx>; Wei Liu <wei.liu2@xxxxxxxxxx>; Roger Pau
> Monne <roger.pau@xxxxxxxxxx>
> Subject: [PATCH] viridian: fix the HvFlushVirtualAddress/List hypercall
> implementation
> 
> The current code uses hvm_asid_flush_vcpu() but this is insufficient for
> a guest running in shadow mode, which results in guest crashes early in
> boot if the 'hcall_remote_tlb_flush' is enabled.
> 
> This patch, instead of open coding a new flush algorithm, adapts the one
> already used by the HVMOP_flush_tlbs Xen hypercall. The implementation is
> modified to allow TLB flushing a subset of a domain's vCPUs. A callback
> function determines whether or not a vCPU requires flushing. This
> mechanism
> was chosen because, while it is the case that the currently implemented
> viridian hypercalls specify a vCPU mask, there are newer variants which
> specify a sparse HV_VP_SET and thus use of a callback will avoid needing
> to
> expose details of this outside of the viridian subsystem if and when those
> newer variants are implemented.
> 
> NOTE: Use of the common flush function requires that the hypercalls are
>       restartable and so, with this patch applied, viridian_hypercall()
>       can now return HVM_HCALL_preempted. This is safe as no modification
>       to struct cpu_user_regs is done before the return.
> 
> Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx>
> ---
> Cc: Jan Beulich <jbeulich@xxxxxxxx>
> Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
> Cc: Wei Liu <wei.liu2@xxxxxxxxxx>
> Cc: "Roger Pau Monné" <roger.pau@xxxxxxxxxx>
> ---
>  xen/arch/x86/hvm/hvm.c               | 55 +++++++++++++++++++++-------
>  xen/arch/x86/hvm/viridian/viridian.c | 41 +++++----------------
>  xen/include/asm-x86/hvm/hvm.h        |  2 +
>  3 files changed, 53 insertions(+), 45 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 410623d437..5de2c2aec7 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -3964,45 +3964,72 @@ static void hvm_s3_resume(struct domain *d)
>      }
>  }
> 
> -static int hvmop_flush_tlb_all(void)
> +static DEFINE_PER_CPU(cpumask_t, flush_cpumask);
> +
> +bool hvm_flush_vcpu_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
> +                        void *ctxt)
>  {
> +    cpumask_t *mask = &this_cpu(flush_cpumask);
>      struct domain *d = current->domain;
>      struct vcpu *v;
> 
> -    if ( !is_hvm_domain(d) )
> -        return -EINVAL;
> -
>      /* Avoid deadlock if more than one vcpu tries this at the same time.
> */
>      if ( !spin_trylock(&d->hypercall_deadlock_mutex) )
> -        return -ERESTART;
> +        return false;
> 
>      /* Pause all other vcpus. */
>      for_each_vcpu ( d, v )
> -        if ( v != current )
> +        if ( v != current && flush_vcpu(ctxt, v) )
>              vcpu_pause_nosync(v);
> 
> +    cpumask_clear(mask);
> +
>      /* Now that all VCPUs are signalled to deschedule, we wait... */
>      for_each_vcpu ( d, v )
> -        if ( v != current )
> -            while ( !vcpu_runnable(v) && v->is_running )
> -                cpu_relax();
> +    {
> +        unsigned int cpu;
> +
> +        if ( v == current || !flush_vcpu(ctxt, v) )
> +            continue;
> +
> +        while ( !vcpu_runnable(v) && v->is_running )
> +            cpu_relax();
> +
> +        cpu = read_atomic(&v->dirty_cpu);
> +        if ( is_vcpu_dirty_cpu(cpu) )
> +            __cpumask_set_cpu(cpu, mask);
> +    }
> 
>      /* All other vcpus are paused, safe to unlock now. */
>      spin_unlock(&d->hypercall_deadlock_mutex);
> 
>      /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE
> cache). */
>      for_each_vcpu ( d, v )
> -        paging_update_cr3(v, false);
> +        if ( flush_vcpu(ctxt, v) )
> +            paging_update_cr3(v, false);
> 
> -    /* Flush all dirty TLBs. */
> -    flush_tlb_mask(d->dirty_cpumask);
> +    /* Flush TLBs on all CPUs with dirty vcpu state. */
> +    flush_tlb_mask(mask);
> 
>      /* Done. */
>      for_each_vcpu ( d, v )
> -        if ( v != current )
> +        if ( v != current && flush_vcpu(ctxt, v) )
>              vcpu_unpause(v);
> 
> -    return 0;
> +    return true;
> +}
> +
> +static bool always_flush(void *ctxt, struct vcpu *v)
> +{
> +    return true;
> +}
> +
> +static int hvmop_flush_tlb_all(void)
> +{
> +    if ( !is_hvm_domain(current->domain) )
> +        return -EINVAL;
> +
> +    return hvm_flush_vcpu_tlb(always_flush, NULL) ? 0 : -ERESTART;
>  }
> 
>  static int hvmop_set_evtchn_upcall_vector(
> diff --git a/xen/arch/x86/hvm/viridian/viridian.c
> b/xen/arch/x86/hvm/viridian/viridian.c
> index c78b2918d9..6bb702f27e 100644
> --- a/xen/arch/x86/hvm/viridian/viridian.c
> +++ b/xen/arch/x86/hvm/viridian/viridian.c
> @@ -430,7 +430,12 @@ void viridian_domain_deinit(struct domain *d)
>          viridian_vcpu_deinit(v);
>  }
> 
> -static DEFINE_PER_CPU(cpumask_t, ipi_cpumask);
> +static bool need_flush(void *ctxt, struct vcpu *v)
> +{
> +    uint64_t vcpu_mask = *(uint64_t *)ctxt;
> +
> +    return vcpu_mask & (1ul << v->vcpu_id);
> +}
> 
>  int viridian_hypercall(struct cpu_user_regs *regs)
>  {
> @@ -494,8 +499,6 @@ int viridian_hypercall(struct cpu_user_regs *regs)
>      case HvFlushVirtualAddressSpace:
>      case HvFlushVirtualAddressList:
>      {
> -        cpumask_t *pcpu_mask;
> -        struct vcpu *v;
>          struct {
>              uint64_t address_space;
>              uint64_t flags;
> @@ -521,36 +524,12 @@ int viridian_hypercall(struct cpu_user_regs *regs)
>          if ( input_params.flags & HV_FLUSH_ALL_PROCESSORS )
>              input_params.vcpu_mask = ~0ul;
> 
> -        pcpu_mask = &this_cpu(ipi_cpumask);
> -        cpumask_clear(pcpu_mask);
> -
> -        /*
> -         * For each specified virtual CPU flush all ASIDs to invalidate
> -         * TLB entries the next time it is scheduled and then, if it
> -         * is currently running, add its physical CPU to a mask of
> -         * those which need to be interrupted to force a flush.
> -         */
> -        for_each_vcpu ( currd, v )
> -        {
> -            if ( v->vcpu_id >= (sizeof(input_params.vcpu_mask) * 8) )
> -                break;
> -
> -            if ( !(input_params.vcpu_mask & (1ul << v->vcpu_id)) )
> -                continue;
> -
> -            hvm_asid_flush_vcpu(v);
> -            if ( v != curr && v->is_running )
> -                __cpumask_set_cpu(v->processor, pcpu_mask);
> -        }
> -
>          /*
> -         * Since ASIDs have now been flushed it just remains to
> -         * force any CPUs currently running target vCPUs out of non-
> -         * root mode. It's possible that re-scheduling has taken place
> -         * so we may unnecessarily IPI some CPUs.
> +         * A false return means that another vcpu is currently trying
> +         * a similar operation, so back off.
>           */
> -        if ( !cpumask_empty(pcpu_mask) )
> -            smp_send_event_check_mask(pcpu_mask);
> +        if ( !hvm_flush_vcpu_tlb(need_flush, &input_params.vcpu_mask) )
> +            return HVM_HCALL_preempted;
> 
>          output.rep_complete = input.rep_count;
> 
> diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
> index 0a10b51554..53ffebb2c5 100644
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -338,6 +338,8 @@ const char *hvm_efer_valid(const struct vcpu *v,
> uint64_t value,
>                             signed int cr0_pg);
>  unsigned long hvm_cr4_guest_valid_bits(const struct domain *d, bool
> restore);
> 
> +bool hvm_flush_vcpu_tlb(bool (*flush_vcpu)(void *ctxt, struct vcpu *v),
> +                        void *ctxt);
> 
>  #ifdef CONFIG_HVM
> 
> --
> 2.20.1

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.