[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v4 26/31] xen/x86: allow HVM guests to use hypercalls to bring up vCPUs



On 07/08/15 11:18, Roger Pau Monne wrote:
> Allow the usage of the VCPUOP_initialise, VCPUOP_up, VCPUOP_down and
> VCPUOP_is_up hypercalls from HVM guests.
>
> This patch introduces a new structure (vcpu_hvm_context) that should be used
> in conjuction with the VCPUOP_initialise hypercall in order to initialize
> vCPUs for HVM guests.
>
> Signed-off-by: Roger Pau Monnà <roger.pau@xxxxxxxxxx>
> Cc: Jan Beulich <jbeulich@xxxxxxxx>
> Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
> Cc: Ian Campbell <ian.campbell@xxxxxxxxxx>
> Cc: Stefano Stabellini <stefano.stabellini@xxxxxxxxxx>
> ---
>  xen/arch/arm/domain.c             |  24 ++++++
>  xen/arch/x86/domain.c             | 156 +++++++++++++++++++++++++++++++++++
>  xen/arch/x86/hvm/hvm.c            |   8 ++
>  xen/common/domain.c               |  16 +---
>  xen/include/public/hvm/hvm_vcpu.h | 168 
> ++++++++++++++++++++++++++++++++++++++
>  xen/include/xen/domain.h          |   2 +
>  6 files changed, 359 insertions(+), 15 deletions(-)
>  create mode 100644 xen/include/public/hvm/hvm_vcpu.h
>
> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
> index b2bfc7d..b20035d 100644
> --- a/xen/arch/arm/domain.c
> +++ b/xen/arch/arm/domain.c
> @@ -752,6 +752,30 @@ int arch_set_info_guest(
>      return 0;
>  }
>  
> +int arch_initialize_vcpu(struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
> +{
> +    struct vcpu_guest_context *ctxt;
> +    struct domain *d = current->domain;
> +    int rc;
> +
> +    if ( (ctxt = alloc_vcpu_guest_context()) == NULL )

I have a patch to drop this {alloc,free}_vcpu_guest_context()
infrastructure in 4.7

It was done to avoid issuing order 1 domheap allocations and spuriously
failing because of memory fragmentation, but vmalloc() is now a strictly
better option.

> +        return -ENOMEM;
> +
> +    if ( copy_from_guest(ctxt, arg, 1) )
> +    {
> +        free_vcpu_guest_context(ctxt);
> +        return -EFAULT;
> +    }
> +
> +    domain_lock(d);
> +    rc = v->is_initialised ? -EEXIST : arch_set_info_guest(v, ctxt);
> +    domain_unlock(d);
> +
> +    free_vcpu_guest_context(ctxt);
> +
> +    return rc;
> +}
> +
>  int arch_vcpu_reset(struct vcpu *v)
>  {
>      vcpu_end_shutdown_deferral(v);
> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> index 432fe43..4a7f8d9 100644
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -37,6 +37,7 @@
>  #include <xen/wait.h>
>  #include <xen/guest_access.h>
>  #include <public/sysctl.h>
> +#include <public/hvm/hvm_vcpu.h>
>  #include <asm/regs.h>
>  #include <asm/mc146818rtc.h>
>  #include <asm/system.h>
> @@ -1135,6 +1136,161 @@ int arch_set_info_guest(
>  #undef c
>  }
>  
> +/* Called by VCPUOP_initialise for HVM guests. */
> +static int arch_set_info_hvm_guest(struct vcpu *v, vcpu_hvm_context_t *ctx)
> +{
> +    struct segment_register seg;
> +
> +#define get_context_seg(ctx, seg, f)                                        \
> +    (ctx)->mode == VCPU_HVM_MODE_16B ? (ctx)->cpu_regs.x86_16.seg##_##f :   \
> +    (ctx)->mode == VCPU_HVM_MODE_32B ? (ctx)->cpu_regs.x86_32.seg##_##f :   \
> +    (ctx)->cpu_regs.x86_64.seg##_##f

I would be tempted to remove _context from the name, capitalise them to
make them stick out as macros, and turn them into function style macros
({ }) to avoid risk of multiple expansion.

Also you need a sanity check on mode, rather than assuming 64bit.

> +
> +#define get_context_gpr(ctx, gpr)                                           \
> +    (ctx)->mode == VCPU_HVM_MODE_16B ? (ctx)->cpu_regs.x86_16.gpr :         \
> +    (ctx)->mode == VCPU_HVM_MODE_32B ? (ctx)->cpu_regs.x86_32.e##gpr :      \
> +    (ctx)->cpu_regs.x86_64.r##gpr
> +
> +#define get_context_field(ctx, field)                                       \
> +    (ctx)->mode == VCPU_HVM_MODE_16B ? (ctx)->cpu_regs.x86_16.field :       \
> +    (ctx)->mode == VCPU_HVM_MODE_32B ? (ctx)->cpu_regs.x86_32.field :       \
> +    (ctx)->cpu_regs.x86_64.field
> +
> +    memset(&seg, 0, sizeof(seg));
> +
> +    if ( !paging_mode_hap(v->domain) )
> +        v->arch.guest_table = pagetable_null();
> +
> +    v->arch.user_regs.rax = get_context_gpr(ctx, ax);
> +    v->arch.user_regs.rcx = get_context_gpr(ctx, cx);
> +    v->arch.user_regs.rdx = get_context_gpr(ctx, dx);
> +    v->arch.user_regs.rbx = get_context_gpr(ctx, bx);
> +    v->arch.user_regs.rsp = get_context_gpr(ctx, sp);
> +    v->arch.user_regs.rbp = get_context_gpr(ctx, bp);
> +    v->arch.user_regs.rsi = get_context_gpr(ctx, si);
> +    v->arch.user_regs.rdi = get_context_gpr(ctx, di);
> +    v->arch.user_regs.rip = get_context_gpr(ctx, ip);
> +    v->arch.user_regs.rflags = get_context_gpr(ctx, flags);
> +
> +    v->arch.hvm_vcpu.guest_cr[0] = get_context_field(ctx, cr0) | X86_CR0_ET;

I am not sure whether ET is worth doing here.  It is stuck high and
ignores writes on any process Xen currently functions on.

> +    hvm_update_guest_cr(v, 0);
> +    v->arch.hvm_vcpu.guest_cr[4] = get_context_field(ctx, cr4);
> +    hvm_update_guest_cr(v, 4);
> +
> +    switch ( ctx->mode )
> +    {
> +    case VCPU_HVM_MODE_32B:
> +        v->arch.hvm_vcpu.guest_efer = ctx->cpu_regs.x86_32.efer;
> +        hvm_update_guest_efer(v);
> +        v->arch.hvm_vcpu.guest_cr[3] = ctx->cpu_regs.x86_32.cr3;
> +        hvm_update_guest_cr(v, 3);
> +        break;

Newline here please.

> +    case VCPU_HVM_MODE_64B:
> +        v->arch.user_regs.r8 = ctx->cpu_regs.x86_64.r8;
> +        v->arch.user_regs.r9 = ctx->cpu_regs.x86_64.r9;
> +        v->arch.user_regs.r10 = ctx->cpu_regs.x86_64.r10;
> +        v->arch.user_regs.r11 = ctx->cpu_regs.x86_64.r11;
> +        v->arch.user_regs.r12 = ctx->cpu_regs.x86_64.r12;
> +        v->arch.user_regs.r13 = ctx->cpu_regs.x86_64.r13;
> +        v->arch.user_regs.r14 = ctx->cpu_regs.x86_64.r14;
> +        v->arch.user_regs.r15 = ctx->cpu_regs.x86_64.r15;
> +        v->arch.hvm_vcpu.guest_efer = ctx->cpu_regs.x86_64.efer;
> +        hvm_update_guest_efer(v);
> +        v->arch.hvm_vcpu.guest_cr[3] = ctx->cpu_regs.x86_64.cr3;
> +        hvm_update_guest_cr(v, 3);
> +        break;
> +    default:
> +        break;

Drop the default case if it is going to look like this.

> +    }
> +
> +    if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) )
> +    {
> +        /* Shadow-mode CR3 change. Check PDBR and update refcounts. */
> +        struct page_info *page = get_page_from_gfn(v->domain,
> +                                 v->arch.hvm_vcpu.guest_cr[3] >> PAGE_SHIFT,
> +                                 NULL, P2M_ALLOC);
> +        if ( !page )
> +        {
> +            gdprintk(XENLOG_ERR, "Invalid CR3\n");
> +            domain_crash(v->domain);
> +            return -EINVAL;
> +        }
> +
> +        v->arch.guest_table = pagetable_from_page(page);
> +    }
> +
> +    seg.base = get_context_seg(ctx, cs, base);
> +    seg.limit = get_context_seg(ctx, cs, limit);
> +    seg.attr.bytes = get_context_seg(ctx, cs, ar);
> +    hvm_set_segment_register(v, x86_seg_cs, &seg);
> +    seg.base = get_context_seg(ctx, ds, base);
> +    seg.limit = get_context_seg(ctx, ds, limit);
> +    seg.attr.bytes = get_context_seg(ctx, ds, ar);
> +    hvm_set_segment_register(v, x86_seg_ds, &seg);
> +    seg.base = get_context_seg(ctx, ss, base);
> +    seg.limit = get_context_seg(ctx, ss, limit);
> +    seg.attr.bytes = get_context_seg(ctx, ss, ar);
> +    hvm_set_segment_register(v, x86_seg_ss, &seg);
> +    seg.base = get_context_seg(ctx, tr, base);
> +    seg.limit = get_context_seg(ctx, tr, limit);
> +    seg.attr.bytes = get_context_seg(ctx, tr, ar);
> +    hvm_set_segment_register(v, x86_seg_tr, &seg);

This block would be easier to read as 4 blocks, one for each segment,
and if the " = " in the middle were lined up.

> +
> +    /* Sync AP's TSC with BSP's. */
> +    v->arch.hvm_vcpu.cache_tsc_offset =
> +        v->domain->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset;
> +    hvm_funcs.set_tsc_offset(v, v->arch.hvm_vcpu.cache_tsc_offset,
> +                             v->domain->arch.hvm_domain.sync_tsc);
> +
> +    v->arch.hvm_vcpu.msr_tsc_adjust = 0;
> +
> +    paging_update_paging_modes(v);
> +
> +    v->arch.flags |= TF_kernel_mode;

TF_kernel_mode only applies to PV guests.  HVM guests determine kernel
vs user mode based on cpl.

> +    v->is_initialised = 1;
> +    set_bit(_VPF_down, &v->pause_flags);
> +
> +    return 0;
> +#undef get_context_field
> +#undef get_context_gpr
> +#undef get_context_seg
> +}
> +
> +int arch_initialize_vcpu(struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
> +{
> +    struct vcpu_guest_context *ctxt;
> +    struct vcpu_hvm_context hvm_ctx;
> +    struct domain *d = current->domain;
> +    int rc;
> +
> +    if ( is_hvm_vcpu(v) )
> +    {
> +        if ( copy_from_guest(&hvm_ctx, arg, 1) )
> +            return -EFAULT;
> +
> +        domain_lock(d);
> +        rc = v->is_initialised ? -EEXIST : arch_set_info_hvm_guest(v, 
> &hvm_ctx);
> +        domain_unlock(d);
> +    } else {

Style.

Otherwise, this looks rather neat and surprisingly non-invasive.

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.