[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 12/18 V2]:PVH xen: support invalid op, return PVH features etc...



On Fri, Mar 15, 2013 at 05:48:44PM -0700, Mukesh Rathor wrote:
>  The biggest change in this patch is in traps.c to allow forced invalid
>  op for PVH guest. Also, enable hypercall page init for PVH guest also.
>  Finally, set guest type to PVH if PV with HAP is created.
> 
> Changes in V2:
>   - Fix emulate_forced_invalid_op() to use proper copy function, and inject PF
>     in case it fails.
>   - remove extraneous PVH check in STI/CLI ops en emulate_privileged_op().
>   - Make assert a debug ASSERT in show_registers().
>   - debug.c: keep get_gfn() locked and move put_gfn closer to it.
> 
> Signed-off-by: Mukesh Rathor <mukesh.rathor@xxxxxxxxxx>
> ---
>  xen/arch/x86/debug.c        |    9 ++++-----
>  xen/arch/x86/traps.c        |   43 
> +++++++++++++++++++++++++++++++++++++------
>  xen/arch/x86/x86_64/traps.c |    5 +++--
>  xen/common/domain.c         |    9 +++++++++
>  xen/common/domctl.c         |    4 ++++
>  xen/common/kernel.c         |    6 +++++-
>  6 files changed, 62 insertions(+), 14 deletions(-)
> 
> diff --git a/xen/arch/x86/debug.c b/xen/arch/x86/debug.c
> index 502edbc..abe538f 100644
> --- a/xen/arch/x86/debug.c
> +++ b/xen/arch/x86/debug.c
> @@ -59,7 +59,9 @@ dbg_hvm_va2mfn(dbgva_t vaddr, struct domain *dp, int toaddr,
>          return INVALID_MFN;
>      }
>  
> -    mfn = mfn_x(get_gfn(dp, *gfn, &gfntype)); 
> +    mfn = mfn_x(get_gfn_query(dp, *gfn, &gfntype)); 
> +    put_gfn(dp, *gfn);
> +
>      if ( p2m_is_readonly(gfntype) && toaddr )
>      {
>          DBGP2("kdb:p2m_is_readonly: gfntype:%x\n", gfntype);
> @@ -158,7 +160,7 @@ dbg_rw_guest_mem(dbgva_t addr, dbgbyte_t *buf, int len, 
> struct domain *dp,
>  
>          pagecnt = min_t(long, PAGE_SIZE - (addr & ~PAGE_MASK), len);
>  
> -        mfn = (is_hvm_domain(dp)
> +        mfn = (is_hvm_or_pvh_domain(dp)
>                 ? dbg_hvm_va2mfn(addr, dp, toaddr, &gfn)
>                 : dbg_pv_va2mfn(addr, dp, pgd3));
>          if ( mfn == INVALID_MFN ) 
> @@ -178,9 +180,6 @@ dbg_rw_guest_mem(dbgva_t addr, dbgbyte_t *buf, int len, 
> struct domain *dp,
>          }
>  
>          unmap_domain_page(va);
> -        if ( gfn != INVALID_GFN )
> -            put_gfn(dp, gfn);
> -
>          addr += pagecnt;
>          buf += pagecnt;
>          len -= pagecnt;
> diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
> index ab54f82..14656c1 100644
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -459,6 +459,10 @@ static void instruction_done(
>      struct cpu_user_regs *regs, unsigned long eip, unsigned int bpmatch)
>  {
>      regs->eip = eip;
> +
> +    if ( is_pvh_vcpu(current) )
> +        return;

Can it be above the 'regs->eip = eip' ?

> +
>      regs->eflags &= ~X86_EFLAGS_RF;
>      if ( bpmatch || (regs->eflags & X86_EFLAGS_TF) )
>      {
> @@ -475,6 +479,10 @@ static unsigned int check_guest_io_breakpoint(struct 
> vcpu *v,
>      unsigned int width, i, match = 0;
>      unsigned long start;
>  
> +    if ( is_pvh_vcpu(v) ) {
> +        /* for pvh, ctrlreg field is not implemented/used unless we need to 
> */
> +        return 0;
> +    }
>      if ( !(v->arch.debugreg[5]) ||
>           !(v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_DE) )
>          return 0;
> @@ -908,14 +916,18 @@ static int emulate_invalid_rdtscp(struct cpu_user_regs 
> *regs)
>  unsigned long emulate_forced_invalid_op(struct cpu_user_regs *regs)
>  {
>      char sig[5], instr[2];
> -    unsigned long eip, rc;
> +    unsigned long eip, rc, addr;
>  
>      eip = regs->eip;
>  
>      /* Check for forced emulation signature: ud2 ; .ascii "xen". */
> -    if ( (rc = copy_from_user(sig, (char *)eip, sizeof(sig))) != 0 )
> +    if ( (rc = raw_copy_from_guest(sig, (char *)eip, sizeof(sig))) != 0 )
>      {
> -        propagate_page_fault(eip + sizeof(sig) - rc, 0);
> +        addr = eip + sizeof(sig) - rc;
> +        if ( is_pvh_vcpu(current) )
> +            return addr;
> +
> +        propagate_page_fault(addr, 0);
>          return EXCRET_fault_fixed;
>      }
>      if ( memcmp(sig, "\xf\xbxen", sizeof(sig)) )
> @@ -923,9 +935,13 @@ unsigned long emulate_forced_invalid_op(struct 
> cpu_user_regs *regs)
>      eip += sizeof(sig);
>  
>      /* We only emulate CPUID. */
> -    if ( ( rc = copy_from_user(instr, (char *)eip, sizeof(instr))) != 0 )
> +    if ( ( rc = raw_copy_from_guest(instr, (char *)eip, sizeof(instr))) != 0 
> )
>      {
> -        propagate_page_fault(eip + sizeof(instr) - rc, 0);
> +        addr = eip + sizeof(instr) - rc;
> +        if ( is_pvh_vcpu(current) )
> +            return addr;
> +
> +        propagate_page_fault(addr, 0);
>          return EXCRET_fault_fixed;
>      }
>      if ( memcmp(instr, "\xf\xa2", sizeof(instr)) )
> @@ -1068,6 +1084,10 @@ void propagate_page_fault(unsigned long addr, u16 
> error_code)
>      struct vcpu *v = current;
>      struct trap_bounce *tb = &v->arch.pv_vcpu.trap_bounce;
>  
> +    /* PVH should not get here. ctrlreg is not implemented amongst other 
> +     * things */
> +    ASSERT( !is_pvh_vcpu(v) );
> +
>      v->arch.pv_vcpu.ctrlreg[2] = addr;
>      arch_set_cr2(v, addr);
>  
> @@ -1453,6 +1473,9 @@ static int read_descriptor(unsigned int sel,
>  {
>      struct desc_struct desc;
>  
> +    if ( is_pvh_vcpu(v) )
> +        return hvm_pvh_read_descriptor(sel, v, regs, base, limit, ar);

Ah, that is why you are using such weird arguments. And it looks like
emulate_privileged_op really needs it as single variables.

Perhaps then you can just add a comment in hvm_pvh_read_descriptor saying
that the reason you are shifting is b/c the caller (emulate_privileged_op)
expects it to be in this format.

> +
>      if ( !vm86_mode(regs) )
>      {
>          if ( sel < 4)
> @@ -1571,6 +1594,11 @@ static int guest_io_okay(
>      int user_mode = !(v->arch.flags & TF_kernel_mode);
>  #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
>  
> +    /* for PVH we check this in vmexit for EXIT_REASON_IO_INSTRUCTION 
> +     * and so don't need to check again here. */
> +    if (is_pvh_vcpu(v))

Odd syntax.

> +        return 1;
> +
>      if ( !vm86_mode(regs) &&
>           (v->arch.pv_vcpu.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3)) )
>          return 1;
> @@ -1816,7 +1844,7 @@ static inline uint64_t guest_misc_enable(uint64_t val)
>          _ptr = (unsigned int)_ptr;                                          \
>      if ( (limit) < sizeof(_x) - 1 || (eip) > (limit) - (sizeof(_x) - 1) )   \
>          goto fail;                                                          \
> -    if ( (_rc = copy_from_user(&_x, (type *)_ptr, sizeof(_x))) != 0 )       \
> +    if ( (_rc = raw_copy_from_guest(&_x, (type *)_ptr, sizeof(_x))) != 0 )  \
>      {                                                                       \
>          propagate_page_fault(_ptr + sizeof(_x) - _rc, 0);                   \
>          goto skip;                                                          \
> @@ -3245,6 +3273,9 @@ void do_device_not_available(struct cpu_user_regs *regs)
>  
>      BUG_ON(!guest_mode(regs));
>  
> +    /* PVH should not get here. ctrlreg is not implemented */
> +    ASSERT( !is_pvh_vcpu(curr) );
> +
>      vcpu_restore_fpu_lazy(curr);
>  
>      if ( curr->arch.pv_vcpu.ctrlreg[0] & X86_CR0_TS )
> diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c
> index d2f7209..47ec2ff 100644
> --- a/xen/arch/x86/x86_64/traps.c
> +++ b/xen/arch/x86/x86_64/traps.c
> @@ -147,7 +147,7 @@ void vcpu_show_registers(const struct vcpu *v)
>      unsigned long crs[8];
>  
>      /* No need to handle HVM for now. */

.. and PVH ..
> -    if ( is_hvm_vcpu(v) )
> +    if ( is_hvm_or_pvh_vcpu(v) )

>          return;
>  
>      crs[0] = v->arch.pv_vcpu.ctrlreg[0];
> @@ -440,6 +440,7 @@ static long register_guest_callback(struct 
> callback_register *reg)
>      long ret = 0;
>      struct vcpu *v = current;
>  
> +    ASSERT( !is_pvh_vcpu(v) );
>      if ( !is_canonical_address(reg->address) )
>          return -EINVAL;
>  
> @@ -620,7 +621,7 @@ static void hypercall_page_initialise_ring3_kernel(void 
> *hypercall_page)
>  void hypercall_page_initialise(struct domain *d, void *hypercall_page)
>  {
>      memset(hypercall_page, 0xCC, PAGE_SIZE);
> -    if ( is_hvm_domain(d) )
> +    if ( is_hvm_or_pvh_domain(d) )
>          hvm_hypercall_page_initialise(d, hypercall_page);
>      else if ( !is_pv_32bit_domain(d) )
>          hypercall_page_initialise_ring3_kernel(hypercall_page);
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index b6f10b7..aac6699 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -232,6 +232,15 @@ struct domain *domain_create(
>  
>      if ( domcr_flags & DOMCRF_hvm )
>          d->guest_type = hvm_guest;
> +    else if ( domcr_flags & DOMCRF_pvh ) {
> +        d->guest_type = pvh_guest;
> +        if ( !(domcr_flags & DOMCRF_hap) ) {
> +            printk("PVH guest must have HAP on\n");

Ahem. XENLOG_ERR
> +            goto fail;
> +        } else
> +            printk("PVH guest. Please note it is experimental. domid:%d\n",
> +                   domid);

XENLOG_INFO or XENLOG_DEBUG

> +    }
>  
>      if ( domid == 0 )
>      {
> diff --git a/xen/common/domctl.c b/xen/common/domctl.c
> index c98e99c..ab615f1 100644
> --- a/xen/common/domctl.c
> +++ b/xen/common/domctl.c
> @@ -149,6 +149,8 @@ void getdomaininfo(struct domain *d, struct 
> xen_domctl_getdomaininfo *info)
>  
>      if ( is_hvm_domain(d) )
>          info->flags |= XEN_DOMINF_hvm_guest;
> +    else if ( is_pvh_domain(d) )
> +        info->flags |= XEN_DOMINF_pvh_guest;
>  
>      xsm_security_domaininfo(d, info);
>  
> @@ -400,6 +402,8 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) 
> u_domctl)
>          domcr_flags = 0;
>          if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_hvm_guest )
>              domcr_flags |= DOMCRF_hvm;
> +        else if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_hap )
> +            domcr_flags |= DOMCRF_pvh;     /* PV with HAP is a PVH guest */

<scratches his head>

So if the user sets: 'hap' in their guest config we automatically
set domcr_flags to DOMCRF_hvm | DOMCRF_pvh right?

Then in domain_create we do this check:

        if (domcr_flags & DOMCRF_pvh ) {
              d->guest_type = pvh_guest;

and we force an HVM guest with 'hap=1' in the guest config to be
a pvh_guest ?




>          if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_hap )
>              domcr_flags |= DOMCRF_hap;
>          if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_s3_integrity )
> diff --git a/xen/common/kernel.c b/xen/common/kernel.c
> index 72fb905..3bba758 100644
> --- a/xen/common/kernel.c
> +++ b/xen/common/kernel.c
> @@ -289,7 +289,11 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDLE_PARAM(void) 
> arg)
>              if ( current->domain == dom0 )
>                  fi.submap |= 1U << XENFEAT_dom0;
>  #ifdef CONFIG_X86
> -            if ( !is_hvm_vcpu(current) )
> +            if ( is_pvh_vcpu(current) )
> +                fi.submap |= (1U << XENFEAT_hvm_safe_pvclock) |
> +                             (1U << XENFEAT_supervisor_mode_kernel) |
> +                             (1U << XENFEAT_hvm_callback_vector);
> +            else if ( !is_hvm_vcpu(current) )
>                  fi.submap |= (1U << XENFEAT_mmu_pt_update_preserve_ad) |
>                               (1U << XENFEAT_highmem_assist) |
>                               (1U << XENFEAT_gnttab_map_avail_bits);
> -- 
> 1.7.2.3
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxx
> http://lists.xen.org/xen-devel
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.