[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH RFC 2/4] x86/mem_access: mem_access and mem_event changes to support PV domains



> mem_access changes
> ------------------
> Two new memory hypercalls, XENMEM_access_op_create_ring_page and
> XENMEM_access_op_get_ring_mfn have been added. The mem_access listener
> makes these calls during setup. The ring page is created from the
> xenheap and shared with the guest. It is freed when mem_access is
> disabled or when the domain is shutdown or destroyed.
> XENMEM_access_op_[sg]et_access hypercalls are modified to accomodate
> calls for a PV domain. When setting the access permissions for a mfn,
> all shadows for that mfn are dropped. They get recreated with the new
> permissions on the next page fault for that mfn. To get the permissions
> for a mfn, value is returned from the p2m_access lookup table.
> 
> mem_event changes
> -----------------
> The XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE/DISABLE hypercalls are
> modified to allow mem_access to work with PV domains. When the access
> listener goes to enable mem_access for a PV domain, shadow PG_mem_access
> mode is turned on and the p2m structures are initialized. When
> disabling, this mode is turned off but domain continues to run in shadow
> mode. The p2m structures are also freed on disabling.
> 
> Signed-off-by: Aravindh Puthiyaparambil <aravindp@xxxxxxxxx>
> Cc: Jan Beulich <jbeulich@xxxxxxxx>
> Cc: Keir Fraser <keir@xxxxxxx>
> Cc: Tim Deegan <tim@xxxxxxx>
> ---
> xen/arch/x86/domain.c            |  12 ++++
> xen/arch/x86/mm/mem_access.c     | 146 ++++++++++++++++++++++++++++++++++++---
> xen/arch/x86/mm/mem_event.c      |  77 +++++++++++++++++----
> xen/include/asm-x86/domain.h     |   3 +
> xen/include/asm-x86/mem_access.h |   4 ++
> xen/include/public/memory.h      |   2 +
> 6 files changed, 222 insertions(+), 22 deletions(-)
> 
> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> index 2a9c6fc..0ea03b4 100644
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -57,6 +57,7 @@
> #include <asm/nmi.h>
> #include <asm/mce.h>
> #include <asm/amd.h>
> +#include <asm/mem_access.h>
> #include <xen/numa.h>
> #include <xen/iommu.h>
> #include <compat/vcpu.h>
> @@ -593,8 +594,11 @@ int arch_domain_create(struct domain *d, unsigned int 
> domcr_flags)
>         }
>     }
>     else
> +    {
>         /* 64-bit PV guest by default. */
>         d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
> +        d->arch.pv_domain.access_ring_mfn = _mfn(INVALID_MFN);
> +    }
> 
>     /* initialize default tsc behavior in case tools don't */
>     tsc_set_info(d, TSC_MODE_DEFAULT, 0UL, 0, 0);
> @@ -632,8 +636,16 @@ void arch_domain_destroy(struct domain *d)
> 
>     free_perdomain_mappings(d);
>     if ( is_pv_domain(d) )
> +    {
>         free_xenheap_page(d->arch.pv_domain.gdt_ldt_l1tab);
> 
> +        /*
> +         * Free the PV mem_access ring xenheap page in the case where a
> +         * mem_access listener is present while the domain is being 
> destroyed.
> +         */
> +        mem_access_free_pv_ring(d);
> +    }
> +
>     free_xenheap_page(d->shared_info);
>     cleanup_domain_irq_mapping(d);
> }
> diff --git a/xen/arch/x86/mm/mem_access.c b/xen/arch/x86/mm/mem_access.c
> index 462c318..2340a46 100644
> --- a/xen/arch/x86/mm/mem_access.c
> +++ b/xen/arch/x86/mm/mem_access.c
> @@ -26,8 +26,36 @@
> #include <xen/hypercall.h>
> #include <asm/p2m.h>
> #include <asm/mem_event.h>
> +#include <asm/shadow.h>
> #include <xsm/xsm.h>
> 
> +/* Override macros from asm/page.h to make them work with mfn_t */
> +#undef mfn_valid
> +#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
> +#undef mfn_to_page
> +#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
> +
> +static inline bool_t domain_valid_for_mem_access(struct domain *d)
> +{
> +    if ( is_hvm_domain(d) )
> +    {
> +        /* Only HAP is supported */
> +        if ( !hap_enabled(d) )
> +            return 0;
> +
> +        /* Currently only EPT is supported */
> +        if ( !cpu_has_vmx )
> +            return 0;
> +    }
> +    /*
> +     * Only PV guests using shadow mem_access mode and running on CPUs with
> +     * the NX bit are supported
> +     */
> +    else if ( !shadow_mode_mem_access(d) || !cpu_has_nx )
> +        return 0;
> +
> +    return 1;
> +}
> 
> int mem_access_memop(unsigned long cmd,
>                      XEN_GUEST_HANDLE_PARAM(xen_mem_access_op_t) arg)
> @@ -43,16 +71,14 @@ int mem_access_memop(unsigned long cmd,
>     if ( rc )
>         return rc;
> 
> -    rc = -EINVAL;
> -    if ( !is_hvm_domain(d) )
> -        goto out;
> -
>     rc = xsm_mem_event_op(XSM_TARGET, d, XENMEM_access_op);
>     if ( rc )
>         goto out;
> 
>     rc = -ENODEV;
> -    if ( unlikely(!d->mem_event->access.ring_page) )
> +    if ( unlikely(!d->mem_event->access.ring_page &&
> +                  mao.op != XENMEM_access_op_create_ring_page &&
> +                  mao.op != XENMEM_access_op_get_ring_mfn) )
>         goto out;
> 
>     switch ( mao.op )
> @@ -65,12 +91,27 @@ int mem_access_memop(unsigned long cmd,
>     case XENMEM_access_op_set_access:
>     {
>         unsigned long start_iter = cmd & ~MEMOP_CMD_MASK;
> +        unsigned long pfn = mao.pfn;
> 
>         rc = -EINVAL;
> -        if ( (mao.pfn != ~0ull) &&
> +        if ( !domain_valid_for_mem_access(d) )
> +            break;
> +
> +        if ( unlikely(is_pv_domain(d)  && pfn != ~0ull) )
> +            pfn = get_gpfn_from_mfn(mao.pfn);
> +
> +        /*
> +         * max_pfn for PV domains is obtained from the shared_info structures
Another one of my potentially inaccurate attempts to help:

I don't know that you need to care about pfn here. This is a PV domain, so a 
page may be "hung" from it without requiring a specific translation to a slot 
in the physmap. There are precedents for this, e.g. the shared info page, the 
grant table pages.

In other words, you could make the men event ring page a xen page, share it 
with the guest so it becomes mappable (share_xen_page_with_guest), and then 
xc_map in dom0 libxc would be happily able to map it. With no need to worry 
ever about finding a pfn in the guest domain for this page.

Of course you'll need to keep track of this page and properly dispose of it 
regardless.

Andres
> +         * that the guest maintains. It is up to the guest to maintain this 
> and
> +         * is not filled in during early boot. So we do not check if we are
> +         * crossing max_pfn here and will depend on the checks in
> +         * p2m_mem_access_set_entry().
> +         */
> +        if ( (pfn != ~0ull) &&
>              (mao.nr < start_iter ||
> -              ((mao.pfn + mao.nr - 1) < mao.pfn) ||
> -              ((mao.pfn + mao.nr - 1) > domain_get_maximum_gpfn(d))) )
> +              ((pfn + mao.nr - 1) < pfn) ||
> +              ((pfn + mao.nr - 1) > domain_get_maximum_gpfn(d) &&
> +                !is_pv_domain(d))) )
>             break;
> 
>         rc = p2m_set_mem_access(d, mao.pfn, mao.nr, start_iter,
> @@ -89,7 +130,18 @@ int mem_access_memop(unsigned long cmd,
>         xenmem_access_t access;
> 
>         rc = -EINVAL;
> -        if ( (mao.pfn > domain_get_maximum_gpfn(d)) && mao.pfn != ~0ull )
> +        if ( !domain_valid_for_mem_access(d) )
> +            break;
> +
> +        /*
> +         * max_pfn for PV domains is obtained from the shared_info structures
> +         * that the guest maintains. It is up to the guest to maintain this 
> and
> +         * is not filled in during early boot. So we do not check if we are
> +         * crossing max_pfn here and will depend on the checks in
> +         * p2m_mem_access_get_entry().
> +         */
> +        if ( (mao.pfn > domain_get_maximum_gpfn(d) && !is_pv_domain(d)) &&
> +             mao.pfn != ~0ull )
>             break;
> 
>         rc = p2m_get_mem_access(d, mao.pfn, &access);
> @@ -102,6 +154,67 @@ int mem_access_memop(unsigned long cmd,
>         break;
>     }
> 
> +    case XENMEM_access_op_create_ring_page:
> +    {
> +        void *access_ring_va;
> +
> +        /*
> +         * mem_access listeners for HVM domains need not call
> +         * xc_mem_access_set_ring_pfn() as the special ring page would have 
> been
> +         * setup during domain creation.
> +         */
> +        rc = -ENOSYS;
> +        if ( is_hvm_domain(d) )
> +            break;
> +
> +        /*
> +         * The ring page was created by a mem_access listener but was not
> +         * freed. Do not allow another xenheap page to be allocated.
> +         */
> +        if ( mfn_valid(d->arch.pv_domain.access_ring_mfn) )
> +        {
> +            rc = -EPERM;
> +            break;
> +        }
> +
> +        access_ring_va = alloc_xenheap_page();
> +        if ( access_ring_va == NULL )
> +        {
> +            rc = -ENOMEM;
> +            break;
> +        }
> +
> +        clear_page(access_ring_va);
> +        share_xen_page_with_guest(virt_to_page(access_ring_va), d,
> +                                  XENSHARE_writable);
> +
> +        d->arch.pv_domain.access_ring_mfn = 
> _mfn(virt_to_mfn(access_ring_va));
> +
> +        rc = 0;
> +        break;
> +    }
> +
> +    case XENMEM_access_op_get_ring_mfn:
> +        /*
> +         * mem_access listeners for HVM domains should call 
> xc_get_hvm_param()
> +         * instead of xc_mem_access_get_ring_pfn().
> +         */
> +        rc = -ENOSYS;
> +        if ( is_hvm_domain(d) )
> +            break;
> +
> +        if ( !mfn_valid(d->arch.pv_domain.access_ring_mfn) )
> +        {
> +            rc = -ENODEV;
> +            break;
> +        }
> +
> +        mao.pfn = mfn_x(d->arch.pv_domain.access_ring_mfn);
> +        rc = __copy_field_to_guest(arg, &mao, pfn) ? -EFAULT : 0;
> +
> +        rc = 0;
> +        break;
> +
>     default:
>         rc = -ENOSYS;
>         break;
> @@ -123,6 +236,21 @@ int mem_access_send_req(struct domain *d, 
> mem_event_request_t *req)
>     return 0;
> } 
> 
> +/* Free the xenheap page used for the PV access ring */
> +void mem_access_free_pv_ring(struct domain *d)
> +{
> +    struct page_info *pg = mfn_to_page(d->arch.pv_domain.access_ring_mfn);
> +
> +    if ( !mfn_valid(d->arch.pv_domain.access_ring_mfn) )
> +        return;
> +
> +    BUG_ON(page_get_owner(pg) != d);
> +    if ( test_and_clear_bit(_PGC_allocated, &pg->count_info) )
> +        put_page(pg);
> +    free_xenheap_page(mfn_to_virt(mfn_x(d->arch.pv_domain.access_ring_mfn)));
> +    d->arch.pv_domain.access_ring_mfn = _mfn(INVALID_MFN);
> +}
> +
> /*
>  * Local variables:
>  * mode: C
> diff --git a/xen/arch/x86/mm/mem_event.c b/xen/arch/x86/mm/mem_event.c
> index 36b9dba..9b45504 100644
> --- a/xen/arch/x86/mm/mem_event.c
> +++ b/xen/arch/x86/mm/mem_event.c
> @@ -25,6 +25,7 @@
> #include <xen/event.h>
> #include <xen/wait.h>
> #include <asm/p2m.h>
> +#include <asm/shadow.h>
> #include <asm/mem_event.h>
> #include <asm/mem_paging.h>
> #include <asm/mem_access.h>
> @@ -49,7 +50,12 @@ static int mem_event_enable(
>     xen_event_channel_notification_t notification_fn)
> {
>     int rc;
> -    unsigned long ring_gfn = d->arch.hvm_domain.params[param];
> +    unsigned long ring_gfn;
> +
> +    if ( is_pv_domain(d) && param == HVM_PARAM_ACCESS_RING_PFN )
> +        ring_gfn = mfn_x(d->arch.pv_domain.access_ring_mfn);
> +    else
> +        ring_gfn = d->arch.hvm_domain.params[param];
> 
>     /* Only one helper at a time. If the helper crashed,
>      * the ring is in an undefined state and so is the guest.
> @@ -581,28 +587,73 @@ int mem_event_domctl(struct domain *d, 
> xen_domctl_mem_event_op_t *mec,
>         switch( mec->op )
>         {
>         case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE:
> -        {
>             rc = -ENODEV;
> -            /* Only HAP is supported */
> -            if ( !hap_enabled(d) )
> -                break;
> +            if ( !is_pv_domain(d) )
> +            {
> +                /* Only HAP is supported */
> +                if ( !hap_enabled(d) )
> +                    break;
> 
> -            /* Currently only EPT is supported */
> -            if ( !cpu_has_vmx )
> -                break;
> +                /* Currently only EPT is supported */
> +                if ( !cpu_has_vmx )
> +                    break;
> +            }
> +            /* PV guests use shadow mem_access mode */
> +            else
> +            {
> +                domain_pause(d);
> +                if ( !shadow_mode_mem_access(d) )
> +                {
> +                    rc = shadow_enable_mem_access(d);
> +                    if ( rc != 0 )
> +                        goto pv_out;
> +                }
> +
> +                rc = p2m_mem_access_init(p2m_get_hostp2m(d));
> +                if ( rc != 0 )
> +                {
> +                    shadow_disable_mem_access(d);
> +                    goto pv_out;
> +                }
> +            }
> 
>             rc = mem_event_enable(d, mec, med, _VPF_mem_access, 
>                                     HVM_PARAM_ACCESS_RING_PFN,
>                                     mem_access_notification);
> -        }
> -        break;
> +
> +            if ( !is_pv_domain(d) )
> +                break;
> +
> + pv_out:
> +            if ( rc != 0 )
> +            {
> +                shadow_disable_mem_access(d);
> +                p2m_mem_access_teardown(p2m_get_hostp2m(d));
> +                mem_access_free_pv_ring(d);
> +            }
> +            domain_unpause(d);
> +
> +            break;
> 
>         case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE:
> -        {
>             if ( med->ring_page )
>                 rc = mem_event_disable(d, med);
> -        }
> -        break;
> +
> +            /*
> +             * Even if mem_access was not enabled, some resources could have
> +             * been created in the PV case.
> +             */
> +            if ( is_pv_domain(d) )
> +            {
> +                domain_pause(d);
> +                if ( shadow_mode_mem_access(d) )
> +                    shadow_disable_mem_access(d);
> +
> +                p2m_mem_access_teardown(p2m_get_hostp2m(d));
> +                mem_access_free_pv_ring(d);
> +                domain_unpause(d);
> +            }
> +            break;
> 
>         default:
>             rc = -ENOSYS;
> diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
> index c5c266f..884b799 100644
> --- a/xen/include/asm-x86/domain.h
> +++ b/xen/include/asm-x86/domain.h
> @@ -227,6 +227,9 @@ struct pv_domain
> 
>     /* map_domain_page() mapping cache. */
>     struct mapcache_domain mapcache;
> +
> +    /* mfn of the mem_access ring page for PV domains */
> +    mfn_t access_ring_mfn;
> };
> 
> struct arch_domain
> diff --git a/xen/include/asm-x86/mem_access.h 
> b/xen/include/asm-x86/mem_access.h
> index 5c7c5fd..c7be52c 100644
> --- a/xen/include/asm-x86/mem_access.h
> +++ b/xen/include/asm-x86/mem_access.h
> @@ -27,6 +27,10 @@ int mem_access_memop(unsigned long cmd,
>                      XEN_GUEST_HANDLE_PARAM(xen_mem_access_op_t) arg);
> int mem_access_send_req(struct domain *d, mem_event_request_t *req);
> 
> +
> +/* Free the xenheap page used for the access ring */
> +void mem_access_free_pv_ring(struct domain *d);
> +
> #endif /* _XEN_ASM_MEM_ACCESS_H */
> 
> /*
> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> index 5bcd475..1070636 100644
> --- a/xen/include/public/memory.h
> +++ b/xen/include/public/memory.h
> @@ -380,6 +380,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_event_op_t);
> #define XENMEM_access_op_resume             0
> #define XENMEM_access_op_set_access         1
> #define XENMEM_access_op_get_access         2
> +#define XENMEM_access_op_create_ring_page   3
> +#define XENMEM_access_op_get_ring_mfn       4
> 
> typedef enum {
>     XENMEM_access_n,
> -- 
> 1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.