|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH RFC 2/4] x86/mem_access: mem_access and mem_event changes to support PV domains
> mem_access changes
> ------------------
> Two new memory hypercalls, XENMEM_access_op_create_ring_page and
> XENMEM_access_op_get_ring_mfn have been added. The mem_access listener
> makes these calls during setup. The ring page is created from the
> xenheap and shared with the guest. It is freed when mem_access is
> disabled or when the domain is shutdown or destroyed.
> XENMEM_access_op_[sg]et_access hypercalls are modified to accomodate
> calls for a PV domain. When setting the access permissions for a mfn,
> all shadows for that mfn are dropped. They get recreated with the new
> permissions on the next page fault for that mfn. To get the permissions
> for a mfn, value is returned from the p2m_access lookup table.
>
> mem_event changes
> -----------------
> The XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE/DISABLE hypercalls are
> modified to allow mem_access to work with PV domains. When the access
> listener goes to enable mem_access for a PV domain, shadow PG_mem_access
> mode is turned on and the p2m structures are initialized. When
> disabling, this mode is turned off but domain continues to run in shadow
> mode. The p2m structures are also freed on disabling.
>
> Signed-off-by: Aravindh Puthiyaparambil <aravindp@xxxxxxxxx>
> Cc: Jan Beulich <jbeulich@xxxxxxxx>
> Cc: Keir Fraser <keir@xxxxxxx>
> Cc: Tim Deegan <tim@xxxxxxx>
> ---
> xen/arch/x86/domain.c | 12 ++++
> xen/arch/x86/mm/mem_access.c | 146 ++++++++++++++++++++++++++++++++++++---
> xen/arch/x86/mm/mem_event.c | 77 +++++++++++++++++----
> xen/include/asm-x86/domain.h | 3 +
> xen/include/asm-x86/mem_access.h | 4 ++
> xen/include/public/memory.h | 2 +
> 6 files changed, 222 insertions(+), 22 deletions(-)
>
> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> index 2a9c6fc..0ea03b4 100644
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -57,6 +57,7 @@
> #include <asm/nmi.h>
> #include <asm/mce.h>
> #include <asm/amd.h>
> +#include <asm/mem_access.h>
> #include <xen/numa.h>
> #include <xen/iommu.h>
> #include <compat/vcpu.h>
> @@ -593,8 +594,11 @@ int arch_domain_create(struct domain *d, unsigned int
> domcr_flags)
> }
> }
> else
> + {
> /* 64-bit PV guest by default. */
> d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
> + d->arch.pv_domain.access_ring_mfn = _mfn(INVALID_MFN);
> + }
>
> /* initialize default tsc behavior in case tools don't */
> tsc_set_info(d, TSC_MODE_DEFAULT, 0UL, 0, 0);
> @@ -632,8 +636,16 @@ void arch_domain_destroy(struct domain *d)
>
> free_perdomain_mappings(d);
> if ( is_pv_domain(d) )
> + {
> free_xenheap_page(d->arch.pv_domain.gdt_ldt_l1tab);
>
> + /*
> + * Free the PV mem_access ring xenheap page in the case where a
> + * mem_access listener is present while the domain is being
> destroyed.
> + */
> + mem_access_free_pv_ring(d);
> + }
> +
> free_xenheap_page(d->shared_info);
> cleanup_domain_irq_mapping(d);
> }
> diff --git a/xen/arch/x86/mm/mem_access.c b/xen/arch/x86/mm/mem_access.c
> index 462c318..2340a46 100644
> --- a/xen/arch/x86/mm/mem_access.c
> +++ b/xen/arch/x86/mm/mem_access.c
> @@ -26,8 +26,36 @@
> #include <xen/hypercall.h>
> #include <asm/p2m.h>
> #include <asm/mem_event.h>
> +#include <asm/shadow.h>
> #include <xsm/xsm.h>
>
> +/* Override macros from asm/page.h to make them work with mfn_t */
> +#undef mfn_valid
> +#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
> +#undef mfn_to_page
> +#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
> +
> +static inline bool_t domain_valid_for_mem_access(struct domain *d)
> +{
> + if ( is_hvm_domain(d) )
> + {
> + /* Only HAP is supported */
> + if ( !hap_enabled(d) )
> + return 0;
> +
> + /* Currently only EPT is supported */
> + if ( !cpu_has_vmx )
> + return 0;
> + }
> + /*
> + * Only PV guests using shadow mem_access mode and running on CPUs with
> + * the NX bit are supported
> + */
> + else if ( !shadow_mode_mem_access(d) || !cpu_has_nx )
> + return 0;
> +
> + return 1;
> +}
>
> int mem_access_memop(unsigned long cmd,
> XEN_GUEST_HANDLE_PARAM(xen_mem_access_op_t) arg)
> @@ -43,16 +71,14 @@ int mem_access_memop(unsigned long cmd,
> if ( rc )
> return rc;
>
> - rc = -EINVAL;
> - if ( !is_hvm_domain(d) )
> - goto out;
> -
> rc = xsm_mem_event_op(XSM_TARGET, d, XENMEM_access_op);
> if ( rc )
> goto out;
>
> rc = -ENODEV;
> - if ( unlikely(!d->mem_event->access.ring_page) )
> + if ( unlikely(!d->mem_event->access.ring_page &&
> + mao.op != XENMEM_access_op_create_ring_page &&
> + mao.op != XENMEM_access_op_get_ring_mfn) )
> goto out;
>
> switch ( mao.op )
> @@ -65,12 +91,27 @@ int mem_access_memop(unsigned long cmd,
> case XENMEM_access_op_set_access:
> {
> unsigned long start_iter = cmd & ~MEMOP_CMD_MASK;
> + unsigned long pfn = mao.pfn;
>
> rc = -EINVAL;
> - if ( (mao.pfn != ~0ull) &&
> + if ( !domain_valid_for_mem_access(d) )
> + break;
> +
> + if ( unlikely(is_pv_domain(d) && pfn != ~0ull) )
> + pfn = get_gpfn_from_mfn(mao.pfn);
> +
> + /*
> + * max_pfn for PV domains is obtained from the shared_info structures
Another one of my potentially inaccurate attempts to help:
I don't know that you need to care about pfn here. This is a PV domain, so a
page may be "hung" from it without requiring a specific translation to a slot
in the physmap. There are precedents for this, e.g. the shared info page, the
grant table pages.
In other words, you could make the men event ring page a xen page, share it
with the guest so it becomes mappable (share_xen_page_with_guest), and then
xc_map in dom0 libxc would be happily able to map it. With no need to worry
ever about finding a pfn in the guest domain for this page.
Of course you'll need to keep track of this page and properly dispose of it
regardless.
Andres
> + * that the guest maintains. It is up to the guest to maintain this
> and
> + * is not filled in during early boot. So we do not check if we are
> + * crossing max_pfn here and will depend on the checks in
> + * p2m_mem_access_set_entry().
> + */
> + if ( (pfn != ~0ull) &&
> (mao.nr < start_iter ||
> - ((mao.pfn + mao.nr - 1) < mao.pfn) ||
> - ((mao.pfn + mao.nr - 1) > domain_get_maximum_gpfn(d))) )
> + ((pfn + mao.nr - 1) < pfn) ||
> + ((pfn + mao.nr - 1) > domain_get_maximum_gpfn(d) &&
> + !is_pv_domain(d))) )
> break;
>
> rc = p2m_set_mem_access(d, mao.pfn, mao.nr, start_iter,
> @@ -89,7 +130,18 @@ int mem_access_memop(unsigned long cmd,
> xenmem_access_t access;
>
> rc = -EINVAL;
> - if ( (mao.pfn > domain_get_maximum_gpfn(d)) && mao.pfn != ~0ull )
> + if ( !domain_valid_for_mem_access(d) )
> + break;
> +
> + /*
> + * max_pfn for PV domains is obtained from the shared_info structures
> + * that the guest maintains. It is up to the guest to maintain this
> and
> + * is not filled in during early boot. So we do not check if we are
> + * crossing max_pfn here and will depend on the checks in
> + * p2m_mem_access_get_entry().
> + */
> + if ( (mao.pfn > domain_get_maximum_gpfn(d) && !is_pv_domain(d)) &&
> + mao.pfn != ~0ull )
> break;
>
> rc = p2m_get_mem_access(d, mao.pfn, &access);
> @@ -102,6 +154,67 @@ int mem_access_memop(unsigned long cmd,
> break;
> }
>
> + case XENMEM_access_op_create_ring_page:
> + {
> + void *access_ring_va;
> +
> + /*
> + * mem_access listeners for HVM domains need not call
> + * xc_mem_access_set_ring_pfn() as the special ring page would have
> been
> + * setup during domain creation.
> + */
> + rc = -ENOSYS;
> + if ( is_hvm_domain(d) )
> + break;
> +
> + /*
> + * The ring page was created by a mem_access listener but was not
> + * freed. Do not allow another xenheap page to be allocated.
> + */
> + if ( mfn_valid(d->arch.pv_domain.access_ring_mfn) )
> + {
> + rc = -EPERM;
> + break;
> + }
> +
> + access_ring_va = alloc_xenheap_page();
> + if ( access_ring_va == NULL )
> + {
> + rc = -ENOMEM;
> + break;
> + }
> +
> + clear_page(access_ring_va);
> + share_xen_page_with_guest(virt_to_page(access_ring_va), d,
> + XENSHARE_writable);
> +
> + d->arch.pv_domain.access_ring_mfn =
> _mfn(virt_to_mfn(access_ring_va));
> +
> + rc = 0;
> + break;
> + }
> +
> + case XENMEM_access_op_get_ring_mfn:
> + /*
> + * mem_access listeners for HVM domains should call
> xc_get_hvm_param()
> + * instead of xc_mem_access_get_ring_pfn().
> + */
> + rc = -ENOSYS;
> + if ( is_hvm_domain(d) )
> + break;
> +
> + if ( !mfn_valid(d->arch.pv_domain.access_ring_mfn) )
> + {
> + rc = -ENODEV;
> + break;
> + }
> +
> + mao.pfn = mfn_x(d->arch.pv_domain.access_ring_mfn);
> + rc = __copy_field_to_guest(arg, &mao, pfn) ? -EFAULT : 0;
> +
> + rc = 0;
> + break;
> +
> default:
> rc = -ENOSYS;
> break;
> @@ -123,6 +236,21 @@ int mem_access_send_req(struct domain *d,
> mem_event_request_t *req)
> return 0;
> }
>
> +/* Free the xenheap page used for the PV access ring */
> +void mem_access_free_pv_ring(struct domain *d)
> +{
> + struct page_info *pg = mfn_to_page(d->arch.pv_domain.access_ring_mfn);
> +
> + if ( !mfn_valid(d->arch.pv_domain.access_ring_mfn) )
> + return;
> +
> + BUG_ON(page_get_owner(pg) != d);
> + if ( test_and_clear_bit(_PGC_allocated, &pg->count_info) )
> + put_page(pg);
> + free_xenheap_page(mfn_to_virt(mfn_x(d->arch.pv_domain.access_ring_mfn)));
> + d->arch.pv_domain.access_ring_mfn = _mfn(INVALID_MFN);
> +}
> +
> /*
> * Local variables:
> * mode: C
> diff --git a/xen/arch/x86/mm/mem_event.c b/xen/arch/x86/mm/mem_event.c
> index 36b9dba..9b45504 100644
> --- a/xen/arch/x86/mm/mem_event.c
> +++ b/xen/arch/x86/mm/mem_event.c
> @@ -25,6 +25,7 @@
> #include <xen/event.h>
> #include <xen/wait.h>
> #include <asm/p2m.h>
> +#include <asm/shadow.h>
> #include <asm/mem_event.h>
> #include <asm/mem_paging.h>
> #include <asm/mem_access.h>
> @@ -49,7 +50,12 @@ static int mem_event_enable(
> xen_event_channel_notification_t notification_fn)
> {
> int rc;
> - unsigned long ring_gfn = d->arch.hvm_domain.params[param];
> + unsigned long ring_gfn;
> +
> + if ( is_pv_domain(d) && param == HVM_PARAM_ACCESS_RING_PFN )
> + ring_gfn = mfn_x(d->arch.pv_domain.access_ring_mfn);
> + else
> + ring_gfn = d->arch.hvm_domain.params[param];
>
> /* Only one helper at a time. If the helper crashed,
> * the ring is in an undefined state and so is the guest.
> @@ -581,28 +587,73 @@ int mem_event_domctl(struct domain *d,
> xen_domctl_mem_event_op_t *mec,
> switch( mec->op )
> {
> case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE:
> - {
> rc = -ENODEV;
> - /* Only HAP is supported */
> - if ( !hap_enabled(d) )
> - break;
> + if ( !is_pv_domain(d) )
> + {
> + /* Only HAP is supported */
> + if ( !hap_enabled(d) )
> + break;
>
> - /* Currently only EPT is supported */
> - if ( !cpu_has_vmx )
> - break;
> + /* Currently only EPT is supported */
> + if ( !cpu_has_vmx )
> + break;
> + }
> + /* PV guests use shadow mem_access mode */
> + else
> + {
> + domain_pause(d);
> + if ( !shadow_mode_mem_access(d) )
> + {
> + rc = shadow_enable_mem_access(d);
> + if ( rc != 0 )
> + goto pv_out;
> + }
> +
> + rc = p2m_mem_access_init(p2m_get_hostp2m(d));
> + if ( rc != 0 )
> + {
> + shadow_disable_mem_access(d);
> + goto pv_out;
> + }
> + }
>
> rc = mem_event_enable(d, mec, med, _VPF_mem_access,
> HVM_PARAM_ACCESS_RING_PFN,
> mem_access_notification);
> - }
> - break;
> +
> + if ( !is_pv_domain(d) )
> + break;
> +
> + pv_out:
> + if ( rc != 0 )
> + {
> + shadow_disable_mem_access(d);
> + p2m_mem_access_teardown(p2m_get_hostp2m(d));
> + mem_access_free_pv_ring(d);
> + }
> + domain_unpause(d);
> +
> + break;
>
> case XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE:
> - {
> if ( med->ring_page )
> rc = mem_event_disable(d, med);
> - }
> - break;
> +
> + /*
> + * Even if mem_access was not enabled, some resources could have
> + * been created in the PV case.
> + */
> + if ( is_pv_domain(d) )
> + {
> + domain_pause(d);
> + if ( shadow_mode_mem_access(d) )
> + shadow_disable_mem_access(d);
> +
> + p2m_mem_access_teardown(p2m_get_hostp2m(d));
> + mem_access_free_pv_ring(d);
> + domain_unpause(d);
> + }
> + break;
>
> default:
> rc = -ENOSYS;
> diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
> index c5c266f..884b799 100644
> --- a/xen/include/asm-x86/domain.h
> +++ b/xen/include/asm-x86/domain.h
> @@ -227,6 +227,9 @@ struct pv_domain
>
> /* map_domain_page() mapping cache. */
> struct mapcache_domain mapcache;
> +
> + /* mfn of the mem_access ring page for PV domains */
> + mfn_t access_ring_mfn;
> };
>
> struct arch_domain
> diff --git a/xen/include/asm-x86/mem_access.h
> b/xen/include/asm-x86/mem_access.h
> index 5c7c5fd..c7be52c 100644
> --- a/xen/include/asm-x86/mem_access.h
> +++ b/xen/include/asm-x86/mem_access.h
> @@ -27,6 +27,10 @@ int mem_access_memop(unsigned long cmd,
> XEN_GUEST_HANDLE_PARAM(xen_mem_access_op_t) arg);
> int mem_access_send_req(struct domain *d, mem_event_request_t *req);
>
> +
> +/* Free the xenheap page used for the access ring */
> +void mem_access_free_pv_ring(struct domain *d);
> +
> #endif /* _XEN_ASM_MEM_ACCESS_H */
>
> /*
> diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
> index 5bcd475..1070636 100644
> --- a/xen/include/public/memory.h
> +++ b/xen/include/public/memory.h
> @@ -380,6 +380,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_event_op_t);
> #define XENMEM_access_op_resume 0
> #define XENMEM_access_op_set_access 1
> #define XENMEM_access_op_get_access 2
> +#define XENMEM_access_op_create_ring_page 3
> +#define XENMEM_access_op_get_ring_mfn 4
>
> typedef enum {
> XENMEM_access_n,
> --
> 1.9.1
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |