Xen project Mailing List

[Xen-devel] [PATCH for-4.5 v7 07/21] xen: Relocate p2m_mem_access_check into common and refactor it.

The p2m_mem_access_check was used only to automatically promote the special-case mem_access rights and to allocate the mem_access message. In this patch we abstract the violation check into a common mem_access_check function and change its return value to int. Return with rc < 0 means the fault wasn't triggered by mem_access permissions. Return with rc == 0 it was but rights were not automatically promoted. Return with rc > 0 means mem_access rights had been automatically promoted. As part of this patch, we relocate the x86/mm/mm-locks.h header into asm as to be able to create a generic inlined p2m_gpfn_lock/unlock wrapper that each arch can define based on its locking scheme. Signed-off-by: Tamas K Lengyel <tklengyel@xxxxxxxxxxxxx> --- xen/arch/x86/hvm/hvm.c | 59 ++------ xen/arch/x86/mm/mem_sharing.c | 2 +- xen/arch/x86/mm/mm-locks.h | 299 ----------------------------------------- xen/arch/x86/mm/p2m-ept.c | 9 +- xen/arch/x86/mm/p2m-pod.c | 2 +- xen/arch/x86/mm/p2m-pt.c | 2 +- xen/arch/x86/mm/p2m.c | 102 +------------- xen/arch/x86/mm/paging.c | 4 +- xen/common/mem_access.c | 143 +++++++++++++++++++- xen/include/asm-x86/mm-locks.h | 299 +++++++++++++++++++++++++++++++++++++++++ xen/include/asm-x86/p2m.h | 12 ++ xen/include/asm-x86/page.h | 4 - xen/include/xen/mem_access.h | 17 ++- xen/include/xen/mm.h | 4 + 14 files changed, 492 insertions(+), 466 deletions(-) delete mode 100644 xen/arch/x86/mm/mm-locks.h create mode 100644 xen/include/asm-x86/mm-locks.h diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 7649d36..aab397a 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -2677,7 +2677,6 @@ int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla, struct p2m_domain *p2m; int rc, fall_through = 0, paged = 0; int sharing_enomem = 0; - mem_event_request_t *req_ptr = NULL; /* On Nested Virtualization, walk the guest page table. * If this succeeds, all is fine. @@ -2745,50 +2744,21 @@ int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla, /* Check access permissions first, then handle faults */ if ( mfn_x(mfn) != INVALID_MFN ) { - bool_t violation; - - /* If the access is against the permissions, then send to mem_event */ - switch (p2ma) + rc = mem_access_check(gpa, gla, npfec); + if ( rc < 0 ) { - case p2m_access_n: - case p2m_access_n2rwx: - default: - violation = npfec.read_access || npfec.write_access || npfec.insn_fetch; - break; - case p2m_access_r: - violation = npfec.write_access || npfec.insn_fetch; - break; - case p2m_access_w: - violation = npfec.read_access || npfec.insn_fetch; - break; - case p2m_access_x: - violation = npfec.read_access || npfec.write_access; - break; - case p2m_access_rx: - case p2m_access_rx2rw: - violation = npfec.write_access; - break; - case p2m_access_wx: - violation = npfec.read_access; - break; - case p2m_access_rw: - violation = npfec.insn_fetch; - break; - case p2m_access_rwx: - violation = 0; - break; + /* Fault wasn't triggered by mem_access. */ } - - if ( violation ) + else if ( rc > 0 ) { - if ( p2m_mem_access_check(gpa, gla, npfec, &req_ptr) ) - { - fall_through = 1; - } else { - /* Rights not promoted, vcpu paused, work here is done */ - rc = 1; - goto out_put_gfn; - } + /* Fault was triggered by mem_access and settings got promoted. */ + fall_through = 1; + } + else + { + /* Rights not promoted, vcpu paused, work here is done */ + rc = 1; + goto out_put_gfn; } } @@ -2877,11 +2847,6 @@ out: rc = 0; } } - if ( req_ptr ) - { - mem_access_send_req(v->domain, req_ptr); - xfree(req_ptr); - } return rc; } diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c index 7c0fc7d..bcfcf6a 100644 --- a/xen/arch/x86/mm/mem_sharing.c +++ b/xen/arch/x86/mm/mem_sharing.c @@ -34,9 +34,9 @@ #include <asm/p2m.h> #include <asm/atomic.h> #include <asm/event.h> +#include <asm/mm-locks.h> #include <xsm/xsm.h> -#include "mm-locks.h" static shr_handle_t next_handle = 1; diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h deleted file mode 100644 index 769f7bc..0000000 --- a/xen/arch/x86/mm/mm-locks.h +++ /dev/null @@ -1,299 +0,0 @@ -/****************************************************************************** - * arch/x86/mm/mm-locks.h - * - * Spinlocks used by the code in arch/x86/mm. - * - * Copyright (c) 2011 Citrix Systems, inc. - * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) - * Copyright (c) 2006-2007 XenSource Inc. - * Copyright (c) 2006 Michael A Fetterman - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _MM_LOCKS_H -#define _MM_LOCKS_H - -#include <asm/mem_sharing.h> - -/* Per-CPU variable for enforcing the lock ordering */ -DECLARE_PER_CPU(int, mm_lock_level); -#define __get_lock_level() (this_cpu(mm_lock_level)) - -static inline void mm_lock_init(mm_lock_t *l) -{ - spin_lock_init(&l->lock); - l->locker = -1; - l->locker_function = "nobody"; - l->unlock_level = 0; -} - -static inline int mm_locked_by_me(mm_lock_t *l) -{ - return (l->lock.recurse_cpu == current->processor); -} - -/* If you see this crash, the numbers printed are lines in this file - * where the offending locks are declared. */ -#define __check_lock_level(l) \ -do { \ - if ( unlikely(__get_lock_level() > (l)) ) \ - { \ - printk("mm locking order violation: %i > %i\n", \ - __get_lock_level(), (l)); \ - BUG(); \ - } \ -} while(0) - -#define __set_lock_level(l) \ -do { \ - __get_lock_level() = (l); \ -} while(0) - -static inline void _mm_lock(mm_lock_t *l, const char *func, int level, int rec) -{ - if ( !((mm_locked_by_me(l)) && rec) ) - __check_lock_level(level); - spin_lock_recursive(&l->lock); - if ( l->lock.recurse_cnt == 1 ) - { - l->locker_function = func; - l->unlock_level = __get_lock_level(); - } - else if ( (unlikely(!rec)) ) - panic("mm lock already held by %s", l->locker_function); - __set_lock_level(level); -} - -static inline void _mm_enforce_order_lock_pre(int level) -{ - __check_lock_level(level); -} - -static inline void _mm_enforce_order_lock_post(int level, int *unlock_level, - unsigned short *recurse_count) -{ - if ( recurse_count ) - { - if ( (*recurse_count)++ == 0 ) - { - *unlock_level = __get_lock_level(); - } - } else { - *unlock_level = __get_lock_level(); - } - __set_lock_level(level); -} - - -static inline void mm_rwlock_init(mm_rwlock_t *l) -{ - rwlock_init(&l->lock); - l->locker = -1; - l->locker_function = "nobody"; - l->unlock_level = 0; -} - -static inline int mm_write_locked_by_me(mm_rwlock_t *l) -{ - return (l->locker == get_processor_id()); -} - -static inline void _mm_write_lock(mm_rwlock_t *l, const char *func, int level) -{ - if ( !mm_write_locked_by_me(l) ) - { - __check_lock_level(level); - write_lock(&l->lock); - l->locker = get_processor_id(); - l->locker_function = func; - l->unlock_level = __get_lock_level(); - __set_lock_level(level); - } - l->recurse_count++; -} - -static inline void mm_write_unlock(mm_rwlock_t *l) -{ - if ( --(l->recurse_count) != 0 ) - return; - l->locker = -1; - l->locker_function = "nobody"; - __set_lock_level(l->unlock_level); - write_unlock(&l->lock); -} - -static inline void _mm_read_lock(mm_rwlock_t *l, int level) -{ - __check_lock_level(level); - read_lock(&l->lock); - /* There's nowhere to store the per-CPU unlock level so we can't - * set the lock level. */ -} - -static inline void mm_read_unlock(mm_rwlock_t *l) -{ - read_unlock(&l->lock); -} - -/* This wrapper uses the line number to express the locking order below */ -#define declare_mm_lock(name) \ - static inline void mm_lock_##name(mm_lock_t *l, const char *func, int rec)\ - { _mm_lock(l, func, __LINE__, rec); } -#define declare_mm_rwlock(name) \ - static inline void mm_write_lock_##name(mm_rwlock_t *l, const char *func) \ - { _mm_write_lock(l, func, __LINE__); } \ - static inline void mm_read_lock_##name(mm_rwlock_t *l) \ - { _mm_read_lock(l, __LINE__); } -/* These capture the name of the calling function */ -#define mm_lock(name, l) mm_lock_##name(l, __func__, 0) -#define mm_lock_recursive(name, l) mm_lock_##name(l, __func__, 1) -#define mm_write_lock(name, l) mm_write_lock_##name(l, __func__) -#define mm_read_lock(name, l) mm_read_lock_##name(l) - -/* This wrapper is intended for "external" locks which do not use - * the mm_lock_t types. Such locks inside the mm code are also subject - * to ordering constraints. */ -#define declare_mm_order_constraint(name) \ - static inline void mm_enforce_order_lock_pre_##name(void) \ - { _mm_enforce_order_lock_pre(__LINE__); } \ - static inline void mm_enforce_order_lock_post_##name( \ - int *unlock_level, unsigned short *recurse_count) \ - { _mm_enforce_order_lock_post(__LINE__, unlock_level, recurse_count); } \ - -static inline void mm_unlock(mm_lock_t *l) -{ - if ( l->lock.recurse_cnt == 1 ) - { - l->locker_function = "nobody"; - __set_lock_level(l->unlock_level); - } - spin_unlock_recursive(&l->lock); -} - -static inline void mm_enforce_order_unlock(int unlock_level, - unsigned short *recurse_count) -{ - if ( recurse_count ) - { - BUG_ON(*recurse_count == 0); - if ( (*recurse_count)-- == 1 ) - { - __set_lock_level(unlock_level); - } - } else { - __set_lock_level(unlock_level); - } -} - -/************************************************************************ - * * - * To avoid deadlocks, these locks _MUST_ be taken in the order they're * - * declared in this file. The locking functions will enforce this. * - * * - ************************************************************************/ - -declare_mm_lock(nestedp2m) -#define nestedp2m_lock(d) mm_lock(nestedp2m, &(d)->arch.nested_p2m_lock) -#define nestedp2m_unlock(d) mm_unlock(&(d)->arch.nested_p2m_lock) - -/* P2M lock (per-p2m-table) - * - * This protects all queries and updates to the p2m table. - * Queries may be made under the read lock but all modifications - * need the main (write) lock. - * - * The write lock is recursive as it is common for a code path to look - * up a gfn and later mutate it. - */ - -declare_mm_rwlock(p2m); -#define p2m_lock(p) mm_write_lock(p2m, &(p)->lock); -#define p2m_unlock(p) mm_write_unlock(&(p)->lock); -#define gfn_lock(p,g,o) p2m_lock(p) -#define gfn_unlock(p,g,o) p2m_unlock(p) -#define p2m_read_lock(p) mm_read_lock(p2m, &(p)->lock) -#define p2m_read_unlock(p) mm_read_unlock(&(p)->lock) -#define p2m_locked_by_me(p) mm_write_locked_by_me(&(p)->lock) -#define gfn_locked_by_me(p,g) p2m_locked_by_me(p) - -/* Sharing per page lock - * - * This is an external lock, not represented by an mm_lock_t. The memory - * sharing lock uses it to protect addition and removal of (gfn,domain) - * tuples to a shared page. We enforce order here against the p2m lock, - * which is taken after the page_lock to change the gfn's p2m entry. - * - * The lock is recursive because during share we lock two pages. */ - -declare_mm_order_constraint(per_page_sharing) -#define page_sharing_mm_pre_lock() mm_enforce_order_lock_pre_per_page_sharing() -#define page_sharing_mm_post_lock(l, r) \ - mm_enforce_order_lock_post_per_page_sharing((l), (r)) -#define page_sharing_mm_unlock(l, r) mm_enforce_order_unlock((l), (r)) - -/* Nested P2M lock (per-domain) - * - * A per-domain lock that protects the mapping from nested-CR3 to - * nested-p2m. In particular it covers: - * - the array of nested-p2m tables, and all LRU activity therein; and - * - setting the "cr3" field of any p2m table to a non-P2M_BASE_EAADR value. - * (i.e. assigning a p2m table to be the shadow of that cr3 */ - -/* PoD lock (per-p2m-table) - * - * Protects private PoD data structs: entry and cache - * counts, page lists, sweep parameters. */ - -declare_mm_lock(pod) -#define pod_lock(p) mm_lock(pod, &(p)->pod.lock) -#define pod_unlock(p) mm_unlock(&(p)->pod.lock) -#define pod_locked_by_me(p) mm_locked_by_me(&(p)->pod.lock) - -/* Page alloc lock (per-domain) - * - * This is an external lock, not represented by an mm_lock_t. However, - * pod code uses it in conjunction with the p2m lock, and expecting - * the ordering which we enforce here. - * The lock is not recursive. */ - -declare_mm_order_constraint(page_alloc) -#define page_alloc_mm_pre_lock() mm_enforce_order_lock_pre_page_alloc() -#define page_alloc_mm_post_lock(l) mm_enforce_order_lock_post_page_alloc(&(l), NULL) -#define page_alloc_mm_unlock(l) mm_enforce_order_unlock((l), NULL) - -/* Paging lock (per-domain) - * - * For shadow pagetables, this lock protects - * - all changes to shadow page table pages - * - the shadow hash table - * - the shadow page allocator - * - all changes to guest page table pages - * - all changes to the page_info->tlbflush_timestamp - * - the page_info->count fields on shadow pages - * - * For HAP, it protects the NPT/EPT tables and mode changes. - * - * It also protects the log-dirty bitmap from concurrent accesses (and - * teardowns, etc). */ - -declare_mm_lock(paging) -#define paging_lock(d) mm_lock(paging, &(d)->arch.paging.lock) -#define paging_lock_recursive(d) \ - mm_lock_recursive(paging, &(d)->arch.paging.lock) -#define paging_unlock(d) mm_unlock(&(d)->arch.paging.lock) -#define paging_locked_by_me(d) mm_locked_by_me(&(d)->arch.paging.lock) - -#endif /* _MM_LOCKS_H */ diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c index 15c6e83..1d11956 100644 --- a/xen/arch/x86/mm/p2m-ept.c +++ b/xen/arch/x86/mm/p2m-ept.c @@ -19,6 +19,9 @@ #include <xen/config.h> #include <xen/domain_page.h> #include <xen/sched.h> +#include <xen/iommu.h> +#include <xen/keyhandler.h> +#include <xen/softirq.h> #include <asm/current.h> #include <asm/paging.h> #include <asm/types.h> @@ -26,13 +29,9 @@ #include <asm/p2m.h> #include <asm/hvm/vmx/vmx.h> #include <asm/hvm/vmx/vmcs.h> -#include <xen/iommu.h> #include <asm/mtrr.h> #include <asm/hvm/cacheattr.h> -#include <xen/keyhandler.h> -#include <xen/softirq.h> - -#include "mm-locks.h" +#include <asm/mm-locks.h> #define atomic_read_ept_entry(__pepte) \ ( (ept_entry_t) { .epte = read_atomic(&(__pepte)->epte) } ) diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c index 43f507c..1484aa3 100644 --- a/xen/arch/x86/mm/p2m-pod.c +++ b/xen/arch/x86/mm/p2m-pod.c @@ -32,8 +32,8 @@ #include <asm/mem_sharing.h> #include <asm/hvm/nestedhvm.h> #include <asm/hvm/svm/amd-iommu-proto.h> +#include <asm/mm-locks.h> -#include "mm-locks.h" /* Override macros from asm/page.h to make them work with mfn_t */ #undef mfn_to_page diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c index e48b63a..a6bbb46 100644 --- a/xen/arch/x86/mm/p2m-pt.c +++ b/xen/arch/x86/mm/p2m-pt.c @@ -37,8 +37,8 @@ #include <asm/mem_sharing.h> #include <asm/hvm/nestedhvm.h> #include <asm/hvm/svm/amd-iommu-proto.h> +#include <asm/mm-locks.h> -#include "mm-locks.h" /* Override macros from asm/page.h to make them work with mfn_t */ #undef mfn_to_page diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c index bf8e537..d192d6c 100644 --- a/xen/arch/x86/mm/p2m.c +++ b/xen/arch/x86/mm/p2m.c @@ -36,9 +36,9 @@ #include <asm/mem_sharing.h> #include <asm/hvm/nestedhvm.h> #include <asm/hvm/svm/amd-iommu-proto.h> +#include <asm/mm-locks.h> #include <xsm/xsm.h> -#include "mm-locks.h" /* turn on/off 1GB host page table support for hap, default on */ bool_t __read_mostly opt_hap_1gb = 1; @@ -1327,106 +1327,6 @@ void p2m_mem_paging_resume(struct domain *d) } } -bool_t p2m_mem_access_check(paddr_t gpa, unsigned long gla, - struct npfec npfec, - mem_event_request_t **req_ptr) -{ - struct vcpu *v = current; - unsigned long gfn = gpa >> PAGE_SHIFT; - struct domain *d = v->domain; - struct p2m_domain* p2m = p2m_get_hostp2m(d); - mfn_t mfn; - p2m_type_t p2mt; - p2m_access_t p2ma; - mem_event_request_t *req; - int rc; - - /* First, handle rx2rw conversion automatically. - * These calls to p2m->set_entry() must succeed: we have the gfn - * locked and just did a successful get_entry(). */ - gfn_lock(p2m, gfn, 0); - mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, 0, NULL); - - if ( npfec.write_access && p2ma == p2m_access_rx2rw ) - { - rc = p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2mt, p2m_access_rw); - ASSERT(rc == 0); - gfn_unlock(p2m, gfn, 0); - return 1; - } - else if ( p2ma == p2m_access_n2rwx ) - { - ASSERT(npfec.write_access || npfec.read_access || npfec.insn_fetch); - rc = p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, - p2mt, p2m_access_rwx); - ASSERT(rc == 0); - } - gfn_unlock(p2m, gfn, 0); - - /* Otherwise, check if there is a memory event listener, and send the message along */ - if ( !mem_event_check_ring(&d->mem_event->access) || !req_ptr ) - { - /* No listener */ - if ( p2m->access_required ) - { - gdprintk(XENLOG_INFO, "Memory access permissions failure, " - "no mem_event listener VCPU %d, dom %d\n", - v->vcpu_id, d->domain_id); - domain_crash(v->domain); - return 0; - } - else - { - gfn_lock(p2m, gfn, 0); - mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, 0, NULL); - if ( p2ma != p2m_access_n2rwx ) - { - /* A listener is not required, so clear the access - * restrictions. This set must succeed: we have the - * gfn locked and just did a successful get_entry(). */ - rc = p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, - p2mt, p2m_access_rwx); - ASSERT(rc == 0); - } - gfn_unlock(p2m, gfn, 0); - return 1; - } - } - - *req_ptr = NULL; - req = xzalloc(mem_event_request_t); - if ( req ) - { - *req_ptr = req; - req->reason = MEM_EVENT_REASON_VIOLATION; - - /* Pause the current VCPU */ - if ( p2ma != p2m_access_n2rwx ) - req->flags |= MEM_EVENT_FLAG_VCPU_PAUSED; - - /* Send request to mem event */ - req->gfn = gfn; - req->offset = gpa & ((1 << PAGE_SHIFT) - 1); - req->gla_valid = npfec.gla_valid; - req->gla = gla; - if ( npfec.kind == npfec_kind_with_gla ) - req->fault_with_gla = 1; - else if ( npfec.kind == npfec_kind_in_gpt ) - req->fault_in_gpt = 1; - req->access_r = npfec.read_access; - req->access_w = npfec.write_access; - req->access_x = npfec.insn_fetch; - req->vcpu_id = v->vcpu_id; - } - - /* Pause the current VCPU */ - if ( p2ma != p2m_access_n2rwx ) - mem_event_vcpu_pause(v); - - /* VCPU may be paused, return whether we promoted automatically */ - return (p2ma == p2m_access_n2rwx); -} - /* Set access type for a region of pfns. * If start_pfn == -1ul, sets the default access type */ long p2m_set_mem_access(struct domain *d, unsigned long pfn, uint32_t nr, diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c index 455000d..eb7751e 100644 --- a/xen/arch/x86/mm/paging.c +++ b/xen/arch/x86/mm/paging.c @@ -22,15 +22,15 @@ #include <xen/init.h> #include <xen/guest_access.h> +#include <xen/numa.h> #include <asm/paging.h> #include <asm/shadow.h> #include <asm/p2m.h> #include <asm/hap.h> #include <asm/hvm/nestedhvm.h> -#include <xen/numa.h> +#include <asm/mm-locks.h> #include <xsm/xsm.h> -#include "mm-locks.h" /* Printouts */ #define PAGING_PRINTK(_f, _a...) \ diff --git a/xen/common/mem_access.c b/xen/common/mem_access.c index 1674d7a..8fc2942 100644 --- a/xen/common/mem_access.c +++ b/xen/common/mem_access.c @@ -136,7 +136,7 @@ int mem_access_memop(unsigned long cmd, return rc; } -int mem_access_send_req(struct domain *d, mem_event_request_t *req) +static int mem_access_send_req(struct domain *d, mem_event_request_t *req) { int rc = mem_event_claim_slot(d, &d->mem_event->access); if ( rc < 0 ) @@ -147,6 +147,147 @@ int mem_access_send_req(struct domain *d, mem_event_request_t *req) return 0; } +int mem_access_check(paddr_t gpa, unsigned long gla, struct npfec npfec) +{ + bool_t violation; + mfn_t mfn; + p2m_access_t p2ma; + p2m_type_t p2mt; + mem_event_request_t *req; + int rc; + struct vcpu *v = current; + unsigned long gfn = gpa >> PAGE_SHIFT; + struct domain *d = v->domain; + struct p2m_domain* p2m = p2m_get_hostp2m(d); + + /* First, handle rx2rw conversion automatically. + * These calls to p2m->set_entry() must succeed: we have the gfn + * locked and just did a successful get_entry(). */ + p2m_gpfn_lock(p2m, gfn); + mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, 0, NULL); + + /* Check if the access is against the permissions. */ + switch ( p2ma ) + { + case p2m_access_rwx: + violation = 0; + break; + case p2m_access_rw: + violation = npfec.insn_fetch; + break; + case p2m_access_wx: + violation = npfec.read_access; + break; + case p2m_access_rx: + case p2m_access_rx2rw: + violation = npfec.write_access; + break; + case p2m_access_x: + violation = npfec.read_access || npfec.write_access; + break; + case p2m_access_w: + violation = npfec.read_access || npfec.insn_fetch; + break; + case p2m_access_r: + violation = npfec.write_access || npfec.insn_fetch; + break; + case p2m_access_n: + case p2m_access_n2rwx: + default: + violation = npfec.read_access || npfec.write_access || npfec.insn_fetch; + break; + } + + /* If no violation is found here, it needs to be reinjected. */ + if ( !violation ) + { + p2m_gpfn_unlock(p2m, gfn); + return -EFAULT; + } + + /* Check for automatic setting promotion. */ + if ( npfec.write_access && p2ma == p2m_access_rx2rw ) + { + rc = p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2mt, p2m_access_rw); + ASSERT(rc == 0); + p2m_gpfn_unlock(p2m, gfn); + return 1; + } + else if ( p2ma == p2m_access_n2rwx ) + { + ASSERT(npfec.write_access || npfec.read_access || npfec.insn_fetch); + rc = p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, + p2mt, p2m_access_rwx); + ASSERT(rc == 0); + } + p2m_gpfn_unlock(p2m, gfn); + + /* Otherwise, check if there is a memory event listener, and send the message along */ + if ( !mem_event_check_ring(&d->mem_event->access) ) + { + /* No listener */ + if ( p2m->access_required ) + { + gdprintk(XENLOG_INFO, "Memory access permissions failure, " + "no mem_event listener VCPU %d, dom %d\n", + v->vcpu_id, d->domain_id); + domain_crash(v->domain); + return -EFAULT; + } + else + { + p2m_gpfn_lock(p2m, gfn); + mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, 0, NULL); + if ( p2ma != p2m_access_n2rwx ) + + { + /* A listener is not required, so clear the access + * restrictions. This set must succeed: we have the + * gfn locked and just did a successful get_entry(). */ + rc = p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, + p2mt, p2m_access_rwx); + ASSERT(rc == 0); + } + p2m_gpfn_unlock(p2m, gfn); + return 1; + } + } + + req = xzalloc(mem_event_request_t); + if ( req ) + { + req->reason = MEM_EVENT_REASON_VIOLATION; + + /* Pause the current VCPU */ + if ( p2ma != p2m_access_n2rwx ) + req->flags |= MEM_EVENT_FLAG_VCPU_PAUSED; + + /* Send request to mem event */ + req->gfn = gfn; + req->offset = gpa & ((1 << PAGE_SHIFT) - 1); + req->gla_valid = npfec.gla_valid; + req->gla = gla; + if ( npfec.kind == npfec_kind_with_gla ) + req->fault_with_gla = 1; + else if ( npfec.kind == npfec_kind_in_gpt ) + req->fault_in_gpt = 1; + req->access_r = npfec.read_access; + req->access_w = npfec.write_access; + req->access_x = npfec.insn_fetch; + req->vcpu_id = v->vcpu_id; + + mem_access_send_req(v->domain, req); + xfree(req); + } + + /* Pause the current VCPU */ + if ( p2ma != p2m_access_n2rwx ) + mem_event_vcpu_pause(v); + + /* VCPU may be paused, return whether we promoted automatically */ + return (p2ma == p2m_access_n2rwx); +} + /* * Local variables: * mode: C diff --git a/xen/include/asm-x86/mm-locks.h b/xen/include/asm-x86/mm-locks.h new file mode 100644 index 0000000..7056a30 --- /dev/null +++ b/xen/include/asm-x86/mm-locks.h @@ -0,0 +1,299 @@ +/****************************************************************************** + * mm-locks.h + * + * Spinlocks used by the code in arch/x86/mm. + * + * Copyright (c) 2011 Citrix Systems, inc. + * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) + * Copyright (c) 2006-2007 XenSource Inc. + * Copyright (c) 2006 Michael A Fetterman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _MM_LOCKS_H +#define _MM_LOCKS_H + +#include <asm/mem_sharing.h> + +/* Per-CPU variable for enforcing the lock ordering */ +DECLARE_PER_CPU(int, mm_lock_level); +#define __get_lock_level() (this_cpu(mm_lock_level)) + +static inline void mm_lock_init(mm_lock_t *l) +{ + spin_lock_init(&l->lock); + l->locker = -1; + l->locker_function = "nobody"; + l->unlock_level = 0; +} + +static inline int mm_locked_by_me(mm_lock_t *l) +{ + return (l->lock.recurse_cpu == current->processor); +} + +/* If you see this crash, the numbers printed are lines in this file + * where the offending locks are declared. */ +#define __check_lock_level(l) \ +do { \ + if ( unlikely(__get_lock_level() > (l)) ) \ + { \ + printk("mm locking order violation: %i > %i\n", \ + __get_lock_level(), (l)); \ + BUG(); \ + } \ +} while(0) + +#define __set_lock_level(l) \ +do { \ + __get_lock_level() = (l); \ +} while(0) + +static inline void _mm_lock(mm_lock_t *l, const char *func, int level, int rec) +{ + if ( !((mm_locked_by_me(l)) && rec) ) + __check_lock_level(level); + spin_lock_recursive(&l->lock); + if ( l->lock.recurse_cnt == 1 ) + { + l->locker_function = func; + l->unlock_level = __get_lock_level(); + } + else if ( (unlikely(!rec)) ) + panic("mm lock already held by %s", l->locker_function); + __set_lock_level(level); +} + +static inline void _mm_enforce_order_lock_pre(int level) +{ + __check_lock_level(level); +} + +static inline void _mm_enforce_order_lock_post(int level, int *unlock_level, + unsigned short *recurse_count) +{ + if ( recurse_count ) + { + if ( (*recurse_count)++ == 0 ) + { + *unlock_level = __get_lock_level(); + } + } else { + *unlock_level = __get_lock_level(); + } + __set_lock_level(level); +} + + +static inline void mm_rwlock_init(mm_rwlock_t *l) +{ + rwlock_init(&l->lock); + l->locker = -1; + l->locker_function = "nobody"; + l->unlock_level = 0; +} + +static inline int mm_write_locked_by_me(mm_rwlock_t *l) +{ + return (l->locker == get_processor_id()); +} + +static inline void _mm_write_lock(mm_rwlock_t *l, const char *func, int level) +{ + if ( !mm_write_locked_by_me(l) ) + { + __check_lock_level(level); + write_lock(&l->lock); + l->locker = get_processor_id(); + l->locker_function = func; + l->unlock_level = __get_lock_level(); + __set_lock_level(level); + } + l->recurse_count++; +} + +static inline void mm_write_unlock(mm_rwlock_t *l) +{ + if ( --(l->recurse_count) != 0 ) + return; + l->locker = -1; + l->locker_function = "nobody"; + __set_lock_level(l->unlock_level); + write_unlock(&l->lock); +} + +static inline void _mm_read_lock(mm_rwlock_t *l, int level) +{ + __check_lock_level(level); + read_lock(&l->lock); + /* There's nowhere to store the per-CPU unlock level so we can't + * set the lock level. */ +} + +static inline void mm_read_unlock(mm_rwlock_t *l) +{ + read_unlock(&l->lock); +} + +/* This wrapper uses the line number to express the locking order below */ +#define declare_mm_lock(name) \ + static inline void mm_lock_##name(mm_lock_t *l, const char *func, int rec)\ + { _mm_lock(l, func, __LINE__, rec); } +#define declare_mm_rwlock(name) \ + static inline void mm_write_lock_##name(mm_rwlock_t *l, const char *func) \ + { _mm_write_lock(l, func, __LINE__); } \ + static inline void mm_read_lock_##name(mm_rwlock_t *l) \ + { _mm_read_lock(l, __LINE__); } +/* These capture the name of the calling function */ +#define mm_lock(name, l) mm_lock_##name(l, __func__, 0) +#define mm_lock_recursive(name, l) mm_lock_##name(l, __func__, 1) +#define mm_write_lock(name, l) mm_write_lock_##name(l, __func__) +#define mm_read_lock(name, l) mm_read_lock_##name(l) + +/* This wrapper is intended for "external" locks which do not use + * the mm_lock_t types. Such locks inside the mm code are also subject + * to ordering constraints. */ +#define declare_mm_order_constraint(name) \ + static inline void mm_enforce_order_lock_pre_##name(void) \ + { _mm_enforce_order_lock_pre(__LINE__); } \ + static inline void mm_enforce_order_lock_post_##name( \ + int *unlock_level, unsigned short *recurse_count) \ + { _mm_enforce_order_lock_post(__LINE__, unlock_level, recurse_count); } \ + +static inline void mm_unlock(mm_lock_t *l) +{ + if ( l->lock.recurse_cnt == 1 ) + { + l->locker_function = "nobody"; + __set_lock_level(l->unlock_level); + } + spin_unlock_recursive(&l->lock); +} + +static inline void mm_enforce_order_unlock(int unlock_level, + unsigned short *recurse_count) +{ + if ( recurse_count ) + { + BUG_ON(*recurse_count == 0); + if ( (*recurse_count)-- == 1 ) + { + __set_lock_level(unlock_level); + } + } else { + __set_lock_level(unlock_level); + } +} + +/************************************************************************ + * * + * To avoid deadlocks, these locks _MUST_ be taken in the order they're * + * declared in this file. The locking functions will enforce this. * + * * + ************************************************************************/ + +declare_mm_lock(nestedp2m) +#define nestedp2m_lock(d) mm_lock(nestedp2m, &(d)->arch.nested_p2m_lock) +#define nestedp2m_unlock(d) mm_unlock(&(d)->arch.nested_p2m_lock) + +/* P2M lock (per-p2m-table) + * + * This protects all queries and updates to the p2m table. + * Queries may be made under the read lock but all modifications + * need the main (write) lock. + * + * The write lock is recursive as it is common for a code path to look + * up a gfn and later mutate it. + */ + +declare_mm_rwlock(p2m); +#define p2m_lock(p) mm_write_lock(p2m, &(p)->lock); +#define p2m_unlock(p) mm_write_unlock(&(p)->lock); +#define gfn_lock(p,g,o) p2m_lock(p) +#define gfn_unlock(p,g,o) p2m_unlock(p) +#define p2m_read_lock(p) mm_read_lock(p2m, &(p)->lock) +#define p2m_read_unlock(p) mm_read_unlock(&(p)->lock) +#define p2m_locked_by_me(p) mm_write_locked_by_me(&(p)->lock) +#define gfn_locked_by_me(p,g) p2m_locked_by_me(p) + +/* Sharing per page lock + * + * This is an external lock, not represented by an mm_lock_t. The memory + * sharing lock uses it to protect addition and removal of (gfn,domain) + * tuples to a shared page. We enforce order here against the p2m lock, + * which is taken after the page_lock to change the gfn's p2m entry. + * + * The lock is recursive because during share we lock two pages. */ + +declare_mm_order_constraint(per_page_sharing) +#define page_sharing_mm_pre_lock() mm_enforce_order_lock_pre_per_page_sharing() +#define page_sharing_mm_post_lock(l, r) \ + mm_enforce_order_lock_post_per_page_sharing((l), (r)) +#define page_sharing_mm_unlock(l, r) mm_enforce_order_unlock((l), (r)) + +/* Nested P2M lock (per-domain) + * + * A per-domain lock that protects the mapping from nested-CR3 to + * nested-p2m. In particular it covers: + * - the array of nested-p2m tables, and all LRU activity therein; and + * - setting the "cr3" field of any p2m table to a non-P2M_BASE_EAADR value. + * (i.e. assigning a p2m table to be the shadow of that cr3 */ + +/* PoD lock (per-p2m-table) + * + * Protects private PoD data structs: entry and cache + * counts, page lists, sweep parameters. */ + +declare_mm_lock(pod) +#define pod_lock(p) mm_lock(pod, &(p)->pod.lock) +#define pod_unlock(p) mm_unlock(&(p)->pod.lock) +#define pod_locked_by_me(p) mm_locked_by_me(&(p)->pod.lock) + +/* Page alloc lock (per-domain) + * + * This is an external lock, not represented by an mm_lock_t. However, + * pod code uses it in conjunction with the p2m lock, and expecting + * the ordering which we enforce here. + * The lock is not recursive. */ + +declare_mm_order_constraint(page_alloc) +#define page_alloc_mm_pre_lock() mm_enforce_order_lock_pre_page_alloc() +#define page_alloc_mm_post_lock(l) mm_enforce_order_lock_post_page_alloc(&(l), NULL) +#define page_alloc_mm_unlock(l) mm_enforce_order_unlock((l), NULL) + +/* Paging lock (per-domain) + * + * For shadow pagetables, this lock protects + * - all changes to shadow page table pages + * - the shadow hash table + * - the shadow page allocator + * - all changes to guest page table pages + * - all changes to the page_info->tlbflush_timestamp + * - the page_info->count fields on shadow pages + * + * For HAP, it protects the NPT/EPT tables and mode changes. + * + * It also protects the log-dirty bitmap from concurrent accesses (and + * teardowns, etc). */ + +declare_mm_lock(paging) +#define paging_lock(d) mm_lock(paging, &(d)->arch.paging.lock) +#define paging_lock_recursive(d) \ + mm_lock_recursive(paging, &(d)->arch.paging.lock) +#define paging_unlock(d) mm_unlock(&(d)->arch.paging.lock) +#define paging_locked_by_me(d) mm_locked_by_me(&(d)->arch.paging.lock) + +#endif /* _MM_LOCKS_H */ diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h index ff1ec97..f4c185d 100644 --- a/xen/include/asm-x86/p2m.h +++ b/xen/include/asm-x86/p2m.h @@ -32,6 +32,7 @@ #include <xen/p2m-common.h> #include <asm/mem_sharing.h> #include <asm/page.h> /* for pagetable_t */ +#include <asm/mm-locks.h> /* for gfn_lock */ extern bool_t opt_hap_1gb, opt_hap_2mb; @@ -584,6 +585,17 @@ long p2m_set_mem_access(struct domain *d, unsigned long start_pfn, uint32_t nr, int p2m_get_mem_access(struct domain *d, unsigned long pfn, xenmem_access_t *access); +/* Used by mem_access_check in mem_access common. */ +static inline void p2m_gpfn_lock(struct p2m_domain *p2m, unsigned long gpfn) +{ + gfn_lock(p2m, gpfn, 0); +} + +static inline void p2m_gpfn_unlock(struct p2m_domain *p2m, unsigned long gpfn) +{ + gfn_unlock(p2m, gpfn, 0); +} + /* * Internal functions, only called by other p2m code */ diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h index ccc268d..66762c1 100644 --- a/xen/include/asm-x86/page.h +++ b/xen/include/asm-x86/page.h @@ -11,10 +11,6 @@ #define PAGE_MASK (~(PAGE_SIZE-1)) #define PAGE_FLAG_MASK (~0) -#define PAGE_ORDER_4K 0 -#define PAGE_ORDER_2M 9 -#define PAGE_ORDER_1G 18 - #ifndef __ASSEMBLY__ # include <asm/types.h> # include <xen/lib.h> diff --git a/xen/include/xen/mem_access.h b/xen/include/xen/mem_access.h index 6ceb2a4..7d921a6 100644 --- a/xen/include/xen/mem_access.h +++ b/xen/include/xen/mem_access.h @@ -29,11 +29,20 @@ int mem_access_memop(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(xen_mem_access_op_t) arg); -int mem_access_send_req(struct domain *d, mem_event_request_t *req); /* Resumes the running of the VCPU, restarting the last instruction */ void mem_access_resume(struct domain *d); +/* + * Return with rc < 0 means the fault wasn't triggered by mem_access. + * Return with rc == 0 means it was but rights were not automatically promoted. + * Return with rc > 0 means it was and rights were automatically promoted. + * + * If the fault was triggered by mem_access, this function automatically + * forwards the event to the listener. + */ +int mem_access_check(paddr_t gpa, unsigned long gla, struct npfec npfec); + #else static inline @@ -43,14 +52,14 @@ int mem_access_memop(unsigned long cmd, return -ENOSYS; } +static inline void mem_access_resume(struct domain *d) {} + static inline -int mem_access_send_req(struct domain *d, mem_event_request_t *req) +int mem_access_check(paddr_t gpa, unsigned long gla, struct npfec npfec) { return -ENOSYS; } -static inline void mem_access_resume(struct domain *d) {} - #endif /* HAS_MEM_ACCESS */ #endif /* _XEN_ASM_MEM_ACCESS_H */ diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h index 74a65a6..bffd8e6 100644 --- a/xen/include/xen/mm.h +++ b/xen/include/xen/mm.h @@ -131,6 +131,10 @@ struct npfec { #define MAX_ORDER 20 /* 2^20 contiguous pages */ #endif +#define PAGE_ORDER_4K 0 +#define PAGE_ORDER_2M 9 +#define PAGE_ORDER_1G 18 + #define page_list_entry list_head #include <asm/mm.h> -- 2.1.0 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.