[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [XEN][X86_64] USe GLOBAL bit to build user mappings.
# HG changeset patch # User kfraser@xxxxxxxxxxxxxxxxxxxxx # Node ID 6374af16a8a3c27d107fe9145f28bf08020fda28 # Parent 9061e1246906e8d1b7f6519c5252e6182f73214d [XEN][X86_64] USe GLOBAL bit to build user mappings. Avoids need to flush user mappings when switching between user and kernel contexts. Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx> Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx> --- xen/arch/x86/domain_build.c | 9 +-- xen/arch/x86/flushtlb.c | 101 ++++++++++++++++++++++++++------------ xen/arch/x86/mm.c | 26 +++++++++ xen/arch/x86/x86_64/traps.c | 6 ++ xen/include/asm-x86/flushtlb.h | 7 -- xen/include/asm-x86/x86_64/page.h | 15 +++++ 6 files changed, 122 insertions(+), 42 deletions(-) diff -r 9061e1246906 -r 6374af16a8a3 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Tue Sep 19 09:40:26 2006 +0100 +++ b/xen/arch/x86/domain_build.c Tue Sep 19 10:50:10 2006 +0100 @@ -74,10 +74,11 @@ string_param("dom0_ioports_disable", opt #define L3_PROT (_PAGE_PRESENT) #elif defined(__x86_64__) /* Allow ring-3 access in long mode as guest cannot use ring 1. */ -#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) -#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) -#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) -#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) +#define BASE_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER) +#define L1_PROT (BASE_PROT|_PAGE_GUEST_KERNEL) +#define L2_PROT (BASE_PROT|_PAGE_DIRTY) +#define L3_PROT (BASE_PROT|_PAGE_DIRTY) +#define L4_PROT (BASE_PROT|_PAGE_DIRTY) #endif #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) diff -r 9061e1246906 -r 6374af16a8a3 xen/arch/x86/flushtlb.c --- a/xen/arch/x86/flushtlb.c Tue Sep 19 09:40:26 2006 +0100 +++ b/xen/arch/x86/flushtlb.c Tue Sep 19 10:50:10 2006 +0100 @@ -4,13 +4,14 @@ * TLB flushes are timestamped using a global virtual 'clock' which ticks * on any TLB flush on any processor. * - * Copyright (c) 2003-2004, K A Fraser + * Copyright (c) 2003-2006, K A Fraser */ #include <xen/config.h> #include <xen/sched.h> #include <xen/softirq.h> #include <asm/flushtlb.h> +#include <asm/page.h> /* Debug builds: Wrap frequently to stress-test the wrap logic. */ #ifdef NDEBUG @@ -22,21 +23,17 @@ u32 tlbflush_clock = 1U; u32 tlbflush_clock = 1U; DEFINE_PER_CPU(u32, tlbflush_time); -void write_cr3(unsigned long cr3) +/* + * pre_flush(): Increment the virtual TLB-flush clock. Returns new clock value. + * + * This must happen *before* we flush the TLB. If we do it after, we race other + * CPUs invalidating PTEs. For example, a page invalidated after the flush + * might get the old timestamp, but this CPU can speculatively fetch the + * mapping into its TLB after the flush but before inc'ing the clock. + */ +static u32 pre_flush(void) { u32 t, t1, t2; - unsigned long flags; - - /* This non-reentrant function is sometimes called in interrupt context. */ - local_irq_save(flags); - - /* - * STEP 1. Increment the virtual clock *before* flushing the TLB. - * If we do it after, we race other CPUs invalidating PTEs. - * (e.g., a page invalidated after the flush might get the old - * timestamp, but this CPU can speculatively fetch the mapping - * into its TLB after the flush but before inc'ing the clock). - */ t = tlbflush_clock; do { @@ -52,26 +49,68 @@ void write_cr3(unsigned long cr3) if ( unlikely(t2 == 0) ) raise_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ); - /* - * STEP 2. Update %CR3, thereby flushing the TLB. - */ + skip_clocktick: + return t2; +} - skip_clocktick: +/* + * post_flush(): Update this CPU's timestamp with specified clock value. + * + * Note that this happens *after* flushing the TLB, as otherwise we can race a + * NEED_FLUSH() test on another CPU. (e.g., other CPU sees the updated CPU + * stamp and so does not force a synchronous TLB flush, but the flush in this + * function hasn't yet occurred and so the TLB might be stale). The ordering + * would only actually matter if this function were interruptible, and + * something that abuses the stale mapping could exist in an interrupt + * handler. In fact neither of these is the case, so really we are being ultra + * paranoid. + */ +static void post_flush(u32 t) +{ + this_cpu(tlbflush_time) = t; +} + +void write_cr3(unsigned long cr3) +{ + unsigned long flags; + u32 t; + + /* This non-reentrant function is sometimes called in interrupt context. */ + local_irq_save(flags); + + t = pre_flush(); + +#ifdef USER_MAPPINGS_ARE_GLOBAL + __pge_off(); __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (cr3) : "memory" ); + __pge_on(); +#else + __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (cr3) : "memory" ); +#endif - /* - * STEP 3. Update this CPU's timestamp. Note that this happens *after* - * flushing the TLB, as otherwise we can race a NEED_FLUSH() test - * on another CPU. (e.g., other CPU sees the updated CPU stamp and - * so does not force a synchronous TLB flush, but the flush in this - * function hasn't yet occurred and so the TLB might be stale). - * The ordering would only actually matter if this function were - * interruptible, and something that abuses the stale mapping could - * exist in an interrupt handler. In fact neither of these is the - * case, so really we are being ultra paranoid. - */ - - this_cpu(tlbflush_time) = t2; + post_flush(t); local_irq_restore(flags); } + +void local_flush_tlb(void) +{ + unsigned long flags; + u32 t; + + /* This non-reentrant function is sometimes called in interrupt context. */ + local_irq_save(flags); + + t = pre_flush(); + +#ifdef USER_MAPPINGS_ARE_GLOBAL + __pge_off(); + __pge_on(); +#else + __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" ); +#endif + + post_flush(t); + + local_irq_restore(flags); +} diff -r 9061e1246906 -r 6374af16a8a3 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Tue Sep 19 09:40:26 2006 +0100 +++ b/xen/arch/x86/mm.c Tue Sep 19 10:50:10 2006 +0100 @@ -694,11 +694,30 @@ get_page_from_l4e( #endif /* 4 level */ #ifdef __x86_64__ + +#ifdef USER_MAPPINGS_ARE_GLOBAL +#define adjust_guest_l1e(pl1e) \ + do { \ + if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) ) \ + { \ + /* _PAGE_GUEST_KERNEL page cannot have the Global bit set. */ \ + if ( (l1e_get_flags((pl1e)) & (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL)) \ + == (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL) ) \ + MEM_LOG("Global bit is set to kernel page %lx", \ + l1e_get_pfn((pl1e))); \ + if ( !(l1e_get_flags((pl1e)) & _PAGE_USER) ) \ + l1e_add_flags((pl1e), (_PAGE_GUEST_KERNEL|_PAGE_USER)); \ + if ( !(l1e_get_flags((pl1e)) & _PAGE_GUEST_KERNEL) ) \ + l1e_add_flags((pl1e), (_PAGE_GLOBAL|_PAGE_USER)); \ + } \ + } while ( 0 ) +#else #define adjust_guest_l1e(pl1e) \ - do { \ + do { \ if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) ) \ l1e_add_flags((pl1e), _PAGE_USER); \ } while ( 0 ) +#endif #define adjust_guest_l2e(pl2e) \ do { \ @@ -717,10 +736,13 @@ get_page_from_l4e( if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) ) \ l4e_add_flags((pl4e), _PAGE_USER); \ } while ( 0 ) -#else + +#else /* !defined(__x86_64__) */ + #define adjust_guest_l1e(_p) ((void)0) #define adjust_guest_l2e(_p) ((void)0) #define adjust_guest_l3e(_p) ((void)0) + #endif void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d) diff -r 9061e1246906 -r 6374af16a8a3 xen/arch/x86/x86_64/traps.c --- a/xen/arch/x86/x86_64/traps.c Tue Sep 19 09:40:26 2006 +0100 +++ b/xen/arch/x86/x86_64/traps.c Tue Sep 19 10:50:10 2006 +0100 @@ -15,6 +15,7 @@ #include <asm/current.h> #include <asm/flushtlb.h> #include <asm/msr.h> +#include <asm/page.h> #include <asm/shadow.h> #include <asm/hvm/hvm.h> #include <asm/hvm/support.h> @@ -188,7 +189,12 @@ void toggle_guest_mode(struct vcpu *v) v->arch.flags ^= TF_kernel_mode; __asm__ __volatile__ ( "swapgs" ); update_cr3(v); +#ifdef USER_MAPPINGS_ARE_GLOBAL + /* Don't flush user global mappings from the TLB. Don't tick TLB clock. */ + __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" ); +#else write_ptbase(v); +#endif } unsigned long do_iret(void) diff -r 9061e1246906 -r 6374af16a8a3 xen/include/asm-x86/flushtlb.h --- a/xen/include/asm-x86/flushtlb.h Tue Sep 19 09:40:26 2006 +0100 +++ b/xen/include/asm-x86/flushtlb.h Tue Sep 19 10:50:10 2006 +0100 @@ -71,11 +71,8 @@ static inline unsigned long read_cr3(voi /* Write pagetable base and implicitly tick the tlbflush clock. */ extern void write_cr3(unsigned long cr3); -#define local_flush_tlb() \ - do { \ - unsigned long cr3 = read_cr3(); \ - write_cr3(cr3); \ - } while ( 0 ) +/* Flush guest mappings from the TLB and implicitly tick the tlbflush clock. */ +extern void local_flush_tlb(void); #define local_flush_tlb_pge() \ do { \ diff -r 9061e1246906 -r 6374af16a8a3 xen/include/asm-x86/x86_64/page.h --- a/xen/include/asm-x86/x86_64/page.h Tue Sep 19 09:40:26 2006 +0100 +++ b/xen/include/asm-x86/x86_64/page.h Tue Sep 19 10:50:10 2006 +0100 @@ -93,6 +93,21 @@ typedef l4_pgentry_t root_pgentry_t; #define GRANT_PTE_FLAGS \ (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_GNTTAB|_PAGE_USER) +#define USER_MAPPINGS_ARE_GLOBAL +#ifdef USER_MAPPINGS_ARE_GLOBAL +/* + * Bit 12 of a 24-bit flag mask. This corresponds to bit 52 of a pte. + * This is needed to distinguish between user and kernel PTEs since _PAGE_USER + * is asserted for both. + */ +#define _PAGE_GUEST_KERNEL (1U<<12) +/* Global bit is allowed to be set on L1 PTEs. Intended for user mappings. */ +#undef L1_DISALLOW_MASK +#define L1_DISALLOW_MASK ((BASE_DISALLOW_MASK | _PAGE_GNTTAB) & ~_PAGE_GLOBAL) +#else +#define _PAGE_GUEST_KERNEL 0 +#endif + #endif /* __X86_64_PAGE_H__ */ /* _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |