[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [XEN][X86_64] USe GLOBAL bit to build user mappings.



# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Node ID 6374af16a8a3c27d107fe9145f28bf08020fda28
# Parent  9061e1246906e8d1b7f6519c5252e6182f73214d
[XEN][X86_64] USe GLOBAL bit to build user mappings.
Avoids need to flush user mappings when switching between
user and kernel contexts.
Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx>
Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
 xen/arch/x86/domain_build.c       |    9 +--
 xen/arch/x86/flushtlb.c           |  101 ++++++++++++++++++++++++++------------
 xen/arch/x86/mm.c                 |   26 +++++++++
 xen/arch/x86/x86_64/traps.c       |    6 ++
 xen/include/asm-x86/flushtlb.h    |    7 --
 xen/include/asm-x86/x86_64/page.h |   15 +++++
 6 files changed, 122 insertions(+), 42 deletions(-)

diff -r 9061e1246906 -r 6374af16a8a3 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c       Tue Sep 19 09:40:26 2006 +0100
+++ b/xen/arch/x86/domain_build.c       Tue Sep 19 10:50:10 2006 +0100
@@ -74,10 +74,11 @@ string_param("dom0_ioports_disable", opt
 #define L3_PROT (_PAGE_PRESENT)
 #elif defined(__x86_64__)
 /* Allow ring-3 access in long mode as guest cannot use ring 1. */
-#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
-#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
-#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
-#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define BASE_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
+#define L1_PROT (BASE_PROT|_PAGE_GUEST_KERNEL)
+#define L2_PROT (BASE_PROT|_PAGE_DIRTY)
+#define L3_PROT (BASE_PROT|_PAGE_DIRTY)
+#define L4_PROT (BASE_PROT|_PAGE_DIRTY)
 #endif
 
 #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
diff -r 9061e1246906 -r 6374af16a8a3 xen/arch/x86/flushtlb.c
--- a/xen/arch/x86/flushtlb.c   Tue Sep 19 09:40:26 2006 +0100
+++ b/xen/arch/x86/flushtlb.c   Tue Sep 19 10:50:10 2006 +0100
@@ -4,13 +4,14 @@
  * TLB flushes are timestamped using a global virtual 'clock' which ticks
  * on any TLB flush on any processor.
  * 
- * Copyright (c) 2003-2004, K A Fraser
+ * Copyright (c) 2003-2006, K A Fraser
  */
 
 #include <xen/config.h>
 #include <xen/sched.h>
 #include <xen/softirq.h>
 #include <asm/flushtlb.h>
+#include <asm/page.h>
 
 /* Debug builds: Wrap frequently to stress-test the wrap logic. */
 #ifdef NDEBUG
@@ -22,21 +23,17 @@ u32 tlbflush_clock = 1U;
 u32 tlbflush_clock = 1U;
 DEFINE_PER_CPU(u32, tlbflush_time);
 
-void write_cr3(unsigned long cr3)
+/*
+ * pre_flush(): Increment the virtual TLB-flush clock. Returns new clock value.
+ * 
+ * This must happen *before* we flush the TLB. If we do it after, we race other
+ * CPUs invalidating PTEs. For example, a page invalidated after the flush
+ * might get the old timestamp, but this CPU can speculatively fetch the
+ * mapping into its TLB after the flush but before inc'ing the clock.
+ */
+static u32 pre_flush(void)
 {
     u32 t, t1, t2;
-    unsigned long flags;
-
-    /* This non-reentrant function is sometimes called in interrupt context. */
-    local_irq_save(flags);
-
-    /*
-     * STEP 1. Increment the virtual clock *before* flushing the TLB.
-     *         If we do it after, we race other CPUs invalidating PTEs.
-     *         (e.g., a page invalidated after the flush might get the old 
-     *          timestamp, but this CPU can speculatively fetch the mapping
-     *          into its TLB after the flush but before inc'ing the clock).
-     */
 
     t = tlbflush_clock;
     do {
@@ -52,26 +49,68 @@ void write_cr3(unsigned long cr3)
     if ( unlikely(t2 == 0) )
         raise_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ);
 
-    /*
-     * STEP 2. Update %CR3, thereby flushing the TLB.
-     */
+ skip_clocktick:
+    return t2;
+}
 
- skip_clocktick:
+/*
+ * post_flush(): Update this CPU's timestamp with specified clock value.
+ * 
+ * Note that this happens *after* flushing the TLB, as otherwise we can race a 
+ * NEED_FLUSH() test on another CPU. (e.g., other CPU sees the updated CPU 
+ * stamp and so does not force a synchronous TLB flush, but the flush in this
+ * function hasn't yet occurred and so the TLB might be stale). The ordering 
+ * would only actually matter if this function were interruptible, and 
+ * something that abuses the stale mapping could exist in an interrupt 
+ * handler. In fact neither of these is the case, so really we are being ultra 
+ * paranoid.
+ */
+static void post_flush(u32 t)
+{
+    this_cpu(tlbflush_time) = t;
+}
+
+void write_cr3(unsigned long cr3)
+{
+    unsigned long flags;
+    u32 t;
+
+    /* This non-reentrant function is sometimes called in interrupt context. */
+    local_irq_save(flags);
+
+    t = pre_flush();
+
+#ifdef USER_MAPPINGS_ARE_GLOBAL
+    __pge_off();
     __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
+    __pge_on();
+#else
+    __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
+#endif
 
-    /*
-     * STEP 3. Update this CPU's timestamp. Note that this happens *after*
-     *         flushing the TLB, as otherwise we can race a NEED_FLUSH() test
-     *         on another CPU. (e.g., other CPU sees the updated CPU stamp and
-     *         so does not force a synchronous TLB flush, but the flush in this
-     *         function hasn't yet occurred and so the TLB might be stale).
-     *         The ordering would only actually matter if this function were
-     *         interruptible, and something that abuses the stale mapping could
-     *         exist in an interrupt handler. In fact neither of these is the
-     *         case, so really we are being ultra paranoid.
-     */
-
-    this_cpu(tlbflush_time) = t2;
+    post_flush(t);
 
     local_irq_restore(flags);
 }
+
+void local_flush_tlb(void)
+{
+    unsigned long flags;
+    u32 t;
+
+    /* This non-reentrant function is sometimes called in interrupt context. */
+    local_irq_save(flags);
+
+    t = pre_flush();
+
+#ifdef USER_MAPPINGS_ARE_GLOBAL
+    __pge_off();
+    __pge_on();
+#else
+    __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" );
+#endif
+
+    post_flush(t);
+
+    local_irq_restore(flags);
+}
diff -r 9061e1246906 -r 6374af16a8a3 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue Sep 19 09:40:26 2006 +0100
+++ b/xen/arch/x86/mm.c Tue Sep 19 10:50:10 2006 +0100
@@ -694,11 +694,30 @@ get_page_from_l4e(
 #endif /* 4 level */
 
 #ifdef __x86_64__
+
+#ifdef USER_MAPPINGS_ARE_GLOBAL
+#define adjust_guest_l1e(pl1e)                                               \
+    do {                                                                     \
+        if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) )                 \
+        {                                                                    \
+            /* _PAGE_GUEST_KERNEL page cannot have the Global bit set. */    \
+            if ( (l1e_get_flags((pl1e)) & (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL)) \
+                 == (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL) )                      \
+                MEM_LOG("Global bit is set to kernel page %lx",              \
+                        l1e_get_pfn((pl1e)));                                \
+            if ( !(l1e_get_flags((pl1e)) & _PAGE_USER) )                     \
+                l1e_add_flags((pl1e), (_PAGE_GUEST_KERNEL|_PAGE_USER));      \
+            if ( !(l1e_get_flags((pl1e)) & _PAGE_GUEST_KERNEL) )             \
+                l1e_add_flags((pl1e), (_PAGE_GLOBAL|_PAGE_USER));            \
+        }                                                                    \
+    } while ( 0 )
+#else
 #define adjust_guest_l1e(pl1e)                                  \
-    do  {                                                       \
+    do {                                                        \
         if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) )    \
             l1e_add_flags((pl1e), _PAGE_USER);                  \
     } while ( 0 )
+#endif
 
 #define adjust_guest_l2e(pl2e)                                  \
     do {                                                        \
@@ -717,10 +736,13 @@ get_page_from_l4e(
         if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) )    \
             l4e_add_flags((pl4e), _PAGE_USER);                  \
     } while ( 0 )
-#else
+
+#else /* !defined(__x86_64__) */
+
 #define adjust_guest_l1e(_p) ((void)0)
 #define adjust_guest_l2e(_p) ((void)0)
 #define adjust_guest_l3e(_p) ((void)0)
+
 #endif
 
 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
diff -r 9061e1246906 -r 6374af16a8a3 xen/arch/x86/x86_64/traps.c
--- a/xen/arch/x86/x86_64/traps.c       Tue Sep 19 09:40:26 2006 +0100
+++ b/xen/arch/x86/x86_64/traps.c       Tue Sep 19 10:50:10 2006 +0100
@@ -15,6 +15,7 @@
 #include <asm/current.h>
 #include <asm/flushtlb.h>
 #include <asm/msr.h>
+#include <asm/page.h>
 #include <asm/shadow.h>
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/support.h>
@@ -188,7 +189,12 @@ void toggle_guest_mode(struct vcpu *v)
     v->arch.flags ^= TF_kernel_mode;
     __asm__ __volatile__ ( "swapgs" );
     update_cr3(v);
+#ifdef USER_MAPPINGS_ARE_GLOBAL
+    /* Don't flush user global mappings from the TLB. Don't tick TLB clock. */
+    __asm__ __volatile__ ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" );
+#else
     write_ptbase(v);
+#endif
 }
 
 unsigned long do_iret(void)
diff -r 9061e1246906 -r 6374af16a8a3 xen/include/asm-x86/flushtlb.h
--- a/xen/include/asm-x86/flushtlb.h    Tue Sep 19 09:40:26 2006 +0100
+++ b/xen/include/asm-x86/flushtlb.h    Tue Sep 19 10:50:10 2006 +0100
@@ -71,11 +71,8 @@ static inline unsigned long read_cr3(voi
 /* Write pagetable base and implicitly tick the tlbflush clock. */
 extern void write_cr3(unsigned long cr3);
 
-#define local_flush_tlb()                                         \
-    do {                                                          \
-        unsigned long cr3 = read_cr3();                           \
-        write_cr3(cr3);                                           \
-    } while ( 0 )
+/* Flush guest mappings from the TLB and implicitly tick the tlbflush clock. */
+extern void local_flush_tlb(void);
 
 #define local_flush_tlb_pge()                                     \
     do {                                                          \
diff -r 9061e1246906 -r 6374af16a8a3 xen/include/asm-x86/x86_64/page.h
--- a/xen/include/asm-x86/x86_64/page.h Tue Sep 19 09:40:26 2006 +0100
+++ b/xen/include/asm-x86/x86_64/page.h Tue Sep 19 10:50:10 2006 +0100
@@ -93,6 +93,21 @@ typedef l4_pgentry_t root_pgentry_t;
 #define GRANT_PTE_FLAGS \
     (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_GNTTAB|_PAGE_USER)
 
+#define USER_MAPPINGS_ARE_GLOBAL
+#ifdef USER_MAPPINGS_ARE_GLOBAL
+/*
+ * Bit 12 of a 24-bit flag mask. This corresponds to bit 52 of a pte.
+ * This is needed to distinguish between user and kernel PTEs since _PAGE_USER
+ * is asserted for both.
+ */
+#define _PAGE_GUEST_KERNEL (1U<<12)
+/* Global bit is allowed to be set on L1 PTEs. Intended for user mappings. */
+#undef L1_DISALLOW_MASK
+#define L1_DISALLOW_MASK ((BASE_DISALLOW_MASK | _PAGE_GNTTAB) & ~_PAGE_GLOBAL)
+#else
+#define _PAGE_GUEST_KERNEL 0
+#endif
+
 #endif /* __X86_64_PAGE_H__ */
 
 /*

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.