[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1/2] x86/mm: remove arch-specific ptep_get_and_clear() function



From: David Vrabel <david.vrabel@xxxxxxxxxx>

x86 provides a ptep_get_and_clear() function instead of using the
generic implementation so it can atomically get and clear the PTE.

It is not clear why x86 has this requirement.  PTE updates are done
while the appropriate PTE lock are held, so presumably, it is an
attempt to ensure that the accessed and dirty bits of the PTE are
saved even if they have been updated by the hardware due to a
concurrent access on another CPU.

However, the atomic exchange is not sufficient for saving the dirty
bit as if there is a TLB entry for this page on another CPU then the
dirty bit may be written by that processor after the PTE is cleared
but before the TLB entry is flushed.  It is also not clear from the
Intel SDM[1] if the processor's read of the PTE and the write to set
the accessed bit are atomic or not.

With this patch the get/clear becomes a read of the PTE and call to
pte_clear() which allows the clears to be batched by some
paravirtualized guests (such as Xen guests).  This can lead to
significant performance improvements in munmap().  e.g., for Xen, a
trap-and-emulate[2] for every page becomes a hypercall for every 31
pages.

There should be no change in the performance on native or for KVM
guests.  There may be a performance regression with lguest guests as
an optimization for avoiding calling pte_update() when doing a full
teardown of an mm is removed.

As a consequence there is a slight increase of the window where a set
(by the processor) of the accessed or dirty bit may be lost.  This
shouldn't change the behaviour of user space in any meaningful way.
Any user space application accessing a mmap()'d region that is being
munmap()'d or mremap()'d is already going to have unpredicatable
behaviour -- the access may succeed, it may fault, or the write to the
mapped file may be lost.

[1] Intel Software Developer's Manual Vol 3A section 4.8 says nothing
on this.

[2] For 32-bit guests, two traps are required for every page to update
both halves of the PTE.

Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
---
 arch/x86/include/asm/pgtable-2level.h |    9 --------
 arch/x86/include/asm/pgtable-3level.h |   16 --------------
 arch/x86/include/asm/pgtable.h        |   37 ---------------------------------
 arch/x86/include/asm/pgtable_64.h     |   13 -----------
 4 files changed, 0 insertions(+), 75 deletions(-)

diff --git a/arch/x86/include/asm/pgtable-2level.h 
b/arch/x86/include/asm/pgtable-2level.h
index 98391db..be7c20e 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -38,15 +38,6 @@ static inline void native_pte_clear(struct mm_struct *mm,
 }
 
 #ifdef CONFIG_SMP
-static inline pte_t native_ptep_get_and_clear(pte_t *xp)
-{
-       return __pte(xchg(&xp->pte_low, 0));
-}
-#else
-#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
-#endif
-
-#ifdef CONFIG_SMP
 static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
 {
        return __pmd(xchg((pmdval_t *)xp, 0));
diff --git a/arch/x86/include/asm/pgtable-3level.h 
b/arch/x86/include/asm/pgtable-3level.h
index 43876f1..42101e9 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -134,22 +134,6 @@ static inline void pud_clear(pud_t *pudp)
 }
 
 #ifdef CONFIG_SMP
-static inline pte_t native_ptep_get_and_clear(pte_t *ptep)
-{
-       pte_t res;
-
-       /* xchg acts as a barrier before the setting of the high bits */
-       res.pte_low = xchg(&ptep->pte_low, 0);
-       res.pte_high = ptep->pte_high;
-       ptep->pte_high = 0;
-
-       return res;
-}
-#else
-#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
-#endif
-
-#ifdef CONFIG_SMP
 union split_pmd {
        struct {
                u32 pmd_low;
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 49afb3f..4413bed 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -598,16 +598,6 @@ static inline int pgd_none(pgd_t pgd)
 
 extern int direct_gbpages;
 
-/* local pte updates need not use xchg for locking */
-static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
-{
-       pte_t res = *ptep;
-
-       /* Pure native function needs no input for mm, addr */
-       native_pte_clear(NULL, 0, ptep);
-       return res;
-}
-
 static inline pmd_t native_local_pmdp_get_and_clear(pmd_t *pmdp)
 {
        pmd_t res = *pmdp;
@@ -668,33 +658,6 @@ extern int ptep_test_and_clear_young(struct vm_area_struct 
*vma,
 extern int ptep_clear_flush_young(struct vm_area_struct *vma,
                                  unsigned long address, pte_t *ptep);
 
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long 
addr,
-                                      pte_t *ptep)
-{
-       pte_t pte = native_ptep_get_and_clear(ptep);
-       pte_update(mm, addr, ptep);
-       return pte;
-}
-
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
-static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
-                                           unsigned long addr, pte_t *ptep,
-                                           int full)
-{
-       pte_t pte;
-       if (full) {
-               /*
-                * Full address destruction in progress; paravirt does not
-                * care about updates and native needs no locking
-                */
-               pte = native_local_ptep_get_and_clear(ptep);
-       } else {
-               pte = ptep_get_and_clear(mm, addr, ptep);
-       }
-       return pte;
-}
-
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm,
                                      unsigned long addr, pte_t *ptep)
diff --git a/arch/x86/include/asm/pgtable_64.h 
b/arch/x86/include/asm/pgtable_64.h
index 975f709..cc12d27 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -69,19 +69,6 @@ static inline void native_pmd_clear(pmd_t *pmd)
        native_set_pmd(pmd, native_make_pmd(0));
 }
 
-static inline pte_t native_ptep_get_and_clear(pte_t *xp)
-{
-#ifdef CONFIG_SMP
-       return native_make_pte(xchg(&xp->pte, 0));
-#else
-       /* native_local_ptep_get_and_clear,
-          but duplicated because of cyclic dependency */
-       pte_t ret = *xp;
-       native_pte_clear(NULL, 0, xp);
-       return ret;
-#endif
-}
-
 static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
 {
 #ifdef CONFIG_SMP
-- 
1.7.2.5


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.