[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] minios: support COW for a zero page



minios: support COW for a zero page
Permits to support sparse data.

Signed-off-by: Samuel Thibault <samuel.thibault@xxxxxxxxxxxxx>

diff -r 27ad7ed41be2 extras/mini-os/arch/x86/mm.c
--- a/extras/mini-os/arch/x86/mm.c      Fri Jan 18 15:55:13 2008 +0000
+++ b/extras/mini-os/arch/x86/mm.c      Fri Jan 18 16:02:32 2008 +0000
@@ -50,6 +50,7 @@
 #endif
 
 unsigned long *phys_to_machine_mapping;
+unsigned long mfn_zero;
 extern char stack[];
 extern void page_walk(unsigned long virt_addr);
 
@@ -492,10 +493,13 @@ static void clear_bootstrap(void)
 static void clear_bootstrap(void)
 {
     struct xen_memory_reservation reservation;
-    xen_pfn_t mfns[] = { virt_to_mfn(0), virt_to_mfn(&shared_info) };
+    xen_pfn_t mfns[] = { virt_to_mfn(&shared_info) };
     int n = sizeof(mfns)/sizeof(*mfns);
     pte_t nullpte = { };
 
+    /* Use page 0 as the CoW zero page */
+    memset(NULL, 0, PAGE_SIZE);
+    mfn_zero = pfn_to_mfn(0);
     if (HYPERVISOR_update_va_mapping(0, nullpte, UVMF_INVLPG))
        printk("Unable to unmap page 0\n");
 
diff -r 27ad7ed41be2 extras/mini-os/arch/x86/traps.c
--- a/extras/mini-os/arch/x86/traps.c   Fri Jan 18 15:55:13 2008 +0000
+++ b/extras/mini-os/arch/x86/traps.c   Fri Jan 18 16:02:32 2008 +0000
@@ -118,6 +118,46 @@ void page_walk(unsigned long virt_addres
 
 }
 
+static int handle_cow(unsigned long addr) {
+        pgentry_t *tab = (pgentry_t *)start_info.pt_base, page;
+       unsigned long new_page;
+       int rc;
+
+#if defined(__x86_64__)
+        page = tab[l4_table_offset(addr)];
+       if (!(page & _PAGE_PRESENT))
+           return 0;
+        tab = pte_to_virt(page);
+#endif
+#if defined(__x86_64__) || defined(CONFIG_X86_PAE)
+        page = tab[l3_table_offset(addr)];
+       if (!(page & _PAGE_PRESENT))
+           return 0;
+        tab = pte_to_virt(page);
+#endif
+        page = tab[l2_table_offset(addr)];
+       if (!(page & _PAGE_PRESENT))
+           return 0;
+        tab = pte_to_virt(page);
+        
+        page = tab[l1_table_offset(addr)];
+       if (!(page & _PAGE_PRESENT))
+           return 0;
+       /* Only support CoW for the zero page.  */
+       if (PHYS_PFN(page) != mfn_zero)
+           return 0;
+
+       new_page = alloc_pages(0);
+       memset((void*) new_page, 0, PAGE_SIZE);
+
+       rc = HYPERVISOR_update_va_mapping(addr & PAGE_MASK, 
__pte(virt_to_mach(new_page) | L1_PROT), UVMF_INVLPG);
+       if (!rc)
+               return 1;
+
+       printk("Map zero page to %lx failed: %d.\n", addr, rc);
+       return 0;
+}
+
 #define read_cr2() \
         (HYPERVISOR_shared_info->vcpu_info[smp_processor_id()].arch.cr2)
 
@@ -126,6 +166,10 @@ void do_page_fault(struct pt_regs *regs,
 void do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
     unsigned long addr = read_cr2();
+
+    if ((error_code & TRAP_PF_WRITE) && handle_cow(addr))
+       return;
+
     /* If we are already handling a page fault, and got another one
        that means we faulted in pagetable walk. Continuing here would cause
        a recursive fault */       
diff -r 27ad7ed41be2 extras/mini-os/include/ia64/arch_mm.h
--- a/extras/mini-os/include/ia64/arch_mm.h     Fri Jan 18 15:55:13 2008 +0000
+++ b/extras/mini-os/include/ia64/arch_mm.h     Fri Jan 18 16:02:32 2008 +0000
@@ -37,5 +37,7 @@
 #define STACK_SIZE              (PAGE_SIZE * (1 << STACK_SIZE_PAGE_ORDER))
 
 #define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, 0, 0)
+/* TODO */
+#define map_zero(n, a) map_frames_ex(NULL, n, 0, 0, a, DOMID_SELF, 0, 0)
 
 #endif /* __ARCH_MM_H__ */
diff -r 27ad7ed41be2 extras/mini-os/include/types.h
--- a/extras/mini-os/include/types.h    Fri Jan 18 15:55:13 2008 +0000
+++ b/extras/mini-os/include/types.h    Fri Jan 18 16:02:32 2008 +0000
@@ -57,6 +57,13 @@ typedef struct { unsigned long pte; } pt
 typedef struct { unsigned long pte; } pte_t;
 #endif /* __i386__ || __x86_64__ */
 
+#if !defined(CONFIG_X86_PAE)
+#define __pte(x) ((pte_t) { (x) } )
+#else
+#define __pte(x) ({ unsigned long long _x = (x);        \
+    ((pte_t) {(unsigned long)(_x), (unsigned long)(_x>>32)}); })
+#endif
+
 typedef  u8 uint8_t;
 typedef  s8 int8_t;
 typedef u16 uint16_t;
diff -r 27ad7ed41be2 extras/mini-os/include/x86/arch_mm.h
--- a/extras/mini-os/include/x86/arch_mm.h      Fri Jan 18 15:55:13 2008 +0000
+++ b/extras/mini-os/include/x86/arch_mm.h      Fri Jan 18 16:02:32 2008 +0000
@@ -144,12 +144,14 @@ typedef unsigned long pgentry_t;
 
 #if defined(__i386__)
 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L1_PROT_RO (_PAGE_PRESENT|_PAGE_ACCESSED)
 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY |_PAGE_USER)
 #if defined(CONFIG_X86_PAE)
 #define L3_PROT (_PAGE_PRESENT)
 #endif /* CONFIG_X86_PAE */
 #elif defined(__x86_64__)
 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
+#define L1_PROT_RO (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_USER)
 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
 #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
 #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
@@ -190,6 +192,7 @@ typedef unsigned long maddr_t;
 
 extern unsigned long *phys_to_machine_mapping;
 extern char _text, _etext, _erodata, _edata, _end;
+extern unsigned long mfn_zero;
 #define pfn_to_mfn(_pfn) (phys_to_machine_mapping[(_pfn)])
 static __inline__ maddr_t phys_to_machine(paddr_t phys)
 {
@@ -224,5 +227,6 @@ static __inline__ paddr_t machine_to_phy
 #define pte_to_virt(_pte)          to_virt(mfn_to_pfn(pte_to_mfn(_pte)) << 
PAGE_SHIFT)
 
 #define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, 0, L1_PROT)
+#define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, 0, 
L1_PROT_RO)
 
 #endif /* _ARCH_MM_H_ */
diff -r 27ad7ed41be2 extras/mini-os/include/x86/traps.h
--- a/extras/mini-os/include/x86/traps.h        Fri Jan 18 15:55:13 2008 +0000
+++ b/extras/mini-os/include/x86/traps.h        Fri Jan 18 16:02:32 2008 +0000
@@ -70,4 +70,8 @@ struct pt_regs {
 
 void dump_regs(struct pt_regs *regs);
 
+#define TRAP_PF_PROT   0x1
+#define TRAP_PF_WRITE  0x2
+#define TRAP_PF_USER   0x4
+
 #endif /* _TRAPS_H_ */

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.