[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Attached patch allows PAE xenlinux to run in the shadow mode using
# HG changeset patch # User smh22@xxxxxxxxxxxxxxxxxxxx # Node ID 995e94c4802e5c0376b0483f3b2473a8f7d7808e # Parent e023e37b3c7aca1d5702715eed4b586e267a75ee Attached patch allows PAE xenlinux to run in the shadow mode using log-dirty guest-refcount, which is required to support save/restore/relocate. We can turn on/off the mode every 5-sec interval while doing kernel build (make -j4), for example, and it survives for hours. We are still restoring log-dirty mode for x86_64 xenlinux, which is not very stable right now, but I believe it should be done very soon. We also checked that it did not break 64-bit VMX domains, which uses different sub-mode of the shadow mode. Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx> Signed-off-by: Xiaohui Xin <xiaohui.xin@xxxxxxxxx> diff -r e023e37b3c7a -r 995e94c4802e xen/arch/x86/shadow.c --- a/xen/arch/x86/shadow.c Fri Nov 11 18:02:49 2005 +++ b/xen/arch/x86/shadow.c Fri Nov 11 18:11:13 2005 @@ -22,7 +22,7 @@ * Jun Nakajima <jun.nakajima@xxxxxxxxx> * Chengyuan Li <chengyuan.li@xxxxxxxxx> * - * Extended to support 64-bit guests. + * Extended to support 32-bit PAE and 64-bit guests. */ #include <xen/config.h> @@ -34,6 +34,7 @@ #include <xen/event.h> #include <xen/sched.h> #include <xen/trace.h> +#include <asm/shadow_64.h> extern void free_shadow_pages(struct domain *d); @@ -44,13 +45,13 @@ #endif #if CONFIG_PAGING_LEVELS == 3 -#include <asm/shadow_64.h> static unsigned long shadow_l3_table( struct domain *d, unsigned long gpfn, unsigned long gmfn); +static inline void validate_bl2e_change( struct domain *d, + guest_root_pgentry_t *new_gle_p, pgentry_64_t *shadow_l3, int index); #endif #if CONFIG_PAGING_LEVELS == 4 -#include <asm/shadow_64.h> static unsigned long shadow_l4_table( struct domain *d, unsigned long gpfn, unsigned long gmfn); static void shadow_map_into_current(struct vcpu *v, @@ -222,7 +223,7 @@ { if (d->arch.ops->guest_paging_levels == PAGING_L2) { -#if CONFIG_PAGING_LEVELS >= 4 +#if CONFIG_PAGING_LEVELS >= 3 /* For 32-bit VMX guest, 2 shadow L1s to simulate 1 guest L1 * So need allocate 2 continues shadow L1 each time. */ @@ -313,6 +314,8 @@ goto fail; perfc_incr(shadow_l3_pages); d->arch.shadow_page_count++; + if ( PGT_l3_page_table == PGT_root_page_table ) + pin = 1; break; case PGT_l4_shadow: @@ -375,7 +378,7 @@ { if (d->arch.ops->guest_paging_levels == PAGING_L2) { -#if CONFIG_PAGING_LEVELS >=4 +#if CONFIG_PAGING_LEVELS >=3 free_domheap_pages(page, SL1_ORDER); #else free_domheap_page(page); @@ -427,14 +430,10 @@ hl2 = map_domain_page(hl2mfn); -#ifdef __i386__ if ( shadow_mode_external(d) ) limit = L2_PAGETABLE_ENTRIES; else limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE; -#else - limit = 0; /* XXX x86/64 XXX */ -#endif memset(hl2, 0, limit * sizeof(l1_pgentry_t)); @@ -540,7 +539,7 @@ SH_VLOG("shadow_l2_table(%lx -> %lx)", gmfn, smfn); return smfn; } -#endif +#endif /* CONFIG_PAGING_LEVELS == 2 */ static void shadow_map_l1_into_current_l2(unsigned long va) { @@ -549,7 +548,7 @@ l1_pgentry_t *spl1e; l2_pgentry_t sl2e; guest_l1_pgentry_t *gpl1e; - guest_l2_pgentry_t gl2e; + guest_l2_pgentry_t gl2e = {0}; unsigned long gl1pfn, gl1mfn, sl1mfn; int i, init_table = 0; @@ -593,14 +592,14 @@ ASSERT( !(l2e_get_flags(old_sl2e) & _PAGE_PRESENT) ); #endif -#if CONFIG_PAGING_LEVELS >=4 +#if CONFIG_PAGING_LEVELS >=3 if (d->arch.ops->guest_paging_levels == PAGING_L2) { - /* for 32-bit VMX guest on 64-bit host, + /* for 32-bit VMX guest on 64-bit or PAE host, * need update two L2 entries each time */ if ( !get_shadow_ref(sl1mfn)) - BUG(); + BUG(); l2pde_general(d, &gl2e, &sl2e, sl1mfn); __guest_set_l2e(v, va, &gl2e); __shadow_set_l2e(v, va & ~((1<<L2_PAGETABLE_SHIFT_32) - 1), &sl2e); @@ -625,19 +624,17 @@ int index = guest_l1_table_offset(va); int min = 1, max = 0; - unsigned long entries, pt_va; - l1_pgentry_t tmp_sl1e; - guest_l1_pgentry_t tmp_gl1e;//Prepare for double compile - - - entries = PAGE_SIZE / sizeof(guest_l1_pgentry_t); - pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) << L1_PAGETABLE_SHIFT; - gpl1e = (guest_l1_pgentry_t *) __guest_get_l1e(v, pt_va, &tmp_gl1e); + unsigned long tmp_gmfn; + l2_pgentry_t tmp_sl2e = {0}; + guest_l2_pgentry_t tmp_gl2e = {0}; + + __guest_get_l2e(v, va, &tmp_gl2e); + tmp_gmfn = __gpfn_to_mfn(d, l2e_get_pfn(tmp_gl2e)); + gpl1e = (guest_l1_pgentry_t *) map_domain_page(tmp_gmfn); /* If the PGT_l1_shadow has two continual pages */ - entries = PAGE_SIZE / sizeof(guest_l1_pgentry_t); //1024 entry!!! - pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(entries - 1)) << L1_PAGETABLE_SHIFT; - spl1e = (l1_pgentry_t *) __shadow_get_l1e(v, pt_va, &tmp_sl1e); + __shadow_get_l2e(v, va, &tmp_sl2e); + spl1e = (l1_pgentry_t *) map_domain_page(l2e_get_pfn(tmp_sl2e)); for ( i = 0; i < GUEST_L1_PAGETABLE_ENTRIES; i++ ) { @@ -662,10 +659,13 @@ if ( likely(i > max) ) max = i; set_guest_back_ptr(d, sl1e, sl1mfn, i); - } + } frame_table[sl1mfn].tlbflush_timestamp = SHADOW_ENCODE_MIN_MAX(min, max); + + unmap_domain_page(gpl1e); + unmap_domain_page(spl1e); } } @@ -674,7 +674,7 @@ { struct vcpu *v = current; struct domain *d = v->domain; - l2_pgentry_t sl2e; + l2_pgentry_t sl2e = {0}; __shadow_get_l2e(v, va, &sl2e); if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) ) @@ -690,11 +690,23 @@ } else /* check to see if it exists; if so, link it in */ { - l2_pgentry_t gpde = linear_l2_table(v)[l2_table_offset(va)]; - unsigned long gl1pfn = l2e_get_pfn(gpde); - unsigned long sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow); - - ASSERT( l2e_get_flags(gpde) & _PAGE_PRESENT ); + l2_pgentry_t gpde = {0}; + unsigned long gl1pfn; + unsigned long sl1mfn; + + __guest_get_l2e(v, va, &gpde); + + if ( l2e_get_flags(gpde) & _PAGE_PRESENT ) + { + gl1pfn = l2e_get_pfn(gpde); + sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow); + } + else + { + // no shadow exists, so there's nothing to do. + perfc_incrc(shadow_set_l1e_fail); + return; + } if ( sl1mfn ) { @@ -738,7 +750,7 @@ shadow_update_min_max(l2e_get_pfn(sl2e), l1_table_offset(va)); } -#if CONFIG_PAGING_LEVELS <= 3 +#if CONFIG_PAGING_LEVELS == 2 static void shadow_invlpg_32(struct vcpu *v, unsigned long va) { struct domain *d = v->domain; @@ -767,7 +779,7 @@ shadow_unlock(d); } -#endif +#endif /* CONFIG_PAGING_LEVELS == 2 */ static struct out_of_sync_entry * shadow_alloc_oos_entry(struct domain *d) @@ -996,7 +1008,10 @@ if (__copy_from_user(&gpte, &guest_pt[index], sizeof(gpte))) + { + unmap_domain_page(snapshot); return 0; + } // This could probably be smarter, but this is sufficent for // our current needs. @@ -1021,7 +1036,7 @@ static int is_out_of_sync(struct vcpu *v, unsigned long va) /* __shadow_out_of_sync */ { struct domain *d = v->domain; -#if defined (__x86_64__) +#if CONFIG_PAGING_LEVELS == 4 unsigned long l2mfn = ((v->arch.flags & TF_kernel_mode)? pagetable_get_pfn(v->arch.guest_table) : pagetable_get_pfn(v->arch.guest_table_user)); @@ -1032,16 +1047,21 @@ guest_l2_pgentry_t l2e; unsigned long l1pfn, l1mfn; guest_l1_pgentry_t *guest_pt; - guest_l1_pgentry_t tmp_gle; - unsigned long pt_va; ASSERT(shadow_lock_is_acquired(d)); ASSERT(VALID_M2P(l2pfn)); perfc_incrc(shadow_out_of_sync_calls); -#if CONFIG_PAGING_LEVELS >= 4 - if (d->arch.ops->guest_paging_levels == PAGING_L4) { /* Mode F */ +#if CONFIG_PAGING_LEVELS >= 3 + +#define unmap_and_return(x) \ + if ( guest_pt != (guest_l1_pgentry_t *) v->arch.guest_vtable ) \ + unmap_domain_page(guest_pt); \ + return (x); + + if (d->arch.ops->guest_paging_levels >= PAGING_L3) + { pgentry_64_t le; unsigned long gmfn; unsigned long gpfn; @@ -1051,37 +1071,57 @@ gpfn = l2pfn; guest_pt = (guest_l1_pgentry_t *)v->arch.guest_vtable; - for (i = PAGING_L4; i >= PAGING_L3; i--) { + for ( i = PAGING_L4; i >= PAGING_L3; i-- ) + { + if (d->arch.ops->guest_paging_levels == PAGING_L3 + && i == PAGING_L4) + continue; /* skip the top-level for 3-level */ + if ( page_out_of_sync(&frame_table[gmfn]) && - !snapshot_entry_matches( - d, guest_pt, gpfn, table_offset_64(va, i)) ) - return 1; - + !snapshot_entry_matches( + d, guest_pt, gpfn, table_offset_64(va, i)) ) + { + unmap_and_return (1); + } + + le = entry_empty(); __rw_entry(v, va, &le, GUEST_ENTRY | GET_ENTRY | i); + if ( !(entry_get_flags(le) & _PAGE_PRESENT) ) - return 0; + { + unmap_and_return (0); + } gpfn = entry_get_pfn(le); gmfn = __gpfn_to_mfn(d, gpfn); if ( !VALID_MFN(gmfn) ) - return 0; - /* Todo: check!*/ + { + unmap_and_return (0); + } + if ( guest_pt != (guest_l1_pgentry_t *)v->arch.guest_vtable ) + unmap_domain_page(guest_pt); guest_pt = (guest_l1_pgentry_t *)map_domain_page(gmfn); - } /* L2 */ if ( page_out_of_sync(&frame_table[gmfn]) && !snapshot_entry_matches(d, guest_pt, gpfn, l2_table_offset(va)) ) + { + unmap_and_return (1); + } + + if ( guest_pt != (guest_l1_pgentry_t *)v->arch.guest_vtable ) + unmap_domain_page(guest_pt); + + } + else +#undef unmap_and_return +#endif /* CONFIG_PAGING_LEVELS >= 3 */ + { + if ( page_out_of_sync(&frame_table[l2mfn]) && + !snapshot_entry_matches(d, (guest_l1_pgentry_t *)v->arch.guest_vtable, + l2pfn, guest_l2_table_offset(va)) ) return 1; - - - } else -#endif - - if ( page_out_of_sync(&frame_table[l2mfn]) && - !snapshot_entry_matches(d, (guest_l1_pgentry_t *)v->arch.guest_vtable, - l2pfn, guest_l2_table_offset(va)) ) - return 1; + } __guest_get_l2e(v, va, &l2e); if ( !(guest_l2e_get_flags(l2e) & _PAGE_PRESENT) || @@ -1095,15 +1135,17 @@ if ( !VALID_MFN(l1mfn) ) return 0; - pt_va = ((va >> L1_PAGETABLE_SHIFT) & ~(GUEST_L1_PAGETABLE_ENTRIES - 1)) - << L1_PAGETABLE_SHIFT; - guest_pt = (guest_l1_pgentry_t *) __guest_get_l1e(v, pt_va, &tmp_gle); + guest_pt = (guest_l1_pgentry_t *) map_domain_page(l1mfn); if ( page_out_of_sync(&frame_table[l1mfn]) && !snapshot_entry_matches( - d, guest_pt, l1pfn, guest_l1_table_offset(va)) ) + d, guest_pt, l1pfn, guest_l1_table_offset(va)) ) + { + unmap_domain_page(guest_pt); return 1; - + } + + unmap_domain_page(guest_pt); return 0; } @@ -1257,7 +1299,7 @@ } if ( shadow_mode_external(d) ) { - if (write_refs-- == 0) + if (--write_refs == 0) return 0; // Use the back pointer to locate the shadow page that can contain @@ -1314,6 +1356,8 @@ for ( entry = d->arch.out_of_sync; entry; entry = entry->next) { + int max = -1; + if ( entry->snapshot_mfn == SHADOW_SNAPSHOT_ELSEWHERE ) continue; @@ -1335,7 +1379,7 @@ continue; } - FSH_LOG("resyncing t=%08x gpfn=%lx gmfn=%lx smfn=%lx snapshot_mfn=%lx", + FSH_LOG("resyncing t=%08x gpfn=%lx gmfn=%lx smfn=%lx snapshot_mfn=%lx", stype, entry->gpfn, entry->gmfn, smfn, entry->snapshot_mfn); // Compare guest's new contents to its snapshot, validating @@ -1373,10 +1417,8 @@ if ( !shadow_mode_refcounts(d) ) revalidate_l1(d, (l1_pgentry_t *)guest1, (l1_pgentry_t *)snapshot1); - if ( !smfn ) break; - changed = 0; @@ -1405,12 +1447,13 @@ perfc_incrc(resync_l1); perfc_incr_histo(wpt_updates, changed, PT_UPDATES); perfc_incr_histo(l1_entries_checked, max_shadow - min_shadow + 1, PT_UPDATES); - if ( d->arch.ops->guest_paging_levels == PAGING_L4 && + if ( d->arch.ops->guest_paging_levels >= PAGING_L3 && unshadow_l1 ) { - pgentry_64_t l2e; + pgentry_64_t l2e = {0}; __shadow_get_l2e(entry->v, entry->va, &l2e); - if (entry_get_flags(l2e) & _PAGE_PRESENT) { + + if ( entry_get_flags(l2e) & _PAGE_PRESENT ) { entry_remove_flags(l2e, _PAGE_PRESENT); __shadow_set_l2e(entry->v, entry->va, &l2e); @@ -1421,11 +1464,9 @@ break; } -#if defined (__i386__) +#if CONFIG_PAGING_LEVELS == 2 case PGT_l2_shadow: { - int max = -1; - l2_pgentry_t *guest2 = guest; l2_pgentry_t *shadow2 = shadow; l2_pgentry_t *snapshot2 = snapshot; @@ -1436,9 +1477,6 @@ changed = 0; for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) { -#if CONFIG_X86_PAE - BUG(); /* FIXME: need type_info */ -#endif if ( !is_guest_l2_slot(0,i) && !external ) continue; @@ -1482,9 +1520,6 @@ changed = 0; for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) { -#if CONFIG_X86_PAE - BUG(); /* FIXME: need type_info */ -#endif if ( !is_guest_l2_slot(0, i) && !external ) continue; @@ -1505,7 +1540,7 @@ perfc_incr_histo(shm_hl2_updates, changed, PT_UPDATES); break; } -#else +#elif CONFIG_PAGING_LEVELS >= 3 case PGT_l2_shadow: case PGT_l3_shadow: { @@ -1521,19 +1556,35 @@ guest_pt[i], snapshot_pt[i], PAGE_FLAG_MASK) ) { need_flush |= validate_entry_change( - d, &guest_pt[i], &shadow_pt[i], - shadow_type_to_level(stype)); + d, &guest_pt[i], &shadow_pt[i], + shadow_type_to_level(stype)); changed++; } +#if CONFIG_PAGING_LEVELS == 3 + if ( stype == PGT_l3_shadow ) + { + if ( entry_get_value(guest_pt[i]) != 0 ) + max = i; + + if ( !(entry_get_flags(guest_pt[i]) & _PAGE_PRESENT) && + unlikely(entry_get_value(guest_pt[i]) != 0) && + !unshadow && + (frame_table[smfn].u.inuse.type_info & PGT_pinned) ) + unshadow = 1; + } +#endif } + + if ( d->arch.ops->guest_paging_levels == PAGING_L3 + && max == -1 && stype == PGT_l3_shadow ) + unshadow = 1; + + perfc_incrc(resync_l3); + perfc_incr_histo(shm_l3_updates, changed, PT_UPDATES); break; - - } case PGT_l4_shadow: { - int max = -1; - guest_root_pgentry_t *guest_root = guest; l4_pgentry_t *shadow4 = shadow; guest_root_pgentry_t *snapshot_root = snapshot; @@ -1547,7 +1598,8 @@ if ( root_entry_has_changed( new_root_e, snapshot_root[i], PAGE_FLAG_MASK)) { - if (d->arch.ops->guest_paging_levels == PAGING_L4) { + if ( d->arch.ops->guest_paging_levels == PAGING_L4 ) + { need_flush |= validate_entry_change( d, (pgentry_64_t *)&new_root_e, (pgentry_64_t *)&shadow4[i], shadow_type_to_level(stype)); @@ -1563,9 +1615,9 @@ // Need a better solution in the long term. if ( !(guest_root_get_flags(new_root_e) & _PAGE_PRESENT) && - unlikely(guest_root_get_intpte(new_root_e) != 0) && - !unshadow && - (frame_table[smfn].u.inuse.type_info & PGT_pinned) ) + unlikely(guest_root_get_intpte(new_root_e) != 0) && + !unshadow && + (frame_table[smfn].u.inuse.type_info & PGT_pinned) ) unshadow = 1; } if ( max == -1 ) @@ -1575,7 +1627,7 @@ break; } -#endif +#endif /* CONFIG_PAGING_LEVELS >= 3 */ default: BUG(); } @@ -1589,7 +1641,7 @@ { perfc_incrc(unshadow_l2_count); shadow_unpin(smfn); -#if defined (__i386__) +#if CONFIG_PAGING_LEVELS == 2 if ( unlikely(shadow_mode_external(d)) ) { unsigned long hl2mfn; @@ -1660,19 +1712,24 @@ // Second, resync all L1 pages, then L2 pages, etc... // need_flush |= resync_all(d, PGT_l1_shadow); -#if defined (__i386__) - if ( shadow_mode_translate(d) ) + +#if CONFIG_PAGING_LEVELS == 2 + if ( d->arch.ops->guest_paging_levels == PAGING_L2 && + shadow_mode_translate(d) ) + { need_flush |= resync_all(d, PGT_hl2_shadow); -#endif - - /* - * Fixme: for i386 host - */ - if (d->arch.ops->guest_paging_levels == PAGING_L4) { - need_flush |= resync_all(d, PGT_l2_shadow); + } +#endif + + need_flush |= resync_all(d, PGT_l2_shadow); + +#if CONFIG_PAGING_LEVELS >= 3 + if (d->arch.ops->guest_paging_levels >= PAGING_L3) + { need_flush |= resync_all(d, PGT_l3_shadow); - } - need_flush |= resync_all(d, PGT_l4_shadow); + need_flush |= resync_all(d, PGT_l4_shadow); + } +#endif if ( need_flush && !unlikely(shadow_mode_external(d)) ) local_flush_tlb(); @@ -1749,7 +1806,7 @@ return 1; } -#if CONFIG_PAGING_LEVELS <= 3 +#if CONFIG_PAGING_LEVELS == 2 static int shadow_fault_32(unsigned long va, struct cpu_user_regs *regs) { l1_pgentry_t gpte, spte, orig_gpte; @@ -1888,7 +1945,20 @@ shadow_unlock(d); return 0; } -#endif +#endif /* CONFIG_PAGING_LEVELS == 2 */ + +static inline unsigned long va_to_l1mfn(struct vcpu *v, unsigned long va) +{ + struct domain *d = v->domain; + guest_l2_pgentry_t gl2e = {0}; + + __guest_get_l2e(v, va, &gl2e); + + if ( unlikely(!(guest_l2e_get_flags(gl2e) & _PAGE_PRESENT)) ) + return INVALID_MFN; + + return __gpfn_to_mfn(d, l2e_get_pfn(gl2e)); +} static int do_update_va_mapping(unsigned long va, l1_pgentry_t val, @@ -1899,8 +1969,6 @@ int rc = 0; shadow_lock(d); - - //printk("%s(va=%p, val=%p)\n", __func__, (void *)va, (void *)l1e_get_intpte(val)); // This is actually overkill - we don't need to sync the L1 itself, // just everything involved in getting to this L1 (i.e. we need @@ -1919,7 +1987,6 @@ if ( shadow_mode_log_dirty(d) ) __mark_dirty(d, va_to_l1mfn(v, va)); -// out: shadow_unlock(d); return rc; @@ -1955,7 +2022,7 @@ static void shadow_update_pagetables(struct vcpu *v) { struct domain *d = v->domain; -#if defined (__x86_64__) +#if CONFIG_PAGING_LEVELS == 4 unsigned long gmfn = ((v->arch.flags & TF_kernel_mode)? pagetable_get_pfn(v->arch.guest_table) : pagetable_get_pfn(v->arch.guest_table_user)); @@ -1991,7 +2058,8 @@ /* * arch.shadow_table */ - if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) ) { + if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) ) + { #if CONFIG_PAGING_LEVELS == 2 smfn = shadow_l2_table(d, gpfn, gmfn); #elif CONFIG_PAGING_LEVELS == 3 @@ -2013,7 +2081,7 @@ * arch.shadow_vtable */ if ( max_mode == SHM_external -#if CONFIG_PAGING_LEVELS >=4 +#if CONFIG_PAGING_LEVELS >=3 || max_mode & SHM_enable #endif ) @@ -2068,7 +2136,7 @@ // XXX - maybe this can be optimized somewhat?? local_flush_tlb(); } -#endif +#endif /* CONFIG_PAGING_LEVELS == 2 */ #if CONFIG_PAGING_LEVELS == 3 /* FIXME: PAE code to be written */ @@ -2373,7 +2441,7 @@ l2e_get_intpte(match)); } -#ifdef __i386__ +#if CONFIG_PAGING_LEVELS == 2 if ( shadow_mode_external(d) ) limit = L2_PAGETABLE_ENTRIES; else @@ -2405,7 +2473,7 @@ int _check_pagetable(struct vcpu *v, char *s) { struct domain *d = v->domain; -#if defined (__x86_64__) +#if CONFIG_PAGING_LEVELS == 4 pagetable_t pt = ((v->arch.flags & TF_kernel_mode)? v->arch.guest_table : v->arch.guest_table_user); #else @@ -2447,7 +2515,7 @@ spl2e = (l2_pgentry_t *) map_domain_page(smfn); /* Go back and recurse. */ -#ifdef __i386__ +#if CONFIG_PAGING_LEVELS == 2 if ( shadow_mode_external(d) ) limit = L2_PAGETABLE_ENTRIES; else @@ -2551,59 +2619,108 @@ #if CONFIG_PAGING_LEVELS == 3 static unsigned long shadow_l3_table( - struct domain *d, unsigned long gpfn, unsigned long gmfn) -{ - BUG(); /* not implemenated yet */ - return 42; -} + struct domain *d, unsigned long gpfn, unsigned long gmfn) +{ + unsigned long smfn; + l3_pgentry_t *spl3e; + + perfc_incrc(shadow_l3_table_count); + + if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l3_shadow))) ) + { + printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn); + BUG(); /* XXX Deal gracefully with failure. */ + } + + spl3e = (l3_pgentry_t *)map_domain_page(smfn); + + /* Make the self entry */ + spl3e[PAE_SHADOW_SELF_ENTRY] = l3e_from_pfn(smfn, __PAGE_HYPERVISOR); + + if ( (PGT_base_page_table == PGT_l3_page_table) && + !shadow_mode_external(d) ) { + int i; + unsigned long g2mfn, s2mfn; + l2_pgentry_t *spl2e; + l3_pgentry_t *gpl3e; + + /* Get the top entry */ + gpl3e = (l3_pgentry_t *)map_domain_page(gmfn); + + if ( !(l3e_get_flags(gpl3e[L3_PAGETABLE_ENTRIES - 1]) & _PAGE_PRESENT) ) + { + BUG(); + } + + g2mfn = l3e_get_pfn(gpl3e[L3_PAGETABLE_ENTRIES - 1]); + + /* NB. g2mfn should be same as g2pfn */ + if (!(s2mfn = __shadow_status(d, g2mfn, PGT_l2_shadow))) { + if ( unlikely(!(s2mfn = + alloc_shadow_page(d, g2mfn, g2mfn, PGT_l2_shadow))) ) { + printk("Couldn't alloc an L2 shadow for pfn=%lx mfn=%lx\n", + g2mfn, g2mfn); + BUG(); /* XXX Deal gracefully with failure. */ + } + + if (!get_shadow_ref(s2mfn)) + BUG(); + } + + /* Map shadow L2 into shadow L3 */ + spl3e[L3_PAGETABLE_ENTRIES - 1] = l3e_from_pfn(s2mfn, _PAGE_PRESENT); + shadow_update_min_max(smfn, L3_PAGETABLE_ENTRIES -1); + + /* + * Xen private mappings. Do the similar things as + * create_pae_xen_mappings(). + */ + spl2e = (l2_pgentry_t *)map_domain_page(s2mfn); + + /* + * When we free L2 pages, we need to tell if the page contains + * Xen private mappings. Use the va_mask part. + */ + frame_table[s2mfn].u.inuse.type_info |= + (unsigned long) 3 << PGT_score_shift; + + memset(spl2e, 0, + (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)) * sizeof(l2_pgentry_t)); + + memcpy(&spl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)], + &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT], + L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t)); + + for ( i = 0; i < (PERDOMAIN_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ ) + spl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] = + l2e_from_page( + virt_to_page(page_get_owner(&frame_table[gmfn])->arch.mm_perdomain_pt) + i, + __PAGE_HYPERVISOR); + for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ ) + spl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] = + (l3e_get_flags(gpl3e[i]) & _PAGE_PRESENT) ? + l2e_from_pfn(l3e_get_pfn(gpl3e[i]), __PAGE_HYPERVISOR) : + l2e_empty(); + + unmap_domain_page(spl2e); + unmap_domain_page(gpl3e); + } + unmap_domain_page(spl3e); + + return smfn; +} + static unsigned long gva_to_gpa_pae(unsigned long gva) { BUG(); return 43; } -#endif - -#if CONFIG_PAGING_LEVELS >= 4 +#endif /* CONFIG_PAGING_LEVELS == 3 */ + +#if CONFIG_PAGING_LEVELS == 4 /****************************************************************************/ /* 64-bit shadow-mode code testing */ /****************************************************************************/ -/* - * validate_bl2e_change() - * The code is for 32-bit VMX gues on 64-bit host. - * To sync guest L2. - */ -static inline void -validate_bl2e_change( - struct domain *d, - guest_root_pgentry_t *new_gle_p, - pgentry_64_t *shadow_l3, - int index) -{ - int sl3_idx, sl2_idx; - unsigned long sl2mfn, sl1mfn; - pgentry_64_t *sl2_p; - - /* Using guest l2 pte index to get shadow l3&l2 index - * index: 0 ~ 1023, PAGETABLE_ENTRIES: 512 - */ - sl3_idx = index / (PAGETABLE_ENTRIES / 2); - sl2_idx = (index % (PAGETABLE_ENTRIES / 2)) * 2; - - sl2mfn = entry_get_pfn(shadow_l3[sl3_idx]); - sl2_p = (pgentry_64_t *)map_domain_page(sl2mfn); - - validate_pde_change( - d, *(guest_l2_pgentry_t *)new_gle_p, (l2_pgentry_t *)&sl2_p[sl2_idx]); - - /* Mapping the second l1 shadow page */ - if (entry_get_flags(sl2_p[sl2_idx]) & _PAGE_PRESENT) { - sl1mfn = entry_get_pfn(sl2_p[sl2_idx]); - sl2_p[sl2_idx + 1] = - entry_from_pfn(sl1mfn + 1, entry_get_flags(sl2_p[sl2_idx])); - } - unmap_domain_page(sl2_p); -} - /* * init_bl2() is for 32-bit VMX guest on 64-bit host * Using 1 shadow L4(l3) and 4 shadow L2s to simulate guest L2 @@ -2699,6 +2816,47 @@ ESH_LOG("shadow_l4_table(%lx -> %lx)", gmfn, smfn); return smfn; } +#endif /* CONFIG_PAGING_LEVELS == 4 */ + +#if CONFIG_PAGING_LEVELS >= 3 +/* + * validate_bl2e_change() + * The code is for 32-bit VMX gues on 64-bit host. + * To sync guest L2. + */ + +static inline void +validate_bl2e_change( + struct domain *d, + guest_root_pgentry_t *new_gle_p, + pgentry_64_t *shadow_l3, + int index) +{ + int sl3_idx, sl2_idx; + unsigned long sl2mfn, sl1mfn; + pgentry_64_t *sl2_p; + + /* Using guest l2 pte index to get shadow l3&l2 index + * index: 0 ~ 1023, PAGETABLE_ENTRIES: 512 + */ + sl3_idx = index / (PAGETABLE_ENTRIES / 2); + sl2_idx = (index % (PAGETABLE_ENTRIES / 2)) * 2; + + sl2mfn = entry_get_pfn(shadow_l3[sl3_idx]); + sl2_p = (pgentry_64_t *)map_domain_page(sl2mfn); + + validate_pde_change( + d, *(guest_l2_pgentry_t *)new_gle_p, (l2_pgentry_t *)&sl2_p[sl2_idx]); + + /* Mapping the second l1 shadow page */ + if (entry_get_flags(sl2_p[sl2_idx]) & _PAGE_PRESENT) { + sl1mfn = entry_get_pfn(sl2_p[sl2_idx]); + sl2_p[sl2_idx + 1] = + entry_from_pfn(sl1mfn + 1, entry_get_flags(sl2_p[sl2_idx])); + } + unmap_domain_page(sl2_p); + +} /* * This shadow_mark_va_out_of_sync() is for 2M page shadow @@ -2714,7 +2872,6 @@ if ( !get_shadow_ref(writable_pl1e >> L1_PAGETABLE_SHIFT) ) BUG(); } - static int get_shadow_mfn(struct domain *d, unsigned long gpfn, unsigned long *spmfn, u32 flag) { @@ -2764,7 +2921,7 @@ static void shadow_map_into_current(struct vcpu *v, unsigned long va, unsigned int from, unsigned int to) { - pgentry_64_t gle, sle; + pgentry_64_t gle = {0}, sle; unsigned long gpfn, smfn; if (from == PAGING_L1 && to == PAGING_L2) { @@ -2836,8 +2993,9 @@ } -static void shadow_set_l1e_64(unsigned long va, pgentry_64_t *sl1e_p, - int create_l1_shadow) +static void shadow_set_l1e_64( + unsigned long va, pgentry_64_t *sl1e_p, + int create_l1_shadow) { struct vcpu *v = current; struct domain *d = v->domain; @@ -2848,19 +3006,21 @@ int i; unsigned long orig_va = 0; - if (d->arch.ops->guest_paging_levels == PAGING_L2) { + if ( d->arch.ops->guest_paging_levels == PAGING_L2 ) + { /* This is for 32-bit VMX guest on 64-bit host */ orig_va = va; va = va & (~((1<<L2_PAGETABLE_SHIFT_32)-1)); } - for (i = PAGING_L4; i >= PAGING_L2; i--) { + for (i = PAGING_L4; i >= PAGING_L2; i--) + { if (!__rw_entry(v, va, &sle, SHADOW_ENTRY | GET_ENTRY | i)) { printk("<%s> i = %d\n", __func__, i); BUG(); } - if (!(entry_get_flags(sle) & _PAGE_PRESENT)) { - if (create_l1_shadow) { + if ( !(entry_get_flags(sle) & _PAGE_PRESENT) ) { + if ( create_l1_shadow ) { perfc_incrc(shadow_set_l3e_force_map); shadow_map_into_current(v, va, i-1, i); __rw_entry(v, va, &sle, SHADOW_ENTRY | GET_ENTRY | i); @@ -2870,12 +3030,12 @@ #endif } } - if(i < PAGING_L4) + if( i < PAGING_L4 ) shadow_update_min_max(entry_get_pfn(sle_up), table_offset_64(va, i)); sle_up = sle; } - if (d->arch.ops->guest_paging_levels == PAGING_L2) { + if ( d->arch.ops->guest_paging_levels == PAGING_L2 ) { va = orig_va; } @@ -2914,7 +3074,7 @@ l1_pgentry_t sl1e; l1_pgentry_t old_sl1e; l2_pgentry_t sl2e; - unsigned long nx = 0; + u64 nx = 0; int put_ref_check = 0; /* Check if gpfn is 2M aligned */ @@ -2929,7 +3089,7 @@ l2e_remove_flags(tmp_l2e, _PAGE_PSE); if (l2e_get_flags(gl2e) & _PAGE_NX) { l2e_remove_flags(tmp_l2e, _PAGE_NX); - nx = 1UL << 63; + nx = 1ULL << 63; } @@ -3037,114 +3197,161 @@ * else return 0. */ #if defined( GUEST_PGENTRY_32 ) -static inline int guest_page_fault(struct vcpu *v, - unsigned long va, unsigned int error_code, - guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e) +static inline int guest_page_fault( + struct vcpu *v, + unsigned long va, unsigned int error_code, + guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e) { /* The following check for 32-bit guest on 64-bit host */ __guest_get_l2e(v, va, gpl2e); /* Check the guest L2 page-table entry first*/ - if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_PRESENT))) + if ( unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_PRESENT)) ) return 1; - if (error_code & ERROR_W) { - if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_RW))) + if ( error_code & ERROR_W ) + { + if ( unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_RW)) ) return 1; } - if (error_code & ERROR_U) { - if (unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_USER))) + + if ( error_code & ERROR_U ) + { + if ( unlikely(!(guest_l2e_get_flags(*gpl2e) & _PAGE_USER)) ) return 1; } - if (guest_l2e_get_flags(*gpl2e) & _PAGE_PSE) + if ( guest_l2e_get_flags(*gpl2e) & _PAGE_PSE ) return 0; __guest_get_l1e(v, va, gpl1e); /* Then check the guest L1 page-table entry */ - if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_PRESENT))) + if ( unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_PRESENT)) ) return 1; - if (error_code & ERROR_W) { - if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_RW))) + if ( error_code & ERROR_W ) + { + if ( unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_RW)) ) return 1; } - if (error_code & ERROR_U) { - if (unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_USER))) + + if ( error_code & ERROR_U ) + { + if ( unlikely(!(guest_l1e_get_flags(*gpl1e) & _PAGE_USER)) ) return 1; } return 0; } #else -static inline int guest_page_fault(struct vcpu *v, - unsigned long va, unsigned int error_code, - guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e) +static inline int guest_page_fault( + struct vcpu *v, + unsigned long va, unsigned int error_code, + guest_l2_pgentry_t *gpl2e, guest_l1_pgentry_t *gpl1e) { struct domain *d = v->domain; - pgentry_64_t gle, *lva; - unsigned long mfn; + pgentry_64_t gle; + unsigned long gpfn = 0, mfn; int i; - __rw_entry(v, va, &gle, GUEST_ENTRY | GET_ENTRY | PAGING_L4); - if (unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT))) - return 1; - - if (error_code & ERROR_W) { - if (unlikely(!(entry_get_flags(gle) & _PAGE_RW))) + ASSERT( d->arch.ops->guest_paging_levels >= PAGING_L3 ); + +#if CONFIG_PAGING_LEVELS == 4 + if ( d->arch.ops->guest_paging_levels == PAGING_L4 ) + { + __rw_entry(v, va, &gle, GUEST_ENTRY | GET_ENTRY | PAGING_L4); + if ( unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)) ) return 1; - } - if (error_code & ERROR_U) { - if (unlikely(!(entry_get_flags(gle) & _PAGE_USER))) - return 1; - } - for (i = PAGING_L3; i >= PAGING_L1; i--) { + + if ( error_code & ERROR_W ) + { + if ( unlikely(!(entry_get_flags(gle) & _PAGE_RW)) ) + return 1; + } + + if ( error_code & ERROR_U ) + { + if ( unlikely(!(entry_get_flags(gle) & _PAGE_USER)) ) + return 1; + } + gpfn = entry_get_pfn(gle); + } +#endif + +#if CONFIG_PAGING_LEVELS >= 3 + if ( d->arch.ops->guest_paging_levels == PAGING_L3 ) + { + gpfn = pagetable_get_pfn(v->arch.guest_table); + } +#endif + + for ( i = PAGING_L3; i >= PAGING_L1; i-- ) + { + pgentry_64_t *lva; /* * If it's not external mode, then mfn should be machine physical. */ - mfn = __gpfn_to_mfn(d, (entry_get_value(gle) >> PAGE_SHIFT)); - - lva = (pgentry_64_t *) phys_to_virt( - mfn << PAGE_SHIFT); + mfn = __gpfn_to_mfn(d, gpfn); + + lva = (pgentry_64_t *) map_domain_page(mfn); gle = lva[table_offset_64(va, i)]; - - if (unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT))) + unmap_domain_page(lva); + + gpfn = entry_get_pfn(gle); + + if ( unlikely(!(entry_get_flags(gle) & _PAGE_PRESENT)) ) return 1; - if (error_code & ERROR_W) { - if (unlikely(!(entry_get_flags(gle) & _PAGE_RW))) - return 1; - } - if (error_code & ERROR_U) { - if (unlikely(!(entry_get_flags(gle) & _PAGE_USER))) - return 1; - } - - if (i == PAGING_L2) { - if (gpl2e) + if ( i < PAGING_L3 ) + { + if ( error_code & ERROR_W ) + { + if ( unlikely(!(entry_get_flags(gle) & _PAGE_RW)) ) + { + if ( i == PAGING_L1 ) + if ( gpl1e ) + gpl1e->l1 = gle.lo; + return 1; + } + } + if ( error_code & ERROR_U ) + { + if ( unlikely(!(entry_get_flags(gle) & _PAGE_USER)) ) + return 1; + } + } + + if ( i == PAGING_L2 ) + { + if ( gpl2e ) gpl2e->l2 = gle.lo; - - if (likely(entry_get_flags(gle) & _PAGE_PSE)) + if ( likely(entry_get_flags(gle) & _PAGE_PSE) ) return 0; - - } - - if (i == PAGING_L1) - if (gpl1e) + } + + if ( i == PAGING_L1 ) + if ( gpl1e ) gpl1e->l1 = gle.lo; } + return 0; -} -#endif + +} +#endif + static int shadow_fault_64(unsigned long va, struct cpu_user_regs *regs) { struct vcpu *v = current; struct domain *d = v->domain; guest_l2_pgentry_t gl2e; - guest_l1_pgentry_t gl1e; + guest_l1_pgentry_t gl1e, orig_gl1e; l1_pgentry_t sl1e; + + gl1e = guest_l1e_empty(); gl2e = guest_l2e_empty(); + + sl1e = l1e_empty(); perfc_incrc(shadow_fault_calls); @@ -3156,7 +3363,7 @@ */ shadow_lock(d); - /* XXX - FIX THIS COMMENT!!! + /* * STEP 1. Check to see if this fault might have been caused by an * out-of-sync table page entry, or if we should pass this * fault onto the guest. @@ -3166,66 +3373,121 @@ /* * STEP 2. Check if the fault belongs to guest */ - if ( guest_page_fault( - v, va, regs->error_code, &gl2e, &gl1e) ) { + if ( guest_page_fault(v, va, regs->error_code, &gl2e, &gl1e) ) + { + if ( unlikely(shadow_mode_log_dirty(d)) && l1e_get_intpte(gl1e) != 0 ) + goto check_writeable; + goto fail; } - if ( unlikely(!(guest_l2e_get_flags(gl2e) & _PAGE_PSE)) ) { - /* - * Handle 4K pages here - */ - - /* Write fault? */ - if ( regs->error_code & 2 ) { - if ( !l1pte_write_fault(v, &gl1e, &sl1e, va) ) { + if ( unlikely((guest_l2e_get_flags(gl2e) & _PAGE_PSE)) ) + goto pse; + + /* + * Handle 4K pages here + */ +check_writeable: + orig_gl1e = gl1e; + + /* Write fault? */ + if ( regs->error_code & 2 ) + { + int allow_writes = 0; + + if ( unlikely(!(guest_l1e_get_flags(gl1e) & _PAGE_RW)) ) + { + if ( shadow_mode_page_writable(va, regs, l1e_get_pfn(gl1e)) ) + { + allow_writes = 1; + l1e_add_flags(gl1e, _PAGE_RW); + } + else + { + /* Write fault on a read-only mapping. */ + SH_VVLOG("shadow_fault - EXIT: wr fault on RO page (%" PRIpte ")", + l1e_get_intpte(gl1e)); + perfc_incrc(shadow_fault_bail_ro_mapping); goto fail; } - } else { - l1pte_read_fault(d, &gl1e, &sl1e); - } - /* - * STEP 3. Write guest/shadow l2e back - */ - if (unlikely(!__guest_set_l1e(v, va, &gl1e))) { + } + + if ( !l1pte_write_fault(v, &gl1e, &sl1e, va) ) + { + SH_VVLOG("shadow_fault - EXIT: l1pte_write_fault failed"); + perfc_incrc(write_fault_bail); + shadow_unlock(d); + return 0; + } + + if (allow_writes) + l1e_remove_flags(gl1e, _PAGE_RW); + } + else + { + if ( !l1pte_read_fault(d, &gl1e, &sl1e) ) + { + SH_VVLOG("shadow_fault - EXIT: l1pte_read_fault failed"); + perfc_incrc(read_fault_bail); + shadow_unlock(d); + return 0; + } + } + + /* + * STEP 3. Write the modified shadow PTE and guest PTE back to the tables + */ + if ( l1e_has_changed(orig_gl1e, gl1e, PAGE_FLAG_MASK) ) + { + if (unlikely(!__guest_set_l1e(v, va, &gl1e))) domain_crash_synchronous(); - } - - ESH_LOG("gl1e: %lx, sl1e: %lx\n", l1e_get_intpte(gl1e), l1e_get_intpte(sl1e)); - shadow_set_l1e_64(va, (pgentry_64_t *)&sl1e, 1); - /* - * if necessary, record the page table page as dirty - */ - if ( unlikely(shadow_mode_log_dirty(d)) ) + + // if necessary, record the page table page as dirty + if ( unlikely(shadow_mode_log_dirty(d)) ) __mark_dirty(d, __gpfn_to_mfn(d, l2e_get_pfn(gl2e))); - - } else { - /* - * Handle 2M pages here - */ - /* Write fault? */ - if ( regs->error_code & 2 ) { - if ( !l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, WRITE_FAULT) ) { - goto fail; - } - } else { - l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, READ_FAULT); - } - - /* - * STEP 3. Write guest/shadow l2e back - */ - - if ( unlikely(!__guest_set_l2e(v, va, &gl2e)) ) { - domain_crash_synchronous(); - } - - /* - * Todo: if necessary, record the page table page as dirty - */ - - - } + } + + shadow_set_l1e_64(va, (pgentry_64_t *)&sl1e, 1); + + perfc_incrc(shadow_fault_fixed); + d->arch.shadow_fault_count++; + + shadow_unlock(d); + + return EXCRET_fault_fixed; + +pse: + /* + * Handle 2M pages here + */ + if ( unlikely(!shadow_mode_external(d)) ) + BUG(); + + /* Write fault? */ + if ( regs->error_code & 2 ) + { + if ( !l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, WRITE_FAULT) ) + { + goto fail; + } + } + else + { + l2e_rw_fault(v, (l2_pgentry_t *)&gl2e, va, READ_FAULT); + } + + /* + * STEP 3. Write guest/shadow l2e back + */ + + if ( unlikely(!__guest_set_l2e(v, va, &gl2e)) ) + { + domain_crash_synchronous(); + } + + /* + * Todo: if necessary, record the page table page as dirty + */ perfc_incrc(shadow_fault_fixed); d->arch.shadow_fault_count++; @@ -3257,6 +3519,7 @@ shadow_unlock(d); } +#if CONFIG_PAGING_LEVELS == 4 static unsigned long gva_to_gpa_64(unsigned long gva) { struct vcpu *v = current; @@ -3273,13 +3536,11 @@ gpa = guest_l1e_get_paddr(gl1e) + (gva & ~PAGE_MASK); return gpa; - } #ifndef GUEST_PGENTRY_32 - struct shadow_ops MODE_F_HANDLER = { - .guest_paging_levels = 4, + .guest_paging_levels = 4, .invlpg = shadow_invlpg_64, .fault = shadow_fault_64, .update_pagetables = shadow_update_pagetables, @@ -3290,9 +3551,11 @@ .is_out_of_sync = is_out_of_sync, .gva_to_gpa = gva_to_gpa_64, }; -#endif - -#endif +#endif /* GUEST_PGENTRY_32 */ +#endif /* CONFIG_PAGING_LEVELS == 4 */ + +#endif /* CONFIG_PAGING_LEVELS >= 3 */ + #if CONFIG_PAGING_LEVELS == 2 struct shadow_ops MODE_A_HANDLER = { @@ -3309,10 +3572,11 @@ }; #elif CONFIG_PAGING_LEVELS == 3 + struct shadow_ops MODE_B_HANDLER = { - .guest_paging_levels = 3, - .invlpg = shadow_invlpg_32, - .fault = shadow_fault_32, + .guest_paging_levels = 3, + .invlpg = shadow_invlpg_64, + .fault = shadow_fault_64, .update_pagetables = shadow_update_pagetables, .sync_all = sync_all, .remove_all_write_access = remove_all_write_access, diff -r e023e37b3c7a -r 995e94c4802e xen/arch/x86/shadow32.c --- a/xen/arch/x86/shadow32.c Fri Nov 11 18:02:49 2005 +++ b/xen/arch/x86/shadow32.c Fri Nov 11 18:11:13 2005 @@ -31,6 +31,8 @@ #include <xen/trace.h> #define MFN_PINNED(_x) (frame_table[_x].u.inuse.type_info & PGT_pinned) +#define va_to_l1mfn(_ed, _va) \ + (l2e_get_pfn(linear_l2_table(_ed)[_va>>L2_PAGETABLE_SHIFT])) static void shadow_free_snapshot(struct domain *d, struct out_of_sync_entry *entry); diff -r e023e37b3c7a -r 995e94c4802e xen/arch/x86/shadow_public.c --- a/xen/arch/x86/shadow_public.c Fri Nov 11 18:02:49 2005 +++ b/xen/arch/x86/shadow_public.c Fri Nov 11 18:11:13 2005 @@ -64,6 +64,9 @@ #if CONFIG_PAGING_LEVELS == 2 if ( d->arch.ops != &MODE_A_HANDLER ) d->arch.ops = &MODE_A_HANDLER; +#elif CONFIG_PAGING_LEVELS == 3 + if ( d->arch.ops != &MODE_B_HANDLER ) + d->arch.ops = &MODE_B_HANDLER; #elif CONFIG_PAGING_LEVELS == 4 if ( d->arch.ops != &MODE_D_HANDLER ) d->arch.ops = &MODE_D_HANDLER; @@ -138,7 +141,92 @@ } /****************************************************************************/ /****************************************************************************/ -#if CONFIG_PAGING_LEVELS >= 4 +#if CONFIG_PAGING_LEVELS >= 3 + +static void inline +free_shadow_fl1_table(struct domain *d, unsigned long smfn) +{ + l1_pgentry_t *pl1e = map_domain_page(smfn); + int i; + + for (i = 0; i < L1_PAGETABLE_ENTRIES; i++) + put_page_from_l1e(pl1e[i], d); +} + +/* + * Free l2, l3, l4 shadow tables + */ + +void free_fake_shadow_l2(struct domain *d,unsigned long smfn); + +static void inline +free_shadow_tables(struct domain *d, unsigned long smfn, u32 level) +{ + pgentry_64_t *ple = map_domain_page(smfn); + int i, external = shadow_mode_external(d); + +#if CONFIG_PAGING_LEVELS >=3 + if ( d->arch.ops->guest_paging_levels == PAGING_L2 ) + { + struct pfn_info *page = &frame_table[smfn]; + for ( i = 0; i < PDP_ENTRIES; i++ ) + { + if ( entry_get_flags(ple[i]) & _PAGE_PRESENT ) + free_fake_shadow_l2(d,entry_get_pfn(ple[i])); + } + + page = &frame_table[entry_get_pfn(ple[0])]; + free_domheap_pages(page, SL2_ORDER); + unmap_domain_page(ple); + } + else +#endif + { + /* + * No Xen mappings in external pages + */ + if ( external ) + { + for ( i = 0; i < PAGETABLE_ENTRIES; i++ ) + if ( entry_get_flags(ple[i]) & _PAGE_PRESENT ) + put_shadow_ref(entry_get_pfn(ple[i])); + } + else + { + for ( i = 0; i < PAGETABLE_ENTRIES; i++ ) + { + /* + * List the skip/break conditions to avoid freeing + * Xen private mappings. + */ +#if CONFIG_PAGING_LEVELS == 2 + if ( level == PAGING_L2 && !is_guest_l2_slot(0, i) ) + continue; +#endif +#if CONFIG_PAGING_LEVELS == 3 + if ( level == PAGING_L3 && i == L3_PAGETABLE_ENTRIES ) + break; + if ( level == PAGING_L2 ) + { + struct pfn_info *page = &frame_table[smfn]; + if ( is_xen_l2_slot(page->u.inuse.type_info, i) ) + continue; + } +#endif +#if CONFIG_PAGING_LEVELS == 4 + if ( level == PAGING_L4 && !is_guest_l4_slot(i)) + continue; +#endif + if ( entry_get_flags(ple[i]) & _PAGE_PRESENT ) + put_shadow_ref(entry_get_pfn(ple[i])); + } + } + unmap_domain_page(ple); + } +} +#endif + +#if CONFIG_PAGING_LEVELS == 4 /* * Convert PAE 3-level page-table to 4-level page-table */ @@ -203,55 +291,6 @@ v->arch.monitor_vtable = (l2_pgentry_t *) mpl4e; } -static void inline -free_shadow_fl1_table(struct domain *d, unsigned long smfn) -{ - l1_pgentry_t *pl1e = map_domain_page(smfn); - int i; - - for (i = 0; i < L1_PAGETABLE_ENTRIES; i++) - put_page_from_l1e(pl1e[i], d); -} - -/* - * Free l2, l3, l4 shadow tables - */ - -void free_fake_shadow_l2(struct domain *d,unsigned long smfn); - -static void inline -free_shadow_tables(struct domain *d, unsigned long smfn, u32 level) -{ - pgentry_64_t *ple = map_domain_page(smfn); - int i, external = shadow_mode_external(d); - struct pfn_info *page = &frame_table[smfn]; - - if (d->arch.ops->guest_paging_levels == PAGING_L2) - { -#if CONFIG_PAGING_LEVELS >=4 - for ( i = 0; i < PDP_ENTRIES; i++ ) - { - if (entry_get_flags(ple[i]) & _PAGE_PRESENT ) - free_fake_shadow_l2(d,entry_get_pfn(ple[i])); - } - - page = &frame_table[entry_get_pfn(ple[0])]; - free_domheap_pages(page, SL2_ORDER); - unmap_domain_page(ple); -#endif - } - else - { - for ( i = 0; i < PAGETABLE_ENTRIES; i++ ) - if ( external || is_guest_l4_slot(i) ) - if ( entry_get_flags(ple[i]) & _PAGE_PRESENT ) - put_shadow_ref(entry_get_pfn(ple[i])); - - unmap_domain_page(ple); - } -} - - void free_monitor_pagetable(struct vcpu *v) { unsigned long mfn; @@ -299,11 +338,9 @@ mpl2e = (l2_pgentry_t *)map_domain_page(mmfn); memset(mpl2e, 0, PAGE_SIZE); -#ifdef __i386__ /* XXX screws x86/64 build */ memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE], HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t)); -#endif mpl2e[l2_table_offset(PERDOMAIN_VIRT_START)] = l2e_from_paddr(__pa(d->arch.mm_perdomain_pt), @@ -333,7 +370,7 @@ unsigned long mfn; ASSERT( pagetable_get_paddr(v->arch.monitor_table) ); - + mpl2e = v->arch.monitor_vtable; /* @@ -517,13 +554,11 @@ SH_VVLOG("%s: smfn=%lx freed", __func__, smfn); -#ifdef __i386__ +#if CONFIG_PAGING_LEVELS == 2 if ( shadow_mode_external(d) ) limit = L2_PAGETABLE_ENTRIES; else limit = DOMAIN_ENTRIES_PER_L2_PAGETABLE; -#else - limit = 0; /* XXX x86/64 XXX */ #endif for ( i = 0; i < limit; i++ ) @@ -584,10 +619,11 @@ ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) ); #if CONFIG_PAGING_LEVELS >=4 - if (type == PGT_fl1_shadow) { + if ( type == PGT_fl1_shadow ) + { unsigned long mfn; mfn = __shadow_status(d, gpfn, PGT_fl1_shadow); - if (!mfn) + if ( !mfn ) gpfn |= (1UL << 63); } #endif @@ -602,7 +638,7 @@ free_shadow_l1_table(d, smfn); d->arch.shadow_page_count--; break; -#if defined (__i386__) +#if CONFIG_PAGING_LEVELS == 2 case PGT_l2_shadow: perfc_decr(shadow_l2_pages); shadow_demote(d, gpfn, gmfn); @@ -616,7 +652,8 @@ free_shadow_hl2_table(d, smfn); d->arch.hl2_page_count--; break; -#else +#endif +#if CONFIG_PAGING_LEVELS >= 3 case PGT_l2_shadow: case PGT_l3_shadow: case PGT_l4_shadow: @@ -630,7 +667,6 @@ d->arch.shadow_page_count--; break; #endif - case PGT_snapshot: perfc_decr(apshot_pages); break; @@ -782,7 +818,7 @@ } } -#if defined (__i386__) +#if CONFIG_PAGING_LEVELS == 2 // For external shadows, remove the monitor table's refs // if ( shadow_mode_external(d) ) @@ -928,7 +964,7 @@ ASSERT(!(d->arch.shadow_mode & ~mode)); #if defined(CONFIG_PAGING_LEVELS) - if(!shadow_set_guest_paging_levels(d, + if(!shadow_set_guest_paging_levels(d, CONFIG_PAGING_LEVELS)) { printk("Unsupported guest paging levels\n"); domain_crash_synchronous(); /* need to take a clean path */ @@ -968,7 +1004,7 @@ else v->arch.shadow_vtable = NULL; -#if defined (__i386__) +#if CONFIG_PAGING_LEVELS == 2 /* * arch.hl2_vtable */ @@ -1408,7 +1444,7 @@ sl1mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l1_shadow); if ( sl1mfn ) { - SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpte=%" PRIpte, + SH_VVLOG("shadow_l1_normal_pt_update pa=%p, gpde=%" PRIpte, (void *)pa, l1e_get_intpte(gpte)); l1pte_propagate_from_guest(current->domain, gpte, &spte); @@ -1447,7 +1483,7 @@ #if CONFIG_PAGING_LEVELS >= 3 void shadow_l3_normal_pt_update( struct domain *d, - unsigned long pa, l3_pgentry_t gpde, + unsigned long pa, l3_pgentry_t l3e, struct domain_mmap_cache *cache) { unsigned long sl3mfn; @@ -1458,11 +1494,10 @@ sl3mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l3_shadow); if ( sl3mfn ) { - SH_VVLOG("shadow_l3_normal_pt_update pa=%p, gpde=%" PRIpte, - (void *)pa, l3e_get_intpte(gpde)); - + SH_VVLOG("shadow_l3_normal_pt_update pa=%p, l3e=%" PRIpte, + (void *)pa, l3e_get_intpte(l3e)); spl3e = (pgentry_64_t *) map_domain_page_with_cache(sl3mfn, cache); - validate_entry_change(d, (pgentry_64_t *) &gpde, + validate_entry_change(d, (pgentry_64_t *) &l3e, &spl3e[(pa & ~PAGE_MASK) / sizeof(l3_pgentry_t)], shadow_type_to_level(PGT_l3_shadow)); unmap_domain_page_with_cache(spl3e, cache); @@ -1475,7 +1510,7 @@ #if CONFIG_PAGING_LEVELS >= 4 void shadow_l4_normal_pt_update( struct domain *d, - unsigned long pa, l4_pgentry_t gpde, + unsigned long pa, l4_pgentry_t l4e, struct domain_mmap_cache *cache) { unsigned long sl4mfn; @@ -1486,11 +1521,10 @@ sl4mfn = __shadow_status(current->domain, pa >> PAGE_SHIFT, PGT_l4_shadow); if ( sl4mfn ) { - SH_VVLOG("shadow_l4_normal_pt_update pa=%p, gpde=%" PRIpte, - (void *)pa, l4e_get_intpte(gpde)); - + SH_VVLOG("shadow_l4_normal_pt_update pa=%p, l4e=%" PRIpte, + (void *)pa, l4e_get_intpte(l4e)); spl4e = (pgentry_64_t *)map_domain_page_with_cache(sl4mfn, cache); - validate_entry_change(d, (pgentry_64_t *)&gpde, + validate_entry_change(d, (pgentry_64_t *)&l4e, &spl4e[(pa & ~PAGE_MASK) / sizeof(l4_pgentry_t)], shadow_type_to_level(PGT_l4_shadow)); unmap_domain_page_with_cache(spl4e, cache); @@ -1554,8 +1588,6 @@ remove_shadow(struct domain *d, unsigned long gpfn, u32 stype) { unsigned long smfn; - - //printk("%s(gpfn=%lx, type=%x)\n", __func__, gpfn, stype); shadow_lock(d); diff -r e023e37b3c7a -r 995e94c4802e xen/include/asm-x86/page.h --- a/xen/include/asm-x86/page.h Fri Nov 11 18:02:49 2005 +++ b/xen/include/asm-x86/page.h Fri Nov 11 18:11:13 2005 @@ -232,9 +232,6 @@ #define linear_l3_table(_ed) ((_ed)->arch.guest_vl3table) #define linear_l4_table(_ed) ((_ed)->arch.guest_vl4table) -#define va_to_l1mfn(_ed, _va) \ - (l2e_get_pfn(linear_l2_table(_ed)[_va>>L2_PAGETABLE_SHIFT])) - #ifndef __ASSEMBLY__ #if CONFIG_PAGING_LEVELS == 3 extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES]; diff -r e023e37b3c7a -r 995e94c4802e xen/include/asm-x86/shadow.h --- a/xen/include/asm-x86/shadow.h Fri Nov 11 18:02:49 2005 +++ b/xen/include/asm-x86/shadow.h Fri Nov 11 18:11:13 2005 @@ -138,6 +138,14 @@ struct domain_mmap_cache *cache); #if CONFIG_PAGING_LEVELS >= 3 #include <asm/page-guest32.h> +/* + * va_mask cannot be used because it's used by the shadow hash. + * Use the score area for for now. + */ +#define is_xen_l2_slot(t,s) \ + ( ((((t) & PGT_score_mask) >> PGT_score_shift) == 3) && \ + ((s) >= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES - 1))) ) + extern unsigned long gva_to_gpa(unsigned long gva); extern void shadow_l3_normal_pt_update(struct domain *d, unsigned long pa, l3_pgentry_t l3e, @@ -458,7 +466,7 @@ /************************************************************************/ -static inline int __mark_dirty(struct domain *d, unsigned int mfn) +static inline int __mark_dirty(struct domain *d, unsigned long mfn) { unsigned long pfn; int rc = 0; @@ -906,7 +914,7 @@ guest_l2e_add_flags(gpde, _PAGE_ACCESSED); *gpde_p = gpde; - } + } if ( l2e_get_intpte(spde) || l2e_get_intpte(gpde) ) SH_VVLOG("%s: gpde=%" PRIpte ", new spde=%" PRIpte, __func__, @@ -1355,7 +1363,7 @@ } -static inline void delete_shadow_status( +static inline void delete_shadow_status( struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int stype) { struct shadow_status *p, *x, *n, *head; @@ -1454,7 +1462,7 @@ ASSERT(stype && !(stype & ~PGT_type_mask)); x = head = hash_bucket(d, gpfn); - + SH_VLOG("set gpfn=%lx smfn=%lx t=%lx bucket=%p(%p)", gpfn, smfn, stype, x, x->next); shadow_audit(d, 0); @@ -1584,7 +1592,7 @@ { struct vcpu *v = current; struct domain *d = v->domain; - l2_pgentry_t sl2e; + l2_pgentry_t sl2e = {0}; __shadow_get_l2e(v, va, &sl2e); if ( !(l2e_get_flags(sl2e) & _PAGE_PRESENT) ) @@ -1731,7 +1739,7 @@ #ifdef CONFIG_VMX if ( VMX_DOMAIN(v) ) paging_enabled = vmx_paging_enabled(v); - + else #endif // HACK ALERT: there's currently no easy way to figure out if a domU @@ -1757,7 +1765,7 @@ if ( shadow_mode_enabled(d) ) v->arch.monitor_table = v->arch.shadow_table; else -#ifdef __x86_64__ +#if CONFIG_PAGING_LEVELS == 4 if ( !(v->arch.flags & TF_kernel_mode) ) v->arch.monitor_table = v->arch.guest_table_user; else diff -r e023e37b3c7a -r 995e94c4802e xen/include/asm-x86/shadow_64.h --- a/xen/include/asm-x86/shadow_64.h Fri Nov 11 18:02:49 2005 +++ b/xen/include/asm-x86/shadow_64.h Fri Nov 11 18:11:13 2005 @@ -29,6 +29,15 @@ #include <asm/shadow.h> #include <asm/shadow_ops.h> +extern struct shadow_ops MODE_B_HANDLER; + +#if CONFIG_PAGING_LEVELS == 3 +#define L4_PAGETABLE_SHIFT 39 +#define L4_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER) +typedef struct { intpte_t l4; } l4_pgentry_t; +#define is_guest_l4_slot(_s) (1) +#endif + #define READ_FAULT 0 #define WRITE_FAULT 1 @@ -94,6 +103,11 @@ return (((va) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1)); case 3: return (((va) >> L3_PAGETABLE_SHIFT) & (L3_PAGETABLE_ENTRIES - 1)); +#if CONFIG_PAGING_LEVELS == 3 + case 4: + return PAE_SHADOW_SELF_ENTRY; +#endif + #if CONFIG_PAGING_LEVELS >= 4 #ifndef GUEST_PGENTRY_32 case 4: @@ -127,57 +141,73 @@ } } -static inline pgentry_64_t *__entry( - struct vcpu *v, u64 va, u32 flag) +static inline int __entry( + struct vcpu *v, u64 va, pgentry_64_t *e_p, u32 flag) { int i; pgentry_64_t *le_e; - pgentry_64_t *le_p; + pgentry_64_t *le_p = NULL; unsigned long mfn; int index; u32 level = flag & L_MASK; struct domain *d = v->domain; - - index = table_offset_64(va, ROOT_LEVEL_64); - if (flag & SHADOW_ENTRY) + int root_level; + + if ( flag & SHADOW_ENTRY ) + { + root_level = ROOT_LEVEL_64; + index = table_offset_64(va, root_level); le_e = (pgentry_64_t *)&v->arch.shadow_vtable[index]; - else + } + else /* guest entry */ + { + root_level = v->domain->arch.ops->guest_paging_levels; + index = table_offset_64(va, root_level); le_e = (pgentry_64_t *)&v->arch.guest_vtable[index]; - + } /* * If it's not external mode, then mfn should be machine physical. */ - for (i = ROOT_LEVEL_64 - level; i > 0; i--) { - if (unlikely(!(entry_get_flags(*le_e) & _PAGE_PRESENT))) - return NULL; - mfn = entry_get_value(*le_e) >> PAGE_SHIFT; - if ((flag & GUEST_ENTRY) && shadow_mode_translate(d)) + for (i = root_level - level; i > 0; i--) { + if ( unlikely(!(entry_get_flags(*le_e) & _PAGE_PRESENT)) ) { + if ( le_p ) + unmap_domain_page(le_p); + return 0; + } + mfn = entry_get_pfn(*le_e); + if ( (flag & GUEST_ENTRY) && shadow_mode_translate(d) ) mfn = get_mfn_from_pfn(mfn); - le_p = (pgentry_64_t *)phys_to_virt(mfn << PAGE_SHIFT); + if ( le_p ) + unmap_domain_page(le_p); + le_p = (pgentry_64_t *)map_domain_page(mfn); index = table_offset_64(va, (level + i - 1)); le_e = &le_p[index]; - - } - return le_e; - -} - -static inline pgentry_64_t *__rw_entry( - struct vcpu *ed, u64 va, void *e_p, u32 flag) -{ - pgentry_64_t *le_e = __entry(ed, va, flag); + } + + if ( flag & SET_ENTRY ) + *le_e = *e_p; + else + *e_p = *le_e; + + if ( le_p ) + unmap_domain_page(le_p); + + return 1; + +} + +static inline int __rw_entry( + struct vcpu *v, u64 va, void *e_p, u32 flag) +{ pgentry_64_t *e = (pgentry_64_t *)e_p; - if (le_e == NULL) - return NULL; if (e) { - if (flag & SET_ENTRY) - *le_e = *e; - else - *e = *le_e; - } - return le_e; -} + return __entry(v, va, e, flag); + } + + return 0; +} + #define __shadow_set_l4e(v, va, value) \ __rw_entry(v, va, value, SHADOW_ENTRY | SET_ENTRY | PAGING_L4) #define __shadow_get_l4e(v, va, sl4e) \ @@ -204,7 +234,7 @@ #define __guest_get_l3e(v, va, sl3e) \ __rw_entry(v, va, gl3e, GUEST_ENTRY | GET_ENTRY | PAGING_L3) -static inline void * __guest_set_l2e( +static inline int __guest_set_l2e( struct vcpu *v, u64 va, void *value, int size) { switch(size) { @@ -216,21 +246,21 @@ l2va = (l2_pgentry_32_t *)v->arch.guest_vtable; if (value) l2va[l2_table_offset_32(va)] = *(l2_pgentry_32_t *)value; - return &l2va[l2_table_offset_32(va)]; + return 1; } case 8: return __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | PAGING_L2); default: BUG(); - return NULL; - } - return NULL; + return 0; + } + return 0; } #define __guest_set_l2e(v, va, value) \ - ( __typeof__(value) )__guest_set_l2e(v, (u64)va, value, sizeof(*value)) - -static inline void * __guest_get_l2e( + __guest_set_l2e(v, (u64)va, value, sizeof(*value)) + +static inline int __guest_get_l2e( struct vcpu *v, u64 va, void *gl2e, int size) { switch(size) { @@ -241,21 +271,21 @@ l2va = (l2_pgentry_32_t *)v->arch.guest_vtable; if (gl2e) *(l2_pgentry_32_t *)gl2e = l2va[l2_table_offset_32(va)]; - return &l2va[l2_table_offset_32(va)]; + return 1; } case 8: return __rw_entry(v, va, gl2e, GUEST_ENTRY | GET_ENTRY | PAGING_L2); default: BUG(); - return NULL; - } - return NULL; + return 0; + } + return 0; } #define __guest_get_l2e(v, va, gl2e) \ - (__typeof__ (gl2e))__guest_get_l2e(v, (u64)va, gl2e, sizeof(*gl2e)) - -static inline void * __guest_set_l1e( + __guest_get_l2e(v, (u64)va, gl2e, sizeof(*gl2e)) + +static inline int __guest_set_l1e( struct vcpu *v, u64 va, void *value, int size) { switch(size) { @@ -267,34 +297,34 @@ unsigned long l1mfn; if (!__guest_get_l2e(v, va, &gl2e)) - return NULL; + return 0; if (unlikely(!(l2e_get_flags_32(gl2e) & _PAGE_PRESENT))) - return NULL; + return 0; l1mfn = get_mfn_from_pfn( l2e_get_pfn(gl2e)); - l1va = (l1_pgentry_32_t *) - phys_to_virt(l1mfn << L1_PAGETABLE_SHIFT); + l1va = (l1_pgentry_32_t *)map_domain_page(l1mfn); if (value) l1va[l1_table_offset_32(va)] = *(l1_pgentry_32_t *)value; - - return &l1va[l1_table_offset_32(va)]; + unmap_domain_page(l1va); + + return 1; } case 8: return __rw_entry(v, va, value, GUEST_ENTRY | SET_ENTRY | PAGING_L1); default: BUG(); - return NULL; - } - return NULL; + return 0; + } + return 0; } #define __guest_set_l1e(v, va, value) \ - ( __typeof__(value) )__guest_set_l1e(v, (u64)va, value, sizeof(*value)) - -static inline void * __guest_get_l1e( + __guest_set_l1e(v, (u64)va, value, sizeof(*value)) + +static inline int __guest_get_l1e( struct vcpu *v, u64 va, void *gl1e, int size) { switch(size) { @@ -306,34 +336,33 @@ unsigned long l1mfn; if (!(__guest_get_l2e(v, va, &gl2e))) - return NULL; + return 0; if (unlikely(!(l2e_get_flags_32(gl2e) & _PAGE_PRESENT))) - return NULL; + return 0; l1mfn = get_mfn_from_pfn( l2e_get_pfn(gl2e)); - l1va = (l1_pgentry_32_t *) phys_to_virt( - l1mfn << L1_PAGETABLE_SHIFT); + l1va = (l1_pgentry_32_t *) map_domain_page(l1mfn); if (gl1e) *(l1_pgentry_32_t *)gl1e = l1va[l1_table_offset_32(va)]; - - return &l1va[l1_table_offset_32(va)]; + unmap_domain_page(l1va); + return 1; } case 8: // 64-bit guest return __rw_entry(v, va, gl1e, GUEST_ENTRY | GET_ENTRY | PAGING_L1); default: BUG(); - return NULL; - } - return NULL; + return 0; + } + return 0; } #define __guest_get_l1e(v, va, gl1e) \ - ( __typeof__(gl1e) )__guest_get_l1e(v, (u64)va, gl1e, sizeof(*gl1e)) + __guest_get_l1e(v, (u64)va, gl1e, sizeof(*gl1e)) static inline void entry_general( struct domain *d, @@ -365,10 +394,16 @@ unmap_domain_page(l1_p); } } else { - sle = entry_from_pfn( - smfn, - (entry_get_flags(gle) | _PAGE_RW | _PAGE_ACCESSED) & ~_PAGE_AVAIL); - entry_add_flags(gle, _PAGE_ACCESSED); + if (d->arch.ops->guest_paging_levels <= PAGING_L3 + && level == PAGING_L3) { + sle = entry_from_pfn(smfn, entry_get_flags(gle)); + } else { + + sle = entry_from_pfn( + smfn, + (entry_get_flags(gle) | _PAGE_RW | _PAGE_ACCESSED) & ~_PAGE_AVAIL); + entry_add_flags(gle, _PAGE_ACCESSED); + } } // XXX mafetter: Hmm... // Shouldn't the dirty log be checked/updated here? @@ -392,7 +427,7 @@ if ( entry_get_flags(gle) & _PAGE_PRESENT ) { if ((entry_get_flags(gle) & _PAGE_PSE) && level == PAGING_L2) { - smfn = __shadow_status(d, entry_get_value(gle) >> PAGE_SHIFT, PGT_fl1_shadow); + smfn = __shadow_status(d, entry_get_pfn(gle), PGT_fl1_shadow); } else { smfn = __shadow_status(d, entry_get_pfn(gle), shadow_level_to_type((level -1 ))); _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |