[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] [HVM] Add type information to the p2m map.
# HG changeset patch # User Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx> # Date 1189431750 -3600 # Node ID 4633e9604da9c51f077285465d63db1820e6f574 # Parent 1474db8058b20753eb465273f7dbf5e10662bf0f [HVM] Add type information to the p2m map. This is a base for memory tricks like page sharing, copy-on-write, lazy allocation etc. It should also make pass-through MMIO easier to implement in the p2m. Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxxxxx> --- xen/arch/x86/hvm/hvm.c | 33 ++++--- xen/arch/x86/hvm/io.c | 9 - xen/arch/x86/hvm/svm/svm.c | 32 ++++-- xen/arch/x86/hvm/vmx/vmx.c | 18 ++- xen/arch/x86/mm/hap/guest_walk.c | 10 +- xen/arch/x86/mm/hap/hap.c | 10 +- xen/arch/x86/mm/p2m.c | 122 +++++++++++++++++--------- xen/arch/x86/mm/shadow/common.c | 24 +++-- xen/arch/x86/mm/shadow/multi.c | 138 +++++++++++++++++++----------- xen/arch/x86/mm/shadow/types.h | 2 xen/include/asm-x86/mm.h | 2 xen/include/asm-x86/p2m.h | 179 ++++++++++++++++++++++++++++----------- 12 files changed, 379 insertions(+), 200 deletions(-) diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Mon Sep 10 13:59:46 2007 +0100 +++ b/xen/arch/x86/hvm/hvm.c Mon Sep 10 14:42:30 2007 +0100 @@ -161,12 +161,14 @@ static int hvm_set_ioreq_page( struct domain *d, struct hvm_ioreq_page *iorp, unsigned long gmfn) { struct page_info *page; + p2m_type_t p2mt; unsigned long mfn; void *va; - mfn = gmfn_to_mfn(d, gmfn); - if ( !mfn_valid(mfn) ) + mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt)); + if ( !p2m_is_ram(p2mt) ) return -EINVAL; + ASSERT(mfn_valid(mfn)); page = mfn_to_page(mfn); if ( !get_page_and_type(page, d, PGT_writable_page) ) @@ -517,7 +519,8 @@ int hvm_set_cr0(unsigned long value) int hvm_set_cr0(unsigned long value) { struct vcpu *v = current; - unsigned long mfn, old_value = v->arch.hvm_vcpu.guest_cr[0]; + p2m_type_t p2mt; + unsigned long gfn, mfn, old_value = v->arch.hvm_vcpu.guest_cr[0]; HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value); @@ -559,8 +562,10 @@ int hvm_set_cr0(unsigned long value) if ( !paging_mode_hap(v->domain) ) { /* The guest CR3 must be pointing to the guest physical. */ - mfn = get_mfn_from_gpfn(v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT); - if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) + gfn = v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT; + mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt)); + if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) || + !get_page(mfn_to_page(mfn), v->domain)) { gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n", v->arch.hvm_vcpu.guest_cr[3], mfn); @@ -603,16 +608,18 @@ int hvm_set_cr3(unsigned long value) int hvm_set_cr3(unsigned long value) { unsigned long mfn; + p2m_type_t p2mt; struct vcpu *v = current; if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) && (value != v->arch.hvm_vcpu.guest_cr[3]) ) { - /* Shadow-mode CR3 change. Check PDBR and then make a new shadow. */ + /* Shadow-mode CR3 change. Check PDBR and update refcounts. */ HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value); - mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); - if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) - goto bad_cr3; + mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt)); + if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) || + !get_page(mfn_to_page(mfn), v->domain) ) + goto bad_cr3; put_page(pagetable_get_page(v->arch.guest_table)); v->arch.guest_table = pagetable_from_pfn(mfn); @@ -677,6 +684,7 @@ static int __hvm_copy(void *buf, paddr_t static int __hvm_copy(void *buf, paddr_t addr, int size, int dir, int virt) { unsigned long gfn, mfn; + p2m_type_t p2mt; char *p; int count, todo; @@ -690,10 +698,11 @@ static int __hvm_copy(void *buf, paddr_t else gfn = addr >> PAGE_SHIFT; - mfn = get_mfn_from_gpfn(gfn); - - if ( mfn == INVALID_MFN ) + mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt)); + + if ( !p2m_is_ram(p2mt) ) return todo; + ASSERT(mfn_valid(mfn)); p = (char *)map_domain_page(mfn) + (addr & ~PAGE_MASK); diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/hvm/io.c --- a/xen/arch/x86/hvm/io.c Mon Sep 10 13:59:46 2007 +0100 +++ b/xen/arch/x86/hvm/io.c Mon Sep 10 14:42:30 2007 +0100 @@ -826,9 +826,7 @@ void hvm_io_assist(void) ioreq_t *p; struct cpu_user_regs *regs; struct hvm_io_op *io_opp; - unsigned long gmfn; struct vcpu *v = current; - struct domain *d = v->domain; io_opp = &v->arch.hvm_vcpu.io_op; regs = &io_opp->io_context; @@ -861,13 +859,6 @@ void hvm_io_assist(void) regs->eflags &= ~X86_EFLAGS_RF; hvm_load_cpu_guest_regs(v, regs); memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES); - - /* Has memory been dirtied? */ - if ( (p->dir == IOREQ_READ) && p->data_is_ptr ) - { - gmfn = get_mfn_from_gpfn(paging_gva_to_gfn(v, p->data)); - paging_mark_dirty(d, gmfn); - } out: vcpu_end_shutdown_deferral(v); diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Mon Sep 10 13:59:46 2007 +0100 +++ b/xen/arch/x86/hvm/svm/svm.c Mon Sep 10 14:42:30 2007 +0100 @@ -338,6 +338,7 @@ int svm_vmcb_restore(struct vcpu *v, str int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c) { unsigned long mfn = 0; + p2m_type_t p2mt; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; if ( c->pending_valid && @@ -353,8 +354,8 @@ int svm_vmcb_restore(struct vcpu *v, str { if ( c->cr0 & X86_CR0_PG ) { - mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); - if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) + mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt)); + if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) ) { gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n", c->cr3); @@ -1004,15 +1005,23 @@ int start_svm(struct cpuinfo_x86 *c) return 1; } -static int svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs) -{ - if (mmio_space(gpa)) { +static void svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs) +{ + p2m_type_t p2mt; + mfn_t mfn; + unsigned long gfn = gpa >> PAGE_SHIFT; + + /* If this GFN is emulated MMIO, pass the fault to the mmio handler */ + mfn = gfn_to_mfn_current(gfn, &p2mt); + if ( p2mt == p2m_mmio_dm ) + { handle_mmio(gpa); - return 1; - } - - paging_mark_dirty(current->domain, get_mfn_from_gpfn(gpa >> PAGE_SHIFT)); - return p2m_set_flags(current->domain, gpa, __PAGE_HYPERVISOR|_PAGE_USER); + return; + } + + /* Log-dirty: mark the page dirty and let the guest write it again */ + paging_mark_dirty(current->domain, mfn_x(mfn)); + p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw); } static void svm_do_no_device_fault(struct vmcb_struct *vmcb) @@ -2341,8 +2350,7 @@ asmlinkage void svm_vmexit_handler(struc case VMEXIT_NPF: regs->error_code = vmcb->exitinfo1; - if ( !svm_do_nested_pgfault(vmcb->exitinfo2, regs) ) - domain_crash(v->domain); + svm_do_nested_pgfault(vmcb->exitinfo2, regs); break; default: diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Mon Sep 10 13:59:46 2007 +0100 +++ b/xen/arch/x86/hvm/vmx/vmx.c Mon Sep 10 14:42:30 2007 +0100 @@ -566,6 +566,7 @@ int vmx_vmcs_restore(struct vcpu *v, str int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c) { unsigned long mfn = 0; + p2m_type_t p2mt; if ( c->pending_valid && ((c->pending_type == 1) || (c->pending_type > 6) || @@ -578,8 +579,8 @@ int vmx_vmcs_restore(struct vcpu *v, str if ( c->cr0 & X86_CR0_PG ) { - mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); - if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) + mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt)); + if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) ) { gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n", c->cr3); return -EINVAL; @@ -1292,19 +1293,23 @@ static void vmx_do_cpuid(struct cpu_user * Note that this leaf lives at <max-hypervisor-leaf> + 1. */ u64 value = ((u64)regs->edx << 32) | (u32)regs->ecx; - unsigned long mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); + p2m_type_t p2mt; + unsigned long mfn; struct vcpu *v = current; char *p; + mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt)); + gdprintk(XENLOG_INFO, "Input address is 0x%"PRIx64".\n", value); /* 8-byte aligned valid pseudophys address from vmxassist, please. */ - if ( (value & 7) || (mfn == INVALID_MFN) || + if ( (value & 7) || !p2m_is_ram(p2mt) || !v->arch.hvm_vmx.vmxassist_enabled ) { domain_crash(v->domain); return; } + ASSERT(mfn_valid(mfn)); p = map_domain_page(mfn); value = *((uint64_t *)(p + (value & (PAGE_SIZE - 1)))); @@ -1905,11 +1910,12 @@ static int vmx_world_restore(struct vcpu static int vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c) { unsigned long mfn = 0; + p2m_type_t p2mt; if ( c->cr0 & X86_CR0_PG ) { - mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); - if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) + mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt)); + if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) ) { gdprintk(XENLOG_ERR, "Invalid CR3 value=%x", c->cr3); return -EINVAL; diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/hap/guest_walk.c --- a/xen/arch/x86/mm/hap/guest_walk.c Mon Sep 10 13:59:46 2007 +0100 +++ b/xen/arch/x86/mm/hap/guest_walk.c Mon Sep 10 14:42:30 2007 +0100 @@ -28,7 +28,8 @@ #include <xen/sched.h> #include <asm/hvm/svm/vmcb.h> #include <asm/domain.h> -#include <asm/shadow.h> +#include <asm/paging.h> +#include <asm/p2m.h> #include <asm/hap.h> #include "private.h" @@ -67,6 +68,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN int lev, index; paddr_t gpa = 0; unsigned long gpfn, mfn; + p2m_type_t p2mt; int success = 1; l1_pgentry_t *l1e; @@ -81,14 +83,16 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN gpfn = (gcr3 >> PAGE_SHIFT); for ( lev = mode; lev >= 1; lev-- ) { - mfn = get_mfn_from_gpfn(gpfn); - if ( mfn == INVALID_MFN ) + mfn = mfn_x(gfn_to_mfn_current(gpfn, &p2mt)); + if ( !p2m_is_ram(p2mt) ) { HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva, lev); success = 0; break; } + ASSERT(mfn_valid(mfn)); + index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1); #if GUEST_PAGING_LEVELS >= 4 diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/hap/hap.c --- a/xen/arch/x86/mm/hap/hap.c Mon Sep 10 13:59:46 2007 +0100 +++ b/xen/arch/x86/mm/hap/hap.c Mon Sep 10 14:42:30 2007 +0100 @@ -60,8 +60,8 @@ int hap_enable_log_dirty(struct domain * d->arch.paging.mode |= PG_log_dirty; hap_unlock(d); - /* set l1e entries of P2M table to NOT_WRITABLE. */ - p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER)); + /* set l1e entries of P2M table to be read-only. */ + p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty); flush_tlb_mask(d->domain_dirty_cpumask); return 0; } @@ -73,14 +73,14 @@ int hap_disable_log_dirty(struct domain hap_unlock(d); /* set l1e entries of P2M table with normal mode */ - p2m_set_flags_global(d, __PAGE_HYPERVISOR|_PAGE_USER); + p2m_change_type_global(d, p2m_ram_logdirty, p2m_ram_rw); return 0; } void hap_clean_dirty_bitmap(struct domain *d) { - /* mark physical memory as NOT_WRITEABLE and flush the TLB */ - p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER)); + /* set l1e entries of P2M table to be read-only. */ + p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty); flush_tlb_mask(d->domain_dirty_cpumask); } diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c Mon Sep 10 13:59:46 2007 +0100 +++ b/xen/arch/x86/mm/p2m.c Mon Sep 10 14:42:30 2007 +0100 @@ -4,7 +4,7 @@ * physical-to-machine mappings for automatically-translated domains. * * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices. - * Parts of this code are Copyright (c) 2006 by XenSource Inc. + * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc. * Parts of this code are Copyright (c) 2006 by Michael A Fetterman * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. * @@ -93,6 +93,31 @@ #define page_to_mfn(_pg) (_mfn((_pg) - frame_table)) +/* PTE flags for the various types of p2m entry */ +#define P2M_BASE_FLAGS \ + (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED) + +static unsigned long p2m_type_to_flags(p2m_type_t t) +{ + unsigned long flags = (t & 0x7UL) << 9; + switch(t) + { + case p2m_invalid: + default: + return flags; + case p2m_ram_rw: + return flags | P2M_BASE_FLAGS | _PAGE_RW; + case p2m_ram_logdirty: + return flags | P2M_BASE_FLAGS; + case p2m_ram_ro: + return flags | P2M_BASE_FLAGS; + case p2m_mmio_dm: + return flags; + case p2m_mmio_direct: + return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD; + } +} + // Find the next level's P2M entry, checking for out-of-range gfn's... // Returns NULL on error. @@ -358,19 +383,25 @@ void p2m_teardown(struct domain *d) } mfn_t -gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn) +gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t) /* Read another domain's p2m entries */ { mfn_t mfn; - paddr_t addr = ((paddr_t)gpfn) << PAGE_SHIFT; + paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT; l2_pgentry_t *l2e; l1_pgentry_t *l1e; ASSERT(paging_mode_translate(d)); + + /* XXX This is for compatibility with the old model, where anything not + * XXX marked as RAM was considered to be emulated MMIO space. + * XXX Once we start explicitly registering MMIO regions in the p2m + * XXX we will return p2m_invalid for unmapped gfns */ + *t = p2m_mmio_dm; + mfn = pagetable_get_mfn(d->arch.phys_table); - - if ( gpfn > d->arch.p2m.max_mapped_pfn ) + if ( gfn > d->arch.p2m.max_mapped_pfn ) /* This pfn is higher than the highest the p2m map currently holds */ return _mfn(INVALID_MFN); @@ -428,9 +459,11 @@ gfn_to_mfn_foreign(struct domain *d, uns return _mfn(INVALID_MFN); } mfn = _mfn(l1e_get_pfn(*l1e)); + *t = p2m_flags_to_type(l1e_get_flags(*l1e)); unmap_domain_page(l1e); - return mfn; + ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t)); + return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN); } #if P2M_AUDIT @@ -630,10 +663,7 @@ p2m_remove_page(struct domain *d, unsign return; P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn); - ASSERT(mfn_x(gfn_to_mfn(d, gfn)) == mfn); - //ASSERT(mfn_to_gfn(d, mfn) == gfn); - - set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER); + set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0); set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); } @@ -653,6 +683,7 @@ guest_physmap_add_page(struct domain *d, unsigned long mfn) { unsigned long ogfn; + p2m_type_t ot; mfn_t omfn; if ( !paging_mode_translate(d) ) @@ -663,10 +694,10 @@ guest_physmap_add_page(struct domain *d, P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn); - omfn = gfn_to_mfn(d, gfn); - if ( mfn_valid(omfn) ) - { - set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER); + omfn = gfn_to_mfn(d, gfn, &ot); + if ( p2m_is_ram(ot) ) + { + ASSERT(mfn_valid(omfn)); set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); } @@ -683,8 +714,10 @@ guest_physmap_add_page(struct domain *d, /* This machine frame is already mapped at another physical address */ P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", mfn, ogfn, gfn); - if ( mfn_valid(omfn = gfn_to_mfn(d, ogfn)) ) - { + omfn = gfn_to_mfn(d, ogfn, &ot); + if ( p2m_is_ram(ot) ) + { + ASSERT(mfn_valid(omfn)); P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n", ogfn , mfn_x(omfn)); if ( mfn_x(omfn) == mfn ) @@ -692,21 +725,29 @@ guest_physmap_add_page(struct domain *d, } } - set_p2m_entry(d, gfn, _mfn(mfn), __PAGE_HYPERVISOR|_PAGE_USER); - set_gpfn_from_mfn(mfn, gfn); + if ( mfn_valid(_mfn(mfn)) ) + { + set_p2m_entry(d, gfn, _mfn(mfn), + p2m_type_to_flags(p2m_ram_rw)|__PAGE_HYPERVISOR|_PAGE_USER); + set_gpfn_from_mfn(mfn, gfn); + } + else + { + gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n", + gfn, mfn); + set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0); + } audit_p2m(d); p2m_unlock(d); } -/* This function goes through P2M table and modify l1e flags of all pages. Note - * that physical base address of l1e is intact. This function can be used for - * special purpose, such as marking physical memory as NOT WRITABLE for - * tracking dirty pages during live migration. - */ -void p2m_set_flags_global(struct domain *d, u32 l1e_flags) -{ - unsigned long mfn, gfn; +/* Walk the whole p2m table, changing any entries of the old type + * to the new type. This is used in hardware-assisted paging to + * quickly enable or diable log-dirty tracking */ +void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt) +{ + unsigned long mfn, gfn, flags; l1_pgentry_t l1e_content; l1_pgentry_t *l1e; l2_pgentry_t *l2e; @@ -769,12 +810,14 @@ void p2m_set_flags_global(struct domain for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ ) { - if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) ) + flags = l1e_get_flags(l1e[i1]); + if ( p2m_flags_to_type(flags) != ot ) continue; mfn = l1e_get_pfn(l1e[i1]); gfn = get_gpfn_from_mfn(mfn); - /* create a new 1le entry using l1e_flags */ - l1e_content = l1e_from_pfn(mfn, l1e_flags); + /* create a new 1le entry with the new type */ + flags = p2m_flags_to_type(nt); + l1e_content = l1e_from_pfn(mfn, flags); paging_write_p2m_entry(d, gfn, &l1e[i1], l1mfn, l1e_content, 1); } @@ -800,24 +843,23 @@ void p2m_set_flags_global(struct domain p2m_unlock(d); } -/* This function traces through P2M table and modifies l1e flags of a specific - * gpa. - */ -int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags) -{ - unsigned long gfn; +/* Modify the p2m type of a single gfn from ot to nt, returning the + * entry's previous type */ +p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn, + p2m_type_t ot, p2m_type_t nt) +{ + p2m_type_t pt; mfn_t mfn; p2m_lock(d); - gfn = gpa >> PAGE_SHIFT; - mfn = gfn_to_mfn(d, gfn); - if ( mfn_valid(mfn) ) - set_p2m_entry(d, gfn, mfn, l1e_flags); + mfn = gfn_to_mfn(d, gfn, &pt); + if ( pt == ot ) + set_p2m_entry(d, gfn, mfn, p2m_type_to_flags(nt)); p2m_unlock(d); - return 1; + return pt; } /* diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Mon Sep 10 13:59:46 2007 +0100 +++ b/xen/arch/x86/mm/shadow/common.c Mon Sep 10 14:42:30 2007 +0100 @@ -2764,19 +2764,23 @@ shadow_write_p2m_entry(struct vcpu *v, u l1_pgentry_t new, unsigned int level) { struct domain *d = v->domain; - mfn_t mfn; shadow_lock(d); - /* handle physmap_add and physmap_remove */ - mfn = gfn_to_mfn(d, gfn); - if ( v != NULL && level == 1 && mfn_valid(mfn) ) { - sh_remove_all_shadows_and_parents(v, mfn); - if ( sh_remove_all_mappings(v, mfn) ) - flush_tlb_mask(d->domain_dirty_cpumask); - } - - /* update the entry with new content */ + /* If we're removing an MFN from the p2m, remove it from the shadows too */ + if ( level == 1 ) + { + mfn_t mfn = _mfn(l1e_get_pfn(*p)); + p2m_type_t p2mt = p2m_flags_to_type(l1e_get_flags(*p)); + if ( p2m_is_valid(p2mt) && mfn_valid(mfn) ) + { + sh_remove_all_shadows_and_parents(v, mfn); + if ( sh_remove_all_mappings(v, mfn) ) + flush_tlb_mask(d->domain_dirty_cpumask); + } + } + + /* Update the entry with new content */ safe_write_pte(p, new); /* install P2M in monitors for PAE Xen */ diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Mon Sep 10 13:59:46 2007 +0100 +++ b/xen/arch/x86/mm/shadow/multi.c Mon Sep 10 14:42:30 2007 +0100 @@ -209,6 +209,7 @@ guest_walk_tables(struct vcpu *v, unsign guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op) { struct domain *d = v->domain; + p2m_type_t p2mt; ASSERT(!guest_op || shadow_locked_by_me(d)); perfc_incr(shadow_guest_walk); @@ -223,8 +224,9 @@ guest_walk_tables(struct vcpu *v, unsign + guest_l4_table_offset(va); /* Walk down to the l3e */ if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0; - gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e)); - if ( !mfn_valid(gw->l3mfn) ) return 1; + gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e), &p2mt); + if ( !p2m_is_ram(p2mt) ) return 1; + ASSERT(mfn_valid(gw->l3mfn)); /* This mfn is a pagetable: make sure the guest can't write to it. */ if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 ) flush_tlb_mask(d->domain_dirty_cpumask); @@ -236,8 +238,9 @@ guest_walk_tables(struct vcpu *v, unsign #endif /* PAE or 64... */ /* Walk down to the l2e */ if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0; - gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e)); - if ( !mfn_valid(gw->l2mfn) ) return 1; + gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e), &p2mt); + if ( !p2m_is_ram(p2mt) ) return 1; + ASSERT(mfn_valid(gw->l2mfn)); /* This mfn is a pagetable: make sure the guest can't write to it. */ if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 ) flush_tlb_mask(d->domain_dirty_cpumask); @@ -278,8 +281,9 @@ guest_walk_tables(struct vcpu *v, unsign else { /* Not a superpage: carry on and find the l1e. */ - gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e)); - if ( !mfn_valid(gw->l1mfn) ) return 1; + gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e), &p2mt); + if ( !p2m_is_ram(p2mt) ) return 1; + ASSERT(mfn_valid(gw->l1mfn)); /* This mfn is a pagetable: make sure the guest can't write to it. */ if ( guest_op && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 ) @@ -626,7 +630,7 @@ _sh_propagate(struct vcpu *v, void *shadow_entry_ptr, int level, fetch_type_t ft, - int mmio) + p2m_type_t p2mt) { guest_l1e_t *gp = guest_entry_ptr; shadow_l1e_t *sp = shadow_entry_ptr; @@ -636,6 +640,13 @@ _sh_propagate(struct vcpu *v, /* We don't shadow PAE l3s */ ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3); + + /* Check there's something for the shadows to map to */ + if ( !p2m_is_valid(p2mt) ) + { + *sp = shadow_l1e_empty(); + goto done; + } if ( mfn_valid(guest_table_mfn) ) /* Handle A and D bit propagation into the guest */ @@ -658,19 +669,22 @@ _sh_propagate(struct vcpu *v, goto done; } - if ( level == 1 && mmio ) - { - /* Guest l1e maps MMIO space */ + if ( level == 1 && p2mt == p2m_mmio_dm ) + { + /* Guest l1e maps emulated MMIO space */ *sp = sh_l1e_mmio(guest_l1e_get_gfn(*gp), gflags); if ( !d->arch.paging.shadow.has_fast_mmio_entries ) d->arch.paging.shadow.has_fast_mmio_entries = 1; goto done; } - // Must have a valid target_mfn, unless this is a prefetch. In the + // Must have a valid target_mfn unless this is a prefetch. In the // case of a prefetch, an invalid mfn means that we can not usefully // shadow anything, and so we return early. // + /* N.B. For pass-through MMIO, either this test needs to be relaxed, + * and shadow_set_l1e() trained to handle non-valid MFNs (ugh), or the + * MMIO areas need to be added to the frame-table to make them "valid". */ if ( !mfn_valid(target_mfn) ) { ASSERT((ft == ft_prefetch)); @@ -718,6 +732,8 @@ _sh_propagate(struct vcpu *v, // Only allow the guest write access to a page a) on a demand fault, // or b) if the page is already marked as dirty. // + // (We handle log-dirty entirely inside the shadow code, without using the + // p2m_ram_logdirty p2m type: only HAP uses that.) if ( unlikely((level == 1) && shadow_mode_log_dirty(d)) ) { if ( ft & FETCH_TYPE_WRITE ) @@ -725,6 +741,10 @@ _sh_propagate(struct vcpu *v, else if ( !sh_mfn_is_dirty(d, target_mfn) ) sflags &= ~_PAGE_RW; } + + /* Read-only memory */ + if ( p2mt == p2m_ram_ro ) + sflags &= ~_PAGE_RW; // protect guest page tables // @@ -754,7 +774,12 @@ _sh_propagate(struct vcpu *v, sflags |= _PAGE_USER; } + /* MMIO addresses should never be cached */ + if ( p2m_is_mmio(p2mt) ) + sflags |= _PAGE_PCD; + *sp = shadow_l1e_from_mfn(target_mfn, sflags); + done: SHADOW_DEBUG(PROPAGATE, "%s level %u guest %" SH_PRI_gpte " shadow %" SH_PRI_pte "\n", @@ -775,7 +800,7 @@ l4e_propagate_from_guest(struct vcpu *v, shadow_l4e_t *sl4e, fetch_type_t ft) { - _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, 0); + _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, p2m_ram_rw); } static void @@ -786,7 +811,7 @@ l3e_propagate_from_guest(struct vcpu *v, shadow_l3e_t *sl3e, fetch_type_t ft) { - _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, 0); + _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, p2m_ram_rw); } #endif // GUEST_PAGING_LEVELS >= 4 @@ -798,7 +823,7 @@ l2e_propagate_from_guest(struct vcpu *v, shadow_l2e_t *sl2e, fetch_type_t ft) { - _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, 0); + _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, p2m_ram_rw); } static void @@ -808,9 +833,9 @@ l1e_propagate_from_guest(struct vcpu *v, mfn_t gmfn, shadow_l1e_t *sl1e, fetch_type_t ft, - int mmio) -{ - _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, mmio); + p2m_type_t p2mt) +{ + _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, p2mt); } @@ -2196,6 +2221,7 @@ static int validate_gl4e(struct vcpu *v, shadow_l4e_t *sl4p = se; mfn_t sl3mfn = _mfn(INVALID_MFN); struct domain *d = v->domain; + p2m_type_t p2mt; int result = 0; perfc_incr(shadow_validate_gl4e_calls); @@ -2203,8 +2229,8 @@ static int validate_gl4e(struct vcpu *v, if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT ) { gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e); - mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn); - if ( mfn_valid(gl3mfn) ) + mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn, &p2mt); + if ( p2m_is_ram(p2mt) ) sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow); else result |= SHADOW_SET_ERROR; @@ -2248,6 +2274,7 @@ static int validate_gl3e(struct vcpu *v, guest_l3e_t *new_gl3e = new_ge; shadow_l3e_t *sl3p = se; mfn_t sl2mfn = _mfn(INVALID_MFN); + p2m_type_t p2mt; int result = 0; perfc_incr(shadow_validate_gl3e_calls); @@ -2255,8 +2282,8 @@ static int validate_gl3e(struct vcpu *v, if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT ) { gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e); - mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn); - if ( mfn_valid(gl2mfn) ) + mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn, &p2mt); + if ( p2m_is_ram(p2mt) ) sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow); else result |= SHADOW_SET_ERROR; @@ -2275,6 +2302,7 @@ static int validate_gl2e(struct vcpu *v, guest_l2e_t *new_gl2e = new_ge; shadow_l2e_t *sl2p = se; mfn_t sl1mfn = _mfn(INVALID_MFN); + p2m_type_t p2mt; int result = 0; perfc_incr(shadow_validate_gl2e_calls); @@ -2299,8 +2327,8 @@ static int validate_gl2e(struct vcpu *v, } else { - mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn); - if ( mfn_valid(gl1mfn) ) + mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn, &p2mt); + if ( p2m_is_ram(p2mt) ) sl1mfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow); else result |= SHADOW_SET_ERROR; @@ -2361,16 +2389,16 @@ static int validate_gl1e(struct vcpu *v, shadow_l1e_t *sl1p = se; gfn_t gfn; mfn_t gmfn; - int result = 0, mmio; + p2m_type_t p2mt; + int result = 0; perfc_incr(shadow_validate_gl1e_calls); gfn = guest_l1e_get_gfn(*new_gl1e); - gmfn = gfn_to_mfn(v->domain, gfn); - - mmio = (is_hvm_vcpu(v) && mmio_space(gfn_to_paddr(gfn))); + gmfn = gfn_to_mfn(v->domain, gfn, &p2mt); + l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e, - ft_prefetch, mmio); + ft_prefetch, p2mt); result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn); return result; @@ -2554,12 +2582,13 @@ static void sh_prefetch(struct vcpu *v, static void sh_prefetch(struct vcpu *v, walk_t *gw, shadow_l1e_t *ptr_sl1e, mfn_t sl1mfn) { - int i, dist, mmio; + int i, dist; gfn_t gfn; mfn_t gmfn; guest_l1e_t gl1e; shadow_l1e_t sl1e; u32 gflags; + p2m_type_t p2mt; /* Prefetch no further than the end of the _shadow_ l1 MFN */ dist = (PAGE_SIZE - ((unsigned long)ptr_sl1e & ~PAGE_MASK)) / sizeof sl1e; @@ -2597,14 +2626,13 @@ static void sh_prefetch(struct vcpu *v, /* Look at the gfn that the l1e is pointing at */ gfn = guest_l1e_get_gfn(gl1e); - gmfn = gfn_to_mfn(v->domain, gfn); - mmio = ( is_hvm_vcpu(v) && mmio_space(gfn_to_paddr(gfn)) ); + gmfn = gfn_to_mfn(v->domain, gfn, &p2mt); /* Propagate the entry. Safe to use a pointer to our local * gl1e, since this is not a demand-fetch so there will be no * write-back to the guest. */ l1e_propagate_from_guest(v, &gl1e, _mfn(INVALID_MFN), - gmfn, &sl1e, ft_prefetch, mmio); + gmfn, &sl1e, ft_prefetch, p2mt); (void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, sl1mfn); } } @@ -2633,8 +2661,9 @@ static int sh_page_fault(struct vcpu *v, paddr_t gpa; struct sh_emulate_ctxt emul_ctxt; struct x86_emulate_ops *emul_ops; - int r, mmio; + int r; fetch_type_t ft = 0; + p2m_type_t p2mt; SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n", v->domain->domain_id, v->vcpu_id, va, regs->error_code); @@ -2787,10 +2816,9 @@ static int sh_page_fault(struct vcpu *v, /* What mfn is the guest trying to access? */ gfn = guest_l1e_get_gfn(gw.eff_l1e); - gmfn = gfn_to_mfn(d, gfn); - mmio = (is_hvm_domain(d) && mmio_space(gfn_to_paddr(gfn))); - - if ( !mmio && !mfn_valid(gmfn) ) + gmfn = gfn_to_mfn(d, gfn, &p2mt); + + if ( !p2m_is_valid(p2mt) || (!p2m_is_mmio(p2mt) && !mfn_valid(gmfn)) ) { perfc_incr(shadow_fault_bail_bad_gfn); SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n", @@ -2821,7 +2849,7 @@ static int sh_page_fault(struct vcpu *v, /* Calculate the shadow entry and write it */ l1e_propagate_from_guest(v, (gw.l1e) ? gw.l1e : &gw.eff_l1e, gw.l1mfn, - gmfn, &sl1e, ft, mmio); + gmfn, &sl1e, ft, p2mt); r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn); #if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH @@ -2844,7 +2872,10 @@ static int sh_page_fault(struct vcpu *v, } } - if ( mmio ) + /* Need to hand off device-model MMIO and writes to read-only + * memory to the device model */ + if ( p2mt == p2m_mmio_dm + || (p2mt == p2m_ram_ro && ft == ft_demand_write) ) { gpa = guest_walk_to_gpa(&gw); goto mmio; @@ -3598,6 +3629,7 @@ sh_update_cr3(struct vcpu *v, int do_loc int flush = 0; gfn_t gl2gfn; mfn_t gl2mfn; + p2m_type_t p2mt; guest_l3e_t *gl3e = (guest_l3e_t*)&v->arch.paging.shadow.gl3e; /* First, make all four entries read-only. */ for ( i = 0; i < 4; i++ ) @@ -3605,8 +3637,9 @@ sh_update_cr3(struct vcpu *v, int do_loc if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT ) { gl2gfn = guest_l3e_get_gfn(gl3e[i]); - gl2mfn = gfn_to_mfn(d, gl2gfn); - flush |= sh_remove_write_access(v, gl2mfn, 2, 0); + gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt); + if ( p2m_is_ram(p2mt) ) + flush |= sh_remove_write_access(v, gl2mfn, 2, 0); } } if ( flush ) @@ -3617,13 +3650,15 @@ sh_update_cr3(struct vcpu *v, int do_loc if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT ) { gl2gfn = guest_l3e_get_gfn(gl3e[i]); - gl2mfn = gfn_to_mfn(d, gl2gfn); - sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) - ? SH_type_l2h_shadow - : SH_type_l2_shadow); + gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt); + if ( p2m_is_ram(p2mt) ) + sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) + ? SH_type_l2h_shadow + : SH_type_l2_shadow); + else + sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); } else - /* The guest is not present: clear out the shadow. */ sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); } } @@ -3932,6 +3967,7 @@ static inline void * emulate_map_dest(st u32 flags, errcode; gfn_t gfn; mfn_t mfn; + p2m_type_t p2mt; /* We don't emulate user-mode writes to page tables */ if ( ring_3(sh_ctxt->ctxt.regs) ) @@ -3971,7 +4007,6 @@ static inline void * emulate_map_dest(st } } #endif - mfn = gfn_to_mfn(v->domain, gfn); errcode = PFEC_write_access; if ( !(flags & _PAGE_PRESENT) ) @@ -3981,8 +4016,10 @@ static inline void * emulate_map_dest(st if ( !(flags & _PAGE_RW) ) goto page_fault; - if ( mfn_valid(mfn) ) - { + mfn = gfn_to_mfn(v->domain, gfn, &p2mt); + if ( p2m_is_ram(p2mt) ) + { + ASSERT(mfn_valid(mfn)); *mfnp = mfn; v->arch.paging.last_write_was_pt = !!sh_mfn_is_a_page_table(mfn); return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK); @@ -4231,6 +4268,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g /* Convert this gfn to an mfn in the manner appropriate for the * guest pagetable it's used in (gmfn) */ { + p2m_type_t p2mt; if ( !shadow_mode_translate(v->domain) ) return _mfn(gfn_x(gfn)); @@ -4238,7 +4276,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g != PGT_writable_page ) return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */ else - return gfn_to_mfn(v->domain, gfn); + return gfn_to_mfn(v->domain, gfn, &p2mt); } diff -r 1474db8058b2 -r 4633e9604da9 xen/arch/x86/mm/shadow/types.h --- a/xen/arch/x86/mm/shadow/types.h Mon Sep 10 13:59:46 2007 +0100 +++ b/xen/arch/x86/mm/shadow/types.h Mon Sep 10 14:42:30 2007 +0100 @@ -414,7 +414,7 @@ gfn_to_paddr(gfn_t gfn) /* Override gfn_to_mfn to work with gfn_t */ #undef gfn_to_mfn -#define gfn_to_mfn(d, g) _gfn_to_mfn((d), gfn_x(g)) +#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), gfn_x(g), (t)) /* Type used for recording a walk through guest pagetables. It is diff -r 1474db8058b2 -r 4633e9604da9 xen/include/asm-x86/mm.h --- a/xen/include/asm-x86/mm.h Mon Sep 10 13:59:46 2007 +0100 +++ b/xen/include/asm-x86/mm.h Mon Sep 10 14:42:30 2007 +0100 @@ -328,8 +328,6 @@ TYPE_SAFE(unsigned long,mfn); ? get_gpfn_from_mfn(mfn) \ : (mfn) ) -#define gmfn_to_mfn(_d, gpfn) mfn_x(gfn_to_mfn(_d, gpfn)) - #define INVALID_MFN (~0UL) #ifdef CONFIG_COMPAT diff -r 1474db8058b2 -r 4633e9604da9 xen/include/asm-x86/p2m.h --- a/xen/include/asm-x86/p2m.h Mon Sep 10 13:59:46 2007 +0100 +++ b/xen/include/asm-x86/p2m.h Mon Sep 10 14:42:30 2007 +0100 @@ -4,7 +4,7 @@ * physical-to-machine mappings for automatically-translated domains. * * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) - * Parts of this code are Copyright (c) 2006 by XenSource Inc. + * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc. * Parts of this code are Copyright (c) 2006 by Michael A Fetterman * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. * @@ -27,49 +27,141 @@ #define _XEN_P2M_H -/* The phys_to_machine_mapping is the reversed mapping of MPT for full - * virtualization. It is only used by shadow_mode_translate()==true - * guests, so we steal the address space that would have normally - * been used by the read-only MPT map. +/* + * The phys_to_machine_mapping maps guest physical frame numbers + * to machine frame numbers. It only exists for paging_mode_translate + * guests. It is organised in page-table format, which: + * + * (1) allows us to use it directly as the second pagetable in hardware- + * assisted paging and (hopefully) iommu support; and + * (2) lets us map it directly into the guest vcpus' virtual address space + * as a linear pagetable, so we can read and write it easily. + * + * For (2) we steal the address space that would have normally been used + * by the read-only MPT map in a non-translated guest. (For + * paging_mode_external() guests this mapping is in the monitor table.) */ #define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START) - -/* Read the current domain's P2M table. */ -static inline mfn_t gfn_to_mfn_current(unsigned long gfn) -{ - l1_pgentry_t l1e = l1e_empty(); - int ret; - - if ( gfn > current->domain->arch.p2m.max_mapped_pfn ) - return _mfn(INVALID_MFN); - - /* Don't read off the end of the p2m table */ - ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t)); - - ret = __copy_from_user(&l1e, - &phys_to_machine_mapping[gfn], - sizeof(l1e)); - - if ( (ret == 0) && (l1e_get_flags(l1e) & _PAGE_PRESENT) ) - return _mfn(l1e_get_pfn(l1e)); - - return _mfn(INVALID_MFN); +/* + * The upper levels of the p2m pagetable always contain full rights; all + * variation in the access control bits is made in the level-1 PTEs. + * + * In addition to the phys-to-machine translation, each p2m PTE contains + * *type* information about the gfn it translates, helping Xen to decide + * on the correct course of action when handling a page-fault to that + * guest frame. We store the type in the "available" bits of the PTEs + * in the table, which gives us 8 possible types on 32-bit systems. + * Further expansions of the type system will only be supported on + * 64-bit Xen. + */ +typedef enum { + p2m_invalid = 0, /* Nothing mapped here */ + p2m_ram_rw = 1, /* Normal read/write guest RAM */ + p2m_ram_logdirty = 2, /* Temporarily read-only for log-dirty */ + p2m_ram_ro = 3, /* Read-only; writes go to the device model */ + p2m_mmio_dm = 4, /* Reads and write go to the device model */ + p2m_mmio_direct = 5, /* Read/write mapping of genuine MMIO area */ +} p2m_type_t; + +/* We use bitmaps and maks to handle groups of types */ +#define p2m_to_mask(_t) (1UL << (_t)) + +/* RAM types, which map to real machine frames */ +#define P2M_RAM_TYPES (p2m_to_mask(p2m_ram_rw) \ + | p2m_to_mask(p2m_ram_logdirty) \ + | p2m_to_mask(p2m_ram_ro)) + +/* MMIO types, which don't have to map to anything in the frametable */ +#define P2M_MMIO_TYPES (p2m_to_mask(p2m_mmio_dm) \ + | p2m_to_mask(p2m_mmio_direct)) + +/* Read-only types, which must have the _PAGE_RW bit clear in their PTEs */ +#define P2M_RO_TYPES (p2m_to_mask(p2m_ram_logdirty) \ + | p2m_to_mask(p2m_ram_ro)) + +/* Useful predicates */ +#define p2m_is_ram(_t) (p2m_to_mask(_t) & P2M_RAM_TYPES) +#define p2m_is_mmio(_t) (p2m_to_mask(_t) & P2M_MMIO_TYPES) +#define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES) +#define p2m_is_valid(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | P2M_MMIO_TYPES)) + +/* Extract the type from the PTE flags that store it */ +static inline p2m_type_t p2m_flags_to_type(unsigned long flags) +{ + /* Type is stored in the "available" bits, 9, 10 and 11 */ + return (flags >> 9) & 0x7; +} + +/* Read the current domain's p2m table (through the linear mapping). */ +static inline mfn_t gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t) +{ + mfn_t mfn = _mfn(INVALID_MFN); + p2m_type_t p2mt = p2m_mmio_dm; + /* XXX This is for compatibility with the old model, where anything not + * XXX marked as RAM was considered to be emulated MMIO space. + * XXX Once we start explicitly registering MMIO regions in the p2m + * XXX we will return p2m_invalid for unmapped gfns */ + + if ( gfn <= current->domain->arch.p2m.max_mapped_pfn ) + { + l1_pgentry_t l1e = l1e_empty(); + int ret; + + ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) + / sizeof(l1_pgentry_t)); + + /* Need to __copy_from_user because the p2m is sparse and this + * part might not exist */ + ret = __copy_from_user(&l1e, + &phys_to_machine_mapping[gfn], + sizeof(l1e)); + + if ( ret == 0 ) { + p2mt = p2m_flags_to_type(l1e_get_flags(l1e)); + ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(*t)); + if ( p2m_is_valid(p2mt) ) + mfn = _mfn(l1e_get_pfn(l1e)); + else + /* XXX see above */ + p2mt = p2m_mmio_dm; + } + } + + *t = p2mt; + return mfn; } /* Read another domain's P2M table, mapping pages as we go */ -mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); +mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t); /* General conversion function from gfn to mfn */ -#define gfn_to_mfn(d, g) _gfn_to_mfn((d), (g)) -static inline mfn_t _gfn_to_mfn(struct domain *d, unsigned long gfn) +#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), (g), (t)) +static inline mfn_t _gfn_to_mfn(struct domain *d, + unsigned long gfn, p2m_type_t *t) { if ( !paging_mode_translate(d) ) + { + /* Not necessarily true, but for non-translated guests, we claim + * it's the most generic kind of memory */ + *t = p2m_ram_rw; return _mfn(gfn); + } if ( likely(current->domain == d) ) - return gfn_to_mfn_current(gfn); + return gfn_to_mfn_current(gfn, t); else - return gfn_to_mfn_foreign(d, gfn); + return gfn_to_mfn_foreign(d, gfn, t); +} + +/* Compatibility function exporting the old untyped interface */ +static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn) +{ + mfn_t mfn; + p2m_type_t t; + mfn = gfn_to_mfn(d, gpfn, &t); + if ( p2m_is_valid(t) ) + return mfn_x(mfn); + return INVALID_MFN; } /* General conversion function from mfn to gfn */ @@ -81,19 +173,6 @@ static inline unsigned long mfn_to_gfn(s return mfn_x(mfn); } -/* Compatibility function for HVM code */ -static inline unsigned long get_mfn_from_gpfn(unsigned long pfn) -{ - return mfn_x(gfn_to_mfn_current(pfn)); -} - -/* Is this guest address an mmio one? (i.e. not defined in p2m map) */ -static inline int mmio_space(paddr_t gpa) -{ - unsigned long gfn = gpa >> PAGE_SHIFT; - return !mfn_valid(mfn_x(gfn_to_mfn_current(gfn))); -} - /* Translate the frame number held in an l1e from guest to machine */ static inline l1_pgentry_t gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e) @@ -105,7 +184,6 @@ gl1e_to_ml1e(struct domain *d, l1_pgentr } - /* Init the datastructures for later use by the p2m code */ void p2m_init(struct domain *d); @@ -130,11 +208,12 @@ void guest_physmap_remove_page(struct do void guest_physmap_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn); -/* set P2M table l1e flags */ -void p2m_set_flags_global(struct domain *d, u32 l1e_flags); - -/* set P2M table l1e flags for a gpa */ -int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags); +/* Change types across all p2m entries in a domain */ +void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt); + +/* Compare-exchange the type of a single p2m entry */ +p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn, + p2m_type_t ot, p2m_type_t nt); #endif /* _XEN_P2M_H */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |