[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge
# HG changeset patch # User Stefano Stabellini <sstabellini@xxxxxxxxxxxxx> # Date 1281538561 -3600 # Node ID 3b839375d5bcb3a4b484170a890e340493b4738a # Parent ca51dba3a7b1d9bd15215d89036eb98dfae8754d # Parent 47021067a0d5a498cdebca8892384eeb2cd94e48 merge --- tools/debugger/gdbsx/Makefile | 6 xen/arch/x86/debug.c | 2 xen/arch/x86/domain.c | 2 xen/arch/x86/domctl.c | 4 xen/arch/x86/hvm/emulate.c | 10 xen/arch/x86/hvm/hvm.c | 110 ++-- xen/arch/x86/hvm/mtrr.c | 2 xen/arch/x86/hvm/stdvga.c | 5 xen/arch/x86/hvm/svm/svm.c | 14 xen/arch/x86/hvm/vmsi.c | 38 + xen/arch/x86/hvm/vmx/vmx.c | 23 - xen/arch/x86/mm.c | 57 +- xen/arch/x86/mm/guest_walk.c | 15 xen/arch/x86/mm/hap/guest_walk.c | 12 xen/arch/x86/mm/hap/hap.c | 38 - xen/arch/x86/mm/hap/p2m-ept.c | 150 +++--- xen/arch/x86/mm/mem_event.c | 4 xen/arch/x86/mm/mem_paging.c | 9 xen/arch/x86/mm/mem_sharing.c | 31 - xen/arch/x86/mm/p2m.c | 736 ++++++++++++++++---------------- xen/arch/x86/mm/shadow/common.c | 38 - xen/arch/x86/mm/shadow/multi.c | 49 +- xen/arch/x86/oprofile/nmi_int.c | 2 xen/arch/x86/oprofile/op_counter.h | 12 xen/arch/x86/oprofile/op_model_athlon.c | 356 +++++++++++++++ xen/arch/x86/oprofile/xenoprof.c | 17 xen/arch/x86/setup.c | 58 ++ xen/arch/x86/x86_32/entry.S | 2 xen/arch/x86/x86_64/compat/entry.S | 2 xen/arch/x86/x86_64/entry.S | 2 xen/common/domain.c | 25 + xen/common/grant_table.c | 12 xen/common/memory.c | 13 xen/common/page_alloc.c | 72 ++- xen/common/sched_credit.c | 44 + xen/common/schedule.c | 21 xen/common/tmem_xen.c | 2 xen/common/xenoprof.c | 14 xen/drivers/passthrough/vtd/intremap.c | 11 xen/include/asm-ia64/xenoprof.h | 7 xen/include/asm-x86/guest_pt.h | 4 xen/include/asm-x86/mem_sharing.h | 6 xen/include/asm-x86/p2m.h | 134 +++-- xen/include/asm-x86/xenoprof.h | 5 xen/include/public/xenoprof.h | 16 xen/include/xen/sched-if.h | 1 xen/include/xen/sched.h | 9 47 files changed, 1449 insertions(+), 753 deletions(-) diff -r ca51dba3a7b1 -r 3b839375d5bc tools/debugger/gdbsx/Makefile --- a/tools/debugger/gdbsx/Makefile Wed Aug 11 15:51:04 2010 +0100 +++ b/tools/debugger/gdbsx/Makefile Wed Aug 11 15:56:01 2010 +0100 @@ -1,7 +1,5 @@ XEN_ROOT = ../../.. XEN_ROOT = ../../.. include ./Rules.mk - -sbindir=/usr/sbin .PHONY: all all: @@ -16,8 +14,8 @@ clean: .PHONY: install install: all - [ -d $(DESTDIR)$(sbindir) ] || $(INSTALL_DIR) $(DESTDIR)$(sbindir) - $(INSTALL_PROG) gdbsx $(DESTDIR)$(sbindir)/gdbsx + [ -d $(DESTDIR)$(SBINDIR) ] || $(INSTALL_DIR) $(DESTDIR)$(SBINDIR) + $(INSTALL_PROG) gdbsx $(DESTDIR)$(SBINDIR)/gdbsx gdbsx: gx/gx_all.a xg/xg_all.a $(CC) -o $@ $^ diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/debug.c --- a/xen/arch/x86/debug.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/debug.c Wed Aug 11 15:56:01 2010 +0100 @@ -61,7 +61,7 @@ dbg_hvm_va2mfn(dbgva_t vaddr, struct dom return INVALID_MFN; } - mfn = mfn_x(gfn_to_mfn(dp, gfn, &gfntype)); + mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(dp), gfn, &gfntype)); if ( p2m_is_readonly(gfntype) && toaddr ) { DBGP2("kdb:p2m_is_readonly: gfntype:%x\n", gfntype); diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/domain.c Wed Aug 11 15:56:01 2010 +0100 @@ -151,7 +151,7 @@ void dump_pageframe_info(struct domain * if ( is_hvm_domain(d) ) { - p2m_pod_dump_data(d); + p2m_pod_dump_data(p2m_get_hostp2m(d)); } spin_lock(&d->page_alloc_lock); diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/domctl.c Wed Aug 11 15:56:01 2010 +0100 @@ -982,7 +982,7 @@ long arch_do_domctl( ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1); for ( i = 0; i < nr_mfns; i++ ) - set_mmio_p2m_entry(d, gfn+i, _mfn(mfn+i)); + set_mmio_p2m_entry(p2m_get_hostp2m(d), gfn+i, _mfn(mfn+i)); } else { @@ -991,7 +991,7 @@ long arch_do_domctl( gfn, mfn, nr_mfns); for ( i = 0; i < nr_mfns; i++ ) - clear_mmio_p2m_entry(d, gfn+i); + clear_mmio_p2m_entry(p2m_get_hostp2m(d), gfn+i); ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1); } diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/hvm/emulate.c --- a/xen/arch/x86/hvm/emulate.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/hvm/emulate.c Wed Aug 11 15:56:01 2010 +0100 @@ -55,6 +55,7 @@ int hvmemul_do_io( paddr_t value = ram_gpa; int value_is_ptr = (p_data == NULL); struct vcpu *curr = current; + struct p2m_domain *p2m = p2m_get_hostp2m(curr->domain); ioreq_t *p = get_ioreq(curr); unsigned long ram_gfn = paddr_to_pfn(ram_gpa); p2m_type_t p2mt; @@ -62,10 +63,10 @@ int hvmemul_do_io( int rc; /* Check for paged out page */ - ram_mfn = gfn_to_mfn_unshare(current->domain, ram_gfn, &p2mt, 0); + ram_mfn = gfn_to_mfn_unshare(p2m, ram_gfn, &p2mt, 0); if ( p2m_is_paging(p2mt) ) { - p2m_mem_paging_populate(curr->domain, ram_gfn); + p2m_mem_paging_populate(p2m, ram_gfn); return X86EMUL_RETRY; } if ( p2m_is_shared(p2mt) ) @@ -638,6 +639,7 @@ static int hvmemul_rep_movs( unsigned long saddr, daddr, bytes; paddr_t sgpa, dgpa; uint32_t pfec = PFEC_page_present; + struct p2m_domain *p2m = p2m_get_hostp2m(current->domain); p2m_type_t p2mt; int rc, df = !!(ctxt->regs->eflags & X86_EFLAGS_DF); char *buf; @@ -668,12 +670,12 @@ static int hvmemul_rep_movs( if ( rc != X86EMUL_OKAY ) return rc; - (void)gfn_to_mfn_current(sgpa >> PAGE_SHIFT, &p2mt); + (void)gfn_to_mfn(p2m, sgpa >> PAGE_SHIFT, &p2mt); if ( !p2m_is_ram(p2mt) && !p2m_is_grant(p2mt) ) return hvmemul_do_mmio( sgpa, reps, bytes_per_rep, dgpa, IOREQ_READ, df, NULL); - (void)gfn_to_mfn_current(dgpa >> PAGE_SHIFT, &p2mt); + (void)gfn_to_mfn(p2m, dgpa >> PAGE_SHIFT, &p2mt); if ( !p2m_is_ram(p2mt) && !p2m_is_grant(p2mt) ) return hvmemul_do_mmio( dgpa, reps, bytes_per_rep, sgpa, IOREQ_WRITE, df, NULL); diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/hvm/hvm.c Wed Aug 11 15:56:01 2010 +0100 @@ -335,16 +335,17 @@ static int hvm_set_ioreq_page( struct domain *d, struct hvm_ioreq_page *iorp, unsigned long gmfn) { struct page_info *page; + struct p2m_domain *p2m = p2m_get_hostp2m(d); p2m_type_t p2mt; unsigned long mfn; void *va; - mfn = mfn_x(gfn_to_mfn_unshare(d, gmfn, &p2mt, 0)); + mfn = mfn_x(gfn_to_mfn_unshare(p2m, gmfn, &p2mt, 0)); if ( !p2m_is_ram(p2mt) ) return -EINVAL; if ( p2m_is_paging(p2mt) ) { - p2m_mem_paging_populate(d, gmfn); + p2m_mem_paging_populate(p2m, gmfn); return -ENOENT; } if ( p2m_is_shared(p2mt) ) @@ -968,8 +969,10 @@ bool_t hvm_hap_nested_page_fault(unsigne { p2m_type_t p2mt; mfn_t mfn; - - mfn = gfn_to_mfn_type_current(gfn, &p2mt, p2m_guest); + struct vcpu *v = current; + struct p2m_domain *p2m = p2m_get_hostp2m(v->domain); + + mfn = gfn_to_mfn_guest(p2m, gfn, &p2mt); /* * If this GFN is emulated MMIO or marked as read-only, pass the fault @@ -985,12 +988,12 @@ bool_t hvm_hap_nested_page_fault(unsigne #ifdef __x86_64__ /* Check if the page has been paged out */ if ( p2m_is_paged(p2mt) || (p2mt == p2m_ram_paging_out) ) - p2m_mem_paging_populate(current->domain, gfn); + p2m_mem_paging_populate(p2m, gfn); /* Mem sharing: unshare the page and try again */ if ( p2mt == p2m_ram_shared ) { - mem_sharing_unshare_page(current->domain, gfn, 0); + mem_sharing_unshare_page(p2m, gfn, 0); return 1; } #endif @@ -1003,8 +1006,8 @@ bool_t hvm_hap_nested_page_fault(unsigne * a large page, we do not change other pages type within that large * page. */ - paging_mark_dirty(current->domain, mfn_x(mfn)); - p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw); + paging_mark_dirty(v->domain, mfn_x(mfn)); + p2m_change_type(p2m, gfn, p2m_ram_logdirty, p2m_ram_rw); return 1; } @@ -1088,6 +1091,7 @@ int hvm_set_cr0(unsigned long value) { struct vcpu *v = current; p2m_type_t p2mt; + struct p2m_domain *p2m = p2m_get_hostp2m(v->domain); unsigned long gfn, mfn, old_value = v->arch.hvm_vcpu.guest_cr[0]; HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value); @@ -1126,7 +1130,7 @@ int hvm_set_cr0(unsigned long value) { /* The guest CR3 must be pointing to the guest physical. */ gfn = v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT; - mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt)); + mfn = mfn_x(gfn_to_mfn(p2m, gfn, &p2mt)); if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) { @@ -1213,7 +1217,8 @@ int hvm_set_cr3(unsigned long value) { /* Shadow-mode CR3 change. Check PDBR and update refcounts. */ HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value); - mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt)); + mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(v->domain), + value >> PAGE_SHIFT, &p2mt)); if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) goto bad_cr3; @@ -1356,6 +1361,8 @@ static void *hvm_map_entry(unsigned long unsigned long gfn, mfn; p2m_type_t p2mt; uint32_t pfec; + struct vcpu *v = current; + struct p2m_domain *p2m = p2m_get_hostp2m(v->domain); if ( ((va & ~PAGE_MASK) + 8) > PAGE_SIZE ) { @@ -1372,10 +1379,10 @@ static void *hvm_map_entry(unsigned long gfn = paging_gva_to_gfn(current, va, &pfec); if ( pfec == PFEC_page_paged || pfec == PFEC_page_shared ) return NULL; - mfn = mfn_x(gfn_to_mfn_unshare(current->domain, gfn, &p2mt, 0)); + mfn = mfn_x(gfn_to_mfn_unshare(p2m, gfn, &p2mt, 0)); if ( p2m_is_paging(p2mt) ) { - p2m_mem_paging_populate(current->domain, gfn); + p2m_mem_paging_populate(p2m, gfn); return NULL; } if ( p2m_is_shared(p2mt) ) @@ -1742,6 +1749,7 @@ static enum hvm_copy_result __hvm_copy( void *buf, paddr_t addr, int size, unsigned int flags, uint32_t pfec) { struct vcpu *curr = current; + struct p2m_domain *p2m = p2m_get_hostp2m(curr->domain); unsigned long gfn, mfn; p2m_type_t p2mt; char *p; @@ -1770,11 +1778,11 @@ static enum hvm_copy_result __hvm_copy( gfn = addr >> PAGE_SHIFT; } - mfn = mfn_x(gfn_to_mfn_unshare(current->domain, gfn, &p2mt, 0)); + mfn = mfn_x(gfn_to_mfn_unshare(p2m, gfn, &p2mt, 0)); if ( p2m_is_paging(p2mt) ) { - p2m_mem_paging_populate(curr->domain, gfn); + p2m_mem_paging_populate(p2m, gfn); return HVMCOPY_gfn_paged_out; } if ( p2m_is_shared(p2mt) ) @@ -2021,7 +2029,7 @@ int hvm_msr_read_intercept(unsigned int uint64_t *var_range_base, *fixed_range_base; int index, mtrr; uint32_t cpuid[4]; - int ret; + int ret = X86EMUL_OKAY; var_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.var_ranges; fixed_range_base = (uint64_t *)v->arch.hvm_vcpu.mtrr.fixed_ranges; @@ -2094,24 +2102,25 @@ int hvm_msr_read_intercept(unsigned int break; default: - ret = vmce_rdmsr(msr, msr_content); - if ( ret < 0 ) + if ( (ret = vmce_rdmsr(msr, msr_content)) < 0 ) goto gp_fault; - else if ( ret ) - break; - /* ret == 0, This is not an MCE MSR, see other MSRs */ - else if (!ret) { - return hvm_funcs.msr_read_intercept(msr, msr_content); - } - } - - HVMTRACE_3D(MSR_READ, (uint32_t)*msr_content, (uint32_t)(*msr_content >> 32), msr); - - return X86EMUL_OKAY; - -gp_fault: + /* If ret == 0 then this is not an MCE MSR, see other MSRs. */ + ret = ((ret == 0) + ? hvm_funcs.msr_read_intercept(msr, msr_content) + : X86EMUL_OKAY); + break; + } + + out: + HVMTRACE_3D(MSR_READ, msr, + (uint32_t)*msr_content, (uint32_t)(*msr_content >> 32)); + return ret; + + gp_fault: hvm_inject_exception(TRAP_gp_fault, 0, 0); - return X86EMUL_EXCEPTION; + ret = X86EMUL_EXCEPTION; + *msr_content = -1ull; + goto out; } int hvm_msr_write_intercept(unsigned int msr, uint64_t msr_content) @@ -2119,9 +2128,10 @@ int hvm_msr_write_intercept(unsigned int struct vcpu *v = current; int index, mtrr; uint32_t cpuid[4]; - int ret; - - HVMTRACE_3D(MSR_WRITE, (uint32_t)msr_content, (uint32_t)(msr_content >> 32), msr); + int ret = X86EMUL_OKAY; + + HVMTRACE_3D(MSR_WRITE, msr, + (uint32_t)msr_content, (uint32_t)(msr_content >> 32)); hvm_cpuid(1, &cpuid[0], &cpuid[1], &cpuid[2], &cpuid[3]); mtrr = !!(cpuid[3] & bitmaskof(X86_FEATURE_MTRR)); @@ -2194,16 +2204,16 @@ int hvm_msr_write_intercept(unsigned int break; default: - ret = vmce_wrmsr(msr, msr_content); - if ( ret < 0 ) + if ( (ret = vmce_wrmsr(msr, msr_content)) < 0 ) goto gp_fault; - else if ( ret ) - break; - else if (!ret) - return hvm_funcs.msr_write_intercept(msr, msr_content); - } - - return X86EMUL_OKAY; + /* If ret == 0 then this is not an MCE MSR, see other MSRs. */ + ret = ((ret == 0) + ? hvm_funcs.msr_write_intercept(msr, msr_content) + : X86EMUL_OKAY); + break; + } + + return ret; gp_fault: hvm_inject_exception(TRAP_gp_fault, 0, 0); @@ -3029,6 +3039,7 @@ long do_hvm_op(unsigned long op, XEN_GUE { struct xen_hvm_modified_memory a; struct domain *d; + struct p2m_domain *p2m; unsigned long pfn; if ( copy_from_guest(&a, arg, 1) ) @@ -3056,13 +3067,14 @@ long do_hvm_op(unsigned long op, XEN_GUE if ( !paging_mode_log_dirty(d) ) goto param_fail3; + p2m = p2m_get_hostp2m(d); for ( pfn = a.first_pfn; pfn < a.first_pfn + a.nr; pfn++ ) { p2m_type_t t; - mfn_t mfn = gfn_to_mfn(d, pfn, &t); + mfn_t mfn = gfn_to_mfn(p2m, pfn, &t); if ( p2m_is_paging(t) ) { - p2m_mem_paging_populate(d, pfn); + p2m_mem_paging_populate(p2m, pfn); rc = -EINVAL; goto param_fail3; @@ -3089,6 +3101,7 @@ long do_hvm_op(unsigned long op, XEN_GUE { struct xen_hvm_set_mem_type a; struct domain *d; + struct p2m_domain *p2m; unsigned long pfn; /* Interface types to internal p2m types */ @@ -3118,15 +3131,16 @@ long do_hvm_op(unsigned long op, XEN_GUE if ( a.hvmmem_type >= ARRAY_SIZE(memtype) ) goto param_fail4; + p2m = p2m_get_hostp2m(d); for ( pfn = a.first_pfn; pfn < a.first_pfn + a.nr; pfn++ ) { p2m_type_t t; p2m_type_t nt; mfn_t mfn; - mfn = gfn_to_mfn_unshare(d, pfn, &t, 0); + mfn = gfn_to_mfn_unshare(p2m, pfn, &t, 0); if ( p2m_is_paging(t) ) { - p2m_mem_paging_populate(d, pfn); + p2m_mem_paging_populate(p2m, pfn); rc = -EINVAL; goto param_fail4; @@ -3145,7 +3159,7 @@ long do_hvm_op(unsigned long op, XEN_GUE } else { - nt = p2m_change_type(d, pfn, t, memtype[a.hvmmem_type]); + nt = p2m_change_type(p2m, pfn, t, memtype[a.hvmmem_type]); if ( nt != t ) { gdprintk(XENLOG_WARNING, diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/hvm/mtrr.c --- a/xen/arch/x86/hvm/mtrr.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/hvm/mtrr.c Wed Aug 11 15:56:01 2010 +0100 @@ -399,7 +399,7 @@ uint32_t get_pat_flags(struct vcpu *v, { struct domain *d = v->domain; p2m_type_t p2mt; - gfn_to_mfn(d, paddr_to_pfn(gpaddr), &p2mt); + gfn_to_mfn(p2m_get_hostp2m(d), paddr_to_pfn(gpaddr), &p2mt); if (p2m_is_ram(p2mt)) gdprintk(XENLOG_WARNING, "Conflict occurs for a given guest l1e flags:%x " diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/hvm/stdvga.c --- a/xen/arch/x86/hvm/stdvga.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/hvm/stdvga.c Wed Aug 11 15:56:01 2010 +0100 @@ -469,6 +469,7 @@ static int mmio_move(struct hvm_hw_stdvg int i; int sign = p->df ? -1 : 1; p2m_type_t p2mt; + struct p2m_domain *p2m = p2m_get_hostp2m(current->domain); if ( p->data_is_ptr ) { @@ -481,7 +482,7 @@ static int mmio_move(struct hvm_hw_stdvg if ( hvm_copy_to_guest_phys(data, &tmp, p->size) != HVMCOPY_okay ) { - (void)gfn_to_mfn_current(data >> PAGE_SHIFT, &p2mt); + (void)gfn_to_mfn(p2m, data >> PAGE_SHIFT, &p2mt); /* * The only case we handle is vga_mem <-> vga_mem. * Anything else disables caching and leaves it to qemu-dm. @@ -503,7 +504,7 @@ static int mmio_move(struct hvm_hw_stdvg if ( hvm_copy_from_guest_phys(&tmp, data, p->size) != HVMCOPY_okay ) { - (void)gfn_to_mfn_current(data >> PAGE_SHIFT, &p2mt); + (void)gfn_to_mfn(p2m, data >> PAGE_SHIFT, &p2mt); if ( (p2mt != p2m_mmio_dm) || (data < VGA_MEM_BASE) || ((data + p->size) > (VGA_MEM_BASE + VGA_MEM_SIZE)) ) return 0; diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/hvm/svm/svm.c Wed Aug 11 15:56:01 2010 +0100 @@ -232,7 +232,7 @@ static int svm_vmcb_restore(struct vcpu { if ( c->cr0 & X86_CR0_PG ) { - mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt)); + mfn = mfn_x(gfn_to_mfn(p2m, c->cr3 >> PAGE_SHIFT, &p2mt)); if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) ) { gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n", @@ -946,6 +946,9 @@ static void svm_do_nested_pgfault(paddr_ unsigned long gfn = gpa >> PAGE_SHIFT; mfn_t mfn; p2m_type_t p2mt; + struct p2m_domain *p2m; + + p2m = p2m_get_hostp2m(current->domain); if ( tb_init_done ) { @@ -958,7 +961,7 @@ static void svm_do_nested_pgfault(paddr_ _d.gpa = gpa; _d.qualification = 0; - _d.mfn = mfn_x(gfn_to_mfn_query(current->domain, gfn, &_d.p2mt)); + _d.mfn = mfn_x(gfn_to_mfn_query(p2m, gfn, &_d.p2mt)); __trace_var(TRC_HVM_NPF, 0, sizeof(_d), (unsigned char *)&_d); } @@ -967,7 +970,7 @@ static void svm_do_nested_pgfault(paddr_ return; /* Everything else is an error. */ - mfn = gfn_to_mfn_type_current(gfn, &p2mt, p2m_guest); + mfn = gfn_to_mfn_guest(p2m, gfn, &p2mt); gdprintk(XENLOG_ERR, "SVM violation gpa %#"PRIpaddr", mfn %#lx, type %i\n", gpa, mfn_x(mfn), p2mt); domain_crash(current->domain); @@ -1117,8 +1120,6 @@ static int svm_msr_read_intercept(unsign goto gpf; } - HVMTRACE_3D (MSR_READ, msr, - (uint32_t)*msr_content, (uint32_t)(*msr_content>>32)); HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, msr_value=%"PRIx64, msr, *msr_content); return X86EMUL_OKAY; @@ -1132,9 +1133,6 @@ static int svm_msr_write_intercept(unsig { struct vcpu *v = current; struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; - - HVMTRACE_3D(MSR_WRITE, msr, - (uint32_t)msr_content, (uint32_t)(msr_content >> 32)); switch ( msr ) { diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/hvm/vmsi.c --- a/xen/arch/x86/hvm/vmsi.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/hvm/vmsi.c Wed Aug 11 15:56:01 2010 +0100 @@ -159,7 +159,10 @@ struct msixtbl_entry unsigned long gtable; /* gpa of msix table */ unsigned long table_len; unsigned long table_flags[MAX_MSIX_TABLE_ENTRIES / BITS_PER_LONG + 1]; - +#define MAX_MSIX_ACC_ENTRIES 3 + struct { + uint32_t msi_ad[3]; /* Shadow of address low, high and data */ + } gentries[MAX_MSIX_ACC_ENTRIES]; struct rcu_head rcu; }; @@ -205,18 +208,15 @@ static int msixtbl_read( struct vcpu *v, unsigned long address, unsigned long len, unsigned long *pval) { - unsigned long offset; + unsigned long offset, val; struct msixtbl_entry *entry; void *virt; + int nr_entry, index; int r = X86EMUL_UNHANDLEABLE; rcu_read_lock(&msixtbl_rcu_lock); if ( len != 4 ) - goto out; - - offset = address & (PCI_MSIX_ENTRY_SIZE - 1); - if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) goto out; entry = msixtbl_find_entry(v, address); @@ -224,9 +224,24 @@ static int msixtbl_read( if ( !virt ) goto out; - *pval = readl(virt); + nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE; + offset = address & (PCI_MSIX_ENTRY_SIZE - 1); + if ( nr_entry >= MAX_MSIX_ACC_ENTRIES && + offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET ) + goto out; + + val = readl(virt); + if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET ) + { + index = offset / sizeof(uint32_t); + *pval = entry->gentries[nr_entry].msi_ad[index]; + } + else + { + *pval = val; + } + r = X86EMUL_OKAY; - out: rcu_read_unlock(&msixtbl_rcu_lock); return r; @@ -238,7 +253,7 @@ static int msixtbl_write(struct vcpu *v, unsigned long offset; struct msixtbl_entry *entry; void *virt; - int nr_entry; + int nr_entry, index; int r = X86EMUL_UNHANDLEABLE; rcu_read_lock(&msixtbl_rcu_lock); @@ -252,6 +267,11 @@ static int msixtbl_write(struct vcpu *v, offset = address & (PCI_MSIX_ENTRY_SIZE - 1); if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) { + if ( nr_entry < MAX_MSIX_ACC_ENTRIES ) + { + index = offset / sizeof(uint32_t); + entry->gentries[nr_entry].msi_ad[index] = val; + } set_bit(nr_entry, &entry->table_flags); goto out; } diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/hvm/vmx/vmx.c Wed Aug 11 15:56:01 2010 +0100 @@ -486,7 +486,8 @@ static int vmx_restore_cr0_cr3( { if ( cr0 & X86_CR0_PG ) { - mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt)); + mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(v->domain), + cr3 >> PAGE_SHIFT, &p2mt)); if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) ) { gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%lx\n", cr3); @@ -1002,7 +1003,8 @@ static void vmx_load_pdptrs(struct vcpu if ( cr3 & 0x1fUL ) goto crash; - mfn = mfn_x(gfn_to_mfn(v->domain, cr3 >> PAGE_SHIFT, &p2mt)); + mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(v->domain), + cr3 >> PAGE_SHIFT, &p2mt)); if ( !p2m_is_ram(p2mt) ) goto crash; @@ -1221,7 +1223,7 @@ void ept_sync_domain(struct domain *d) return; ASSERT(local_irq_is_enabled()); - ASSERT(p2m_locked_by_me(d->arch.p2m)); + ASSERT(p2m_locked_by_me(p2m_get_hostp2m(d))); /* * Flush active cpus synchronously. Flush others the next time this domain @@ -1340,7 +1342,7 @@ static void vmx_set_uc_mode(struct vcpu { if ( paging_mode_hap(v->domain) ) ept_change_entry_emt_with_range( - v->domain, 0, v->domain->arch.p2m->max_mapped_pfn); + v->domain, 0, p2m_get_hostp2m(v->domain)->max_mapped_pfn); hvm_asid_flush_vcpu(v); } @@ -1872,8 +1874,6 @@ static int vmx_msr_read_intercept(unsign } done: - HVMTRACE_3D(MSR_READ, msr, - (uint32_t)*msr_content, (uint32_t)(*msr_content >> 32)); HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, msr_value=0x%"PRIx64, msr, *msr_content); return X86EMUL_OKAY; @@ -1895,7 +1895,8 @@ static int vmx_alloc_vlapic_mapping(stru return -ENOMEM; share_xen_page_with_guest(virt_to_page(apic_va), d, XENSHARE_writable); set_mmio_p2m_entry( - d, paddr_to_pfn(APIC_DEFAULT_PHYS_BASE), _mfn(virt_to_mfn(apic_va))); + p2m_get_hostp2m(d), paddr_to_pfn(APIC_DEFAULT_PHYS_BASE), + _mfn(virt_to_mfn(apic_va))); d->arch.hvm_domain.vmx.apic_access_mfn = virt_to_mfn(apic_va); return 0; @@ -1949,9 +1950,6 @@ static int vmx_msr_write_intercept(unsig HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, msr_value=0x%"PRIx64, msr, msr_content); - - HVMTRACE_3D(MSR_WRITE, msr, - (uint32_t)msr_content, (uint32_t)(msr_content >> 32)); switch ( msr ) { @@ -2103,6 +2101,7 @@ static void ept_handle_violation(unsigne unsigned long gla, gfn = gpa >> PAGE_SHIFT; mfn_t mfn; p2m_type_t p2mt; + struct p2m_domain *p2m = p2m_get_hostp2m(current->domain); if ( tb_init_done ) { @@ -2115,7 +2114,7 @@ static void ept_handle_violation(unsigne _d.gpa = gpa; _d.qualification = qualification; - _d.mfn = mfn_x(gfn_to_mfn_query(current->domain, gfn, &_d.p2mt)); + _d.mfn = mfn_x(gfn_to_mfn_query(p2m, gfn, &_d.p2mt)); __trace_var(TRC_HVM_NPF, 0, sizeof(_d), (unsigned char *)&_d); } @@ -2125,7 +2124,7 @@ static void ept_handle_violation(unsigne return; /* Everything else is an error. */ - mfn = gfn_to_mfn_type_current(gfn, &p2mt, p2m_guest); + mfn = gfn_to_mfn_guest(p2m, gfn, &p2mt); gdprintk(XENLOG_ERR, "EPT violation %#lx (%c%c%c/%c%c%c), " "gpa %#"PRIpaddr", mfn %#lx, type %i.\n", qualification, diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/mm.c Wed Aug 11 15:56:01 2010 +0100 @@ -398,7 +398,7 @@ unsigned long domain_get_maximum_gpfn(st unsigned long domain_get_maximum_gpfn(struct domain *d) { if ( is_hvm_domain(d) ) - return d->arch.p2m->max_mapped_pfn; + return p2m_get_hostp2m(d)->max_mapped_pfn; /* NB. PV guests specify nr_pfns rather than max_pfn so we adjust here. */ return arch_get_max_pfn(d) - 1; } @@ -1741,7 +1741,8 @@ static int mod_l1_entry(l1_pgentry_t *pl if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) { /* Translate foreign guest addresses. */ - mfn = mfn_x(gfn_to_mfn(pg_dom, l1e_get_pfn(nl1e), &p2mt)); + mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(pg_dom), + l1e_get_pfn(nl1e), &p2mt)); if ( !p2m_is_ram(p2mt) || unlikely(mfn == INVALID_MFN) ) return 0; ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0); @@ -3318,8 +3319,8 @@ int do_mmu_update( struct page_info *page; int rc = 0, okay = 1, i = 0; unsigned int cmd, done = 0, pt_dom; - struct domain *d = current->domain, *pt_owner = d, *pg_owner; struct vcpu *v = current; + struct domain *d = v->domain, *pt_owner = d, *pg_owner; struct domain_mmap_cache mapcache; if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) @@ -3403,13 +3404,13 @@ int do_mmu_update( req.ptr -= cmd; gmfn = req.ptr >> PAGE_SHIFT; - mfn = mfn_x(gfn_to_mfn(pt_owner, gmfn, &p2mt)); + mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(pt_owner), gmfn, &p2mt)); if ( !p2m_is_valid(p2mt) ) mfn = INVALID_MFN; if ( p2m_is_paged(p2mt) ) { - p2m_mem_paging_populate(pg_owner, gmfn); + p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner), gmfn); rc = -ENOENT; break; @@ -3434,12 +3435,13 @@ int do_mmu_update( { l1_pgentry_t l1e = l1e_from_intpte(req.val); p2m_type_t l1e_p2mt; - gfn_to_mfn(pg_owner, l1e_get_pfn(l1e), &l1e_p2mt); + gfn_to_mfn(p2m_get_hostp2m(pg_owner), + l1e_get_pfn(l1e), &l1e_p2mt); if ( p2m_is_paged(l1e_p2mt) ) { - p2m_mem_paging_populate(pg_owner, l1e_get_pfn(l1e)); - + p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner), + l1e_get_pfn(l1e)); rc = -ENOENT; break; } @@ -3457,7 +3459,7 @@ int do_mmu_update( /* Unshare the page for RW foreign mappings */ if ( l1e_get_flags(l1e) & _PAGE_RW ) { - rc = mem_sharing_unshare_page(pg_owner, + rc = mem_sharing_unshare_page(p2m_get_hostp2m(pg_owner), l1e_get_pfn(l1e), 0); if ( rc ) @@ -3475,12 +3477,12 @@ int do_mmu_update( { l2_pgentry_t l2e = l2e_from_intpte(req.val); p2m_type_t l2e_p2mt; - gfn_to_mfn(pg_owner, l2e_get_pfn(l2e), &l2e_p2mt); + gfn_to_mfn(p2m_get_hostp2m(pg_owner), l2e_get_pfn(l2e), &l2e_p2mt); if ( p2m_is_paged(l2e_p2mt) ) { - p2m_mem_paging_populate(pg_owner, l2e_get_pfn(l2e)); - + p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner), + l2e_get_pfn(l2e)); rc = -ENOENT; break; } @@ -3505,12 +3507,12 @@ int do_mmu_update( { l3_pgentry_t l3e = l3e_from_intpte(req.val); p2m_type_t l3e_p2mt; - gfn_to_mfn(pg_owner, l3e_get_pfn(l3e), &l3e_p2mt); + gfn_to_mfn(p2m_get_hostp2m(pg_owner), l3e_get_pfn(l3e), &l3e_p2mt); if ( p2m_is_paged(l3e_p2mt) ) { - p2m_mem_paging_populate(pg_owner, l3e_get_pfn(l3e)); - + p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner), + l3e_get_pfn(l3e)); rc = -ENOENT; break; } @@ -3536,12 +3538,13 @@ int do_mmu_update( { l4_pgentry_t l4e = l4e_from_intpte(req.val); p2m_type_t l4e_p2mt; - gfn_to_mfn(pg_owner, l4e_get_pfn(l4e), &l4e_p2mt); + gfn_to_mfn(p2m_get_hostp2m(pg_owner), + l4e_get_pfn(l4e), &l4e_p2mt); if ( p2m_is_paged(l4e_p2mt) ) { - p2m_mem_paging_populate(pg_owner, l4e_get_pfn(l4e)); - + p2m_mem_paging_populate(p2m_get_hostp2m(pg_owner), + l4e_get_pfn(l4e)); rc = -ENOENT; break; } @@ -3923,8 +3926,8 @@ static int create_grant_p2m_mapping(uint p2mt = p2m_grant_map_ro; else p2mt = p2m_grant_map_rw; - rc = guest_physmap_add_entry(current->domain, addr >> PAGE_SHIFT, - frame, 0, p2mt); + rc = guest_physmap_add_entry(p2m_get_hostp2m(current->domain), + addr >> PAGE_SHIFT, frame, 0, p2mt); if ( rc ) return GNTST_general_error; else @@ -3962,11 +3965,12 @@ static int replace_grant_p2m_mapping( unsigned long gfn = (unsigned long)(addr >> PAGE_SHIFT); p2m_type_t type; mfn_t old_mfn; + struct domain *d = current->domain; if ( new_addr != 0 || (flags & GNTMAP_contains_pte) ) return GNTST_general_error; - old_mfn = gfn_to_mfn_current(gfn, &type); + old_mfn = gfn_to_mfn(p2m_get_hostp2m(d), gfn, &type); if ( !p2m_is_grant(type) || mfn_x(old_mfn) != frame ) { gdprintk(XENLOG_WARNING, @@ -3974,7 +3978,7 @@ static int replace_grant_p2m_mapping( type, mfn_x(old_mfn), frame); return GNTST_general_error; } - guest_physmap_remove_page(current->domain, gfn, frame, 0); + guest_physmap_remove_page(d, gfn, frame, 0); return GNTST_okay; } @@ -4581,7 +4585,8 @@ long arch_memory_op(int op, XEN_GUEST_HA { p2m_type_t p2mt; - xatp.idx = mfn_x(gfn_to_mfn_unshare(d, xatp.idx, &p2mt, 0)); + xatp.idx = mfn_x(gfn_to_mfn_unshare(p2m_get_hostp2m(d), + xatp.idx, &p2mt, 0)); /* If the page is still shared, exit early */ if ( p2m_is_shared(p2mt) ) { @@ -4771,6 +4776,7 @@ long arch_memory_op(int op, XEN_GUEST_HA { xen_pod_target_t target; struct domain *d; + struct p2m_domain *p2m; /* Support DOMID_SELF? */ if ( !IS_PRIV(current->domain) ) @@ -4794,9 +4800,10 @@ long arch_memory_op(int op, XEN_GUEST_HA rc = p2m_pod_set_mem_target(d, target.target_pages); } + p2m = p2m_get_hostp2m(d); target.tot_pages = d->tot_pages; - target.pod_cache_pages = d->arch.p2m->pod.count; - target.pod_entries = d->arch.p2m->pod.entry_count; + target.pod_cache_pages = p2m->pod.count; + target.pod_entries = p2m->pod.entry_count; if ( copy_to_guest(arg, &target, 1) ) { diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/mm/guest_walk.c --- a/xen/arch/x86/mm/guest_walk.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/mm/guest_walk.c Wed Aug 11 15:56:01 2010 +0100 @@ -86,17 +86,17 @@ static uint32_t set_ad_bits(void *guest_ return 0; } -static inline void *map_domain_gfn(struct domain *d, +static inline void *map_domain_gfn(struct p2m_domain *p2m, gfn_t gfn, mfn_t *mfn, p2m_type_t *p2mt, uint32_t *rc) { /* Translate the gfn, unsharing if shared */ - *mfn = gfn_to_mfn_unshare(d, gfn_x(gfn), p2mt, 0); + *mfn = gfn_to_mfn_unshare(p2m, gfn_x(gfn), p2mt, 0); if ( p2m_is_paging(*p2mt) ) { - p2m_mem_paging_populate(d, gfn_x(gfn)); + p2m_mem_paging_populate(p2m, gfn_x(gfn)); *rc = _PAGE_PAGED; return NULL; @@ -119,7 +119,8 @@ static inline void *map_domain_gfn(struc /* Walk the guest pagetables, after the manner of a hardware walker. */ uint32_t -guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, +guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, + unsigned long va, walk_t *gw, uint32_t pfec, mfn_t top_mfn, void *top_map) { struct domain *d = v->domain; @@ -154,7 +155,7 @@ guest_walk_tables(struct vcpu *v, unsign if ( rc & _PAGE_PRESENT ) goto out; /* Map the l3 table */ - l3p = map_domain_gfn(d, + l3p = map_domain_gfn(p2m, guest_l4e_get_gfn(gw->l4e), &gw->l3mfn, &p2mt, @@ -181,7 +182,7 @@ guest_walk_tables(struct vcpu *v, unsign #endif /* PAE or 64... */ /* Map the l2 table */ - l2p = map_domain_gfn(d, + l2p = map_domain_gfn(p2m, guest_l3e_get_gfn(gw->l3e), &gw->l2mfn, &p2mt, @@ -237,7 +238,7 @@ guest_walk_tables(struct vcpu *v, unsign else { /* Not a superpage: carry on and find the l1e. */ - l1p = map_domain_gfn(d, + l1p = map_domain_gfn(p2m, guest_l2e_get_gfn(gw->l2e), &gw->l1mfn, &p2mt, diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/mm/hap/guest_walk.c --- a/xen/arch/x86/mm/hap/guest_walk.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/mm/hap/guest_walk.c Wed Aug 11 15:56:01 2010 +0100 @@ -43,13 +43,14 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN void *top_map; p2m_type_t p2mt; walk_t gw; + struct p2m_domain *p2m = p2m_get_hostp2m(v->domain); /* Get the top-level table's MFN */ cr3 = v->arch.hvm_vcpu.guest_cr[3]; - top_mfn = gfn_to_mfn_unshare(v->domain, cr3 >> PAGE_SHIFT, &p2mt, 0); + top_mfn = gfn_to_mfn_unshare(p2m, cr3 >> PAGE_SHIFT, &p2mt, 0); if ( p2m_is_paging(p2mt) ) { - p2m_mem_paging_populate(v->domain, cr3 >> PAGE_SHIFT); + p2m_mem_paging_populate(p2m, cr3 >> PAGE_SHIFT); pfec[0] = PFEC_page_paged; return INVALID_GFN; @@ -71,17 +72,17 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN #if GUEST_PAGING_LEVELS == 3 top_map += (cr3 & ~(PAGE_MASK | 31)); #endif - missing = guest_walk_tables(v, gva, &gw, pfec[0], top_mfn, top_map); + missing = guest_walk_tables(v, p2m, gva, &gw, pfec[0], top_mfn, top_map); unmap_domain_page(top_map); /* Interpret the answer */ if ( missing == 0 ) { gfn_t gfn = guest_l1e_get_gfn(gw.l1e); - gfn_to_mfn_unshare(v->domain, gfn_x(gfn), &p2mt, 0); + gfn_to_mfn_unshare(p2m, gfn_x(gfn), &p2mt, 0); if ( p2m_is_paging(p2mt) ) { - p2m_mem_paging_populate(v->domain, gfn_x(gfn)); + p2m_mem_paging_populate(p2m, gfn_x(gfn)); pfec[0] = PFEC_page_paged; return INVALID_GFN; @@ -130,4 +131,3 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN * indent-tabs-mode: nil * End: */ - diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/mm/hap/hap.c --- a/xen/arch/x86/mm/hap/hap.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/mm/hap/hap.c Wed Aug 11 15:56:01 2010 +0100 @@ -70,7 +70,7 @@ static int hap_enable_vram_tracking(stru /* set l1e entries of P2M table to be read-only. */ for (i = dirty_vram->begin_pfn; i < dirty_vram->end_pfn; i++) - p2m_change_type(d, i, p2m_ram_rw, p2m_ram_logdirty); + p2m_change_type(p2m_get_hostp2m(d), i, p2m_ram_rw, p2m_ram_logdirty); flush_tlb_mask(&d->domain_dirty_cpumask); return 0; @@ -90,7 +90,7 @@ static int hap_disable_vram_tracking(str /* set l1e entries of P2M table with normal mode */ for (i = dirty_vram->begin_pfn; i < dirty_vram->end_pfn; i++) - p2m_change_type(d, i, p2m_ram_logdirty, p2m_ram_rw); + p2m_change_type(p2m_get_hostp2m(d), i, p2m_ram_logdirty, p2m_ram_rw); flush_tlb_mask(&d->domain_dirty_cpumask); return 0; @@ -106,7 +106,7 @@ static void hap_clean_vram_tracking(stru /* set l1e entries of P2M table to be read-only. */ for (i = dirty_vram->begin_pfn; i < dirty_vram->end_pfn; i++) - p2m_change_type(d, i, p2m_ram_rw, p2m_ram_logdirty); + p2m_change_type(p2m_get_hostp2m(d), i, p2m_ram_rw, p2m_ram_logdirty); flush_tlb_mask(&d->domain_dirty_cpumask); } @@ -200,7 +200,8 @@ static int hap_enable_log_dirty(struct d hap_unlock(d); /* set l1e entries of P2M table to be read-only. */ - p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty); + p2m_change_entry_type_global(p2m_get_hostp2m(d), + p2m_ram_rw, p2m_ram_logdirty); flush_tlb_mask(&d->domain_dirty_cpumask); return 0; } @@ -212,14 +213,16 @@ static int hap_disable_log_dirty(struct hap_unlock(d); /* set l1e entries of P2M table with normal mode */ - p2m_change_entry_type_global(d, p2m_ram_logdirty, p2m_ram_rw); + p2m_change_entry_type_global(p2m_get_hostp2m(d), + p2m_ram_logdirty, p2m_ram_rw); return 0; } static void hap_clean_dirty_bitmap(struct domain *d) { /* set l1e entries of P2M table to be read-only. */ - p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_logdirty); + p2m_change_entry_type_global(p2m_get_hostp2m(d), + p2m_ram_rw, p2m_ram_logdirty); flush_tlb_mask(&d->domain_dirty_cpumask); } @@ -273,8 +276,9 @@ static void hap_free(struct domain *d, m page_list_add_tail(pg, &d->arch.paging.hap.freelist); } -static struct page_info *hap_alloc_p2m_page(struct domain *d) -{ +static struct page_info *hap_alloc_p2m_page(struct p2m_domain *p2m) +{ + struct domain *d = p2m->domain; struct page_info *pg; hap_lock(d); @@ -312,8 +316,9 @@ static struct page_info *hap_alloc_p2m_p return pg; } -static void hap_free_p2m_page(struct domain *d, struct page_info *pg) -{ +static void hap_free_p2m_page(struct p2m_domain *p2m, struct page_info *pg) +{ + struct domain *d = p2m->domain; hap_lock(d); ASSERT(page_get_owner(pg) == d); /* Should have just the one ref we gave it in alloc_p2m_page() */ @@ -594,7 +599,8 @@ int hap_enable(struct domain *d, u32 mod /* allocate P2m table */ if ( mode & PG_translate ) { - rv = p2m_alloc_table(d, hap_alloc_p2m_page, hap_free_p2m_page); + rv = p2m_alloc_table(p2m_get_hostp2m(d), + hap_alloc_p2m_page, hap_free_p2m_page); if ( rv != 0 ) goto out; } @@ -611,7 +617,7 @@ void hap_final_teardown(struct domain *d if ( d->arch.paging.hap.total_pages != 0 ) hap_teardown(d); - p2m_teardown(d); + p2m_teardown(p2m_get_hostp2m(d)); ASSERT(d->arch.paging.hap.p2m_pages == 0); } @@ -711,9 +717,11 @@ static int hap_page_fault(struct vcpu *v static int hap_page_fault(struct vcpu *v, unsigned long va, struct cpu_user_regs *regs) { + struct domain *d = v->domain; + HAP_ERROR("Intercepted a guest #PF (%u:%u) with HAP enabled.\n", - v->domain->domain_id, v->vcpu_id); - domain_crash(v->domain); + d->domain_id, v->vcpu_id); + domain_crash(d); return 0; } @@ -882,5 +890,3 @@ static const struct paging_mode hap_pagi * indent-tabs-mode: nil * End: */ - - diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/mm/hap/p2m-ept.c --- a/xen/arch/x86/mm/hap/p2m-ept.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/mm/hap/p2m-ept.c Wed Aug 11 15:56:01 2010 +0100 @@ -36,23 +36,23 @@ #define is_epte_superpage(ept_entry) ((ept_entry)->sp) /* Non-ept "lock-and-check" wrapper */ -static int ept_pod_check_and_populate(struct domain *d, unsigned long gfn, +static int ept_pod_check_and_populate(struct p2m_domain *p2m, unsigned long gfn, ept_entry_t *entry, int order, p2m_query_t q) { int r; - p2m_lock(d->arch.p2m); + p2m_lock(p2m); /* Check to make sure this is still PoD */ if ( entry->avail1 != p2m_populate_on_demand ) { - p2m_unlock(d->arch.p2m); + p2m_unlock(p2m); return 0; } - r = p2m_pod_demand_populate(d, gfn, order, q); - - p2m_unlock(d->arch.p2m); + r = p2m_pod_demand_populate(p2m, gfn, order, q); + + p2m_unlock(p2m); return r; } @@ -98,11 +98,11 @@ static void ept_p2m_type_to_flags(ept_en #define GUEST_TABLE_POD_PAGE 3 /* Fill in middle levels of ept table */ -static int ept_set_middle_entry(struct domain *d, ept_entry_t *ept_entry) +static int ept_set_middle_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry) { struct page_info *pg; - pg = p2m_alloc_ptp(d, 0); + pg = p2m_alloc_ptp(p2m, 0); if ( pg == NULL ) return 0; @@ -119,7 +119,7 @@ static int ept_set_middle_entry(struct d } /* free ept sub tree behind an entry */ -void ept_free_entry(struct domain *d, ept_entry_t *ept_entry, int level) +void ept_free_entry(struct p2m_domain *p2m, ept_entry_t *ept_entry, int level) { /* End if the entry is a leaf entry. */ if ( level == 0 || !is_epte_present(ept_entry) || @@ -130,14 +130,14 @@ void ept_free_entry(struct domain *d, ep { ept_entry_t *epte = map_domain_page(ept_entry->mfn); for ( int i = 0; i < EPT_PAGETABLE_ENTRIES; i++ ) - ept_free_entry(d, epte + i, level - 1); + ept_free_entry(p2m, epte + i, level - 1); unmap_domain_page(epte); } - d->arch.p2m->free_page(d, mfn_to_page(ept_entry->mfn)); -} - -static int ept_split_super_page(struct domain *d, ept_entry_t *ept_entry, + p2m->free_page(p2m, mfn_to_page(ept_entry->mfn)); +} + +static int ept_split_super_page(struct p2m_domain *p2m, ept_entry_t *ept_entry, int level, int target) { ept_entry_t new_ept, *table; @@ -150,7 +150,7 @@ static int ept_split_super_page(struct d ASSERT(is_epte_superpage(ept_entry)); - if ( !ept_set_middle_entry(d, &new_ept) ) + if ( !ept_set_middle_entry(p2m, &new_ept) ) return 0; table = map_domain_page(new_ept.mfn); @@ -174,7 +174,7 @@ static int ept_split_super_page(struct d ASSERT(is_epte_superpage(epte)); - if ( !(rv = ept_split_super_page(d, epte, level - 1, target)) ) + if ( !(rv = ept_split_super_page(p2m, epte, level - 1, target)) ) break; } @@ -200,7 +200,7 @@ static int ept_split_super_page(struct d * GUEST_TABLE_POD: * The next entry is marked populate-on-demand. */ -static int ept_next_level(struct domain *d, bool_t read_only, +static int ept_next_level(struct p2m_domain *p2m, bool_t read_only, ept_entry_t **table, unsigned long *gfn_remainder, int next_level) { @@ -225,7 +225,7 @@ static int ept_next_level(struct domain if ( read_only ) return GUEST_TABLE_MAP_FAILED; - if ( !ept_set_middle_entry(d, ept_entry) ) + if ( !ept_set_middle_entry(p2m, ept_entry) ) return GUEST_TABLE_MAP_FAILED; } @@ -245,7 +245,7 @@ static int ept_next_level(struct domain * by observing whether any gfn->mfn translations are modified. */ static int -ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, +ept_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, unsigned int order, p2m_type_t p2mt) { ept_entry_t *table, *ept_entry; @@ -259,6 +259,7 @@ ept_set_entry(struct domain *d, unsigned uint8_t ipat = 0; int need_modify_vtd_table = 1; int needs_sync = 1; + struct domain *d = p2m->domain; /* * the caller must make sure: @@ -267,7 +268,7 @@ ept_set_entry(struct domain *d, unsigned * 3. passing a valid order. */ if ( ((gfn | mfn_x(mfn)) & ((1UL << order) - 1)) || - (gfn >> ((ept_get_wl(d) + 1) * EPT_TABLE_ORDER)) || + ((u64)gfn >> ((ept_get_wl(d) + 1) * EPT_TABLE_ORDER)) || (order % EPT_TABLE_ORDER) ) return 0; @@ -281,7 +282,7 @@ ept_set_entry(struct domain *d, unsigned for ( i = ept_get_wl(d); i > target; i-- ) { - ret = ept_next_level(d, 0, &table, &gfn_remainder, i); + ret = ept_next_level(p2m, 0, &table, &gfn_remainder, i); if ( !ret ) goto out; else if ( ret != GUEST_TABLE_NORMAL_PAGE ) @@ -311,7 +312,7 @@ ept_set_entry(struct domain *d, unsigned if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) || (p2mt == p2m_ram_paging_in_start) ) { - ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat, + ept_entry->emt = epte_get_entry_emt(p2m->domain, gfn, mfn, &ipat, direct_mmio); ept_entry->ipat = ipat; ept_entry->sp = order ? 1 : 0; @@ -337,9 +338,9 @@ ept_set_entry(struct domain *d, unsigned split_ept_entry = *ept_entry; - if ( !ept_split_super_page(d, &split_ept_entry, i, target) ) - { - ept_free_entry(d, &split_ept_entry, i); + if ( !ept_split_super_page(p2m, &split_ept_entry, i, target) ) + { + ept_free_entry(p2m, &split_ept_entry, i); goto out; } @@ -349,7 +350,7 @@ ept_set_entry(struct domain *d, unsigned /* then move to the level we want to make real changes */ for ( ; i > target; i-- ) - ept_next_level(d, 0, &table, &gfn_remainder, i); + ept_next_level(p2m, 0, &table, &gfn_remainder, i); ASSERT(i == target); @@ -374,8 +375,8 @@ ept_set_entry(struct domain *d, unsigned /* Track the highest gfn for which we have ever had a valid mapping */ if ( mfn_valid(mfn_x(mfn)) && - (gfn + (1UL << order) - 1 > d->arch.p2m->max_mapped_pfn) ) - d->arch.p2m->max_mapped_pfn = gfn + (1UL << order) - 1; + (gfn + (1UL << order) - 1 > p2m->max_mapped_pfn) ) + p2m->max_mapped_pfn = gfn + (1UL << order) - 1; /* Success */ rv = 1; @@ -384,10 +385,10 @@ out: unmap_domain_page(table); if ( needs_sync ) - ept_sync_domain(d); + ept_sync_domain(p2m->domain); /* Now the p2m table is not shared with vt-d page table */ - if ( rv && iommu_enabled && need_iommu(d) && need_modify_vtd_table ) + if ( rv && iommu_enabled && need_iommu(p2m->domain) && need_modify_vtd_table ) { if ( p2mt == p2m_ram_rw ) { @@ -395,22 +396,22 @@ out: { for ( i = 0; i < (1 << order); i++ ) iommu_map_page( - d, gfn - offset + i, mfn_x(mfn) - offset + i, + p2m->domain, gfn - offset + i, mfn_x(mfn) - offset + i, IOMMUF_readable | IOMMUF_writable); } else if ( !order ) iommu_map_page( - d, gfn, mfn_x(mfn), IOMMUF_readable | IOMMUF_writable); + p2m->domain, gfn, mfn_x(mfn), IOMMUF_readable | IOMMUF_writable); } else { if ( order == EPT_TABLE_ORDER ) { for ( i = 0; i < (1 << order); i++ ) - iommu_unmap_page(d, gfn - offset + i); + iommu_unmap_page(p2m->domain, gfn - offset + i); } else if ( !order ) - iommu_unmap_page(d, gfn); + iommu_unmap_page(p2m->domain, gfn); } } @@ -418,9 +419,11 @@ out: } /* Read ept p2m entries */ -static mfn_t ept_get_entry(struct domain *d, unsigned long gfn, p2m_type_t *t, +static mfn_t ept_get_entry(struct p2m_domain *p2m, + unsigned long gfn, p2m_type_t *t, p2m_query_t q) { + struct domain *d = p2m->domain; ept_entry_t *table = map_domain_page(ept_get_asr(d)); unsigned long gfn_remainder = gfn; ept_entry_t *ept_entry; @@ -432,7 +435,7 @@ static mfn_t ept_get_entry(struct domain *t = p2m_mmio_dm; /* This pfn is higher than the highest the p2m map currently holds */ - if ( gfn > d->arch.p2m->max_mapped_pfn ) + if ( gfn > p2m->max_mapped_pfn ) goto out; /* Should check if gfn obeys GAW here. */ @@ -440,7 +443,7 @@ static mfn_t ept_get_entry(struct domain for ( i = ept_get_wl(d); i > 0; i-- ) { retry: - ret = ept_next_level(d, 1, &table, &gfn_remainder, i); + ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i); if ( !ret ) goto out; else if ( ret == GUEST_TABLE_POD_PAGE ) @@ -457,7 +460,7 @@ static mfn_t ept_get_entry(struct domain index = gfn_remainder >> ( i * EPT_TABLE_ORDER); ept_entry = table + index; - if ( !ept_pod_check_and_populate(d, gfn, + if ( !ept_pod_check_and_populate(p2m, gfn, ept_entry, 9, q) ) goto retry; else @@ -480,7 +483,7 @@ static mfn_t ept_get_entry(struct domain ASSERT(i == 0); - if ( ept_pod_check_and_populate(d, gfn, + if ( ept_pod_check_and_populate(p2m, gfn, ept_entry, 0, q) ) goto out; } @@ -511,9 +514,10 @@ out: /* WARNING: Only caller doesn't care about PoD pages. So this function will * always return 0 for PoD pages, not populate them. If that becomes necessary, * pass a p2m_query_t type along to distinguish. */ -static ept_entry_t ept_get_entry_content(struct domain *d, unsigned long gfn, int *level) -{ - ept_entry_t *table = map_domain_page(ept_get_asr(d)); +static ept_entry_t ept_get_entry_content(struct p2m_domain *p2m, + unsigned long gfn, int *level) +{ + ept_entry_t *table = map_domain_page(ept_get_asr(p2m->domain)); unsigned long gfn_remainder = gfn; ept_entry_t *ept_entry; ept_entry_t content = { .epte = 0 }; @@ -522,12 +526,12 @@ static ept_entry_t ept_get_entry_content int ret=0; /* This pfn is higher than the highest the p2m map currently holds */ - if ( gfn > d->arch.p2m->max_mapped_pfn ) + if ( gfn > p2m->max_mapped_pfn ) goto out; - for ( i = ept_get_wl(d); i > 0; i-- ) - { - ret = ept_next_level(d, 1, &table, &gfn_remainder, i); + for ( i = ept_get_wl(p2m->domain); i > 0; i-- ) + { + ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i); if ( !ret || ret == GUEST_TABLE_POD_PAGE ) goto out; else if ( ret == GUEST_TABLE_SUPER_PAGE ) @@ -546,6 +550,7 @@ static ept_entry_t ept_get_entry_content void ept_walk_table(struct domain *d, unsigned long gfn) { + struct p2m_domain *p2m = p2m_get_hostp2m(d); ept_entry_t *table = map_domain_page(ept_get_asr(d)); unsigned long gfn_remainder = gfn; @@ -555,10 +560,10 @@ void ept_walk_table(struct domain *d, un d->domain_id, gfn); /* This pfn is higher than the highest the p2m map currently holds */ - if ( gfn > d->arch.p2m->max_mapped_pfn ) + if ( gfn > p2m->max_mapped_pfn ) { gdprintk(XENLOG_ERR, " gfn exceeds max_mapped_pfn %lx\n", - d->arch.p2m->max_mapped_pfn); + p2m->max_mapped_pfn); goto out; } @@ -593,17 +598,18 @@ out: return; } -static mfn_t ept_get_entry_current(unsigned long gfn, p2m_type_t *t, +static mfn_t ept_get_entry_current(struct p2m_domain *p2m, + unsigned long gfn, p2m_type_t *t, p2m_query_t q) { - return ept_get_entry(current->domain, gfn, t, q); + return ept_get_entry(p2m, gfn, t, q); } /* * To test if the new emt type is the same with old, * return 1 to not to reset ept entry. */ -static int need_modify_ept_entry(struct domain *d, unsigned long gfn, +static int need_modify_ept_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, uint8_t o_ipat, uint8_t o_emt, p2m_type_t p2mt) { @@ -611,7 +617,7 @@ static int need_modify_ept_entry(struct uint8_t emt; bool_t direct_mmio = (p2mt == p2m_mmio_direct); - emt = epte_get_entry_emt(d, gfn, mfn, &ipat, direct_mmio); + emt = epte_get_entry_emt(p2m->domain, gfn, mfn, &ipat, direct_mmio); if ( (emt == o_emt) && (ipat == o_ipat) ) return 0; @@ -619,21 +625,23 @@ static int need_modify_ept_entry(struct return 1; } -void ept_change_entry_emt_with_range(struct domain *d, unsigned long start_gfn, +void ept_change_entry_emt_with_range(struct domain *d, + unsigned long start_gfn, unsigned long end_gfn) { unsigned long gfn; ept_entry_t e; mfn_t mfn; int order = 0; - - p2m_lock(d->arch.p2m); + struct p2m_domain *p2m = p2m_get_hostp2m(d); + + p2m_lock(p2m); for ( gfn = start_gfn; gfn <= end_gfn; gfn++ ) { int level = 0; uint64_t trunk = 0; - e = ept_get_entry_content(d, gfn, &level); + e = ept_get_entry_content(p2m, gfn, &level); if ( !p2m_has_emt(e.avail1) ) continue; @@ -652,9 +660,9 @@ void ept_change_entry_emt_with_range(str * Set emt for super page. */ order = level * EPT_TABLE_ORDER; - if ( need_modify_ept_entry(d, gfn, mfn, + if ( need_modify_ept_entry(p2m, gfn, mfn, e.ipat, e.emt, e.avail1) ) - ept_set_entry(d, gfn, mfn, order, e.avail1); + ept_set_entry(p2m, gfn, mfn, order, e.avail1); gfn += trunk; break; } @@ -663,11 +671,11 @@ void ept_change_entry_emt_with_range(str } else /* gfn assigned with 4k */ { - if ( need_modify_ept_entry(d, gfn, mfn, e.ipat, e.emt, e.avail1) ) - ept_set_entry(d, gfn, mfn, order, e.avail1); - } - } - p2m_unlock(d->arch.p2m); + if ( need_modify_ept_entry(p2m, gfn, mfn, e.ipat, e.emt, e.avail1) ) + ept_set_entry(p2m, gfn, mfn, order, e.avail1); + } + } + p2m_unlock(p2m); } /* @@ -701,9 +709,10 @@ static void ept_change_entry_type_page(m unmap_domain_page(epte); } -static void ept_change_entry_type_global(struct domain *d, +static void ept_change_entry_type_global(struct p2m_domain *p2m, p2m_type_t ot, p2m_type_t nt) { + struct domain *d = p2m->domain; if ( ept_get_asr(d) == 0 ) return; @@ -714,10 +723,11 @@ static void ept_change_entry_type_global void ept_p2m_init(struct domain *d) { - d->arch.p2m->set_entry = ept_set_entry; - d->arch.p2m->get_entry = ept_get_entry; - d->arch.p2m->get_entry_current = ept_get_entry_current; - d->arch.p2m->change_entry_type_global = ept_change_entry_type_global; + struct p2m_domain *p2m = p2m_get_hostp2m(d); + p2m->set_entry = ept_set_entry; + p2m->get_entry = ept_get_entry; + p2m->get_entry_current = ept_get_entry_current; + p2m->change_entry_type_global = ept_change_entry_type_global; } static void ept_dump_p2m_table(unsigned char key) @@ -742,7 +752,7 @@ static void ept_dump_p2m_table(unsigned p2m = p2m_get_hostp2m(d); printk("\ndomain%d EPT p2m table: \n", d->domain_id); - for ( gfn = 0; gfn <= d->arch.p2m->max_mapped_pfn; gfn += (1 << order) ) + for ( gfn = 0; gfn <= p2m->max_mapped_pfn; gfn += (1 << order) ) { gfn_remainder = gfn; mfn = _mfn(INVALID_MFN); @@ -750,7 +760,7 @@ static void ept_dump_p2m_table(unsigned for ( i = ept_get_wl(d); i > 0; i-- ) { - ret = ept_next_level(d, 1, &table, &gfn_remainder, i); + ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i); if ( ret != GUEST_TABLE_NORMAL_PAGE ) break; } diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/mm/mem_event.c --- a/xen/arch/x86/mm/mem_event.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/mm/mem_event.c Wed Aug 11 15:56:01 2010 +0100 @@ -235,7 +235,7 @@ int mem_event_domctl(struct domain *d, x /* Get MFN of ring page */ guest_get_eff_l1e(v, ring_addr, &l1e); gfn = l1e_get_pfn(l1e); - ring_mfn = gfn_to_mfn(dom_mem_event, gfn, &p2mt); + ring_mfn = gfn_to_mfn(p2m_get_hostp2m(dom_mem_event), gfn, &p2mt); rc = -EINVAL; if ( unlikely(!mfn_valid(mfn_x(ring_mfn))) ) @@ -244,7 +244,7 @@ int mem_event_domctl(struct domain *d, x /* Get MFN of shared page */ guest_get_eff_l1e(v, shared_addr, &l1e); gfn = l1e_get_pfn(l1e); - shared_mfn = gfn_to_mfn(dom_mem_event, gfn, &p2mt); + shared_mfn = gfn_to_mfn(p2m_get_hostp2m(dom_mem_event), gfn, &p2mt); rc = -EINVAL; if ( unlikely(!mfn_valid(mfn_x(shared_mfn))) ) diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/mm/mem_paging.c --- a/xen/arch/x86/mm/mem_paging.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/mm/mem_paging.c Wed Aug 11 15:56:01 2010 +0100 @@ -29,33 +29,34 @@ int mem_paging_domctl(struct domain *d, XEN_GUEST_HANDLE(void) u_domctl) { int rc; + struct p2m_domain *p2m = p2m_get_hostp2m(d); switch( mec->op ) { case XEN_DOMCTL_MEM_EVENT_OP_PAGING_NOMINATE: { unsigned long gfn = mec->gfn; - rc = p2m_mem_paging_nominate(d, gfn); + rc = p2m_mem_paging_nominate(p2m, gfn); } break; case XEN_DOMCTL_MEM_EVENT_OP_PAGING_EVICT: { unsigned long gfn = mec->gfn; - rc = p2m_mem_paging_evict(d, gfn); + rc = p2m_mem_paging_evict(p2m, gfn); } break; case XEN_DOMCTL_MEM_EVENT_OP_PAGING_PREP: { unsigned long gfn = mec->gfn; - rc = p2m_mem_paging_prep(d, gfn); + rc = p2m_mem_paging_prep(p2m, gfn); } break; case XEN_DOMCTL_MEM_EVENT_OP_PAGING_RESUME: { - p2m_mem_paging_resume(d); + p2m_mem_paging_resume(p2m); rc = 0; } break; diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/mm/mem_sharing.c --- a/xen/arch/x86/mm/mem_sharing.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/mm/mem_sharing.c Wed Aug 11 15:56:01 2010 +0100 @@ -251,6 +251,7 @@ static void mem_sharing_audit(void) list_for_each(le, &e->gfns) { struct domain *d; + struct p2m_domain *p2m; p2m_type_t t; mfn_t mfn; @@ -262,7 +263,8 @@ static void mem_sharing_audit(void) g->domain, g->gfn, mfn_x(e->mfn)); continue; } - mfn = gfn_to_mfn(d, g->gfn, &t); + p2m = p2m_get_hostp2m(d); + mfn = gfn_to_mfn(p2m, g->gfn, &t); if(mfn_x(mfn) != mfn_x(e->mfn)) MEM_SHARING_DEBUG("Incorrect P2M for d=%d, PFN=%lx." "Expecting MFN=%ld, got %ld\n", @@ -377,7 +379,7 @@ int mem_sharing_debug_gfn(struct domain mfn_t mfn; struct page_info *page; - mfn = gfn_to_mfn(d, gfn, &p2mt); + mfn = gfn_to_mfn(p2m_get_hostp2m(d), gfn, &p2mt); page = mfn_to_page(mfn); printk("Debug for domain=%d, gfn=%lx, ", @@ -487,7 +489,7 @@ int mem_sharing_debug_gref(struct domain return mem_sharing_debug_gfn(d, gfn); } -int mem_sharing_nominate_page(struct domain *d, +int mem_sharing_nominate_page(struct p2m_domain *p2m, unsigned long gfn, int expected_refcnt, shr_handle_t *phandle) @@ -499,10 +501,11 @@ int mem_sharing_nominate_page(struct dom shr_handle_t handle; shr_hash_entry_t *hash_entry; struct gfn_info *gfn_info; + struct domain *d = p2m->domain; *phandle = 0UL; - mfn = gfn_to_mfn(d, gfn, &p2mt); + mfn = gfn_to_mfn(p2m, gfn, &p2mt); /* Check if mfn is valid */ ret = -EINVAL; @@ -536,7 +539,7 @@ int mem_sharing_nominate_page(struct dom } /* Change the p2m type */ - if(p2m_change_type(d, gfn, p2mt, p2m_ram_shared) != p2mt) + if(p2m_change_type(p2m, gfn, p2mt, p2m_ram_shared) != p2mt) { /* This is unlikely, as the type must have changed since we've checked * it a few lines above. @@ -599,7 +602,7 @@ int mem_sharing_share_pages(shr_handle_t list_del(&gfn->list); d = get_domain_by_id(gfn->domain); BUG_ON(!d); - BUG_ON(set_shared_p2m_entry(d, gfn->gfn, se->mfn) == 0); + BUG_ON(set_shared_p2m_entry(p2m_get_hostp2m(d), gfn->gfn, se->mfn) == 0); put_domain(d); list_add(&gfn->list, &se->gfns); put_page_and_type(cpage); @@ -618,7 +621,7 @@ err_out: return ret; } -int mem_sharing_unshare_page(struct domain *d, +int mem_sharing_unshare_page(struct p2m_domain *p2m, unsigned long gfn, uint16_t flags) { @@ -631,8 +634,9 @@ int mem_sharing_unshare_page(struct doma struct gfn_info *gfn_info = NULL; shr_handle_t handle; struct list_head *le; - - mfn = gfn_to_mfn(d, gfn, &p2mt); + struct domain *d = p2m->domain; + + mfn = gfn_to_mfn(p2m, gfn, &p2mt); page = mfn_to_page(mfn); handle = page->shr_handle; @@ -696,7 +700,7 @@ gfn_found: unmap_domain_page(s); unmap_domain_page(t); - ASSERT(set_shared_p2m_entry(d, gfn, page_to_mfn(page)) != 0); + ASSERT(set_shared_p2m_entry(p2m, gfn, page_to_mfn(page)) != 0); put_page_and_type(old_page); private_page_found: @@ -708,7 +712,7 @@ private_page_found: atomic_dec(&nr_saved_mfns); shr_unlock(); - if(p2m_change_type(d, gfn, p2m_ram_shared, p2m_ram_rw) != + if(p2m_change_type(p2m, gfn, p2m_ram_shared, p2m_ram_rw) != p2m_ram_shared) { printk("Could not change p2m type.\n"); @@ -740,7 +744,7 @@ int mem_sharing_domctl(struct domain *d, shr_handle_t handle; if(!mem_sharing_enabled(d)) return -EINVAL; - rc = mem_sharing_nominate_page(d, gfn, 0, &handle); + rc = mem_sharing_nominate_page(p2m_get_hostp2m(d), gfn, 0, &handle); mec->u.nominate.handle = handle; mem_sharing_audit(); } @@ -756,7 +760,8 @@ int mem_sharing_domctl(struct domain *d, return -EINVAL; if(mem_sharing_gref_to_gfn(d, gref, &gfn) < 0) return -EINVAL; - rc = mem_sharing_nominate_page(d, gfn, 3, &handle); + rc = mem_sharing_nominate_page(p2m_get_hostp2m(d), + gfn, 3, &handle); mec->u.nominate.handle = handle; mem_sharing_audit(); } diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/mm/p2m.c Wed Aug 11 15:56:01 2010 +0100 @@ -108,9 +108,9 @@ static unsigned long p2m_type_to_flags(p } #if P2M_AUDIT -static void audit_p2m(struct domain *d); +static void audit_p2m(struct p2m_domain *p2m); #else -# define audit_p2m(_d) do { (void)(_d); } while(0) +# define audit_p2m(_p2m) do { (void)(_p2m); } while(0) #endif /* P2M_AUDIT */ // Find the next level's P2M entry, checking for out-of-range gfn's... @@ -135,15 +135,17 @@ p2m_find_entry(void *table, unsigned lon } struct page_info * -p2m_alloc_ptp(struct domain *d, unsigned long type) +p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type) { struct page_info *pg; - pg = d->arch.p2m->alloc_page(d); + ASSERT(p2m); + ASSERT(p2m->alloc_page); + pg = p2m->alloc_page(p2m); if (pg == NULL) return NULL; - page_list_add_tail(pg, &d->arch.p2m->pages); + page_list_add_tail(pg, &p2m->pages); pg->u.inuse.type_info = type | 1 | PGT_validated; pg->count_info |= 1; @@ -154,7 +156,7 @@ p2m_alloc_ptp(struct domain *d, unsigned // Returns 0 on error. // static int -p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table, +p2m_next_level(struct p2m_domain *p2m, mfn_t *table_mfn, void **table, unsigned long *gfn_remainder, unsigned long gfn, u32 shift, u32 max, unsigned long type) { @@ -163,7 +165,7 @@ p2m_next_level(struct domain *d, mfn_t * l1_pgentry_t new_entry; void *next; int i; - ASSERT(d->arch.p2m->alloc_page); + ASSERT(p2m->alloc_page); if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn, shift, max)) ) @@ -174,7 +176,7 @@ p2m_next_level(struct domain *d, mfn_t * { struct page_info *pg; - pg = p2m_alloc_ptp(d, type); + pg = p2m_alloc_ptp(p2m, type); if ( pg == NULL ) return 0; @@ -183,7 +185,7 @@ p2m_next_level(struct domain *d, mfn_t * switch ( type ) { case PGT_l3_page_table: - paging_write_p2m_entry(d, gfn, + paging_write_p2m_entry(p2m->domain, gfn, p2m_entry, *table_mfn, new_entry, 4); break; case PGT_l2_page_table: @@ -191,11 +193,11 @@ p2m_next_level(struct domain *d, mfn_t * /* for PAE mode, PDPE only has PCD/PWT/P bits available */ new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), _PAGE_PRESENT); #endif - paging_write_p2m_entry(d, gfn, + paging_write_p2m_entry(p2m->domain, gfn, p2m_entry, *table_mfn, new_entry, 3); break; case PGT_l1_page_table: - paging_write_p2m_entry(d, gfn, + paging_write_p2m_entry(p2m->domain, gfn, p2m_entry, *table_mfn, new_entry, 2); break; default: @@ -212,7 +214,7 @@ p2m_next_level(struct domain *d, mfn_t * unsigned long flags, pfn; struct page_info *pg; - pg = p2m_alloc_ptp(d, PGT_l2_page_table); + pg = p2m_alloc_ptp(p2m, PGT_l2_page_table); if ( pg == NULL ) return 0; @@ -223,13 +225,13 @@ p2m_next_level(struct domain *d, mfn_t * for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) { new_entry = l1e_from_pfn(pfn + (i * L1_PAGETABLE_ENTRIES), flags); - paging_write_p2m_entry(d, gfn, l1_entry+i, *table_mfn, new_entry, - 2); + paging_write_p2m_entry(p2m->domain, gfn, + l1_entry+i, *table_mfn, new_entry, 2); } unmap_domain_page(l1_entry); new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), __PAGE_HYPERVISOR|_PAGE_USER); //disable PSE - paging_write_p2m_entry(d, gfn, + paging_write_p2m_entry(p2m->domain, gfn, p2m_entry, *table_mfn, new_entry, 3); } @@ -240,7 +242,7 @@ p2m_next_level(struct domain *d, mfn_t * unsigned long flags, pfn; struct page_info *pg; - pg = p2m_alloc_ptp(d, PGT_l1_page_table); + pg = p2m_alloc_ptp(p2m, PGT_l1_page_table); if ( pg == NULL ) return 0; @@ -257,14 +259,14 @@ p2m_next_level(struct domain *d, mfn_t * for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) { new_entry = l1e_from_pfn(pfn + i, flags); - paging_write_p2m_entry(d, gfn, + paging_write_p2m_entry(p2m->domain, gfn, l1_entry+i, *table_mfn, new_entry, 1); } unmap_domain_page(l1_entry); new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), __PAGE_HYPERVISOR|_PAGE_USER); - paging_write_p2m_entry(d, gfn, + paging_write_p2m_entry(p2m->domain, gfn, p2m_entry, *table_mfn, new_entry, 2); } @@ -280,17 +282,17 @@ p2m_next_level(struct domain *d, mfn_t * * Populate-on-demand functionality */ static -int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, +int set_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, unsigned int page_order, p2m_type_t p2mt); static int -p2m_pod_cache_add(struct domain *d, +p2m_pod_cache_add(struct p2m_domain *p2m, struct page_info *page, unsigned long order) { int i; struct page_info *p; - struct p2m_domain *p2md = d->arch.p2m; + struct domain *d = p2m->domain; #ifndef NDEBUG mfn_t mfn; @@ -320,7 +322,7 @@ p2m_pod_cache_add(struct domain *d, } #endif - ASSERT(p2m_locked_by_me(p2md)); + ASSERT(p2m_locked_by_me(p2m)); /* * Pages from domain_alloc and returned by the balloon driver aren't @@ -347,12 +349,12 @@ p2m_pod_cache_add(struct domain *d, switch(order) { case 9: - page_list_add_tail(page, &p2md->pod.super); /* lock: page_alloc */ - p2md->pod.count += 1 << order; + page_list_add_tail(page, &p2m->pod.super); /* lock: page_alloc */ + p2m->pod.count += 1 << order; break; case 0: - page_list_add_tail(page, &p2md->pod.single); /* lock: page_alloc */ - p2md->pod.count += 1 ; + page_list_add_tail(page, &p2m->pod.single); /* lock: page_alloc */ + p2m->pod.count += 1; break; default: BUG(); @@ -371,57 +373,56 @@ p2m_pod_cache_add(struct domain *d, * down 2-meg pages into singleton pages automatically. Returns null if * a superpage is requested and no superpages are available. Must be called * with the d->page_lock held. */ -static struct page_info * p2m_pod_cache_get(struct domain *d, +static struct page_info * p2m_pod_cache_get(struct p2m_domain *p2m, unsigned long order) { - struct p2m_domain *p2md = d->arch.p2m; struct page_info *p = NULL; int i; - if ( order == 9 && page_list_empty(&p2md->pod.super) ) + if ( order == 9 && page_list_empty(&p2m->pod.super) ) { return NULL; } - else if ( order == 0 && page_list_empty(&p2md->pod.single) ) + else if ( order == 0 && page_list_empty(&p2m->pod.single) ) { unsigned long mfn; struct page_info *q; - BUG_ON( page_list_empty(&p2md->pod.super) ); + BUG_ON( page_list_empty(&p2m->pod.super) ); /* Break up a superpage to make single pages. NB count doesn't * need to be adjusted. */ - p = page_list_remove_head(&p2md->pod.super); + p = page_list_remove_head(&p2m->pod.super); mfn = mfn_x(page_to_mfn(p)); for ( i=0; i<SUPERPAGE_PAGES; i++ ) { q = mfn_to_page(_mfn(mfn+i)); - page_list_add_tail(q, &p2md->pod.single); + page_list_add_tail(q, &p2m->pod.single); } } switch ( order ) { case 9: - BUG_ON( page_list_empty(&p2md->pod.super) ); - p = page_list_remove_head(&p2md->pod.super); - p2md->pod.count -= 1 << order; /* Lock: page_alloc */ + BUG_ON( page_list_empty(&p2m->pod.super) ); + p = page_list_remove_head(&p2m->pod.super); + p2m->pod.count -= 1 << order; /* Lock: page_alloc */ break; case 0: - BUG_ON( page_list_empty(&p2md->pod.single) ); - p = page_list_remove_head(&p2md->pod.single); - p2md->pod.count -= 1; + BUG_ON( page_list_empty(&p2m->pod.single) ); + p = page_list_remove_head(&p2m->pod.single); + p2m->pod.count -= 1; break; default: BUG(); } /* Put the pages back on the domain page_list */ - for ( i = 0 ; i < (1 << order) ; i++ ) - { - BUG_ON(page_get_owner(p + i) != d); - page_list_add_tail(p + i, &d->page_list); + for ( i = 0 ; i < (1 << order); i++ ) + { + BUG_ON(page_get_owner(p + i) != p2m->domain); + page_list_add_tail(p + i, &p2m->domain->page_list); } return p; @@ -429,18 +430,18 @@ static struct page_info * p2m_pod_cache_ /* Set the size of the cache, allocating or freeing as necessary. */ static int -p2m_pod_set_cache_target(struct domain *d, unsigned long pod_target) -{ - struct p2m_domain *p2md = d->arch.p2m; +p2m_pod_set_cache_target(struct p2m_domain *p2m, unsigned long pod_target) +{ + struct domain *d = p2m->domain; int ret = 0; /* Increasing the target */ - while ( pod_target > p2md->pod.count ) + while ( pod_target > p2m->pod.count ) { struct page_info * page; int order; - if ( (pod_target - p2md->pod.count) >= SUPERPAGE_PAGES ) + if ( (pod_target - p2m->pod.count) >= SUPERPAGE_PAGES ) order = 9; else order = 0; @@ -456,18 +457,18 @@ p2m_pod_set_cache_target(struct domain * } printk("%s: Unable to allocate domheap page for pod cache. target %lu cachesize %d\n", - __func__, pod_target, p2md->pod.count); + __func__, pod_target, p2m->pod.count); ret = -ENOMEM; goto out; } - p2m_pod_cache_add(d, page, order); + p2m_pod_cache_add(p2m, page, order); } /* Decreasing the target */ /* We hold the p2m lock here, so we don't need to worry about * cache disappearing under our feet. */ - while ( pod_target < p2md->pod.count ) + while ( pod_target < p2m->pod.count ) { struct page_info * page; int order, i; @@ -476,13 +477,13 @@ p2m_pod_set_cache_target(struct domain * * entries may disappear before we grab the lock. */ spin_lock(&d->page_alloc_lock); - if ( (p2md->pod.count - pod_target) > SUPERPAGE_PAGES - && !page_list_empty(&p2md->pod.super) ) + if ( (p2m->pod.count - pod_target) > SUPERPAGE_PAGES + && !page_list_empty(&p2m->pod.super) ) order = 9; else order = 0; - page = p2m_pod_cache_get(d, order); + page = p2m_pod_cache_get(p2m, order); ASSERT(page != NULL); @@ -553,14 +554,14 @@ p2m_pod_set_mem_target(struct domain *d, p2m_pod_set_mem_target(struct domain *d, unsigned long target) { unsigned pod_target; - struct p2m_domain *p2md = d->arch.p2m; + struct p2m_domain *p2m = p2m_get_hostp2m(d); int ret = 0; unsigned long populated; - p2m_lock(p2md); + p2m_lock(p2m); /* P == B: Nothing to do. */ - if ( p2md->pod.entry_count == 0 ) + if ( p2m->pod.entry_count == 0 ) goto out; /* Don't do anything if the domain is being torn down */ @@ -572,21 +573,21 @@ p2m_pod_set_mem_target(struct domain *d, if ( target < d->tot_pages ) goto out; - populated = d->tot_pages - p2md->pod.count; + populated = d->tot_pages - p2m->pod.count; pod_target = target - populated; /* B < T': Set the cache size equal to # of outstanding entries, * let the balloon driver fill in the rest. */ - if ( pod_target > p2md->pod.entry_count ) - pod_target = p2md->pod.entry_count; - - ASSERT( pod_target >= p2md->pod.count ); - - ret = p2m_pod_set_cache_target(d, pod_target); + if ( pod_target > p2m->pod.entry_count ) + pod_target = p2m->pod.entry_count; + + ASSERT( pod_target >= p2m->pod.count ); + + ret = p2m_pod_set_cache_target(p2m, pod_target); out: - p2m_unlock(p2md); + p2m_unlock(p2m); return ret; } @@ -594,16 +595,16 @@ void void p2m_pod_empty_cache(struct domain *d) { - struct p2m_domain *p2md = d->arch.p2m; + struct p2m_domain *p2m = p2m_get_hostp2m(d); struct page_info *page; /* After this barrier no new PoD activities can happen. */ BUG_ON(!d->is_dying); - spin_barrier(&p2md->lock); + spin_barrier(&p2m->lock); spin_lock(&d->page_alloc_lock); - while ( (page = page_list_remove_head(&p2md->pod.super)) ) + while ( (page = page_list_remove_head(&p2m->pod.super)) ) { int i; @@ -613,18 +614,18 @@ p2m_pod_empty_cache(struct domain *d) page_list_add_tail(page + i, &d->page_list); } - p2md->pod.count -= SUPERPAGE_PAGES; - } - - while ( (page = page_list_remove_head(&p2md->pod.single)) ) + p2m->pod.count -= SUPERPAGE_PAGES; + } + + while ( (page = page_list_remove_head(&p2m->pod.single)) ) { BUG_ON(page_get_owner(page) != d); page_list_add_tail(page, &d->page_list); - p2md->pod.count -= 1; - } - - BUG_ON(p2md->pod.count != 0); + p2m->pod.count -= 1; + } + + BUG_ON(p2m->pod.count != 0); spin_unlock(&d->page_alloc_lock); } @@ -642,9 +643,9 @@ p2m_pod_decrease_reservation(struct doma xen_pfn_t gpfn, unsigned int order) { - struct p2m_domain *p2md = d->arch.p2m; int ret=0; int i; + struct p2m_domain *p2m = p2m_get_hostp2m(d); int steal_for_cache = 0; int pod = 0, nonpod = 0, ram = 0; @@ -652,14 +653,14 @@ p2m_pod_decrease_reservation(struct doma /* If we don't have any outstanding PoD entries, let things take their * course */ - if ( p2md->pod.entry_count == 0 ) + if ( p2m->pod.entry_count == 0 ) goto out; /* Figure out if we need to steal some freed memory for our cache */ - steal_for_cache = ( p2md->pod.entry_count > p2md->pod.count ); - - p2m_lock(p2md); - audit_p2m(d); + steal_for_cache = ( p2m->pod.entry_count > p2m->pod.count ); + + p2m_lock(p2m); + audit_p2m(p2m); if ( unlikely(d->is_dying) ) goto out_unlock; @@ -670,7 +671,7 @@ p2m_pod_decrease_reservation(struct doma { p2m_type_t t; - gfn_to_mfn_query(d, gpfn + i, &t); + gfn_to_mfn_query(p2m, gpfn + i, &t); if ( t == p2m_populate_on_demand ) pod++; @@ -690,9 +691,9 @@ p2m_pod_decrease_reservation(struct doma { /* All PoD: Mark the whole region invalid and tell caller * we're done. */ - set_p2m_entry(d, gpfn, _mfn(INVALID_MFN), order, p2m_invalid); - p2md->pod.entry_count-=(1<<order); /* Lock: p2m */ - BUG_ON(p2md->pod.entry_count < 0); + set_p2m_entry(p2m, gpfn, _mfn(INVALID_MFN), order, p2m_invalid); + p2m->pod.entry_count-=(1<<order); /* Lock: p2m */ + BUG_ON(p2m->pod.entry_count < 0); ret = 1; goto out_entry_check; } @@ -710,12 +711,12 @@ p2m_pod_decrease_reservation(struct doma mfn_t mfn; p2m_type_t t; - mfn = gfn_to_mfn_query(d, gpfn + i, &t); + mfn = gfn_to_mfn_query(p2m, gpfn + i, &t); if ( t == p2m_populate_on_demand ) { - set_p2m_entry(d, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid); - p2md->pod.entry_count--; /* Lock: p2m */ - BUG_ON(p2md->pod.entry_count < 0); + set_p2m_entry(p2m, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid); + p2m->pod.entry_count--; /* Lock: p2m */ + BUG_ON(p2m->pod.entry_count < 0); pod--; } else if ( steal_for_cache && p2m_is_ram(t) ) @@ -726,12 +727,12 @@ p2m_pod_decrease_reservation(struct doma page = mfn_to_page(mfn); - set_p2m_entry(d, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid); + set_p2m_entry(p2m, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid); set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY); - p2m_pod_cache_add(d, page, 0); - - steal_for_cache = ( p2md->pod.entry_count > p2md->pod.count ); + p2m_pod_cache_add(p2m, page, 0); + + steal_for_cache = ( p2m->pod.entry_count > p2m->pod.count ); nonpod--; ram--; @@ -745,33 +746,31 @@ p2m_pod_decrease_reservation(struct doma out_entry_check: /* If we've reduced our "liabilities" beyond our "assets", free some */ - if ( p2md->pod.entry_count < p2md->pod.count ) - { - p2m_pod_set_cache_target(d, p2md->pod.entry_count); + if ( p2m->pod.entry_count < p2m->pod.count ) + { + p2m_pod_set_cache_target(p2m, p2m->pod.entry_count); } out_unlock: - audit_p2m(d); - p2m_unlock(p2md); + audit_p2m(p2m); + p2m_unlock(p2m); out: return ret; } void -p2m_pod_dump_data(struct domain *d) -{ - struct p2m_domain *p2md = d->arch.p2m; - +p2m_pod_dump_data(struct p2m_domain *p2m) +{ printk(" PoD entries=%d cachesize=%d\n", - p2md->pod.entry_count, p2md->pod.count); + p2m->pod.entry_count, p2m->pod.count); } /* Search for all-zero superpages to be reclaimed as superpages for the * PoD cache. Must be called w/ p2m lock held, page_alloc lock not held. */ static int -p2m_pod_zero_check_superpage(struct domain *d, unsigned long gfn) +p2m_pod_zero_check_superpage(struct p2m_domain *p2m, unsigned long gfn) { mfn_t mfn, mfn0 = _mfn(INVALID_MFN); p2m_type_t type, type0 = 0; @@ -779,6 +778,7 @@ p2m_pod_zero_check_superpage(struct doma int ret=0, reset = 0; int i, j; int max_ref = 1; + struct domain *d = p2m->domain; if ( !superpage_aligned(gfn) ) goto out; @@ -792,7 +792,7 @@ p2m_pod_zero_check_superpage(struct doma for ( i=0; i<SUPERPAGE_PAGES; i++ ) { - mfn = gfn_to_mfn_query(d, gfn + i, &type); + mfn = gfn_to_mfn_query(p2m, gfn + i, &type); if ( i == 0 ) { @@ -840,7 +840,7 @@ p2m_pod_zero_check_superpage(struct doma } /* Try to remove the page, restoring old mapping if it fails. */ - set_p2m_entry(d, gfn, + set_p2m_entry(p2m, gfn, _mfn(POPULATE_ON_DEMAND_MFN), 9, p2m_populate_on_demand); @@ -892,23 +892,24 @@ p2m_pod_zero_check_superpage(struct doma /* Finally! We've passed all the checks, and can add the mfn superpage * back on the PoD cache, and account for the new p2m PoD entries */ - p2m_pod_cache_add(d, mfn_to_page(mfn0), 9); - d->arch.p2m->pod.entry_count += SUPERPAGE_PAGES; + p2m_pod_cache_add(p2m, mfn_to_page(mfn0), 9); + p2m->pod.entry_count += SUPERPAGE_PAGES; out_reset: if ( reset ) - set_p2m_entry(d, gfn, mfn0, 9, type0); + set_p2m_entry(p2m, gfn, mfn0, 9, type0); out: return ret; } static void -p2m_pod_zero_check(struct domain *d, unsigned long *gfns, int count) +p2m_pod_zero_check(struct p2m_domain *p2m, unsigned long *gfns, int count) { mfn_t mfns[count]; p2m_type_t types[count]; unsigned long * map[count]; + struct domain *d = p2m->domain; int i, j; int max_ref = 1; @@ -920,7 +921,7 @@ p2m_pod_zero_check(struct domain *d, uns /* First, get the gfn list, translate to mfns, and map the pages. */ for ( i=0; i<count; i++ ) { - mfns[i] = gfn_to_mfn_query(d, gfns[i], types + i); + mfns[i] = gfn_to_mfn_query(p2m, gfns[i], types + i); /* If this is ram, and not a pagetable or from the xen heap, and probably not mapped elsewhere, map it; otherwise, skip. */ if ( p2m_is_ram(types[i]) @@ -952,7 +953,7 @@ p2m_pod_zero_check(struct domain *d, uns } /* Try to remove the page, restoring old mapping if it fails. */ - set_p2m_entry(d, gfns[i], + set_p2m_entry(p2m, gfns[i], _mfn(POPULATE_ON_DEMAND_MFN), 0, p2m_populate_on_demand); @@ -963,7 +964,7 @@ p2m_pod_zero_check(struct domain *d, uns unmap_domain_page(map[i]); map[i] = NULL; - set_p2m_entry(d, gfns[i], mfns[i], 0, types[i]); + set_p2m_entry(p2m, gfns[i], mfns[i], 0, types[i]); continue; } @@ -985,7 +986,7 @@ p2m_pod_zero_check(struct domain *d, uns * check timing. */ if ( j < PAGE_SIZE/sizeof(*map[i]) ) { - set_p2m_entry(d, gfns[i], mfns[i], 0, types[i]); + set_p2m_entry(p2m, gfns[i], mfns[i], 0, types[i]); } else { @@ -1005,8 +1006,8 @@ p2m_pod_zero_check(struct domain *d, uns } /* Add to cache, and account for the new p2m PoD entry */ - p2m_pod_cache_add(d, mfn_to_page(mfns[i]), 0); - d->arch.p2m->pod.entry_count++; + p2m_pod_cache_add(p2m, mfn_to_page(mfns[i]), 0); + p2m->pod.entry_count++; } } @@ -1014,56 +1015,53 @@ p2m_pod_zero_check(struct domain *d, uns #define POD_SWEEP_LIMIT 1024 static void -p2m_pod_emergency_sweep_super(struct domain *d) -{ - struct p2m_domain *p2md = d->arch.p2m; +p2m_pod_emergency_sweep_super(struct p2m_domain *p2m) +{ unsigned long i, start, limit; - if ( p2md->pod.reclaim_super == 0 ) - { - p2md->pod.reclaim_super = (p2md->pod.max_guest>>9)<<9; - p2md->pod.reclaim_super -= SUPERPAGE_PAGES; + if ( p2m->pod.reclaim_super == 0 ) + { + p2m->pod.reclaim_super = (p2m->pod.max_guest>>9)<<9; + p2m->pod.reclaim_super -= SUPERPAGE_PAGES; } - start = p2md->pod.reclaim_super; + start = p2m->pod.reclaim_super; limit = (start > POD_SWEEP_LIMIT) ? (start - POD_SWEEP_LIMIT) : 0; - for ( i=p2md->pod.reclaim_super ; i > 0 ; i-=SUPERPAGE_PAGES ) - { - p2m_pod_zero_check_superpage(d, i); + for ( i=p2m->pod.reclaim_super ; i > 0 ; i -= SUPERPAGE_PAGES ) + { + p2m_pod_zero_check_superpage(p2m, i); /* Stop if we're past our limit and we have found *something*. * * NB that this is a zero-sum game; we're increasing our cache size * by increasing our 'debt'. Since we hold the p2m lock, * (entry_count - count) must remain the same. */ - if ( !page_list_empty(&p2md->pod.super) && i < limit ) + if ( !page_list_empty(&p2m->pod.super) && i < limit ) break; } - p2md->pod.reclaim_super = i ? i - SUPERPAGE_PAGES : 0; - + p2m->pod.reclaim_super = i ? i - SUPERPAGE_PAGES : 0; } #define POD_SWEEP_STRIDE 16 static void -p2m_pod_emergency_sweep(struct domain *d) -{ - struct p2m_domain *p2md = d->arch.p2m; +p2m_pod_emergency_sweep(struct p2m_domain *p2m) +{ unsigned long gfns[POD_SWEEP_STRIDE]; unsigned long i, j=0, start, limit; p2m_type_t t; - if ( p2md->pod.reclaim_single == 0 ) - p2md->pod.reclaim_single = p2md->pod.max_guest; - - start = p2md->pod.reclaim_single; + if ( p2m->pod.reclaim_single == 0 ) + p2m->pod.reclaim_single = p2m->pod.max_guest; + + start = p2m->pod.reclaim_single; limit = (start > POD_SWEEP_LIMIT) ? (start - POD_SWEEP_LIMIT) : 0; /* FIXME: Figure out how to avoid superpages */ - for ( i=p2md->pod.reclaim_single ; i > 0 ; i-- ) - { - gfn_to_mfn_query(d, i, &t ); + for ( i=p2m->pod.reclaim_single; i > 0 ; i-- ) + { + gfn_to_mfn_query(p2m, i, &t ); if ( p2m_is_ram(t) ) { gfns[j] = i; @@ -1071,7 +1069,7 @@ p2m_pod_emergency_sweep(struct domain *d BUG_ON(j > POD_SWEEP_STRIDE); if ( j == POD_SWEEP_STRIDE ) { - p2m_pod_zero_check(d, gfns, j); + p2m_pod_zero_check(p2m, gfns, j); j = 0; } } @@ -1080,29 +1078,29 @@ p2m_pod_emergency_sweep(struct domain *d * NB that this is a zero-sum game; we're increasing our cache size * by re-increasing our 'debt'. Since we hold the p2m lock, * (entry_count - count) must remain the same. */ - if ( p2md->pod.count > 0 && i < limit ) + if ( p2m->pod.count > 0 && i < limit ) break; } if ( j ) - p2m_pod_zero_check(d, gfns, j); - - p2md->pod.reclaim_single = i ? i - 1 : i; + p2m_pod_zero_check(p2m, gfns, j); + + p2m->pod.reclaim_single = i ? i - 1 : i; } int -p2m_pod_demand_populate(struct domain *d, unsigned long gfn, +p2m_pod_demand_populate(struct p2m_domain *p2m, unsigned long gfn, unsigned int order, p2m_query_t q) { + struct domain *d = p2m->domain; struct page_info *p = NULL; /* Compiler warnings */ unsigned long gfn_aligned; mfn_t mfn; - struct p2m_domain *p2md = d->arch.p2m; int i; - ASSERT(p2m_locked_by_me(d->arch.p2m)); + ASSERT(p2m_locked_by_me(p2m)); /* This check is done with the p2m lock held. This will make sure that * even if d->is_dying changes under our feet, p2m_pod_empty_cache() @@ -1120,34 +1118,34 @@ p2m_pod_demand_populate(struct domain *d * set_p2m_entry() should automatically shatter the 1GB page into * 512 2MB pages. The rest of 511 calls are unnecessary. */ - set_p2m_entry(d, gfn_aligned, _mfn(POPULATE_ON_DEMAND_MFN), 9, + set_p2m_entry(p2m, gfn_aligned, _mfn(POPULATE_ON_DEMAND_MFN), 9, p2m_populate_on_demand); - audit_p2m(d); - p2m_unlock(p2md); + audit_p2m(p2m); + p2m_unlock(p2m); return 0; } /* If we're low, start a sweep */ - if ( order == 9 && page_list_empty(&p2md->pod.super) ) - p2m_pod_emergency_sweep_super(d); - - if ( page_list_empty(&p2md->pod.single) && + if ( order == 9 && page_list_empty(&p2m->pod.super) ) + p2m_pod_emergency_sweep_super(p2m); + + if ( page_list_empty(&p2m->pod.single) && ( ( order == 0 ) - || (order == 9 && page_list_empty(&p2md->pod.super) ) ) ) - p2m_pod_emergency_sweep(d); + || (order == 9 && page_list_empty(&p2m->pod.super) ) ) ) + p2m_pod_emergency_sweep(p2m); /* Keep track of the highest gfn demand-populated by a guest fault */ - if ( q == p2m_guest && gfn > p2md->pod.max_guest ) - p2md->pod.max_guest = gfn; + if ( q == p2m_guest && gfn > p2m->pod.max_guest ) + p2m->pod.max_guest = gfn; spin_lock(&d->page_alloc_lock); - if ( p2md->pod.count == 0 ) + if ( p2m->pod.count == 0 ) goto out_of_memory; /* Get a page f/ the cache. A NULL return value indicates that the * 2-meg range should be marked singleton PoD, and retried */ - if ( (p = p2m_pod_cache_get(d, order)) == NULL ) + if ( (p = p2m_pod_cache_get(p2m, order)) == NULL ) goto remap_and_retry; mfn = page_to_mfn(p); @@ -1158,13 +1156,13 @@ p2m_pod_demand_populate(struct domain *d gfn_aligned = (gfn >> order) << order; - set_p2m_entry(d, gfn_aligned, mfn, order, p2m_ram_rw); - - for( i = 0 ; i < (1UL << order) ; i++ ) + set_p2m_entry(p2m, gfn_aligned, mfn, order, p2m_ram_rw); + + for( i = 0; i < (1UL << order); i++ ) set_gpfn_from_mfn(mfn_x(mfn) + i, gfn_aligned + i); - p2md->pod.entry_count -= (1 << order); /* Lock: p2m */ - BUG_ON(p2md->pod.entry_count < 0); + p2m->pod.entry_count -= (1 << order); /* Lock: p2m */ + BUG_ON(p2m->pod.entry_count < 0); if ( tb_init_done ) { @@ -1186,7 +1184,7 @@ out_of_memory: spin_unlock(&d->page_alloc_lock); printk("%s: Out of populate-on-demand memory! tot_pages %" PRIu32 " pod_entries %" PRIi32 "\n", - __func__, d->tot_pages, p2md->pod.entry_count); + __func__, d->tot_pages, p2m->pod.entry_count); domain_crash(d); out_fail: return -1; @@ -1197,7 +1195,7 @@ remap_and_retry: /* Remap this 2-meg region in singleton chunks */ gfn_aligned = (gfn>>order)<<order; for(i=0; i<(1<<order); i++) - set_p2m_entry(d, gfn_aligned+i, _mfn(POPULATE_ON_DEMAND_MFN), 0, + set_p2m_entry(p2m, gfn_aligned+i, _mfn(POPULATE_ON_DEMAND_MFN), 0, p2m_populate_on_demand); if ( tb_init_done ) { @@ -1216,44 +1214,44 @@ remap_and_retry: } /* Non-ept "lock-and-check" wrapper */ -static int p2m_pod_check_and_populate(struct domain *d, unsigned long gfn, +static int p2m_pod_check_and_populate(struct p2m_domain *p2m, unsigned long gfn, l1_pgentry_t *p2m_entry, int order, p2m_query_t q) { /* Only take the lock if we don't already have it. Otherwise it * wouldn't be safe to do p2m lookups with the p2m lock held */ - int do_locking = !p2m_locked_by_me(d->arch.p2m); + int do_locking = !p2m_locked_by_me(p2m); int r; if ( do_locking ) - p2m_lock(d->arch.p2m); - - audit_p2m(d); + p2m_lock(p2m); + + audit_p2m(p2m); /* Check to make sure this is still PoD */ if ( p2m_flags_to_type(l1e_get_flags(*p2m_entry)) != p2m_populate_on_demand ) { if ( do_locking ) - p2m_unlock(d->arch.p2m); + p2m_unlock(p2m); return 0; } - r = p2m_pod_demand_populate(d, gfn, order, q); - - audit_p2m(d); + r = p2m_pod_demand_populate(p2m, gfn, order, q); + + audit_p2m(p2m); if ( do_locking ) - p2m_unlock(d->arch.p2m); + p2m_unlock(p2m); return r; } // Returns 0 on error (out of memory) static int -p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, +p2m_set_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, unsigned int page_order, p2m_type_t p2mt) { // XXX -- this might be able to be faster iff current->domain == d - mfn_t table_mfn = pagetable_get_mfn(p2m_get_pagetable(p2m_get_hostp2m(d))); + mfn_t table_mfn = pagetable_get_mfn(p2m_get_pagetable(p2m)); void *table =map_domain_page(mfn_x(table_mfn)); unsigned long i, gfn_remainder = gfn; l1_pgentry_t *p2m_entry; @@ -1273,14 +1271,14 @@ p2m_set_entry(struct domain *d, unsigned t.gfn = gfn; t.mfn = mfn_x(mfn); t.p2mt = p2mt; - t.d = d->domain_id; + t.d = p2m->domain->domain_id; t.order = page_order; __trace_var(TRC_MEM_SET_P2M_ENTRY, 0, sizeof(t), (unsigned char *)&t); } #if CONFIG_PAGING_LEVELS >= 4 - if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, + if ( !p2m_next_level(p2m, &table_mfn, &table, &gfn_remainder, gfn, L4_PAGETABLE_SHIFT - PAGE_SHIFT, L4_PAGETABLE_ENTRIES, PGT_l3_page_table) ) goto out; @@ -1298,14 +1296,15 @@ p2m_set_entry(struct domain *d, unsigned !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) { P2M_ERROR("configure P2M table L3 entry with large page\n"); - domain_crash(d); + domain_crash(p2m->domain); goto out; } l3e_content = mfn_valid(mfn) ? l3e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt) | _PAGE_PSE) : l3e_empty(); entry_content.l1 = l3e_content.l3; - paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 3); + paging_write_p2m_entry(p2m->domain, gfn, p2m_entry, + table_mfn, entry_content, 3); } /* @@ -1315,17 +1314,17 @@ p2m_set_entry(struct domain *d, unsigned * in Xen's address space for translated PV guests. * When using AMD's NPT on PAE Xen, we are restricted to 4GB. */ - else if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, + else if ( !p2m_next_level(p2m, &table_mfn, &table, &gfn_remainder, gfn, L3_PAGETABLE_SHIFT - PAGE_SHIFT, ((CONFIG_PAGING_LEVELS == 3) - ? (paging_mode_hap(d) ? 4 : 8) + ? (paging_mode_hap(p2m->domain) ? 4 : 8) : L3_PAGETABLE_ENTRIES), PGT_l2_page_table) ) goto out; if ( page_order == 0 ) { - if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, + if ( !p2m_next_level(p2m, &table_mfn, &table, &gfn_remainder, gfn, L2_PAGETABLE_SHIFT - PAGE_SHIFT, L2_PAGETABLE_ENTRIES, PGT_l1_page_table) ) goto out; @@ -1340,7 +1339,8 @@ p2m_set_entry(struct domain *d, unsigned entry_content = l1e_empty(); /* level 1 entry */ - paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1); + paging_write_p2m_entry(p2m->domain, gfn, p2m_entry, + table_mfn, entry_content, 1); } else if ( page_order == 9 ) { @@ -1354,7 +1354,7 @@ p2m_set_entry(struct domain *d, unsigned !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) { P2M_ERROR("configure P2M table 4KB L2 entry with large page\n"); - domain_crash(d); + domain_crash(p2m->domain); goto out; } @@ -1365,23 +1365,24 @@ p2m_set_entry(struct domain *d, unsigned l2e_content = l2e_empty(); entry_content.l1 = l2e_content.l2; - paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 2); + paging_write_p2m_entry(p2m->domain, gfn, p2m_entry, + table_mfn, entry_content, 2); } /* Track the highest gfn for which we have ever had a valid mapping */ if ( mfn_valid(mfn) - && (gfn + (1UL << page_order) - 1 > d->arch.p2m->max_mapped_pfn) ) - d->arch.p2m->max_mapped_pfn = gfn + (1UL << page_order) - 1; - - if ( iommu_enabled && need_iommu(d) ) + && (gfn + (1UL << page_order) - 1 > p2m->max_mapped_pfn) ) + p2m->max_mapped_pfn = gfn + (1UL << page_order) - 1; + + if ( iommu_enabled && need_iommu(p2m->domain) ) { if ( p2mt == p2m_ram_rw ) for ( i = 0; i < (1UL << page_order); i++ ) - iommu_map_page(d, gfn+i, mfn_x(mfn)+i, + iommu_map_page(p2m->domain, gfn+i, mfn_x(mfn)+i, IOMMUF_readable|IOMMUF_writable); else for ( int i = 0; i < (1UL << page_order); i++ ) - iommu_unmap_page(d, gfn+i); + iommu_unmap_page(p2m->domain, gfn+i); } /* Success */ @@ -1393,7 +1394,7 @@ out: } static mfn_t -p2m_gfn_to_mfn(struct domain *d, unsigned long gfn, p2m_type_t *t, +p2m_gfn_to_mfn(struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t, p2m_query_t q) { mfn_t mfn; @@ -1401,7 +1402,7 @@ p2m_gfn_to_mfn(struct domain *d, unsigne l2_pgentry_t *l2e; l1_pgentry_t *l1e; - ASSERT(paging_mode_translate(d)); + ASSERT(paging_mode_translate(p2m->domain)); /* XXX This is for compatibility with the old model, where anything not * XXX marked as RAM was considered to be emulated MMIO space. @@ -1409,9 +1410,9 @@ p2m_gfn_to_mfn(struct domain *d, unsigne * XXX we will return p2m_invalid for unmapped gfns */ *t = p2m_mmio_dm; - mfn = pagetable_get_mfn(p2m_get_pagetable(p2m_get_hostp2m(d))); - - if ( gfn > d->arch.p2m->max_mapped_pfn ) + mfn = pagetable_get_mfn(p2m_get_pagetable(p2m)); + + if ( gfn > p2m->max_mapped_pfn ) /* This pfn is higher than the highest the p2m map currently holds */ return _mfn(INVALID_MFN); @@ -1447,7 +1448,7 @@ pod_retry_l3: { if ( q != p2m_query ) { - if ( !p2m_pod_demand_populate(d, gfn, 18, q) ) + if ( !p2m_pod_demand_populate(p2m, gfn, 18, q) ) goto pod_retry_l3; } else @@ -1482,8 +1483,8 @@ pod_retry_l2: if ( p2m_flags_to_type(l2e_get_flags(*l2e)) == p2m_populate_on_demand ) { if ( q != p2m_query ) { - if ( !p2m_pod_check_and_populate(d, gfn, - (l1_pgentry_t *)l2e, 9, q) ) + if ( !p2m_pod_check_and_populate(p2m, gfn, + (l1_pgentry_t *)l2e, 9, q) ) goto pod_retry_l2; } else *t = p2m_populate_on_demand; @@ -1514,8 +1515,8 @@ pod_retry_l1: if ( p2m_flags_to_type(l1e_get_flags(*l1e)) == p2m_populate_on_demand ) { if ( q != p2m_query ) { - if ( !p2m_pod_check_and_populate(d, gfn, - (l1_pgentry_t *)l1e, 0, q) ) + if ( !p2m_pod_check_and_populate(p2m, gfn, + (l1_pgentry_t *)l1e, 0, q) ) goto pod_retry_l1; } else *t = p2m_populate_on_demand; @@ -1533,7 +1534,8 @@ pod_retry_l1: } /* Read the current domain's p2m table (through the linear mapping). */ -static mfn_t p2m_gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t, +static mfn_t p2m_gfn_to_mfn_current(struct p2m_domain *p2m, + unsigned long gfn, p2m_type_t *t, p2m_query_t q) { mfn_t mfn = _mfn(INVALID_MFN); @@ -1544,7 +1546,7 @@ static mfn_t p2m_gfn_to_mfn_current(unsi * XXX Once we start explicitly registering MMIO regions in the p2m * XXX we will return p2m_invalid for unmapped gfns */ - if ( gfn <= current->domain->arch.p2m->max_mapped_pfn ) + if ( gfn <= p2m->max_mapped_pfn ) { l1_pgentry_t l1e = l1e_empty(), *p2m_entry; l2_pgentry_t l2e = l2e_empty(); @@ -1574,7 +1576,7 @@ static mfn_t p2m_gfn_to_mfn_current(unsi /* The read has succeeded, so we know that mapping exists */ if ( q != p2m_query ) { - if ( !p2m_pod_demand_populate(current->domain, gfn, 18, q) ) + if ( !p2m_pod_demand_populate(p2m, gfn, 18, q) ) goto pod_retry_l3; p2mt = p2m_invalid; printk("%s: Allocate 1GB failed!\n", __func__); @@ -1624,8 +1626,8 @@ static mfn_t p2m_gfn_to_mfn_current(unsi * exits at this point. */ if ( q != p2m_query ) { - if ( !p2m_pod_check_and_populate(current->domain, gfn, - p2m_entry, 9, q) ) + if ( !p2m_pod_check_and_populate(p2m, gfn, + p2m_entry, 9, q) ) goto pod_retry_l2; /* Allocate failed. */ @@ -1680,8 +1682,8 @@ static mfn_t p2m_gfn_to_mfn_current(unsi * exits at this point. */ if ( q != p2m_query ) { - if ( !p2m_pod_check_and_populate(current->domain, gfn, - (l1_pgentry_t *)p2m_entry, 0, q) ) + if ( !p2m_pod_check_and_populate(p2m, gfn, + (l1_pgentry_t *)p2m_entry, 0, q) ) goto pod_retry_l1; /* Allocate failed. */ @@ -1708,22 +1710,15 @@ out: } /* Init the datastructures for later use by the p2m code */ -int p2m_init(struct domain *d) -{ - struct p2m_domain *p2m; - - p2m = xmalloc(struct p2m_domain); - if ( p2m == NULL ) - return -ENOMEM; - - d->arch.p2m = p2m; - +static void p2m_initialise(struct domain *d, struct p2m_domain *p2m) +{ memset(p2m, 0, sizeof(*p2m)); p2m_lock_init(p2m); INIT_PAGE_LIST_HEAD(&p2m->pages); INIT_PAGE_LIST_HEAD(&p2m->pod.super); INIT_PAGE_LIST_HEAD(&p2m->pod.single); + p2m->domain = d; p2m->set_entry = p2m_set_entry; p2m->get_entry = p2m_gfn_to_mfn; p2m->get_entry_current = p2m_gfn_to_mfn_current; @@ -1732,23 +1727,34 @@ int p2m_init(struct domain *d) if ( hap_enabled(d) && (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) ) ept_p2m_init(d); + return; +} + +int p2m_init(struct domain *d) +{ + struct p2m_domain *p2m; + + p2m_get_hostp2m(d) = p2m = xmalloc(struct p2m_domain); + if ( p2m == NULL ) + return -ENOMEM; + p2m_initialise(d, p2m); + return 0; } -void p2m_change_entry_type_global(struct domain *d, +void p2m_change_entry_type_global(struct p2m_domain *p2m, p2m_type_t ot, p2m_type_t nt) { - struct p2m_domain *p2m = d->arch.p2m; - p2m_lock(p2m); - p2m->change_entry_type_global(d, ot, nt); + p2m->change_entry_type_global(p2m, ot, nt); p2m_unlock(p2m); } static -int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, +int set_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn, unsigned int page_order, p2m_type_t p2mt) { + struct domain *d = p2m->domain; unsigned long todo = 1ul << page_order; unsigned int order; int rc = 1; @@ -1763,7 +1769,7 @@ int set_p2m_entry(struct domain *d, unsi else order = 0; - if ( !d->arch.p2m->set_entry(d, gfn, mfn, order, p2mt) ) + if ( !p2m->set_entry(p2m, gfn, mfn, order, p2mt) ) rc = 0; gfn += 1ul << order; if ( mfn_x(mfn) != INVALID_MFN ) @@ -1784,16 +1790,14 @@ int set_p2m_entry(struct domain *d, unsi // // Returns 0 for success or -errno. // -int p2m_alloc_table(struct domain *d, - struct page_info * (*alloc_page)(struct domain *d), - void (*free_page)(struct domain *d, struct page_info *pg)) - +int p2m_alloc_table(struct p2m_domain *p2m, + struct page_info * (*alloc_page)(struct p2m_domain *p2m), + void (*free_page)(struct p2m_domain *p2m, struct page_info *pg)) { mfn_t mfn = _mfn(INVALID_MFN); struct page_info *page, *p2m_top; unsigned int page_count = 0; unsigned long gfn = -1UL; - struct p2m_domain *p2m = p2m_get_hostp2m(d); p2m_lock(p2m); @@ -1809,7 +1813,7 @@ int p2m_alloc_table(struct domain *d, p2m->alloc_page = alloc_page; p2m->free_page = free_page; - p2m_top = p2m_alloc_ptp(d, + p2m_top = p2m_alloc_ptp(p2m, #if CONFIG_PAGING_LEVELS == 4 PGT_l4_page_table #else @@ -1828,13 +1832,13 @@ int p2m_alloc_table(struct domain *d, P2M_PRINTK("populating p2m table\n"); /* Initialise physmap tables for slot zero. Other code assumes this. */ - if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), 0, + if ( !set_p2m_entry(p2m, 0, _mfn(INVALID_MFN), 0, p2m_invalid) ) goto error; /* Copy all existing mappings from the page list and m2p */ - spin_lock(&d->page_alloc_lock); - page_list_for_each(page, &d->page_list) + spin_lock(&p2m->domain->page_alloc_lock); + page_list_for_each(page, &p2m->domain->page_list) { mfn = page_to_mfn(page); gfn = get_gpfn_from_mfn(mfn_x(mfn)); @@ -1848,17 +1852,17 @@ int p2m_alloc_table(struct domain *d, (gfn != 0x55555555L) #endif && gfn != INVALID_M2P_ENTRY - && !set_p2m_entry(d, gfn, mfn, 0, p2m_ram_rw) ) + && !set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_rw) ) goto error_unlock; } - spin_unlock(&d->page_alloc_lock); + spin_unlock(&p2m->domain->page_alloc_lock); P2M_PRINTK("p2m table initialised (%u pages)\n", page_count); p2m_unlock(p2m); return 0; error_unlock: - spin_unlock(&d->page_alloc_lock); + spin_unlock(&p2m->domain->page_alloc_lock); error: P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%" PRI_mfn "\n", gfn, mfn_x(mfn)); @@ -1866,12 +1870,11 @@ error_unlock: return -ENOMEM; } -void p2m_teardown(struct domain *d) +void p2m_teardown(struct p2m_domain *p2m) /* Return all the p2m pages to Xen. * We know we don't have any extra mappings to these pages */ { struct page_info *pg; - struct p2m_domain *p2m = p2m_get_hostp2m(d); #ifdef __x86_64__ unsigned long gfn; p2m_type_t t; @@ -1883,27 +1886,28 @@ void p2m_teardown(struct domain *d) #ifdef __x86_64__ for ( gfn=0; gfn < p2m->max_mapped_pfn; gfn++ ) { - mfn = p2m->get_entry(d, gfn, &t, p2m_query); + mfn = p2m->get_entry(p2m, gfn, &t, p2m_query); if ( mfn_valid(mfn) && (t == p2m_ram_shared) ) - BUG_ON(mem_sharing_unshare_page(d, gfn, MEM_SHARING_DESTROY_GFN)); + BUG_ON(mem_sharing_unshare_page(p2m, gfn, MEM_SHARING_DESTROY_GFN)); } #endif p2m->phys_table = pagetable_null(); while ( (pg = page_list_remove_head(&p2m->pages)) ) - p2m->free_page(d, pg); + p2m->free_page(p2m, pg); p2m_unlock(p2m); } void p2m_final_teardown(struct domain *d) { + /* Iterate over all p2m tables per domain */ xfree(d->arch.p2m); d->arch.p2m = NULL; } #if P2M_AUDIT -static void audit_p2m(struct domain *d) +static void audit_p2m(struct p2m_domain *p2m) { struct page_info *page; struct domain *od; @@ -1913,6 +1917,7 @@ static void audit_p2m(struct domain *d) unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0; int test_linear; p2m_type_t type; + struct domain *d = p2m->domain; if ( !paging_mode_translate(d) ) return; @@ -1967,7 +1972,7 @@ static void audit_p2m(struct domain *d) continue; } - p2mfn = gfn_to_mfn_type_foreign(d, gfn, &type, p2m_query); + p2mfn = gfn_to_mfn_type_p2m(p2m, gfn, &type, p2m_query); if ( mfn_x(p2mfn) != mfn ) { mpbad++; @@ -1983,9 +1988,9 @@ static void audit_p2m(struct domain *d) set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); } - if ( test_linear && (gfn <= d->arch.p2m->max_mapped_pfn) ) - { - lp2mfn = mfn_x(gfn_to_mfn_query(d, gfn, &type)); + if ( test_linear && (gfn <= p2m->max_mapped_pfn) ) + { + lp2mfn = mfn_x(gfn_to_mfn_query(p2m, gfn, &type)); if ( lp2mfn != mfn_x(p2mfn) ) { P2M_PRINTK("linear mismatch gfn %#lx -> mfn %#lx " @@ -2000,7 +2005,7 @@ static void audit_p2m(struct domain *d) spin_unlock(&d->page_alloc_lock); /* Audit part two: walk the domain's p2m table, checking the entries. */ - if ( pagetable_get_pfn(p2m_get_pagetable(p2m_get_hostp2m(d)) != 0 ) + if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 ) { l2_pgentry_t *l2e; l1_pgentry_t *l1e; @@ -2009,12 +2014,12 @@ static void audit_p2m(struct domain *d) #if CONFIG_PAGING_LEVELS == 4 l4_pgentry_t *l4e; l3_pgentry_t *l3e; - int i3, i4; - l4e = map_domain_page(mfn_x(pagetable_get_mfn(p2m_get_pagetable(p2m_get_hostp2m(d))))); + int i4, i3; + l4e = map_domain_page(mfn_x(pagetable_get_mfn(p2m_get_pagetable(p2m)))); #else /* CONFIG_PAGING_LEVELS == 3 */ l3_pgentry_t *l3e; int i3; - l3e = map_domain_page(mfn_x(pagetable_get_mfn(p2m_get_pagetable(p2m_get_hostp2m(d))))); + l3e = map_domain_page(mfn_x(pagetable_get_mfn(p2m_get_pagetable(p2m)))); #endif gfn = 0; @@ -2144,11 +2149,11 @@ static void audit_p2m(struct domain *d) } - if ( entry_count != d->arch.p2m->pod.entry_count ) + if ( entry_count != p2m->pod.entry_count ) { printk("%s: refcounted entry count %d, audit count %d!\n", __func__, - d->arch.p2m->pod.entry_count, + p2m->pod.entry_count, entry_count); BUG(); } @@ -2166,18 +2171,18 @@ static void audit_p2m(struct domain *d) static void -p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn, +p2m_remove_page(struct p2m_domain *p2m, unsigned long gfn, unsigned long mfn, unsigned int page_order) { unsigned long i; mfn_t mfn_return; p2m_type_t t; - if ( !paging_mode_translate(d) ) - { - if ( need_iommu(d) ) + if ( !paging_mode_translate(p2m->domain) ) + { + if ( need_iommu(p2m->domain) ) for ( i = 0; i < (1 << page_order); i++ ) - iommu_unmap_page(d, mfn + i); + iommu_unmap_page(p2m->domain, mfn + i); return; } @@ -2185,23 +2190,23 @@ p2m_remove_page(struct domain *d, unsign for ( i = 0; i < (1UL << page_order); i++ ) { - mfn_return = d->arch.p2m->get_entry(d, gfn + i, &t, p2m_query); + mfn_return = p2m->get_entry(p2m, gfn + i, &t, p2m_query); if ( !p2m_is_grant(t) ) set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY); ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) ); } - set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid); + set_p2m_entry(p2m, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid); } void -guest_physmap_remove_page(struct domain *d, unsigned long gfn, +guest_physmap_remove_entry(struct p2m_domain *p2m, unsigned long gfn, unsigned long mfn, unsigned int page_order) { - p2m_lock(d->arch.p2m); - audit_p2m(d); - p2m_remove_page(d, gfn, mfn, page_order); - audit_p2m(d); - p2m_unlock(d->arch.p2m); + p2m_lock(p2m); + audit_p2m(p2m); + p2m_remove_page(p2m, gfn, mfn, page_order); + audit_p2m(p2m); + p2m_unlock(p2m); } #if CONFIG_PAGING_LEVELS == 3 @@ -2232,7 +2237,7 @@ guest_physmap_mark_populate_on_demand(st guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn, unsigned int order) { - struct p2m_domain *p2md = d->arch.p2m; + struct p2m_domain *p2m = p2m_get_hostp2m(d); unsigned long i; p2m_type_t ot; mfn_t omfn; @@ -2245,15 +2250,15 @@ guest_physmap_mark_populate_on_demand(st if ( rc != 0 ) return rc; - p2m_lock(p2md); - audit_p2m(d); + p2m_lock(p2m); + audit_p2m(p2m); P2M_DEBUG("mark pod gfn=%#lx\n", gfn); /* Make sure all gpfns are unused */ for ( i = 0; i < (1UL << order); i++ ) { - omfn = gfn_to_mfn_query(d, gfn + i, &ot); + omfn = gfn_to_mfn_query(p2m, gfn + i, &ot); if ( p2m_is_ram(ot) ) { printk("%s: gfn_to_mfn returned type %d!\n", @@ -2269,29 +2274,29 @@ guest_physmap_mark_populate_on_demand(st } /* Now, actually do the two-way mapping */ - if ( !set_p2m_entry(d, gfn, _mfn(POPULATE_ON_DEMAND_MFN), order, + if ( !set_p2m_entry(p2m, gfn, _mfn(POPULATE_ON_DEMAND_MFN), order, p2m_populate_on_demand) ) rc = -EINVAL; else { - p2md->pod.entry_count += 1 << order; /* Lock: p2m */ - p2md->pod.entry_count -= pod_count; - BUG_ON(p2md->pod.entry_count < 0); - } - - audit_p2m(d); - p2m_unlock(p2md); + p2m->pod.entry_count += 1 << order; /* Lock: p2m */ + p2m->pod.entry_count -= pod_count; + BUG_ON(p2m->pod.entry_count < 0); + } + + audit_p2m(p2m); + p2m_unlock(p2m); out: return rc; - } int -guest_physmap_add_entry(struct domain *d, unsigned long gfn, +guest_physmap_add_entry(struct p2m_domain *p2m, unsigned long gfn, unsigned long mfn, unsigned int page_order, p2m_type_t t) { + struct domain *d = p2m->domain; unsigned long i, ogfn; p2m_type_t ot; mfn_t omfn; @@ -2321,20 +2326,20 @@ guest_physmap_add_entry(struct domain *d if ( rc != 0 ) return rc; - p2m_lock(d->arch.p2m); - audit_p2m(d); + p2m_lock(p2m); + audit_p2m(p2m); P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn); /* First, remove m->p mappings for existing p->m mappings */ for ( i = 0; i < (1UL << page_order); i++ ) { - omfn = gfn_to_mfn_query(d, gfn + i, &ot); + omfn = gfn_to_mfn_query(p2m, gfn + i, &ot); if ( p2m_is_grant(ot) ) { /* Really shouldn't be unmapping grant maps this way */ domain_crash(d); - p2m_unlock(d->arch.p2m); + p2m_unlock(p2m); return -EINVAL; } else if ( p2m_is_ram(ot) ) @@ -2368,7 +2373,7 @@ guest_physmap_add_entry(struct domain *d * address */ P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", mfn + i, ogfn, gfn + i); - omfn = gfn_to_mfn_query(d, ogfn, &ot); + omfn = gfn_to_mfn_query(p2m, ogfn, &ot); /* If we get here, we know the local domain owns the page, so it can't have been grant mapped in. */ BUG_ON( p2m_is_grant(ot) ); @@ -2378,7 +2383,7 @@ guest_physmap_add_entry(struct domain *d P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n", ogfn , mfn_x(omfn)); if ( mfn_x(omfn) == (mfn + i) ) - p2m_remove_page(d, ogfn, mfn + i, 0); + p2m_remove_page(p2m, ogfn, mfn + i, 0); } } } @@ -2386,7 +2391,7 @@ guest_physmap_add_entry(struct domain *d /* Now, actually do the two-way mapping */ if ( mfn_valid(_mfn(mfn)) ) { - if ( !set_p2m_entry(d, gfn, _mfn(mfn), page_order, t) ) + if ( !set_p2m_entry(p2m, gfn, _mfn(mfn), page_order, t) ) rc = -EINVAL; if ( !p2m_is_grant(t) ) { @@ -2398,18 +2403,18 @@ guest_physmap_add_entry(struct domain *d { gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n", gfn, mfn); - if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, + if ( !set_p2m_entry(p2m, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid) ) rc = -EINVAL; else { - d->arch.p2m->pod.entry_count -= pod_count; /* Lock: p2m */ - BUG_ON(d->arch.p2m->pod.entry_count < 0); - } - } - - audit_p2m(d); - p2m_unlock(d->arch.p2m); + p2m->pod.entry_count -= pod_count; /* Lock: p2m */ + BUG_ON(p2m->pod.entry_count < 0); + } + } + + audit_p2m(p2m); + p2m_unlock(p2m); return rc; } @@ -2417,7 +2422,7 @@ guest_physmap_add_entry(struct domain *d /* Walk the whole p2m table, changing any entries of the old type * to the new type. This is used in hardware-assisted paging to * quickly enable or diable log-dirty tracking */ -void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt) +void p2m_change_type_global(struct p2m_domain *p2m, p2m_type_t ot, p2m_type_t nt) { unsigned long mfn, gfn, flags; l1_pgentry_t l1e_content; @@ -2430,17 +2435,16 @@ void p2m_change_type_global(struct domai l4_pgentry_t *l4e; unsigned long i4; #endif /* CONFIG_PAGING_LEVELS == 4 */ - struct p2m_domain *p2m = p2m_get_hostp2m(d); BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt)); - if ( !paging_mode_translate(d) ) + if ( !paging_mode_translate(p2m->domain) ) return; if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) == 0 ) return; - ASSERT(p2m_locked_by_me(d->arch.p2m)); + ASSERT(p2m_locked_by_me(p2m)); #if CONFIG_PAGING_LEVELS == 4 l4e = map_domain_page(mfn_x(pagetable_get_mfn(p2m_get_pagetable(p2m)))); @@ -2476,7 +2480,8 @@ void p2m_change_type_global(struct domai gfn = get_gpfn_from_mfn(mfn); flags = p2m_type_to_flags(nt); l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE); - paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l3e[i3], + paging_write_p2m_entry(p2m->domain, gfn, + (l1_pgentry_t *)&l3e[i3], l3mfn, l1e_content, 3); continue; } @@ -2506,7 +2511,8 @@ void p2m_change_type_global(struct domai * L2_PAGETABLE_ENTRIES) * L1_PAGETABLE_ENTRIES; flags = p2m_type_to_flags(nt); l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE); - paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l2e[i2], + paging_write_p2m_entry(p2m->domain, gfn, + (l1_pgentry_t *)&l2e[i2], l2mfn, l1e_content, 2); continue; } @@ -2529,7 +2535,7 @@ void p2m_change_type_global(struct domai /* create a new 1le entry with the new type */ flags = p2m_type_to_flags(nt); l1e_content = l1e_from_pfn(mfn, flags); - paging_write_p2m_entry(d, gfn, &l1e[i1], + paging_write_p2m_entry(p2m->domain, gfn, &l1e[i1], l1mfn, l1e_content, 1); } unmap_domain_page(l1e); @@ -2551,7 +2557,7 @@ void p2m_change_type_global(struct domai /* Modify the p2m type of a single gfn from ot to nt, returning the * entry's previous type */ -p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn, +p2m_type_t p2m_change_type(struct p2m_domain *p2m, unsigned long gfn, p2m_type_t ot, p2m_type_t nt) { p2m_type_t pt; @@ -2559,31 +2565,31 @@ p2m_type_t p2m_change_type(struct domain BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt)); - p2m_lock(d->arch.p2m); - - mfn = gfn_to_mfn_query(d, gfn, &pt); + p2m_lock(p2m); + + mfn = gfn_to_mfn_query(p2m, gfn, &pt); if ( pt == ot ) - set_p2m_entry(d, gfn, mfn, 0, nt); - - p2m_unlock(d->arch.p2m); + set_p2m_entry(p2m, gfn, mfn, 0, nt); + + p2m_unlock(p2m); return pt; } int -set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) +set_mmio_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn) { int rc = 0; p2m_type_t ot; mfn_t omfn; - if ( !paging_mode_translate(d) ) + if ( !paging_mode_translate(p2m->domain) ) return 0; - omfn = gfn_to_mfn_query(d, gfn, &ot); + omfn = gfn_to_mfn_query(p2m, gfn, &ot); if ( p2m_is_grant(ot) ) { - domain_crash(d); + domain_crash(p2m->domain); return 0; } else if ( p2m_is_ram(ot) ) @@ -2593,51 +2599,51 @@ set_mmio_p2m_entry(struct domain *d, uns } P2M_DEBUG("set mmio %lx %lx\n", gfn, mfn_x(mfn)); - p2m_lock(d->arch.p2m); - rc = set_p2m_entry(d, gfn, mfn, 0, p2m_mmio_direct); - p2m_unlock(d->arch.p2m); + p2m_lock(p2m); + rc = set_p2m_entry(p2m, gfn, mfn, 0, p2m_mmio_direct); + p2m_unlock(p2m); if ( 0 == rc ) gdprintk(XENLOG_ERR, "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n", - gmfn_to_mfn(d, gfn)); + mfn_x(gfn_to_mfn(p2m, gfn, &ot))); return rc; } int -clear_mmio_p2m_entry(struct domain *d, unsigned long gfn) +clear_mmio_p2m_entry(struct p2m_domain *p2m, unsigned long gfn) { int rc = 0; - unsigned long mfn; - - if ( !paging_mode_translate(d) ) + mfn_t mfn; + p2m_type_t t; + + if ( !paging_mode_translate(p2m->domain) ) return 0; - mfn = gmfn_to_mfn(d, gfn); - if ( INVALID_MFN == mfn ) + mfn = gfn_to_mfn(p2m, gfn, &t); + if ( !mfn_valid(mfn) ) { gdprintk(XENLOG_ERR, "clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn); return 0; } - p2m_lock(d->arch.p2m); - rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0, 0); - p2m_unlock(d->arch.p2m); + p2m_lock(p2m); + rc = set_p2m_entry(p2m, gfn, _mfn(INVALID_MFN), 0, 0); + p2m_unlock(p2m); return rc; } -#ifdef __x86_64__ int -set_shared_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) +set_shared_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn) { int rc = 0; p2m_type_t ot; mfn_t omfn; - if ( !paging_mode_translate(d) ) + if ( !paging_mode_translate(p2m->domain) ) return 0; - omfn = gfn_to_mfn_query(d, gfn, &ot); + omfn = gfn_to_mfn_query(p2m, gfn, &ot); /* At the moment we only allow p2m change if gfn has already been made * sharable first */ ASSERT(p2m_is_shared(ot)); @@ -2646,22 +2652,23 @@ set_shared_p2m_entry(struct domain *d, u set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); P2M_DEBUG("set shared %lx %lx\n", gfn, mfn_x(mfn)); - rc = set_p2m_entry(d, gfn, mfn, 0, p2m_ram_shared); + rc = set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_shared); if ( 0 == rc ) gdprintk(XENLOG_ERR, "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n", - gmfn_to_mfn(d, gfn)); + gmfn_to_mfn(p2m->domain, gfn)); return rc; } -int p2m_mem_paging_nominate(struct domain *d, unsigned long gfn) +#ifdef __x86_64__ +int p2m_mem_paging_nominate(struct p2m_domain *p2m, unsigned long gfn) { struct page_info *page; p2m_type_t p2mt; mfn_t mfn; int ret; - mfn = gfn_to_mfn(d, gfn, &p2mt); + mfn = gfn_to_mfn(p2m, gfn, &p2mt); /* Check if mfn is valid */ ret = -EINVAL; @@ -2687,9 +2694,9 @@ int p2m_mem_paging_nominate(struct domai goto out; /* Fix p2m entry */ - p2m_lock(d->arch.p2m); - set_p2m_entry(d, gfn, mfn, 0, p2m_ram_paging_out); - p2m_unlock(d->arch.p2m); + p2m_lock(p2m); + set_p2m_entry(p2m, gfn, mfn, 0, p2m_ram_paging_out); + p2m_unlock(p2m); ret = 0; @@ -2697,14 +2704,15 @@ int p2m_mem_paging_nominate(struct domai return ret; } -int p2m_mem_paging_evict(struct domain *d, unsigned long gfn) +int p2m_mem_paging_evict(struct p2m_domain *p2m, unsigned long gfn) { struct page_info *page; p2m_type_t p2mt; mfn_t mfn; + struct domain *d = p2m->domain; /* Get mfn */ - mfn = gfn_to_mfn(d, gfn, &p2mt); + mfn = gfn_to_mfn(p2m, gfn, &p2mt); if ( unlikely(!mfn_valid(mfn)) ) return -EINVAL; @@ -2722,9 +2730,9 @@ int p2m_mem_paging_evict(struct domain * put_page(page); /* Remove mapping from p2m table */ - p2m_lock(d->arch.p2m); - set_p2m_entry(d, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paged); - p2m_unlock(d->arch.p2m); + p2m_lock(p2m); + set_p2m_entry(p2m, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paged); + p2m_unlock(p2m); /* Put the page back so it gets freed */ put_page(page); @@ -2732,11 +2740,12 @@ int p2m_mem_paging_evict(struct domain * return 0; } -void p2m_mem_paging_populate(struct domain *d, unsigned long gfn) +void p2m_mem_paging_populate(struct p2m_domain *p2m, unsigned long gfn) { struct vcpu *v = current; mem_event_request_t req; p2m_type_t p2mt; + struct domain *d = p2m->domain; memset(&req, 0, sizeof(req)); @@ -2747,12 +2756,12 @@ void p2m_mem_paging_populate(struct doma /* Fix p2m mapping */ /* XXX: It seems inefficient to have this here, as it's only needed * in one case (ept guest accessing paging out page) */ - gfn_to_mfn(d, gfn, &p2mt); + gfn_to_mfn(p2m, gfn, &p2mt); if ( p2mt != p2m_ram_paging_out ) { - p2m_lock(d->arch.p2m); - set_p2m_entry(d, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paging_in_start); - p2m_unlock(d->arch.p2m); + p2m_lock(p2m); + set_p2m_entry(p2m, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paging_in_start); + p2m_unlock(p2m); } /* Pause domain */ @@ -2770,25 +2779,26 @@ void p2m_mem_paging_populate(struct doma mem_event_put_request(d, &req); } -int p2m_mem_paging_prep(struct domain *d, unsigned long gfn) +int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn) { struct page_info *page; /* Get a free page */ - page = alloc_domheap_page(d, 0); + page = alloc_domheap_page(p2m->domain, 0); if ( unlikely(page == NULL) ) return -EINVAL; /* Fix p2m mapping */ - p2m_lock(d->arch.p2m); - set_p2m_entry(d, gfn, page_to_mfn(page), 0, p2m_ram_paging_in); - p2m_unlock(d->arch.p2m); + p2m_lock(p2m); + set_p2m_entry(p2m, gfn, page_to_mfn(page), 0, p2m_ram_paging_in); + p2m_unlock(p2m); return 0; } -void p2m_mem_paging_resume(struct domain *d) -{ +void p2m_mem_paging_resume(struct p2m_domain *p2m) +{ + struct domain *d = p2m->domain; mem_event_response_t rsp; p2m_type_t p2mt; mfn_t mfn; @@ -2797,10 +2807,10 @@ void p2m_mem_paging_resume(struct domain mem_event_get_response(d, &rsp); /* Fix p2m entry */ - mfn = gfn_to_mfn(d, rsp.gfn, &p2mt); - p2m_lock(d->arch.p2m); - set_p2m_entry(d, rsp.gfn, mfn, 0, p2m_ram_rw); - p2m_unlock(d->arch.p2m); + mfn = gfn_to_mfn(p2m, rsp.gfn, &p2mt); + p2m_lock(p2m); + set_p2m_entry(p2m, rsp.gfn, mfn, 0, p2m_ram_rw); + p2m_unlock(p2m); /* Unpause domain */ if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED ) diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/mm/shadow/common.c Wed Aug 11 15:56:01 2010 +0100 @@ -1714,8 +1714,9 @@ sh_alloc_p2m_pages(struct domain *d) // Returns 0 if no memory is available... static struct page_info * -shadow_alloc_p2m_page(struct domain *d) -{ +shadow_alloc_p2m_page(struct p2m_domain *p2m) +{ + struct domain *d = p2m->domain; struct page_info *pg; mfn_t mfn; void *p; @@ -1741,8 +1742,9 @@ shadow_alloc_p2m_page(struct domain *d) } static void -shadow_free_p2m_page(struct domain *d, struct page_info *pg) -{ +shadow_free_p2m_page(struct p2m_domain *p2m, struct page_info *pg) +{ + struct domain *d = p2m->domain; ASSERT(page_get_owner(pg) == d); /* Should have just the one ref we gave it in alloc_p2m_page() */ if ( (pg->count_info & PGC_count_mask) != 1 ) @@ -3100,6 +3102,7 @@ int shadow_enable(struct domain *d, u32 struct page_info *pg = NULL; uint32_t *e; int i, rv = 0; + struct p2m_domain *p2m = p2m_get_hostp2m(d); mode |= PG_SH_enable; @@ -3135,7 +3138,8 @@ int shadow_enable(struct domain *d, u32 * to avoid possible deadlock. */ if ( mode & PG_translate ) { - rv = p2m_alloc_table(d, shadow_alloc_p2m_page, shadow_free_p2m_page); + rv = p2m_alloc_table(p2m, + shadow_alloc_p2m_page, shadow_free_p2m_page); if (rv != 0) goto out_unlocked; } @@ -3146,7 +3150,7 @@ int shadow_enable(struct domain *d, u32 { /* Get a single page from the shadow pool. Take it via the * P2M interface to make freeing it simpler afterwards. */ - pg = shadow_alloc_p2m_page(d); + pg = shadow_alloc_p2m_page(p2m); if ( pg == NULL ) { rv = -ENOMEM; @@ -3195,10 +3199,10 @@ int shadow_enable(struct domain *d, u32 out_locked: shadow_unlock(d); out_unlocked: - if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m_get_hostp2m(d))) ) - p2m_teardown(d); + if ( rv != 0 && !pagetable_is_null(p2m_get_pagetable(p2m)) ) + p2m_teardown(p2m); if ( rv != 0 && pg != NULL ) - shadow_free_p2m_page(d, pg); + shadow_free_p2m_page(p2m, pg); domain_unpause(d); return rv; } @@ -3210,6 +3214,7 @@ void shadow_teardown(struct domain *d) struct vcpu *v; mfn_t mfn; struct page_info *pg; + struct p2m_domain *p2m = p2m_get_hostp2m(d); ASSERT(d->is_dying); ASSERT(d != current->domain); @@ -3264,7 +3269,7 @@ void shadow_teardown(struct domain *d) #endif /* (SHADOW_OPTIMIZATIONS & (SHOPT_VIRTUAL_TLB|SHOPT_OUT_OF_SYNC)) */ while ( (pg = page_list_remove_head(&d->arch.paging.shadow.p2m_freelist)) ) - shadow_free_p2m_page(d, pg); + shadow_free_p2m_page(p2m, pg); if ( d->arch.paging.shadow.total_pages != 0 ) { @@ -3298,7 +3303,7 @@ void shadow_teardown(struct domain *d) if ( !hvm_paging_enabled(v) ) v->arch.guest_table = pagetable_null(); } - shadow_free_p2m_page(d, + shadow_free_p2m_page(p2m, pagetable_get_page(d->arch.paging.shadow.unpaged_pagetable)); d->arch.paging.shadow.unpaged_pagetable = pagetable_null(); } @@ -3335,7 +3340,7 @@ void shadow_final_teardown(struct domain shadow_teardown(d); /* It is now safe to pull down the p2m map. */ - p2m_teardown(d); + p2m_teardown(p2m_get_hostp2m(d)); SHADOW_PRINTK("dom %u final teardown done." " Shadow pages total = %u, free = %u, p2m=%u\n", @@ -3657,10 +3662,11 @@ int shadow_track_dirty_vram(struct domai unsigned long i; p2m_type_t t; struct sh_dirty_vram *dirty_vram = d->arch.hvm_domain.dirty_vram; + struct p2m_domain *p2m = p2m_get_hostp2m(d); if (end_pfn < begin_pfn - || begin_pfn > d->arch.p2m->max_mapped_pfn - || end_pfn >= d->arch.p2m->max_mapped_pfn) + || begin_pfn > p2m->max_mapped_pfn + || end_pfn >= p2m->max_mapped_pfn) return -EINVAL; shadow_lock(d); @@ -3729,7 +3735,7 @@ int shadow_track_dirty_vram(struct domai /* Iterate over VRAM to track dirty bits. */ for ( i = 0; i < nr; i++ ) { - mfn_t mfn = gfn_to_mfn(d, begin_pfn + i, &t); + mfn_t mfn = gfn_to_mfn(p2m, begin_pfn + i, &t); struct page_info *page; int dirty = 0; paddr_t sl1ma = dirty_vram->sl1ma[i]; @@ -3814,7 +3820,7 @@ int shadow_track_dirty_vram(struct domai /* was clean for more than two seconds, try to disable guest * write access */ for ( i = begin_pfn; i < end_pfn; i++ ) { - mfn_t mfn = gfn_to_mfn(d, i, &t); + mfn_t mfn = gfn_to_mfn(p2m, i, &t); if (mfn_x(mfn) != INVALID_MFN) flush_tlb |= sh_remove_write_access(d->vcpu[0], mfn, 1, 0); } diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/mm/shadow/multi.c Wed Aug 11 15:56:01 2010 +0100 @@ -167,7 +167,7 @@ sh_walk_guest_tables(struct vcpu *v, uns sh_walk_guest_tables(struct vcpu *v, unsigned long va, walk_t *gw, uint32_t pfec) { - return guest_walk_tables(v, va, gw, pfec, + return guest_walk_tables(v, p2m_get_hostp2m(v->domain), va, gw, pfec, #if GUEST_PAGING_LEVELS == 3 /* PAE */ _mfn(INVALID_MFN), v->arch.paging.shadow.gl3e @@ -2240,6 +2240,7 @@ static int validate_gl4e(struct vcpu *v, shadow_l4e_t *sl4p = se; mfn_t sl3mfn = _mfn(INVALID_MFN); struct domain *d = v->domain; + struct p2m_domain *p2m = p2m_get_hostp2m(d); p2m_type_t p2mt; int result = 0; @@ -2248,7 +2249,7 @@ static int validate_gl4e(struct vcpu *v, if ( guest_l4e_get_flags(new_gl4e) & _PAGE_PRESENT ) { gfn_t gl3gfn = guest_l4e_get_gfn(new_gl4e); - mfn_t gl3mfn = gfn_to_mfn_query(d, gl3gfn, &p2mt); + mfn_t gl3mfn = gfn_to_mfn_query(p2m, gl3gfn, &p2mt); if ( p2m_is_ram(p2mt) ) sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow); else if ( p2mt != p2m_populate_on_demand ) @@ -2299,13 +2300,14 @@ static int validate_gl3e(struct vcpu *v, mfn_t sl2mfn = _mfn(INVALID_MFN); p2m_type_t p2mt; int result = 0; + struct p2m_domain *p2m = p2m_get_hostp2m(v->domain); perfc_incr(shadow_validate_gl3e_calls); if ( guest_l3e_get_flags(new_gl3e) & _PAGE_PRESENT ) { gfn_t gl2gfn = guest_l3e_get_gfn(new_gl3e); - mfn_t gl2mfn = gfn_to_mfn_query(v->domain, gl2gfn, &p2mt); + mfn_t gl2mfn = gfn_to_mfn_query(p2m, gl2gfn, &p2mt); if ( p2m_is_ram(p2mt) ) sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow); else if ( p2mt != p2m_populate_on_demand ) @@ -2329,6 +2331,7 @@ static int validate_gl2e(struct vcpu *v, guest_l2e_t new_gl2e = *(guest_l2e_t *)new_ge; shadow_l2e_t *sl2p = se; mfn_t sl1mfn = _mfn(INVALID_MFN); + struct p2m_domain *p2m = p2m_get_hostp2m(v->domain); p2m_type_t p2mt; int result = 0; @@ -2354,7 +2357,7 @@ static int validate_gl2e(struct vcpu *v, } else { - mfn_t gl1mfn = gfn_to_mfn_query(v->domain, gl1gfn, &p2mt); + mfn_t gl1mfn = gfn_to_mfn_query(p2m, gl1gfn, &p2mt); if ( p2m_is_ram(p2mt) ) sl1mfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow); else if ( p2mt != p2m_populate_on_demand ) @@ -2415,6 +2418,7 @@ static int validate_gl1e(struct vcpu *v, shadow_l1e_t *sl1p = se; gfn_t gfn; mfn_t gmfn; + struct p2m_domain *p2m = p2m_get_hostp2m(v->domain); p2m_type_t p2mt; int result = 0; #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) @@ -2424,7 +2428,7 @@ static int validate_gl1e(struct vcpu *v, perfc_incr(shadow_validate_gl1e_calls); gfn = guest_l1e_get_gfn(new_gl1e); - gmfn = gfn_to_mfn_query(v->domain, gfn, &p2mt); + gmfn = gfn_to_mfn_query(p2m, gfn, &p2mt); l1e_propagate_from_guest(v, new_gl1e, gmfn, &new_sl1e, ft_prefetch, p2mt); result |= shadow_set_l1e(v, sl1p, new_sl1e, p2mt, sl1mfn); @@ -2484,7 +2488,7 @@ void sh_resync_l1(struct vcpu *v, mfn_t shadow_l1e_t nsl1e; gfn = guest_l1e_get_gfn(gl1e); - gmfn = gfn_to_mfn_query(v->domain, gfn, &p2mt); + gmfn = gfn_to_mfn_query(p2m_get_hostp2m(v->domain), gfn, &p2mt); l1e_propagate_from_guest(v, gl1e, gmfn, &nsl1e, ft_prefetch, p2mt); rc |= shadow_set_l1e(v, sl1p, nsl1e, p2mt, sl1mfn); @@ -2810,7 +2814,7 @@ static void sh_prefetch(struct vcpu *v, /* Look at the gfn that the l1e is pointing at */ gfn = guest_l1e_get_gfn(gl1e); - gmfn = gfn_to_mfn_query(v->domain, gfn, &p2mt); + gmfn = gfn_to_mfn_query(p2m_get_hostp2m(v->domain), gfn, &p2mt); /* Propagate the entry. */ l1e_propagate_from_guest(v, gl1e, gmfn, &sl1e, ft_prefetch, p2mt); @@ -3166,7 +3170,7 @@ static int sh_page_fault(struct vcpu *v, /* What mfn is the guest trying to access? */ gfn = guest_l1e_get_gfn(gw.l1e); - gmfn = gfn_to_mfn_guest(d, gfn, &p2mt); + gmfn = gfn_to_mfn_guest(p2m_get_hostp2m(d), gfn, &p2mt); if ( shadow_mode_refcounts(d) && ((!p2m_is_valid(p2mt) && !p2m_is_grant(p2mt)) || @@ -4272,7 +4276,7 @@ sh_update_cr3(struct vcpu *v, int do_loc if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT ) { gl2gfn = guest_l3e_get_gfn(gl3e[i]); - gl2mfn = gfn_to_mfn_query(d, gl2gfn, &p2mt); + gl2mfn = gfn_to_mfn_query(p2m_get_hostp2m(d), gl2gfn, &p2mt); if ( p2m_is_ram(p2mt) ) flush |= sh_remove_write_access(v, gl2mfn, 2, 0); } @@ -4285,7 +4289,7 @@ sh_update_cr3(struct vcpu *v, int do_loc if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT ) { gl2gfn = guest_l3e_get_gfn(gl3e[i]); - gl2mfn = gfn_to_mfn_query(d, gl2gfn, &p2mt); + gl2mfn = gfn_to_mfn_query(p2m_get_hostp2m(d), gl2gfn, &p2mt); if ( p2m_is_ram(p2mt) ) sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) ? SH_type_l2h_shadow @@ -4682,7 +4686,7 @@ static void sh_pagetable_dying(struct vc if ( gcr3 == gpa ) fast_path = 1; - gmfn = gfn_to_mfn_query(v->domain, _gfn(gpa >> PAGE_SHIFT), &p2mt); + gmfn = gfn_to_mfn_query(p2m_get_hostp2m(v->domain), _gfn(gpa >> PAGE_SHIFT), &p2mt); if ( !mfn_valid(gmfn) || !p2m_is_ram(p2mt) ) { printk(XENLOG_DEBUG "sh_pagetable_dying: gpa not valid %"PRIpaddr"\n", @@ -4702,7 +4706,7 @@ static void sh_pagetable_dying(struct vc { /* retrieving the l2s */ gl2a = guest_l3e_get_paddr(gl3e[i]); - gmfn = gfn_to_mfn_query(v->domain, _gfn(gl2a >> PAGE_SHIFT), &p2mt); + gmfn = gfn_to_mfn_query(p2m_get_hostp2m(v->domain), _gfn(gl2a >> PAGE_SHIFT), &p2mt); smfn = shadow_hash_lookup(v, mfn_x(gmfn), SH_type_l2_pae_shadow); } @@ -4737,7 +4741,7 @@ static void sh_pagetable_dying(struct vc shadow_lock(v->domain); - gmfn = gfn_to_mfn_query(v->domain, _gfn(gpa >> PAGE_SHIFT), &p2mt); + gmfn = gfn_to_mfn_query(p2m_get_hostp2m(v->domain), _gfn(gpa >> PAGE_SHIFT), &p2mt); #if GUEST_PAGING_LEVELS == 2 smfn = shadow_hash_lookup(v, mfn_x(gmfn), SH_type_l2_32_shadow); #else @@ -4777,6 +4781,7 @@ static mfn_t emulate_gva_to_mfn(struct v mfn_t mfn; p2m_type_t p2mt; uint32_t pfec = PFEC_page_present | PFEC_write_access; + struct p2m_domain *p2m = p2m_get_hostp2m(v->domain); /* Translate the VA to a GFN */ gfn = sh_gva_to_gfn(v, vaddr, &pfec); @@ -4792,9 +4797,9 @@ static mfn_t emulate_gva_to_mfn(struct v /* Translate the GFN to an MFN */ /* PoD: query only if shadow lock is held (to avoid deadlock) */ if ( shadow_locked_by_me(v->domain) ) - mfn = gfn_to_mfn_query(v->domain, _gfn(gfn), &p2mt); + mfn = gfn_to_mfn_query(p2m, _gfn(gfn), &p2mt); else - mfn = gfn_to_mfn(v->domain, _gfn(gfn), &p2mt); + mfn = gfn_to_mfn(p2m, _gfn(gfn), &p2mt); if ( p2m_is_readonly(p2mt) ) return _mfn(READONLY_GFN); @@ -5199,7 +5204,7 @@ int sh_audit_l1_table(struct vcpu *v, mf { gfn = guest_l1e_get_gfn(*gl1e); mfn = shadow_l1e_get_mfn(*sl1e); - gmfn = gfn_to_mfn_query(v->domain, gfn, &p2mt); + gmfn = gfn_to_mfn_query(p2m_get_hostp2m(v->domain), gfn, &p2mt); if ( !p2m_is_grant(p2mt) && mfn_x(gmfn) != mfn_x(mfn) ) AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn " --> %" PRI_mfn " != mfn %" PRI_mfn, @@ -5243,6 +5248,7 @@ int sh_audit_l2_table(struct vcpu *v, mf shadow_l2e_t *sl2e; mfn_t mfn, gmfn, gl2mfn; gfn_t gfn; + struct p2m_domain *p2m = p2m_get_hostp2m(v->domain); p2m_type_t p2mt; char *s; int done = 0; @@ -5269,7 +5275,7 @@ int sh_audit_l2_table(struct vcpu *v, mf mfn = shadow_l2e_get_mfn(*sl2e); gmfn = (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? get_fl1_shadow_status(v, gfn) - : get_shadow_status(v, gfn_to_mfn_query(v->domain, gfn, &p2mt), + : get_shadow_status(v, gfn_to_mfn_query(p2m, gfn, &p2mt), SH_type_l1_shadow); if ( mfn_x(gmfn) != mfn_x(mfn) ) AUDIT_FAIL(2, "bad translation: gfn %" SH_PRI_gfn @@ -5277,8 +5283,8 @@ int sh_audit_l2_table(struct vcpu *v, mf " --> %" PRI_mfn " != mfn %" PRI_mfn, gfn_x(gfn), (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? 0 - : mfn_x(gfn_to_mfn_query(v->domain, gfn, &p2mt)), - mfn_x(gmfn), mfn_x(mfn)); + : mfn_x(gfn_to_mfn_query(p2m, + gfn, &p2mt)), mfn_x(gmfn), mfn_x(mfn)); } }); sh_unmap_domain_page(gp); @@ -5316,7 +5322,7 @@ int sh_audit_l3_table(struct vcpu *v, mf { gfn = guest_l3e_get_gfn(*gl3e); mfn = shadow_l3e_get_mfn(*sl3e); - gmfn = get_shadow_status(v, gfn_to_mfn_query(v->domain, gfn, &p2mt), + gmfn = get_shadow_status(v, gfn_to_mfn_query(p2m_get_hostp2m(v->domain), gfn, &p2mt), ((GUEST_PAGING_LEVELS == 3 || is_pv_32on64_vcpu(v)) && !shadow_mode_external(v->domain) @@ -5363,7 +5369,8 @@ int sh_audit_l4_table(struct vcpu *v, mf { gfn = guest_l4e_get_gfn(*gl4e); mfn = shadow_l4e_get_mfn(*sl4e); - gmfn = get_shadow_status(v, gfn_to_mfn_query(v->domain, gfn, &p2mt), + gmfn = get_shadow_status(v, gfn_to_mfn_query(p2m_get_hostp2m(v->domain), + gfn, &p2mt), SH_type_l3_shadow); if ( mfn_x(gmfn) != mfn_x(mfn) ) AUDIT_FAIL(4, "bad translation: gfn %" SH_PRI_gfn diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/oprofile/nmi_int.c --- a/xen/arch/x86/oprofile/nmi_int.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/oprofile/nmi_int.c Wed Aug 11 15:56:01 2010 +0100 @@ -28,6 +28,7 @@ #include "op_x86_model.h" struct op_counter_config counter_config[OP_MAX_COUNTER]; +struct op_ibs_config ibs_config; static struct op_x86_model_spec const *__read_mostly model; static struct op_msrs cpu_msrs[NR_CPUS]; @@ -430,6 +431,7 @@ static int __init nmi_init(void) case 0x10: model = &op_athlon_spec; cpu_type = "x86-64/family10"; + ibs_caps = ibs_init(); break; case 0x11: model = &op_athlon_spec; diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/oprofile/op_counter.h --- a/xen/arch/x86/oprofile/op_counter.h Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/oprofile/op_counter.h Wed Aug 11 15:56:01 2010 +0100 @@ -26,4 +26,16 @@ struct op_counter_config { extern struct op_counter_config counter_config[]; +/* AMD IBS configuration */ +struct op_ibs_config { + unsigned long op_enabled; + unsigned long fetch_enabled; + unsigned long max_cnt_fetch; + unsigned long max_cnt_op; + unsigned long rand_en; + unsigned long dispatched_ops; +}; + +extern struct op_ibs_config ibs_config; + #endif /* OP_COUNTER_H */ diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/oprofile/op_model_athlon.c --- a/xen/arch/x86/oprofile/op_model_athlon.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/oprofile/op_model_athlon.c Wed Aug 11 15:56:01 2010 +0100 @@ -19,6 +19,7 @@ #include <asm/regs.h> #include <asm/current.h> #include <asm/hvm/support.h> +#include <xen/pci_regs.h> #include "op_x86_model.h" #include "op_counter.h" @@ -47,6 +48,116 @@ static unsigned long reset_value[NUM_COU extern char svm_stgi_label[]; +u32 ibs_caps = 0; +static u64 ibs_op_ctl; + +/* IBS cpuid feature detection */ +#define IBS_CPUID_FEATURES 0x8000001b + +/* IBS MSRs */ +#define MSR_AMD64_IBSFETCHCTL 0xc0011030 +#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 +#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 +#define MSR_AMD64_IBSOPCTL 0xc0011033 +#define MSR_AMD64_IBSOPRIP 0xc0011034 +#define MSR_AMD64_IBSOPDATA 0xc0011035 +#define MSR_AMD64_IBSOPDATA2 0xc0011036 +#define MSR_AMD64_IBSOPDATA3 0xc0011037 +#define MSR_AMD64_IBSDCLINAD 0xc0011038 +#define MSR_AMD64_IBSDCPHYSAD 0xc0011039 +#define MSR_AMD64_IBSCTL 0xc001103a + +/* + * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but + * bit 0 is used to indicate the existence of IBS. + */ +#define IBS_CAPS_AVAIL (1LL<<0) +#define IBS_CAPS_RDWROPCNT (1LL<<3) +#define IBS_CAPS_OPCNT (1LL<<4) + +/* IBS randomization macros */ +#define IBS_RANDOM_BITS 12 +#define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1) +#define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5)) + +/* IbsFetchCtl bits/masks */ +#define IBS_FETCH_RAND_EN (1ULL<<57) +#define IBS_FETCH_VAL (1ULL<<49) +#define IBS_FETCH_ENABLE (1ULL<<48) +#define IBS_FETCH_CNT 0xFFFF0000ULL +#define IBS_FETCH_MAX_CNT 0x0000FFFFULL + +/* IbsOpCtl bits */ +#define IBS_OP_CNT_CTL (1ULL<<19) +#define IBS_OP_VAL (1ULL<<18) +#define IBS_OP_ENABLE (1ULL<<17) +#define IBS_OP_MAX_CNT 0x0000FFFFULL + +/* IBS sample identifier */ +#define IBS_FETCH_CODE 13 +#define IBS_OP_CODE 14 + +#define clamp(val, min, max) ({ \ + typeof(val) __val = (val); \ + typeof(min) __min = (min); \ + typeof(max) __max = (max); \ + (void) (&__val == &__min); \ + (void) (&__val == &__max); \ + __val = __val < __min ? __min: __val; \ + __val > __max ? __max: __val; }) + +/* + * 16-bit Linear Feedback Shift Register (LFSR) + */ +static unsigned int lfsr_random(void) +{ + static unsigned int lfsr_value = 0xF00D; + unsigned int bit; + + /* Compute next bit to shift in */ + bit = ((lfsr_value >> 0) ^ + (lfsr_value >> 2) ^ + (lfsr_value >> 3) ^ + (lfsr_value >> 5)) & 0x0001; + + /* Advance to next register value */ + lfsr_value = (lfsr_value >> 1) | (bit << 15); + + return lfsr_value; +} + +/* + * IBS software randomization + * + * The IBS periodic op counter is randomized in software. The lower 12 + * bits of the 20 bit counter are randomized. IbsOpCurCnt is + * initialized with a 12 bit random value. + */ +static inline u64 op_amd_randomize_ibs_op(u64 val) +{ + unsigned int random = lfsr_random(); + + if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) + /* + * Work around if the hw can not write to IbsOpCurCnt + * + * Randomize the lower 8 bits of the 16 bit + * IbsOpMaxCnt [15:0] value in the range of -128 to + * +127 by adding/subtracting an offset to the + * maximum count (IbsOpMaxCnt). + * + * To avoid over or underflows and protect upper bits + * starting at bit 16, the initial value for + * IbsOpMaxCnt must fit in the range from 0x0081 to + * 0xff80. + */ + val += (s8)(random >> 4); + else + val |= (u64)(random & IBS_RANDOM_MASK) << 32; + + return val; +} + static void athlon_fill_in_addresses(struct op_msrs * const msrs) { msrs->counters[0].addr = MSR_K7_PERFCTR0; @@ -101,6 +212,78 @@ static void athlon_setup_ctrs(struct op_ } } +static inline void +ibs_log_event(u64 data, struct cpu_user_regs * const regs, int mode) +{ + struct vcpu *v = current; + u32 temp = 0; + + temp = data & 0xFFFFFFFF; + xenoprof_log_event(v, regs, temp, mode, 0); + + temp = (data >> 32) & 0xFFFFFFFF; + xenoprof_log_event(v, regs, temp, mode, 0); + +} + +static inline int handle_ibs(int mode, struct cpu_user_regs * const regs) +{ + u64 val, ctl; + struct vcpu *v = current; + + if (!ibs_caps) + return 1; + + if (ibs_config.fetch_enabled) { + rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl); + if (ctl & IBS_FETCH_VAL) { + rdmsrl(MSR_AMD64_IBSFETCHLINAD, val); + xenoprof_log_event(v, regs, IBS_FETCH_CODE, mode, 0); + xenoprof_log_event(v, regs, val, mode, 0); + + ibs_log_event(val, regs, mode); + ibs_log_event(ctl, regs, mode); + + rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val); + ibs_log_event(val, regs, mode); + + /* reenable the IRQ */ + ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT); + ctl |= IBS_FETCH_ENABLE; + wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl); + } + } + + if (ibs_config.op_enabled) { + rdmsrl(MSR_AMD64_IBSOPCTL, ctl); + if (ctl & IBS_OP_VAL) { + + rdmsrl(MSR_AMD64_IBSOPRIP, val); + xenoprof_log_event(v, regs, IBS_OP_CODE, mode, 0); + xenoprof_log_event(v, regs, val, mode, 0); + + ibs_log_event(val, regs, mode); + + rdmsrl(MSR_AMD64_IBSOPDATA, val); + ibs_log_event(val, regs, mode); + rdmsrl(MSR_AMD64_IBSOPDATA2, val); + ibs_log_event(val, regs, mode); + rdmsrl(MSR_AMD64_IBSOPDATA3, val); + ibs_log_event(val, regs, mode); + rdmsrl(MSR_AMD64_IBSDCLINAD, val); + ibs_log_event(val, regs, mode); + rdmsrl(MSR_AMD64_IBSDCPHYSAD, val); + ibs_log_event(val, regs, mode); + + /* reenable the IRQ */ + ctl = op_amd_randomize_ibs_op(ibs_op_ctl); + wrmsrl(MSR_AMD64_IBSOPCTL, ctl); + } + } + + return 1; +} + static int athlon_check_ctrs(unsigned int const cpu, struct op_msrs const * const msrs, struct cpu_user_regs * const regs) @@ -134,10 +317,51 @@ static int athlon_check_ctrs(unsigned in } } + ovf = handle_ibs(mode, regs); /* See op_model_ppro.c */ return ovf; } +static inline void start_ibs(void) +{ + u64 val = 0; + + if (!ibs_caps) + return; + + if (ibs_config.fetch_enabled) { + val = (ibs_config.max_cnt_fetch >> 4) & IBS_FETCH_MAX_CNT; + val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; + val |= IBS_FETCH_ENABLE; + wrmsrl(MSR_AMD64_IBSFETCHCTL, val); + } + + if (ibs_config.op_enabled) { + ibs_op_ctl = ibs_config.max_cnt_op >> 4; + if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) { + /* + * IbsOpCurCnt not supported. See + * op_amd_randomize_ibs_op() for details. + */ + ibs_op_ctl = clamp((unsigned long long)ibs_op_ctl, + 0x0081ULL, 0xFF80ULL); + } else { + /* + * The start value is randomized with a + * positive offset, we need to compensate it + * with the half of the randomized range. Also + * avoid underflows. + */ + ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET, + IBS_OP_MAX_CNT); + } + if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops) + ibs_op_ctl |= IBS_OP_CNT_CTL; + ibs_op_ctl |= IBS_OP_ENABLE; + val = op_amd_randomize_ibs_op(ibs_op_ctl); + wrmsrl(MSR_AMD64_IBSOPCTL, val); + } +} static void athlon_start(struct op_msrs const * const msrs) { @@ -150,8 +374,22 @@ static void athlon_start(struct op_msrs CTRL_WRITE(msr_content, msrs, i); } } -} - + start_ibs(); +} + +static void stop_ibs(void) +{ + if (!ibs_caps) + return; + + if (ibs_config.fetch_enabled) + /* clear max count and enable */ + wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); + + if (ibs_config.op_enabled) + /* clear max count and enable */ + wrmsrl(MSR_AMD64_IBSOPCTL, 0); +} static void athlon_stop(struct op_msrs const * const msrs) { @@ -165,8 +403,118 @@ static void athlon_stop(struct op_msrs c CTRL_SET_INACTIVE(msr_content); CTRL_WRITE(msr_content, msrs, i); } -} - + + stop_ibs(); +} + +#define IBSCTL_LVTOFFSETVAL (1 << 8) +#define APIC_EILVT_MSG_NMI 0x4 +#define APIC_EILVT_LVTOFF_IBS 1 +#define APIC_EILVTn(n) (0x500 + 0x10 * n) +static inline void init_ibs_nmi_per_cpu(void *arg) +{ + unsigned long reg; + + reg = (APIC_EILVT_LVTOFF_IBS << 4) + APIC_EILVTn(0); + apic_write(reg, APIC_EILVT_MSG_NMI << 8); +} + +#define PCI_VENDOR_ID_AMD 0x1022 +#define PCI_DEVICE_ID_AMD_10H_NB_MISC 0x1203 +#define IBSCTL 0x1cc +static int init_ibs_nmi(void) +{ + int bus, dev, func; + u32 id, value; + u16 vendor_id, dev_id; + int nodes; + + /* per CPU setup */ + on_each_cpu(init_ibs_nmi_per_cpu, NULL, 1); + + nodes = 0; + for (bus = 0; bus < 256; bus++) { + for (dev = 0; dev < 32; dev++) { + for (func = 0; func < 8; func++) { + id = pci_conf_read32(bus, dev, func, PCI_VENDOR_ID); + + if ((id == 0xffffffff) || (id == 0x00000000) || + (id == 0x0000ffff) || (id == 0xffff0000)) + continue; + + vendor_id = id & 0xffff; + dev_id = (id >> 16) & 0xffff; + + if ((vendor_id == PCI_VENDOR_ID_AMD) && + (dev_id == PCI_DEVICE_ID_AMD_10H_NB_MISC)) { + + pci_conf_write32(bus, dev, func, IBSCTL, + IBSCTL_LVTOFFSETVAL | APIC_EILVT_LVTOFF_IBS); + + value = pci_conf_read32(bus, dev, func, IBSCTL); + + if (value != (IBSCTL_LVTOFFSETVAL | + APIC_EILVT_LVTOFF_IBS)) { + printk("Xenoprofile: Failed to setup IBS LVT offset, " + "IBSCTL = 0x%08x", value); + return 1; + } + nodes++; + } + } + } + } + + if (!nodes) { + printk("Xenoprofile: No CPU node configured for IBS"); + return 1; + } + + return 0; +} + +static u32 get_ibs_caps(void) +{ +#ifdef CONFIG_X86_32 + return 0; +#else + unsigned int max_level; + + if (!boot_cpu_has(X86_FEATURE_IBS)) + return 0; + + /* check IBS cpuid feature flags */ + max_level = cpuid_eax(0x80000000); + if (max_level < IBS_CPUID_FEATURES) + return IBS_CAPS_AVAIL; + + ibs_caps = cpuid_eax(IBS_CPUID_FEATURES); + if (!(ibs_caps & IBS_CAPS_AVAIL)) + /* cpuid flags not valid */ + return IBS_CAPS_AVAIL; + + return ibs_caps; +#endif +} + +u32 ibs_init(void) +{ + u32 ibs_caps = 0; + + ibs_caps = get_ibs_caps(); + + if ( !ibs_caps ) + return 0; + + if (init_ibs_nmi()) { + ibs_caps = 0; + return 0; + } + + printk("Xenoprofile: AMD IBS detected (0x%08x)\n", + (unsigned)ibs_caps); + return ibs_caps; +} struct op_x86_model_spec const op_athlon_spec = { .num_counters = NUM_COUNTERS, diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/oprofile/xenoprof.c --- a/xen/arch/x86/oprofile/xenoprof.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/oprofile/xenoprof.c Wed Aug 11 15:56:01 2010 +0100 @@ -34,6 +34,23 @@ int xenoprof_arch_counter(XEN_GUEST_HAND counter_config[counter.ind].kernel = counter.kernel; counter_config[counter.ind].user = counter.user; counter_config[counter.ind].unit_mask = counter.unit_mask; + + return 0; +} + +int xenoprof_arch_ibs_counter(XEN_GUEST_HANDLE(void) arg) +{ + struct xenoprof_ibs_counter ibs_counter; + + if ( copy_from_guest(&ibs_counter, arg, 1) ) + return -EFAULT; + + ibs_config.op_enabled = ibs_counter.op_enabled; + ibs_config.fetch_enabled = ibs_counter.fetch_enabled; + ibs_config.max_cnt_fetch = ibs_counter.max_cnt_fetch; + ibs_config.max_cnt_op = ibs_counter.max_cnt_op; + ibs_config.rand_en = ibs_counter.rand_en; + ibs_config.dispatched_ops = ibs_counter.dispatched_ops; return 0; } diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/setup.c Wed Aug 11 15:56:01 2010 +0100 @@ -203,6 +203,58 @@ void __devinit srat_detect_node(int cpu) if ( opt_cpu_info && acpi_numa > 0 ) printk("CPU %d APIC %d -> Node %d\n", cpu, apicid, node); +} + +/* + * Sort CPUs by <node,package,core,thread> tuple. Fortunately this hierarchy is + * reflected in the structure of modern APIC identifiers, so we sort based on + * those. This is slightly complicated by the fact that the BSP must remain + * CPU 0. Hence we do a variation on longest-prefix matching to do the best we + * can while keeping CPU 0 static. + */ +static void __init normalise_cpu_order(void) +{ + unsigned int i, j, min_cpu; + uint32_t apicid, diff, min_diff; + + for_each_present_cpu ( i ) + { + apicid = x86_cpu_to_apicid[i]; + min_diff = min_cpu = ~0u; + + /* + * Find remaining CPU with longest-prefix match on APIC ID. + * Among identical longest-prefix matches, pick the smallest APIC ID. + */ + for ( j = next_cpu(i, cpu_present_map); + j < NR_CPUS; + j = next_cpu(j, cpu_present_map) ) + { + diff = x86_cpu_to_apicid[j] ^ apicid; + while ( diff & (diff-1) ) + diff &= diff-1; + if ( (diff < min_diff) || + ((diff == min_diff) && + (x86_cpu_to_apicid[j] < x86_cpu_to_apicid[min_cpu])) ) + { + min_diff = diff; + min_cpu = j; + } + } + + /* If no match then there must be no CPUs remaining to consider. */ + if ( min_cpu >= NR_CPUS ) + { + BUG_ON(next_cpu(i, cpu_present_map) < NR_CPUS); + break; + } + + /* Switch the best-matching CPU with the next CPU in logical order. */ + j = next_cpu(i, cpu_present_map); + apicid = x86_cpu_to_apicid[min_cpu]; + x86_cpu_to_apicid[min_cpu] = x86_cpu_to_apicid[j]; + x86_cpu_to_apicid[j] = apicid; + } } /* @@ -952,8 +1004,6 @@ void __init __start_xen(unsigned long mb acpi_boot_init(); - init_cpu_to_node(); - if ( smp_found_config ) get_smp_config(); @@ -963,6 +1013,10 @@ void __init __start_xen(unsigned long mb #endif init_apic_mappings(); + + normalise_cpu_order(); + + init_cpu_to_node(); if ( x2apic_is_available() ) enable_x2apic(); diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/x86_32/entry.S --- a/xen/arch/x86/x86_32/entry.S Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/x86_32/entry.S Wed Aug 11 15:56:01 2010 +0100 @@ -264,7 +264,7 @@ process_mce: ALIGN /* %ebx: struct vcpu */ process_nmi: - cmpw $1 << VCPU_TRAP_NMI,VCPU_async_exception_mask(%ebx) + testb $1 << VCPU_TRAP_NMI,VCPU_async_exception_mask(%ebx) jnz test_guest_events sti movb $0,VCPU_nmi_pending(%ebx) diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/x86_64/compat/entry.S --- a/xen/arch/x86/x86_64/compat/entry.S Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/x86_64/compat/entry.S Wed Aug 11 15:56:01 2010 +0100 @@ -137,7 +137,7 @@ compat_process_mce: ALIGN /* %rbx: struct vcpu */ compat_process_nmi: - cmpw $1 << VCPU_TRAP_NMI,VCPU_async_exception_mask(%rbx) + testb $1 << VCPU_TRAP_NMI,VCPU_async_exception_mask(%rbx) jnz compat_test_guest_events sti movb $0,VCPU_nmi_pending(%rbx) diff -r ca51dba3a7b1 -r 3b839375d5bc xen/arch/x86/x86_64/entry.S --- a/xen/arch/x86/x86_64/entry.S Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/arch/x86/x86_64/entry.S Wed Aug 11 15:56:01 2010 +0100 @@ -239,7 +239,7 @@ process_mce: ALIGN /* %rbx: struct vcpu */ process_nmi: - cmpw $1 << VCPU_TRAP_NMI,VCPU_async_exception_mask(%rbx) + testb $1 << VCPU_TRAP_NMI,VCPU_async_exception_mask(%rbx) jnz test_guest_events sti movb $0,VCPU_nmi_pending(%rbx) diff -r ca51dba3a7b1 -r 3b839375d5bc xen/common/domain.c --- a/xen/common/domain.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/common/domain.c Wed Aug 11 15:56:01 2010 +0100 @@ -191,6 +191,8 @@ struct vcpu *alloc_vcpu( /* Must be called after making new vcpu visible to for_each_vcpu(). */ vcpu_check_shutdown(v); + domain_update_node_affinity(d); + return v; } @@ -234,6 +236,8 @@ struct domain *domain_create( spin_lock_init(&d->hypercall_deadlock_mutex); INIT_PAGE_LIST_HEAD(&d->page_list); INIT_PAGE_LIST_HEAD(&d->xenpage_list); + + spin_lock_init(&d->node_affinity_lock); spin_lock_init(&d->shutdown_lock); d->shutdown_code = -1; @@ -338,6 +342,27 @@ struct domain *domain_create( xfree(d->pirq_to_evtchn); free_domain_struct(d); return NULL; +} + + +void domain_update_node_affinity(struct domain *d) +{ + cpumask_t cpumask = CPU_MASK_NONE; + nodemask_t nodemask = NODE_MASK_NONE; + struct vcpu *v; + unsigned int node; + + spin_lock(&d->node_affinity_lock); + + for_each_vcpu ( d, v ) + cpus_or(cpumask, cpumask, v->cpu_affinity); + + for_each_online_node ( node ) + if ( cpus_intersects(node_to_cpumask(node), cpumask) ) + node_set(node, nodemask); + + d->node_affinity = nodemask; + spin_unlock(&d->node_affinity_lock); } diff -r ca51dba3a7b1 -r 3b839375d5bc xen/common/grant_table.c --- a/xen/common/grant_table.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/common/grant_table.c Wed Aug 11 15:56:01 2010 +0100 @@ -109,7 +109,7 @@ static unsigned inline int max_nr_maptra #define gfn_to_mfn_private(_d, _gfn) ({ \ p2m_type_t __p2mt; \ unsigned long __x; \ - __x = mfn_x(gfn_to_mfn_unshare(_d, _gfn, &__p2mt, 1)); \ + __x = mfn_x(gfn_to_mfn_unshare(p2m_get_hostp2m(_d), _gfn, &__p2mt, 1)); \ if ( !p2m_is_valid(__p2mt) ) \ __x = INVALID_MFN; \ __x; }) @@ -1933,12 +1933,13 @@ __gnttab_copy( { #ifdef CONFIG_X86 p2m_type_t p2mt; - s_frame = mfn_x(gfn_to_mfn(sd, op->source.u.gmfn, &p2mt)); + struct p2m_domain *p2m = p2m_get_hostp2m(sd); + s_frame = mfn_x(gfn_to_mfn(p2m, op->source.u.gmfn, &p2mt)); if ( !p2m_is_valid(p2mt) ) s_frame = INVALID_MFN; if ( p2m_is_paging(p2mt) ) { - p2m_mem_paging_populate(sd, op->source.u.gmfn); + p2m_mem_paging_populate(p2m, op->source.u.gmfn); rc = -ENOENT; goto error_out; } @@ -1979,12 +1980,13 @@ __gnttab_copy( { #ifdef CONFIG_X86 p2m_type_t p2mt; - d_frame = mfn_x(gfn_to_mfn_unshare(dd, op->dest.u.gmfn, &p2mt, 1)); + struct p2m_domain *p2m = p2m_get_hostp2m(dd); + d_frame = mfn_x(gfn_to_mfn_unshare(p2m, op->dest.u.gmfn, &p2mt, 1)); if ( !p2m_is_valid(p2mt) ) d_frame = INVALID_MFN; if ( p2m_is_paging(p2mt) ) { - p2m_mem_paging_populate(dd, op->dest.u.gmfn); + p2m_mem_paging_populate(p2m, op->dest.u.gmfn); rc = -ENOENT; goto error_out; } diff -r ca51dba3a7b1 -r 3b839375d5bc xen/common/memory.c --- a/xen/common/memory.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/common/memory.c Wed Aug 11 15:56:01 2010 +0100 @@ -161,7 +161,7 @@ int guest_remove_page(struct domain *d, unsigned long mfn; #ifdef CONFIG_X86 - mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt)); + mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(d), gmfn, &p2mt)); #else mfn = gmfn_to_mfn(d, gmfn); #endif @@ -259,7 +259,7 @@ static long memory_exchange(XEN_GUEST_HA unsigned long in_chunk_order, out_chunk_order; xen_pfn_t gpfn, gmfn, mfn; unsigned long i, j, k; - unsigned int node, memflags = 0; + unsigned int memflags = 0; long rc = 0; struct domain *d; struct page_info *page; @@ -324,10 +324,7 @@ static long memory_exchange(XEN_GUEST_HA d, XENMEMF_get_address_bits(exch.out.mem_flags) ? : (BITS_PER_LONG+PAGE_SHIFT))); - node = XENMEMF_get_node(exch.out.mem_flags); - if ( node == NUMA_NO_NODE ) - node = domain_to_node(d); - memflags |= MEMF_node(node); + memflags |= MEMF_node(XENMEMF_get_node(exch.out.mem_flags)); for ( i = (exch.nr_exchanged >> in_chunk_order); i < (exch.in.nr_extents >> in_chunk_order); @@ -359,7 +356,7 @@ static long memory_exchange(XEN_GUEST_HA p2m_type_t p2mt; /* Shared pages cannot be exchanged */ - mfn = mfn_x(gfn_to_mfn_unshare(d, gmfn + k, &p2mt, 0)); + mfn = mfn_x(gfn_to_mfn_unshare(p2m_get_hostp2m(d), gmfn + k, &p2mt, 0)); if ( p2m_is_shared(p2mt) ) { rc = -ENOMEM; @@ -545,7 +542,7 @@ long do_memory_op(unsigned long cmd, XEN } args.memflags |= MEMF_node(XENMEMF_get_node(reservation.mem_flags)); - if (reservation.mem_flags & XENMEMF_exact_node_request) + if ( reservation.mem_flags & XENMEMF_exact_node_request ) args.memflags |= MEMF_exact_node; if ( op == XENMEM_populate_physmap diff -r ca51dba3a7b1 -r 3b839375d5bc xen/common/page_alloc.c --- a/xen/common/page_alloc.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/common/page_alloc.c Wed Aug 11 15:56:01 2010 +0100 @@ -295,20 +295,29 @@ static unsigned long init_node_heap(int /* Allocate 2^@order contiguous pages. */ static struct page_info *alloc_heap_pages( unsigned int zone_lo, unsigned int zone_hi, - unsigned int node, unsigned int order, unsigned int memflags) -{ - unsigned int i, j, zone = 0; - unsigned int num_nodes = num_online_nodes(); + unsigned int order, unsigned int memflags, + struct domain *d) +{ + unsigned int first_node, i, j, zone = 0, nodemask_retry = 0; + unsigned int node = (uint8_t)((memflags >> _MEMF_node) - 1); unsigned long request = 1UL << order; - bool_t exact_node_request = !!(memflags & MEMF_exact_node); cpumask_t extra_cpus_mask, mask; struct page_info *pg; + nodemask_t nodemask = (d != NULL ) ? d->node_affinity : node_online_map; if ( node == NUMA_NO_NODE ) { - node = cpu_to_node(smp_processor_id()); - exact_node_request = 0; - } + memflags &= ~MEMF_exact_node; + if ( d != NULL ) + { + node = next_node(d->last_alloc_node, nodemask); + if ( node >= MAX_NUMNODES ) + node = first_node(nodemask); + } + if ( node >= MAX_NUMNODES ) + node = cpu_to_node(smp_processor_id()); + } + first_node = node; ASSERT(node >= 0); ASSERT(zone_lo <= zone_hi); @@ -335,7 +344,7 @@ static struct page_info *alloc_heap_page * zone before failing, only calc new node value if we fail to find memory * in target node, this avoids needless computation on fast-path. */ - for ( i = 0; i < num_nodes; i++ ) + for ( ; ; ) { zone = zone_hi; do { @@ -349,18 +358,35 @@ static struct page_info *alloc_heap_page goto found; } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */ - if ( exact_node_request ) + if ( memflags & MEMF_exact_node ) goto not_found; - /* Pick next node, wrapping around if needed. */ - node = next_node(node, node_online_map); - if (node == MAX_NUMNODES) - node = first_node(node_online_map); + /* Pick next node. */ + if ( !node_isset(node, nodemask) ) + { + /* Very first node may be caller-specified and outside nodemask. */ + ASSERT(!nodemask_retry); + first_node = node = first_node(nodemask); + if ( node < MAX_NUMNODES ) + continue; + } + else if ( (node = next_node(node, nodemask)) >= MAX_NUMNODES ) + node = first_node(nodemask); + if ( node == first_node ) + { + /* When we have tried all in nodemask, we fall back to others. */ + if ( nodemask_retry++ ) + goto not_found; + nodes_andnot(nodemask, node_online_map, nodemask); + first_node = node = first_node(nodemask); + if ( node >= MAX_NUMNODES ) + goto not_found; + } } try_tmem: /* Try to free memory from tmem */ - if ( (pg = tmem_relinquish_pages(order,memflags)) != NULL ) + if ( (pg = tmem_relinquish_pages(order, memflags)) != NULL ) { /* reassigning an already allocated anonymous heap page */ spin_unlock(&heap_lock); @@ -385,6 +411,9 @@ static struct page_info *alloc_heap_page avail[node][zone] -= request; total_avail_pages -= request; ASSERT(total_avail_pages >= 0); + + if ( d != NULL ) + d->last_alloc_node = node; spin_unlock(&heap_lock); @@ -1010,7 +1039,7 @@ void *alloc_xenheap_pages(unsigned int o ASSERT(!in_irq()); pg = alloc_heap_pages(MEMZONE_XEN, MEMZONE_XEN, - cpu_to_node(smp_processor_id()), order, memflags); + order, memflags, NULL); if ( unlikely(pg == NULL) ) return NULL; @@ -1153,24 +1182,21 @@ struct page_info *alloc_domheap_pages( { struct page_info *pg = NULL; unsigned int bits = memflags >> _MEMF_bits, zone_hi = NR_ZONES - 1; - unsigned int node = (uint8_t)((memflags >> _MEMF_node) - 1), dma_zone; + unsigned int dma_zone; ASSERT(!in_irq()); - - if ( (node == NUMA_NO_NODE) && (d != NULL) ) - node = domain_to_node(d); bits = domain_clamp_alloc_bitsize(d, bits ? : (BITS_PER_LONG+PAGE_SHIFT)); if ( (zone_hi = min_t(unsigned int, bits_to_zone(bits), zone_hi)) == 0 ) return NULL; if ( dma_bitsize && ((dma_zone = bits_to_zone(dma_bitsize)) < zone_hi) ) - pg = alloc_heap_pages(dma_zone + 1, zone_hi, node, order, memflags); + pg = alloc_heap_pages(dma_zone + 1, zone_hi, order, memflags, d); if ( (pg == NULL) && ((memflags & MEMF_no_dma) || - ((pg = alloc_heap_pages(MEMZONE_XEN + 1, zone_hi, - node, order, memflags)) == NULL)) ) + ((pg = alloc_heap_pages(MEMZONE_XEN + 1, zone_hi, order, + memflags, d)) == NULL)) ) return NULL; if ( (d != NULL) && assign_pages(d, pg, order, memflags) ) diff -r ca51dba3a7b1 -r 3b839375d5bc xen/common/sched_credit.c --- a/xen/common/sched_credit.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/common/sched_credit.c Wed Aug 11 15:56:01 2010 +0100 @@ -64,7 +64,8 @@ /* * Flags */ -#define CSCHED_FLAG_VCPU_PARKED 0x0001 /* VCPU over capped credits */ +#define CSCHED_FLAG_VCPU_PARKED 0x0001 /* VCPU over capped credits */ +#define CSCHED_FLAG_VCPU_YIELD 0x0002 /* VCPU yielding */ /* @@ -106,6 +107,12 @@ #endif /* CSCHED_STATS */ + +/* + * Boot parameters + */ +int sched_credit_default_yield = 0; +boolean_param("sched_credit_default_yield", sched_credit_default_yield); /* * Physical CPU @@ -202,6 +209,18 @@ __runq_insert(unsigned int cpu, struct c break; } + /* If the vcpu yielded, try to put it behind one lower-priority + * runnable vcpu if we can. The next runq_sort will bring it forward + * within 30ms if the queue too long. */ + if ( svc->flags & CSCHED_FLAG_VCPU_YIELD + && __runq_elem(iter)->pri > CSCHED_PRI_IDLE ) + { + iter=iter->next; + + /* Some sanity checks */ + BUG_ON(iter == runq); + } + list_add_tail(&svc->runq_elem, iter); } @@ -748,6 +767,18 @@ csched_vcpu_wake(const struct scheduler __runq_tickle(cpu, svc); } +static void +csched_vcpu_yield(const struct scheduler *ops, struct vcpu *vc) +{ + struct csched_vcpu * const sv = CSCHED_VCPU(vc); + + if ( !sched_credit_default_yield ) + { + /* Let the scheduler know that this vcpu is trying to yield */ + sv->flags |= CSCHED_FLAG_VCPU_YIELD; + } +} + static int csched_dom_cntl( const struct scheduler *ops, @@ -1069,7 +1100,9 @@ csched_acct(void* dummy) if ( credit > CSCHED_CREDITS_PER_TSLICE ) { __csched_vcpu_acct_stop_locked(prv, svc); - credit = 0; + /* Divide credits in half, so that when it starts + * accounting again, it starts a little bit "ahead" */ + credit /= 2; atomic_set(&svc->credit, credit); } } @@ -1280,6 +1313,12 @@ csched_schedule( snext = CSCHED_VCPU(idle_vcpu[cpu]); snext->pri = CSCHED_PRI_TS_BOOST; } + + /* + * Clear YIELD flag before scheduling out + */ + if ( scurr->flags & CSCHED_FLAG_VCPU_YIELD ) + scurr->flags &= ~(CSCHED_FLAG_VCPU_YIELD); /* * SMP Load balance: @@ -1509,6 +1548,7 @@ const struct scheduler sched_credit_def .sleep = csched_vcpu_sleep, .wake = csched_vcpu_wake, + .yield = csched_vcpu_yield, .adjust = csched_dom_cntl, diff -r ca51dba3a7b1 -r 3b839375d5bc xen/common/schedule.c --- a/xen/common/schedule.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/common/schedule.c Wed Aug 11 15:56:01 2010 +0100 @@ -276,6 +276,7 @@ int sched_move_domain(struct domain *d, new_p = cycle_cpu(new_p, c->cpu_valid); } + domain_update_node_affinity(d); d->cpupool = c; SCHED_OP(DOM2OP(d), free_domdata, d->sched_priv); @@ -457,6 +458,7 @@ int cpu_disable_scheduler(unsigned int c struct vcpu *v; struct cpupool *c; int ret = 0; + bool_t affinity_broken; c = per_cpu(cpupool, cpu); if ( c == NULL ) @@ -466,6 +468,8 @@ int cpu_disable_scheduler(unsigned int c { if ( d->cpupool != c ) continue; + + affinity_broken = 0; for_each_vcpu ( d, v ) { @@ -477,6 +481,7 @@ int cpu_disable_scheduler(unsigned int c printk("Breaking vcpu affinity for domain %d vcpu %d\n", v->domain->domain_id, v->vcpu_id); cpus_setall(v->cpu_affinity); + affinity_broken = 1; } if ( v->processor == cpu ) @@ -499,7 +504,11 @@ int cpu_disable_scheduler(unsigned int c if ( v->processor == cpu ) ret = -EAGAIN; } - } + + if ( affinity_broken ) + domain_update_node_affinity(d); + } + return ret; } @@ -524,6 +533,8 @@ int vcpu_set_affinity(struct vcpu *v, cp set_bit(_VPF_migrating, &v->pause_flags); vcpu_schedule_unlock_irq(v); + + domain_update_node_affinity(v->domain); if ( test_bit(_VPF_migrating, &v->pause_flags) ) { @@ -631,6 +642,12 @@ static long do_poll(struct sched_poll *s /* Voluntarily yield the processor for this allocation. */ static long do_yield(void) { + struct vcpu * v=current; + + vcpu_schedule_lock_irq(v); + SCHED_OP(VCPU2OP(v), yield, v); + vcpu_schedule_unlock_irq(v); + TRACE_2D(TRC_SCHED_YIELD, current->domain->domain_id, current->vcpu_id); raise_softirq(SCHEDULE_SOFTIRQ); return 0; @@ -1296,7 +1313,7 @@ void schedule_cpu_switch(unsigned int cp spin_unlock_irqrestore(per_cpu(schedule_data, cpu).schedule_lock, flags); - SCHED_OP(old_ops, free_vdata, vpriv); + SCHED_OP(old_ops, free_vdata, vpriv_old); SCHED_OP(old_ops, free_pdata, ppriv_old, cpu); } diff -r ca51dba3a7b1 -r 3b839375d5bc xen/common/tmem_xen.c --- a/xen/common/tmem_xen.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/common/tmem_xen.c Wed Aug 11 15:56:01 2010 +0100 @@ -100,7 +100,7 @@ static inline void *cli_mfn_to_va(tmem_c unsigned long cli_mfn; p2m_type_t t; - cli_mfn = mfn_x(gfn_to_mfn(current->domain, cmfn, &t)); + cli_mfn = mfn_x(gfn_to_mfn(p2m_get_hostp2m(current->domain), cmfn, &t)); if (t != p2m_ram_rw || cli_mfn == INVALID_MFN) return NULL; if (pcli_mfn != NULL) diff -r ca51dba3a7b1 -r 3b839375d5bc xen/common/xenoprof.c --- a/xen/common/xenoprof.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/common/xenoprof.c Wed Aug 11 15:56:01 2010 +0100 @@ -881,6 +881,20 @@ int do_xenoprof_op(int op, XEN_GUEST_HAN ret = -EFAULT; break; + case XENOPROF_ibs_counter: + if ( (xenoprof_state != XENOPROF_COUNTERS_RESERVED) || + (adomains == 0) ) + { + ret = -EPERM; + break; + } + ret = xenoprof_arch_ibs_counter(arg); + break; + + case XENOPROF_get_ibs_caps: + ret = ibs_caps; + break; + default: ret = -ENOSYS; } diff -r ca51dba3a7b1 -r 3b839375d5bc xen/drivers/passthrough/vtd/intremap.c --- a/xen/drivers/passthrough/vtd/intremap.c Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/drivers/passthrough/vtd/intremap.c Wed Aug 11 15:56:01 2010 +0100 @@ -440,14 +440,21 @@ void io_apic_write_remap_rte( { *IO_APIC_BASE(apic) = rte_upper ? (reg + 1) : reg; *(IO_APIC_BASE(apic)+4) = value; + + /* Recover the original value of 'mask' bit */ + if ( rte_upper ) + { + *IO_APIC_BASE(apic) = reg; + *(IO_APIC_BASE(apic)+4) = *(((u32 *)&old_rte)+0); + } return; } /* write new entry to ioapic */ + *IO_APIC_BASE(apic) = reg + 1; + *(IO_APIC_BASE(apic)+4) = *(((u32 *)&old_rte)+1); *IO_APIC_BASE(apic) = reg; *(IO_APIC_BASE(apic)+4) = *(((u32 *)&old_rte)+0); - *IO_APIC_BASE(apic) = reg + 1; - *(IO_APIC_BASE(apic)+4) = *(((u32 *)&old_rte)+1); } #if defined(__i386__) || defined(__x86_64__) diff -r ca51dba3a7b1 -r 3b839375d5bc xen/include/asm-ia64/xenoprof.h --- a/xen/include/asm-ia64/xenoprof.h Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/include/asm-ia64/xenoprof.h Wed Aug 11 15:56:01 2010 +0100 @@ -33,6 +33,13 @@ void xenoprof_arch_stop(void); void xenoprof_arch_stop(void); void xenoprof_arch_disable_virq(void); void xenoprof_arch_release_counters(void); + +static inline int xenoprof_arch_ibs_counter(XEN_GUEST_HANDLE(void) arg) +{ + return -ENOSYS; /* not supported */ +} +/* AMD IBS not supported */ +#define ibs_caps 0 struct vcpu; struct cpu_user_regs; diff -r ca51dba3a7b1 -r 3b839375d5bc xen/include/asm-x86/guest_pt.h --- a/xen/include/asm-x86/guest_pt.h Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/include/asm-x86/guest_pt.h Wed Aug 11 15:56:01 2010 +0100 @@ -272,8 +272,8 @@ guest_walk_to_gpa(walk_t *gw) #define guest_walk_tables GPT_RENAME(guest_walk_tables, GUEST_PAGING_LEVELS) extern uint32_t -guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, - uint32_t pfec, mfn_t top_mfn, void *top_map); +guest_walk_tables(struct vcpu *v, struct p2m_domain *p2m, unsigned long va, + walk_t *gw, uint32_t pfec, mfn_t top_mfn, void *top_map); /* Pretty-print the contents of a guest-walk */ static inline void print_gw(walk_t *gw) diff -r ca51dba3a7b1 -r 3b839375d5bc xen/include/asm-x86/mem_sharing.h --- a/xen/include/asm-x86/mem_sharing.h Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/include/asm-x86/mem_sharing.h Wed Aug 11 15:56:01 2010 +0100 @@ -30,17 +30,17 @@ typedef uint64_t shr_handle_t; typedef uint64_t shr_handle_t; unsigned int mem_sharing_get_nr_saved_mfns(void); -int mem_sharing_nominate_page(struct domain *d, +int mem_sharing_nominate_page(struct p2m_domain *p2m, unsigned long gfn, int expected_refcnt, shr_handle_t *phandle); #define MEM_SHARING_MUST_SUCCEED (1<<0) #define MEM_SHARING_DESTROY_GFN (1<<1) -int mem_sharing_unshare_page(struct domain *d, +int mem_sharing_unshare_page(struct p2m_domain *p2m, unsigned long gfn, uint16_t flags); int mem_sharing_sharing_resume(struct domain *d); -int mem_sharing_cache_resize(struct domain *d, int new_size); +int mem_sharing_cache_resize(struct p2m_domain *p2m, int new_size); int mem_sharing_domctl(struct domain *d, xen_domctl_mem_sharing_op_t *mec); void mem_sharing_init(void); diff -r ca51dba3a7b1 -r 3b839375d5bc xen/include/asm-x86/p2m.h --- a/xen/include/asm-x86/p2m.h Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/include/asm-x86/p2m.h Wed Aug 11 15:56:01 2010 +0100 @@ -172,23 +172,28 @@ struct p2m_domain { /* Shadow translated domain: p2m mapping */ pagetable_t phys_table; + struct domain *domain; /* back pointer to domain */ + /* Pages used to construct the p2m */ struct page_list_head pages; /* Functions to call to get or free pages for the p2m */ - struct page_info * (*alloc_page )(struct domain *d); - void (*free_page )(struct domain *d, + struct page_info * (*alloc_page )(struct p2m_domain *p2m); + void (*free_page )(struct p2m_domain *p2m, struct page_info *pg); - int (*set_entry )(struct domain *d, unsigned long gfn, + int (*set_entry )(struct p2m_domain *p2m, + unsigned long gfn, mfn_t mfn, unsigned int page_order, p2m_type_t p2mt); - mfn_t (*get_entry )(struct domain *d, unsigned long gfn, + mfn_t (*get_entry )(struct p2m_domain *p2m, + unsigned long gfn, p2m_type_t *p2mt, p2m_query_t q); - mfn_t (*get_entry_current)(unsigned long gfn, + mfn_t (*get_entry_current)(struct p2m_domain *p2m, + unsigned long gfn, p2m_type_t *p2mt, p2m_query_t q); - void (*change_entry_type_global)(struct domain *d, + void (*change_entry_type_global)(struct p2m_domain *p2m, p2m_type_t ot, p2m_type_t nt); @@ -279,65 +284,64 @@ static inline p2m_type_t p2m_flags_to_ty } /* Read the current domain's p2m table. Do not populate PoD pages. */ -static inline mfn_t gfn_to_mfn_type_current(unsigned long gfn, p2m_type_t *t, +static inline mfn_t gfn_to_mfn_type_current(struct p2m_domain *p2m, + unsigned long gfn, p2m_type_t *t, p2m_query_t q) { - return current->domain->arch.p2m->get_entry_current(gfn, t, q); -} - -/* Read another domain's P2M table, mapping pages as we go. + return p2m->get_entry_current(p2m, gfn, t, q); +} + +/* Read P2M table, mapping pages as we go. * Do not populate PoD pages. */ -static inline -mfn_t gfn_to_mfn_type_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t, - p2m_query_t q) -{ - return d->arch.p2m->get_entry(d, gfn, t, q); -} +static inline mfn_t +gfn_to_mfn_type_p2m(struct p2m_domain *p2m, unsigned long gfn, + p2m_type_t *t, p2m_query_t q) +{ + return p2m->get_entry(p2m, gfn, t, q); +} + /* General conversion function from gfn to mfn */ -static inline mfn_t _gfn_to_mfn_type(struct domain *d, +static inline mfn_t _gfn_to_mfn_type(struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t, p2m_query_t q) { - if ( !paging_mode_translate(d) ) + if ( !p2m || !paging_mode_translate(p2m->domain) ) { /* Not necessarily true, but for non-translated guests, we claim * it's the most generic kind of memory */ *t = p2m_ram_rw; return _mfn(gfn); } - if ( likely(current->domain == d) ) - return gfn_to_mfn_type_current(gfn, t, q); + if ( likely(current->domain == p2m->domain) ) + return gfn_to_mfn_type_current(p2m, gfn, t, q); else - return gfn_to_mfn_type_foreign(d, gfn, t, q); -} - -#define gfn_to_mfn(d, g, t) _gfn_to_mfn_type((d), (g), (t), p2m_alloc) -#define gfn_to_mfn_query(d, g, t) _gfn_to_mfn_type((d), (g), (t), p2m_query) -#define gfn_to_mfn_guest(d, g, t) _gfn_to_mfn_type((d), (g), (t), p2m_guest) - -#define gfn_to_mfn_current(g, t) gfn_to_mfn_type_current((g), (t), p2m_alloc) -#define gfn_to_mfn_foreign(d, g, t) gfn_to_mfn_type_foreign((d), (g), (t), p2m_alloc) - -static inline mfn_t gfn_to_mfn_unshare(struct domain *d, + return gfn_to_mfn_type_p2m(p2m, gfn, t, q); +} + +#define gfn_to_mfn(p2m, g, t) _gfn_to_mfn_type((p2m), (g), (t), p2m_alloc) +#define gfn_to_mfn_query(p2m, g, t) _gfn_to_mfn_type((p2m), (g), (t), p2m_query) +#define gfn_to_mfn_guest(p2m, g, t) _gfn_to_mfn_type((p2m), (g), (t), p2m_guest) + +static inline mfn_t gfn_to_mfn_unshare(struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *p2mt, int must_succeed) { mfn_t mfn; - mfn = gfn_to_mfn(d, gfn, p2mt); + mfn = gfn_to_mfn(p2m, gfn, p2mt); #ifdef __x86_64__ if ( p2m_is_shared(*p2mt) ) { - if ( mem_sharing_unshare_page(d, gfn, + if ( mem_sharing_unshare_page(p2m, gfn, must_succeed ? MEM_SHARING_MUST_SUCCEED : 0) ) { BUG_ON(must_succeed); return mfn; } - mfn = gfn_to_mfn(d, gfn, p2mt); + mfn = gfn_to_mfn(p2m, gfn, p2mt); } #endif @@ -350,7 +354,7 @@ static inline unsigned long gmfn_to_mfn( { mfn_t mfn; p2m_type_t t; - mfn = gfn_to_mfn(d, gpfn, &t); + mfn = gfn_to_mfn(d->arch.p2m, gpfn, &t); if ( p2m_is_valid(t) ) return mfn_x(mfn); return INVALID_MFN; @@ -374,16 +378,16 @@ int p2m_init(struct domain *d); * build the p2m, and to release it again at the end of day. * * Returns 0 for success or -errno. */ -int p2m_alloc_table(struct domain *d, - struct page_info * (*alloc_page)(struct domain *d), - void (*free_page)(struct domain *d, struct page_info *pg)); +int p2m_alloc_table(struct p2m_domain *p2m, + struct page_info * (*alloc_page)(struct p2m_domain *p2m), + void (*free_page)(struct p2m_domain *p2m, struct page_info *pg)); /* Return all the p2m resources to Xen. */ -void p2m_teardown(struct domain *d); +void p2m_teardown(struct p2m_domain *p2m); void p2m_final_teardown(struct domain *d); /* Dump PoD information about the domain */ -void p2m_pod_dump_data(struct domain *d); +void p2m_pod_dump_data(struct p2m_domain *p2m); /* Move all pages from the populate-on-demand cache to the domain page_list * (usually in preparation for domain destruction) */ @@ -402,14 +406,18 @@ p2m_pod_decrease_reservation(struct doma /* Called by p2m code when demand-populating a PoD page */ int -p2m_pod_demand_populate(struct domain *d, unsigned long gfn, +p2m_pod_demand_populate(struct p2m_domain *p2m, unsigned long gfn, unsigned int order, p2m_query_t q); /* Add a page to a domain's p2m table */ -int guest_physmap_add_entry(struct domain *d, unsigned long gfn, +int guest_physmap_add_entry(struct p2m_domain *p2m, unsigned long gfn, unsigned long mfn, unsigned int page_order, p2m_type_t t); + +/* Remove a page from a domain's p2m table */ +void guest_physmap_remove_entry(struct p2m_domain *p2m, unsigned long gfn, + unsigned long mfn, unsigned int page_order); /* Set a p2m range as populate-on-demand */ int guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn, @@ -419,49 +427,55 @@ int guest_physmap_mark_populate_on_deman * * Return 0 for success */ -static inline int guest_physmap_add_page(struct domain *d, unsigned long gfn, +static inline int guest_physmap_add_page(struct domain *d, + unsigned long gfn, unsigned long mfn, unsigned int page_order) { - return guest_physmap_add_entry(d, gfn, mfn, page_order, p2m_ram_rw); + return guest_physmap_add_entry(d->arch.p2m, gfn, mfn, page_order, p2m_ram_rw); } /* Remove a page from a domain's p2m table */ -void guest_physmap_remove_page(struct domain *d, unsigned long gfn, - unsigned long mfn, unsigned int page_order); +static inline void guest_physmap_remove_page(struct domain *d, + unsigned long gfn, + unsigned long mfn, unsigned int page_order) +{ + guest_physmap_remove_entry(d->arch.p2m, gfn, mfn, page_order); +} /* Change types across all p2m entries in a domain */ -void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt); -void p2m_change_entry_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt); +void p2m_change_type_global(struct p2m_domain *p2m, p2m_type_t ot, p2m_type_t nt); +void p2m_change_entry_type_global(struct p2m_domain *p2m, p2m_type_t ot, p2m_type_t nt); /* Compare-exchange the type of a single p2m entry */ -p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn, +p2m_type_t p2m_change_type(struct p2m_domain *p2m, unsigned long gfn, p2m_type_t ot, p2m_type_t nt); /* Set mmio addresses in the p2m table (for pass-through) */ -int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn); -int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn); +int set_mmio_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn); +int clear_mmio_p2m_entry(struct p2m_domain *p2m, unsigned long gfn); #ifdef __x86_64__ /* Modify p2m table for shared gfn */ -int set_shared_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn); +int set_shared_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, mfn_t mfn); + /* Check if a nominated gfn is valid to be paged out */ -int p2m_mem_paging_nominate(struct domain *d, unsigned long gfn); +int p2m_mem_paging_nominate(struct p2m_domain *p2m, unsigned long gfn); /* Evict a frame */ -int p2m_mem_paging_evict(struct domain *d, unsigned long gfn); +int p2m_mem_paging_evict(struct p2m_domain *p2m, unsigned long gfn); /* Start populating a paged out frame */ -void p2m_mem_paging_populate(struct domain *d, unsigned long gfn); +void p2m_mem_paging_populate(struct p2m_domain *p2m, unsigned long gfn); /* Prepare the p2m for paging a frame in */ -int p2m_mem_paging_prep(struct domain *d, unsigned long gfn); +int p2m_mem_paging_prep(struct p2m_domain *p2m, unsigned long gfn); /* Resume normal operation (in case a domain was paused) */ -void p2m_mem_paging_resume(struct domain *d); +void p2m_mem_paging_resume(struct p2m_domain *p2m); #else -static inline void p2m_mem_paging_populate(struct domain *d, unsigned long gfn) +static inline void p2m_mem_paging_populate(struct p2m_domain *p2m, unsigned long gfn) { } #endif -struct page_info *p2m_alloc_ptp(struct domain *d, unsigned long type); +struct page_info *p2m_alloc_ptp(struct p2m_domain *p2m, unsigned long type); #endif /* _XEN_P2M_H */ diff -r ca51dba3a7b1 -r 3b839375d5bc xen/include/asm-x86/xenoprof.h --- a/xen/include/asm-x86/xenoprof.h Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/include/asm-x86/xenoprof.h Wed Aug 11 15:56:01 2010 +0100 @@ -42,9 +42,14 @@ int xenoprof_arch_init(int *num_events, int xenoprof_arch_counter(XEN_GUEST_HANDLE(void) arg); int compat_oprof_arch_counter(XEN_GUEST_HANDLE(void) arg); +int xenoprof_arch_ibs_counter(XEN_GUEST_HANDLE(void) arg); struct vcpu; struct cpu_user_regs; + +/* AMD IBS support */ +u32 ibs_init(void); +extern u32 ibs_caps; int xenoprofile_get_mode(struct vcpu *v, struct cpu_user_regs * const regs); diff -r ca51dba3a7b1 -r 3b839375d5bc xen/include/public/xenoprof.h --- a/xen/include/public/xenoprof.h Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/include/public/xenoprof.h Wed Aug 11 15:56:01 2010 +0100 @@ -50,7 +50,11 @@ #define XENOPROF_shutdown 13 #define XENOPROF_get_buffer 14 #define XENOPROF_set_backtrace 15 -#define XENOPROF_last_op 15 + +/* AMD IBS support */ +#define XENOPROF_get_ibs_caps 16 +#define XENOPROF_ibs_counter 17 +#define XENOPROF_last_op 17 #define MAX_OPROF_EVENTS 32 #define MAX_OPROF_DOMAINS 25 @@ -124,6 +128,16 @@ typedef struct xenoprof_passive { } xenoprof_passive_t; DEFINE_XEN_GUEST_HANDLE(xenoprof_passive_t); +struct xenoprof_ibs_counter { + uint64_t op_enabled; + uint64_t fetch_enabled; + uint64_t max_cnt_fetch; + uint64_t max_cnt_op; + uint64_t rand_en; + uint64_t dispatched_ops; +}; +typedef struct xenoprof_ibs_counter xenoprof_ibs_counter_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_ibs_counter_t); #endif /* __XEN_PUBLIC_XENOPROF_H__ */ diff -r ca51dba3a7b1 -r 3b839375d5bc xen/include/xen/sched-if.h --- a/xen/include/xen/sched-if.h Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/include/xen/sched-if.h Wed Aug 11 15:56:01 2010 +0100 @@ -107,6 +107,7 @@ struct scheduler { void (*sleep) (const struct scheduler *, struct vcpu *); void (*wake) (const struct scheduler *, struct vcpu *); + void (*yield) (const struct scheduler *, struct vcpu *); void (*context_saved) (const struct scheduler *, struct vcpu *); struct task_slice (*do_schedule) (const struct scheduler *, s_time_t, diff -r ca51dba3a7b1 -r 3b839375d5bc xen/include/xen/sched.h --- a/xen/include/xen/sched.h Wed Aug 11 15:51:04 2010 +0100 +++ b/xen/include/xen/sched.h Wed Aug 11 15:56:01 2010 +0100 @@ -23,6 +23,8 @@ #include <xen/mm.h> #include <xen/tasklet.h> #include <public/mem_event.h> +#include <xen/cpumask.h> +#include <xen/nodemask.h> #ifdef CONFIG_COMPAT #include <compat/vcpu.h> @@ -326,6 +328,11 @@ struct domain /* Memory paging support */ struct mem_event_domain mem_event; + + /* Currently computed from union of all vcpu cpu-affinity masks. */ + nodemask_t node_affinity; + unsigned int last_alloc_node; + spinlock_t node_affinity_lock; }; struct domain_setup_info @@ -393,6 +400,8 @@ static inline void get_knownalive_domain ASSERT(!(atomic_read(&d->refcnt) & DOMAIN_DESTROYED)); } +void domain_update_node_affinity(struct domain *d); + struct domain *domain_create( domid_t domid, unsigned int domcr_flags, ssidref_t ssidref); /* DOMCRF_hvm: Create an HVM domain, as opposed to a PV domain. */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |