[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v3 1/5] EPT: force reâevaluation of memory type as necessary
> From: Jan Beulich > Sent: Monday, April 07, 2014 6:08 PM > > The main goal here is to drop the bogus dependency of > epte_get_entry_emt() on > d->arch.hvm_domain.params[HVM_PARAM_IDENT_PT]. > > Any change to state influencing epte_get_entry_emt()'s decision needs > to result in re-calculation. Do this by using the EPT_MISCONFIG VM > exit, storing an invalid memory type into EPT's emt field (leaving the > IOMMU, which doesn't care about memory types, unaffected). > > This is being done in a hierarchical manner to keep execution time > down: Initially only the top level directory gets invalidated this way. > Upon access, the involved intermediate page table levels get cleared > back to zero, and the leaf entry gets its field properly set. For 4k > leaves all other entries in the same directory also get processed to > amortize the cost of the extra VM exit (which halved the number of > these VM exits in my testing). > > This restoring can result in spurious EPT_MISCONFIG VM exits (since > two vCPU-s may access addresses involving identical page table > structures). Rather than simply returning in such cases (and risking > that such a VM exit results from a real mis-configuration, which > would then result in an endless loop rather than killing the VM), a > per-vCPU flag is being introduced indicating when such a spurious VM > exit might validly happen - if another one occurs right after VM re- > entry, the flag would generally end up being clear, causing the VM > to be killed as before on such VM exits. > > Note that putting a reserved memory type value in the EPT structures > isn't formally sanctioned by the specification. Intel isn't willing to > adjust the specification to make this or a similar use of the > EPT_MISCONFIG VM exit formally possible, but they have indicated that > us using this is low risk wrt forward compatibility. > > Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> > Reviewed-by: Tim Deegan <tim@xxxxxxx> > --- > v3: Code movement. any data on improvement against original code? one small comment is whether it's clearer to define a new macro to indicate the purpose instead of using MTRR_NUM_TYPES? Acked-by: Kevin Tian <kevin.tian@xxxxxxxxx> Thanks Kevin > > --- a/xen/arch/x86/domctl.c > +++ b/xen/arch/x86/domctl.c > @@ -84,6 +84,8 @@ long arch_do_domctl( > ret = ioports_permit_access(d, fp, fp + np - 1); > else > ret = ioports_deny_access(d, fp, fp + np - 1); > + if ( !ret ) > + memory_type_changed(d); > } > break; > > @@ -705,6 +707,8 @@ long arch_do_domctl( > ret, add ? "removing" : "denying", d->domain_id, > mfn, mfn + nr_mfns - 1); > } > + /* Do this unconditionally to cover errors on above failure paths. */ > + memory_type_changed(d); > } > break; > > @@ -791,6 +795,8 @@ long arch_do_domctl( > "ioport_map: error %ld denying dom%d access to > [%x,%x]\n", > ret, d->domain_id, fmp, fmp + np - 1); > } > + if ( !ret ) > + memory_type_changed(d); > } > break; > > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -252,6 +252,9 @@ int hvm_set_guest_pat(struct vcpu *v, u6 > > if ( !hvm_funcs.set_guest_pat(v, guest_pat) ) > v->arch.hvm_vcpu.pat_cr = guest_pat; > + > + memory_type_changed(v->domain); > + > return 1; > } > > --- a/xen/arch/x86/hvm/mtrr.c > +++ b/xen/arch/x86/hvm/mtrr.c > @@ -431,8 +431,12 @@ bool_t mtrr_def_type_msr_set(struct doma > return 0; > } > > - m->enabled = enabled; > - m->def_type = def_type; > + if ( m->enabled != enabled || m->def_type != def_type ) > + { > + m->enabled = enabled; > + m->def_type = def_type; > + memory_type_changed(d); > + } > > return 1; > } > @@ -452,6 +456,7 @@ bool_t mtrr_fix_range_msr_set(struct dom > return 0; > > fixed_range_base[row] = msr_content; > + memory_type_changed(d); > } > > return 1; > @@ -496,6 +501,8 @@ bool_t mtrr_var_range_msr_set( > > m->overlapped = is_var_mtrr_overlapped(m); > > + memory_type_changed(d); > + > return 1; > } > > @@ -690,6 +697,12 @@ static int hvm_load_mtrr_msr(struct doma > HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save_mtrr_msr, > hvm_load_mtrr_msr, > 1, HVMSR_PER_VCPU); > > +void memory_type_changed(struct domain *d) > +{ > + if ( iommu_enabled && !iommu_snoop && d->vcpu && d->vcpu[0] ) > + p2m_memory_type_changed(d); > +} > + > uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn, > uint8_t *ipat, bool_t direct_mmio) > { > @@ -733,8 +746,7 @@ uint8_t epte_get_entry_emt(struct domain > return MTRR_TYPE_WRBACK; > } > > - gmtrr_mtype = is_hvm_domain(d) && v && > - > d->arch.hvm_domain.params[HVM_PARAM_IDENT_PT] ? > + gmtrr_mtype = is_hvm_domain(d) && v ? > get_mtrr_type(&v->arch.hvm_vcpu.mtrr, (gfn << > PAGE_SHIFT)) : > MTRR_TYPE_WRBACK; > > --- a/xen/arch/x86/hvm/vmx/vmx.c > +++ b/xen/arch/x86/hvm/vmx/vmx.c > @@ -3016,6 +3016,16 @@ void vmx_vmexit_handler(struct cpu_user_ > break; > } > > + case EXIT_REASON_EPT_MISCONFIG: > + { > + paddr_t gpa; > + > + __vmread(GUEST_PHYSICAL_ADDRESS, &gpa); > + if ( !ept_handle_misconfig(gpa) ) > + goto exit_and_crash; > + break; > + } > + > case EXIT_REASON_MONITOR_TRAP_FLAG: > v->arch.hvm_vmx.exec_control &= > ~CPU_BASED_MONITOR_TRAP_FLAG; > vmx_update_cpu_exec_control(v); > --- a/xen/arch/x86/mm/p2m.c > +++ b/xen/arch/x86/mm/p2m.c > @@ -200,6 +200,18 @@ void p2m_change_entry_type_global(struct > p2m_unlock(p2m); > } > > +void p2m_memory_type_changed(struct domain *d) > +{ > + struct p2m_domain *p2m = p2m_get_hostp2m(d); > + > + if ( p2m->memory_type_changed ) > + { > + p2m_lock(p2m); > + p2m->memory_type_changed(p2m); > + p2m_unlock(p2m); > + } > +} > + > mfn_t __get_gfn_type_access(struct p2m_domain *p2m, unsigned long gfn, > p2m_type_t *t, p2m_access_t *a, p2m_query_t q, > unsigned int *page_order, bool_t locked) > --- a/xen/arch/x86/mm/p2m-ept.c > +++ b/xen/arch/x86/mm/p2m-ept.c > @@ -270,6 +270,124 @@ static int ept_next_level(struct p2m_dom > return GUEST_TABLE_NORMAL_PAGE; > } > > +static bool_t ept_invalidate_emt(mfn_t mfn) > +{ > + ept_entry_t *epte = map_domain_page(mfn_x(mfn)); > + unsigned int i; > + bool_t changed = 0; > + > + for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ ) > + { > + ept_entry_t e = atomic_read_ept_entry(&epte[i]); > + > + if ( !is_epte_valid(&e) || !is_epte_present(&e) || > + e.emt == MTRR_NUM_TYPES ) > + continue; > + > + e.emt = MTRR_NUM_TYPES; > + atomic_write_ept_entry(&epte[i], e); > + changed = 1; > + } > + > + unmap_domain_page(epte); > + > + return changed; > +} > + > +bool_t ept_handle_misconfig(uint64_t gpa) > +{ > + struct vcpu *curr = current; > + struct p2m_domain *p2m = p2m_get_hostp2m(curr->domain); > + struct ept_data *ept = &p2m->ept; > + unsigned int level = ept_get_wl(ept); > + unsigned long gfn = PFN_DOWN(gpa); > + unsigned long mfn = ept_get_asr(ept); > + ept_entry_t *epte; > + int okay; > + > + if ( !mfn ) > + return 0; > + > + p2m_lock(p2m); > + > + okay = -curr->arch.hvm_vmx.ept_spurious_misconfig; > + for ( ; ; --level ) > + { > + ept_entry_t e; > + unsigned int i; > + > + epte = map_domain_page(mfn); > + i = (gfn >> (level * EPT_TABLE_ORDER)) & > (EPT_PAGETABLE_ENTRIES - 1); > + e = atomic_read_ept_entry(&epte[i]); > + > + if ( level == 0 || is_epte_superpage(&e) ) > + { > + uint8_t ipat = 0; > + > + if ( e.emt != MTRR_NUM_TYPES ) > + break; > + > + if ( level == 0 ) > + { > + for ( gfn -= i, i = 0; i < EPT_PAGETABLE_ENTRIES; ++i ) > + { > + e = atomic_read_ept_entry(&epte[i]); > + if ( e.emt == MTRR_NUM_TYPES ) > + e.emt = 0; > + if ( !is_epte_valid(&e) || !is_epte_present(&e) ) > + continue; > + e.emt = epte_get_entry_emt(p2m->domain, gfn + i, > + _mfn(e.mfn), > &ipat, > + e.sa_p2mt == > p2m_mmio_direct); > + e.ipat = ipat; > + atomic_write_ept_entry(&epte[i], e); > + } > + } > + else > + { > + e.emt = epte_get_entry_emt(p2m->domain, gfn, > _mfn(e.mfn), > + &ipat, > + e.sa_p2mt == > p2m_mmio_direct); > + e.ipat = ipat; > + atomic_write_ept_entry(&epte[i], e); > + } > + > + okay = 1; > + break; > + } > + > + if ( e.emt == MTRR_NUM_TYPES ) > + { > + ASSERT(is_epte_present(&e)); > + ept_invalidate_emt(_mfn(e.mfn)); > + e.emt = 0; > + atomic_write_ept_entry(&epte[i], e); > + unmap_domain_page(epte); > + okay = 1; > + } > + else if ( is_epte_present(&e) && !e.emt ) > + unmap_domain_page(epte); > + else > + break; > + > + mfn = e.mfn; > + } > + > + unmap_domain_page(epte); > + if ( okay > 0 ) > + { > + struct vcpu *v; > + > + for_each_vcpu ( curr->domain, v ) > + v->arch.hvm_vmx.ept_spurious_misconfig = 1; > + } > + curr->arch.hvm_vmx.ept_spurious_misconfig = 0; > + ept_sync_domain(p2m); > + p2m_unlock(p2m); > + > + return !!okay; > +} > + > /* > * ept_set_entry() computes 'need_modify_vtd_table' for itself, > * by observing whether any gfn->mfn translations are modified. > @@ -660,6 +778,17 @@ static void ept_change_entry_type_global > ept_sync_domain(p2m); > } > > +static void ept_memory_type_changed(struct p2m_domain *p2m) > +{ > + unsigned long mfn = ept_get_asr(&p2m->ept); > + > + if ( !mfn ) > + return; > + > + if ( ept_invalidate_emt(_mfn(mfn)) ) > + ept_sync_domain(p2m); > +} > + > static void __ept_sync_domain(void *info) > { > struct ept_data *ept = &((struct p2m_domain *)info)->ept; > @@ -697,6 +826,7 @@ int ept_p2m_init(struct p2m_domain *p2m) > p2m->set_entry = ept_set_entry; > p2m->get_entry = ept_get_entry; > p2m->change_entry_type_global = ept_change_entry_type_global; > + p2m->memory_type_changed = ept_memory_type_changed; > p2m->audit_p2m = NULL; > > /* Set the memory type used when accessing EPT paging structures. */ > @@ -737,6 +867,7 @@ static void ept_dump_p2m_table(unsigned > [MTRR_TYPE_WRTHROUGH] = "WT", > [MTRR_TYPE_WRPROT] = "WP", > [MTRR_TYPE_WRBACK] = "WB", > + [MTRR_NUM_TYPES] = "??" > }; > > for_each_domain(d) > @@ -750,11 +881,16 @@ static void ept_dump_p2m_table(unsigned > > for ( gfn = 0; gfn <= p2m->max_mapped_pfn; gfn += 1UL << order ) > { > + char c = 0; > + > gfn_remainder = gfn; > table = > map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m))); > > for ( i = ept_get_wl(ept); i > 0; i-- ) > { > + ept_entry = table + (gfn_remainder >> (i * > EPT_TABLE_ORDER)); > + if ( ept_entry->emt == MTRR_NUM_TYPES ) > + c = '?'; > ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i); > if ( ret != GUEST_TABLE_NORMAL_PAGE ) > break; > @@ -775,7 +911,7 @@ static void ept_dump_p2m_table(unsigned > memory_types[ept_entry->emt][0], > memory_types[ept_entry->emt][1] > ?: ept_entry->emt + '0', > - ept_entry->ipat ? '!' : ' '); > + c ?: ept_entry->ipat ? '!' : ' '); > > if ( !(record_counter++ % 100) ) > process_pending_softirqs(); > --- a/xen/common/domctl.c > +++ b/xen/common/domctl.c > @@ -815,6 +815,10 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe > ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1); > else > ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1); > +#ifdef CONFIG_X86 > + if ( !ret ) > + memory_type_changed(d); > +#endif > } > break; > > --- a/xen/include/asm-x86/hvm/vmx/vmcs.h > +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h > @@ -124,6 +124,9 @@ struct arch_vmx_struct { > > unsigned long host_cr0; > > + /* Do we need to tolerate a spurious EPT_MISCONFIG VM exit? */ > + bool_t ept_spurious_misconfig; > + > /* Is the guest in real mode? */ > uint8_t vmx_realmode; > /* Are we emulating rather than VMENTERing? */ > --- a/xen/include/asm-x86/hvm/vmx/vmx.h > +++ b/xen/include/asm-x86/hvm/vmx/vmx.h > @@ -520,6 +520,7 @@ int ept_p2m_init(struct p2m_domain *p2m) > void ept_p2m_uninit(struct p2m_domain *p2m); > > void ept_walk_table(struct domain *d, unsigned long gfn); > +bool_t ept_handle_misconfig(uint64_t gpa); > void setup_ept_dump(void); > > void update_guest_eip(void); > --- a/xen/include/asm-x86/mtrr.h > +++ b/xen/include/asm-x86/mtrr.h > @@ -88,6 +88,7 @@ extern bool_t mtrr_fix_range_msr_set(str > uint32_t row, uint64_t > msr_content); > extern bool_t mtrr_def_type_msr_set(struct domain *, struct mtrr_state *, > uint64_t msr_content); > +extern void memory_type_changed(struct domain *); > extern bool_t pat_msr_set(uint64_t *pat, uint64_t msr); > > bool_t is_var_mtrr_overlapped(struct mtrr_state *m); > --- a/xen/include/asm-x86/p2m.h > +++ b/xen/include/asm-x86/p2m.h > @@ -233,6 +233,7 @@ struct p2m_domain { > void (*change_entry_type_global)(struct p2m_domain > *p2m, > p2m_type_t > ot, > p2m_type_t > nt); > + void (*memory_type_changed)(struct p2m_domain > *p2m); > > void (*write_p2m_entry)(struct p2m_domain *p2m, > unsigned long gfn, > l1_pgentry_t *p, > @@ -506,6 +507,9 @@ void p2m_change_type_range(struct domain > p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn, > p2m_type_t ot, p2m_type_t nt); > > +/* Report a change affecting memory types. */ > +void p2m_memory_type_changed(struct domain *d); > + > /* Set mmio addresses in the p2m table (for pass-through) */ > int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn); > int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn); > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |