|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 6/8] x86/EPT: force re?evaluation of memory type as necessary
> The main goal here is to drop the bogus dependency of
> epte_get_entry_emt() on d->arch.hvm_domain.params[HVM_PARAM_IDENT_PT].
>
> Any change to state influencing epte_get_entry_emt()'s decision needs
> to result in re-calculation. Do this by using the EPT_MISCONFIG VM
> exit, storing an invalid memory type into EPT's emt field (leaving the
> IOMMU, which doesn't care about memory types, unaffected).
>
> This is being done in a hierarchical manner to keep execution time
> down: Initially only the top level directory gets invalidated this way.
> Upon access, the involved intermediate page table levels get cleared
> back to zero, and the leaf entry gets its field properly set. For 4k
> leaves all other entries in the same directory also get processed to
> amortize the cost of the extra VM exit (which halved the number of
> these VM exits in my testing).
>
> This restoring can result in spurious EPT_MISCONFIG VM exits (since
> two vCPU-s may access addresses involving identical page table
> structures). Rather than simply returning in such cases (and risking
> that such a VM exit results from a real mis-configuration, which
> would then result in an endless loop rather than killing the VM), a
> per-vCPU flag is being introduced indicating when such a spurious VM
> exit might validly happen - if another one occurs right after VM re-
> entry, the flag would generally end up being clear, causing the VM
> to be killed as before on such VM exits.
>
> Note that putting a reserved memory type value in the EPT structures
> isn't formally sanctioned by the specification. Intel isn't willing to
> adjust the specification to make this or a similar use of the
> EPT_MISCONFIG VM exit formally possible, but they have indicated that
> us using this is low risk wrt forward compatibility.
>
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
>
> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c
> @@ -84,6 +84,8 @@ long arch_do_domctl(
> ret = ioports_permit_access(d, fp, fp + np - 1);
> else
> ret = ioports_deny_access(d, fp, fp + np - 1);
> + if ( !ret )
> + memory_type_changed(d);
> }
> break;
>
> @@ -705,6 +707,8 @@ long arch_do_domctl(
> ret, add ? "removing" : "denying", d->domain_id,
> mfn, mfn + nr_mfns - 1);
> }
> + /* Do this unconditionally to cover errors on above failure paths. */
> + memory_type_changed(d);
> }
> break;
>
> @@ -791,6 +795,8 @@ long arch_do_domctl(
> "ioport_map: error %ld denying dom%d access to
> [%x,%x]\n",
> ret, d->domain_id, fmp, fmp + np - 1);
> }
> + if ( !ret )
> + memory_type_changed(d);
> }
> break;
>
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -252,6 +252,9 @@ int hvm_set_guest_pat(struct vcpu *v, u6
>
> if ( !hvm_funcs.set_guest_pat(v, guest_pat) )
> v->arch.hvm_vcpu.pat_cr = guest_pat;
> +
> + memory_type_changed(v->domain);
> +
> return 1;
> }
>
> --- a/xen/arch/x86/hvm/mtrr.c
> +++ b/xen/arch/x86/hvm/mtrr.c
> @@ -431,8 +431,12 @@ bool_t mtrr_def_type_msr_set(struct doma
> return 0;
> }
>
> - m->enabled = enabled;
> - m->def_type = def_type;
> + if ( m->enabled != enabled || m->def_type != def_type )
> + {
> + m->enabled = enabled;
> + m->def_type = def_type;
> + memory_type_changed(d);
> + }
>
> return 1;
> }
> @@ -452,6 +456,7 @@ bool_t mtrr_fix_range_msr_set(struct dom
> return 0;
>
> fixed_range_base[row] = msr_content;
> + memory_type_changed(d);
> }
>
> return 1;
> @@ -496,6 +501,8 @@ bool_t mtrr_var_range_msr_set(
>
> m->overlapped = is_var_mtrr_overlapped(m);
>
> + memory_type_changed(d);
> +
> return 1;
> }
>
> @@ -690,6 +697,12 @@ static int hvm_load_mtrr_msr(struct doma
> HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save_mtrr_msr, hvm_load_mtrr_msr,
> 1, HVMSR_PER_VCPU);
>
> +void memory_type_changed(struct domain *d)
> +{
> + if ( iommu_enabled && !iommu_snoop && d->vcpu && d->vcpu[0] )
> + p2m_memory_type_changed(d);
> +}
> +
> uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn,
> uint8_t *ipat, bool_t direct_mmio)
> {
> @@ -733,8 +746,7 @@ uint8_t epte_get_entry_emt(struct domain
> return MTRR_TYPE_WRBACK;
> }
>
> - gmtrr_mtype = is_hvm_domain(d) && v &&
> - d->arch.hvm_domain.params[HVM_PARAM_IDENT_PT] ?
> + gmtrr_mtype = is_hvm_domain(d) && v ?
> get_mtrr_type(&v->arch.hvm_vcpu.mtrr, (gfn << PAGE_SHIFT)) :
> MTRR_TYPE_WRBACK;
>
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -3011,6 +3011,16 @@ void vmx_vmexit_handler(struct cpu_user_
> break;
> }
>
> + case EXIT_REASON_EPT_MISCONFIG:
> + {
> + paddr_t gpa;
> +
> + __vmread(GUEST_PHYSICAL_ADDRESS, &gpa);
> + if ( !ept_handle_misconfig(gpa) )
> + goto exit_and_crash;
> + break;
> + }
> +
> case EXIT_REASON_MONITOR_TRAP_FLAG:
> v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
> vmx_update_cpu_exec_control(v);
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -200,6 +200,18 @@ void p2m_change_entry_type_global(struct
> p2m_unlock(p2m);
> }
>
> +void p2m_memory_type_changed(struct domain *d)
> +{
> + struct p2m_domain *p2m = p2m_get_hostp2m(d);
> +
> + if ( p2m->memory_type_changed )
> + {
> + p2m_lock(p2m);
> + p2m->memory_type_changed(p2m);
I don't see the memory_type_changed function pointer for p2m-pt.c.
It might not be needed immediately, but it's a little land-mine waiting to be
NULL-dereferenced.
Andres
> + p2m_unlock(p2m);
> + }
> +}
> +
> mfn_t __get_gfn_type_access(struct p2m_domain *p2m, unsigned long gfn,
> p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
> unsigned int *page_order, bool_t locked)
> --- a/xen/arch/x86/mm/p2m-ept.c
> +++ b/xen/arch/x86/mm/p2m-ept.c
> @@ -660,6 +660,130 @@ static void ept_change_entry_type_global
> ept_sync_domain(p2m);
> }
>
> +static bool_t ept_invalidate_emt(mfn_t mfn)
> +{
> + ept_entry_t *epte = map_domain_page(mfn_x(mfn));
> + unsigned int i;
> + bool_t changed = 0;
> +
> + for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
> + {
> + ept_entry_t e = atomic_read_ept_entry(&epte[i]);
> +
> + if ( !is_epte_valid(&e) || !is_epte_present(&e) ||
> + e.emt == MTRR_NUM_TYPES )
> + continue;
> +
> + e.emt = MTRR_NUM_TYPES;
> + atomic_write_ept_entry(&epte[i], e);
> + changed = 1;
> + }
> +
> + unmap_domain_page(epte);
> +
> + return changed;
> +}
> +
> +static void ept_memory_type_changed(struct p2m_domain *p2m)
> +{
> + unsigned long mfn = ept_get_asr(&p2m->ept);
> +
> + if ( !mfn )
> + return;
> +
> + if ( ept_invalidate_emt(_mfn(mfn)) )
> + ept_sync_domain(p2m);
> +}
> +
> +bool_t ept_handle_misconfig(uint64_t gpa)
> +{
> + struct vcpu *curr = current;
> + struct p2m_domain *p2m = p2m_get_hostp2m(curr->domain);
> + struct ept_data *ept = &p2m->ept;
> + unsigned int level = ept_get_wl(ept);
> + unsigned long gfn = PFN_DOWN(gpa);
> + unsigned long mfn = ept_get_asr(ept);
> + ept_entry_t *epte;
> + bool_t okay;
> +
> + if ( !mfn )
> + return 0;
> +
> + p2m_lock(p2m);
> +
> + for ( okay = curr->arch.hvm_vmx.ept_spurious_misconfig; ; --level) {
> + ept_entry_t e;
> + unsigned int i;
> +
> + epte = map_domain_page(mfn);
> + i = (gfn >> (level * EPT_TABLE_ORDER)) & (EPT_PAGETABLE_ENTRIES - 1);
> + e = atomic_read_ept_entry(&epte[i]);
> +
> + if ( level == 0 || is_epte_superpage(&e) )
> + {
> + uint8_t ipat = 0;
> + struct vcpu *v;
> +
> + if ( e.emt != MTRR_NUM_TYPES )
> + break;
> +
> + if ( level == 0 )
> + {
> + for ( gfn -= i, i = 0; i < EPT_PAGETABLE_ENTRIES; ++i )
> + {
> + e = atomic_read_ept_entry(&epte[i]);
> + if ( e.emt == MTRR_NUM_TYPES )
> + e.emt = 0;
> + if ( !is_epte_valid(&e) || !is_epte_present(&e) )
> + continue;
> + e.emt = epte_get_entry_emt(p2m->domain, gfn + i,
> + _mfn(e.mfn), &ipat,
> + e.sa_p2mt == p2m_mmio_direct);
> + e.ipat = ipat;
> + atomic_write_ept_entry(&epte[i], e);
> + }
> + }
> + else
> + {
> + e.emt = epte_get_entry_emt(p2m->domain, gfn, _mfn(e.mfn),
> + &ipat,
> + e.sa_p2mt == p2m_mmio_direct);
> + e.ipat = ipat;
> + atomic_write_ept_entry(&epte[i], e);
> + }
> +
> + for_each_vcpu ( curr->domain, v )
> + v->arch.hvm_vmx.ept_spurious_misconfig = 1;
> + okay = 1;
> + break;
> + }
> +
> + if ( e.emt == MTRR_NUM_TYPES )
> + {
> + ASSERT(is_epte_present(&e));
> + e.emt = 0;
> + atomic_write_ept_entry(&epte[i], e);
> + unmap_domain_page(epte);
> +
> + ept_invalidate_emt(_mfn(e.mfn));
> + okay = 1;
> + }
> + else if ( is_epte_present(&e) && !e.emt )
> + unmap_domain_page(epte);
> + else
> + break;
> +
> + mfn = e.mfn;
> + }
> +
> + unmap_domain_page(epte);
> + curr->arch.hvm_vmx.ept_spurious_misconfig = 0;
> + ept_sync_domain(p2m);
> + p2m_unlock(p2m);
> +
> + return okay;
> +}
> +
> static void __ept_sync_domain(void *info)
> {
> struct ept_data *ept = &((struct p2m_domain *)info)->ept;
> @@ -697,6 +821,7 @@ int ept_p2m_init(struct p2m_domain *p2m)
> p2m->set_entry = ept_set_entry;
> p2m->get_entry = ept_get_entry;
> p2m->change_entry_type_global = ept_change_entry_type_global;
> + p2m->memory_type_changed = ept_memory_type_changed;
> p2m->audit_p2m = NULL;
>
> /* Set the memory type used when accessing EPT paging structures. */
> @@ -737,6 +862,7 @@ static void ept_dump_p2m_table(unsigned
> [MTRR_TYPE_WRTHROUGH] = "WT",
> [MTRR_TYPE_WRPROT] = "WP",
> [MTRR_TYPE_WRBACK] = "WB",
> + [MTRR_NUM_TYPES] = "??"
> };
>
> for_each_domain(d)
> @@ -750,11 +876,16 @@ static void ept_dump_p2m_table(unsigned
>
> for ( gfn = 0; gfn <= p2m->max_mapped_pfn; gfn += 1UL << order )
> {
> + char c = 0;
> +
> gfn_remainder = gfn;
> table =
> map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m)));
>
> for ( i = ept_get_wl(ept); i > 0; i-- )
> {
> + ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER));
> + if ( ept_entry->emt == MTRR_NUM_TYPES )
> + c = '?';
> ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i);
> if ( ret != GUEST_TABLE_NORMAL_PAGE )
> break;
> @@ -775,7 +906,7 @@ static void ept_dump_p2m_table(unsigned
> memoryTypes[ept_entry->emt][0],
> memoryTypes[ept_entry->emt][1]
> ?: ept_entry->emt + '0',
> - ept_entry->ipat ? '!' : ' ');
> + c ?: ept_entry->ipat ? '!' : ' ');
>
> if ( !(record_counter++ % 100) )
> process_pending_softirqs();
> --- a/xen/common/domctl.c
> +++ b/xen/common/domctl.c
> @@ -815,6 +815,10 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
> ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1);
> else
> ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1);
> +#ifdef CONFIG_X86
> + if ( !ret )
> + memory_type_changed(d);
> +#endif
> }
> break;
>
> --- a/xen/include/asm-x86/hvm/vmx/vmcs.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
> @@ -124,6 +124,9 @@ struct arch_vmx_struct {
>
> unsigned long host_cr0;
>
> + /* Do we need to tolerate a spurious EPT_MISCONFIG VM exit? */
> + bool_t ept_spurious_misconfig;
> +
> /* Is the guest in real mode? */
> uint8_t vmx_realmode;
> /* Are we emulating rather than VMENTERing? */
> --- a/xen/include/asm-x86/hvm/vmx/vmx.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmx.h
> @@ -520,6 +520,7 @@ int ept_p2m_init(struct p2m_domain *p2m)
> void ept_p2m_uninit(struct p2m_domain *p2m);
>
> void ept_walk_table(struct domain *d, unsigned long gfn);
> +bool_t ept_handle_misconfig(uint64_t gpa);
> void setup_ept_dump(void);
>
> void update_guest_eip(void);
> --- a/xen/include/asm-x86/mtrr.h
> +++ b/xen/include/asm-x86/mtrr.h
> @@ -88,6 +88,7 @@ extern bool_t mtrr_fix_range_msr_set(str
> uint32_t row, uint64_t msr_content);
> extern bool_t mtrr_def_type_msr_set(struct domain *, struct mtrr_state *,
> uint64_t msr_content);
> +extern void memory_type_changed(struct domain *);
> extern bool_t pat_msr_set(uint64_t *pat, uint64_t msr);
>
> bool_t is_var_mtrr_overlapped(struct mtrr_state *m);
> --- a/xen/include/asm-x86/p2m.h
> +++ b/xen/include/asm-x86/p2m.h
> @@ -233,6 +233,7 @@ struct p2m_domain {
> void (*change_entry_type_global)(struct p2m_domain *p2m,
> p2m_type_t ot,
> p2m_type_t nt);
> + void (*memory_type_changed)(struct p2m_domain *p2m);
>
> void (*write_p2m_entry)(struct p2m_domain *p2m,
> unsigned long gfn, l1_pgentry_t *p,
> @@ -506,6 +507,9 @@ void p2m_change_type_range(struct domain
> p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
> p2m_type_t ot, p2m_type_t nt);
>
> +/* Report a change affecting memory types. */
> +void p2m_memory_type_changed(struct domain *d);
> +
> /* Set mmio addresses in the p2m table (for pass-through) */
> int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn);
> int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn);
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |