[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 6/8] x86/EPT: force re?evaluation of memory type as necessary



> The main goal here is to drop the bogus dependency of
> epte_get_entry_emt() on d->arch.hvm_domain.params[HVM_PARAM_IDENT_PT].
> 
> Any change to state influencing epte_get_entry_emt()'s decision needs
> to result in re-calculation. Do this by using the EPT_MISCONFIG VM
> exit, storing an invalid memory type into EPT's emt field (leaving the
> IOMMU, which doesn't care about memory types, unaffected).
> 
> This is being done in a hierarchical manner to keep execution time
> down: Initially only the top level directory gets invalidated this way.
> Upon access, the involved intermediate page table levels get cleared
> back to zero, and the leaf entry gets its field properly set. For 4k
> leaves all other entries in the same directory also get processed to
> amortize the cost of the extra VM exit (which halved the number of
> these VM exits in my testing).
> 
> This restoring can result in spurious EPT_MISCONFIG VM exits (since
> two vCPU-s may access addresses involving identical page table
> structures). Rather than simply returning in such cases (and risking
> that such a VM exit results from a real mis-configuration, which
> would then result in an endless loop rather than killing the VM), a
> per-vCPU flag is being introduced indicating when such a spurious VM
> exit might validly happen - if another one occurs right after VM re-
> entry, the flag would generally end up being clear, causing the VM
> to be killed as before on such VM exits.
> 
> Note that putting a reserved memory type value in the EPT structures
> isn't formally sanctioned by the specification. Intel isn't willing to
> adjust the specification to make this or a similar use of the
> EPT_MISCONFIG VM exit formally possible, but they have indicated that
> us using this is low risk wrt forward compatibility.
> 
> Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
> 
> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c
> @@ -84,6 +84,8 @@ long arch_do_domctl(
>             ret = ioports_permit_access(d, fp, fp + np - 1);
>         else
>             ret = ioports_deny_access(d, fp, fp + np - 1);
> +        if ( !ret )
> +            memory_type_changed(d);
>     }
>     break;
> 
> @@ -705,6 +707,8 @@ long arch_do_domctl(
>                        ret, add ? "removing" : "denying", d->domain_id,
>                        mfn, mfn + nr_mfns - 1);
>         }
> +        /* Do this unconditionally to cover errors on above failure paths. */
> +        memory_type_changed(d);
>     }
>     break;
> 
> @@ -791,6 +795,8 @@ long arch_do_domctl(
>                        "ioport_map: error %ld denying dom%d access to 
> [%x,%x]\n",
>                        ret, d->domain_id, fmp, fmp + np - 1);
>         }
> +        if ( !ret )
> +            memory_type_changed(d);
>     }
>     break;
> 
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -252,6 +252,9 @@ int hvm_set_guest_pat(struct vcpu *v, u6
> 
>     if ( !hvm_funcs.set_guest_pat(v, guest_pat) )
>         v->arch.hvm_vcpu.pat_cr = guest_pat;
> +
> +    memory_type_changed(v->domain);
> +
>     return 1;
> }
> 
> --- a/xen/arch/x86/hvm/mtrr.c
> +++ b/xen/arch/x86/hvm/mtrr.c
> @@ -431,8 +431,12 @@ bool_t mtrr_def_type_msr_set(struct doma
>          return 0;
>     }
> 
> -    m->enabled = enabled;
> -    m->def_type = def_type;
> +    if ( m->enabled != enabled || m->def_type != def_type )
> +    {
> +        m->enabled = enabled;
> +        m->def_type = def_type;
> +        memory_type_changed(d);
> +    }
> 
>     return 1;
> }
> @@ -452,6 +456,7 @@ bool_t mtrr_fix_range_msr_set(struct dom
>                 return 0;
> 
>         fixed_range_base[row] = msr_content;
> +        memory_type_changed(d);
>     }
> 
>     return 1;
> @@ -496,6 +501,8 @@ bool_t mtrr_var_range_msr_set(
> 
>     m->overlapped = is_var_mtrr_overlapped(m);
> 
> +    memory_type_changed(d);
> +
>     return 1;
> }
> 
> @@ -690,6 +697,12 @@ static int hvm_load_mtrr_msr(struct doma
> HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save_mtrr_msr, hvm_load_mtrr_msr,
>                           1, HVMSR_PER_VCPU);
> 
> +void memory_type_changed(struct domain *d)
> +{
> +    if ( iommu_enabled && !iommu_snoop && d->vcpu && d->vcpu[0] )
> +        p2m_memory_type_changed(d);
> +}
> +
> uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, mfn_t mfn,
>                            uint8_t *ipat, bool_t direct_mmio)
> {
> @@ -733,8 +746,7 @@ uint8_t epte_get_entry_emt(struct domain
>         return MTRR_TYPE_WRBACK;
>     }
> 
> -    gmtrr_mtype = is_hvm_domain(d) && v &&
> -                  d->arch.hvm_domain.params[HVM_PARAM_IDENT_PT] ?
> +    gmtrr_mtype = is_hvm_domain(d) && v ?
>                   get_mtrr_type(&v->arch.hvm_vcpu.mtrr, (gfn << PAGE_SHIFT)) :
>                   MTRR_TYPE_WRBACK;
> 
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -3011,6 +3011,16 @@ void vmx_vmexit_handler(struct cpu_user_
>         break;
>     }
> 
> +    case EXIT_REASON_EPT_MISCONFIG:
> +    {
> +        paddr_t gpa;
> +
> +        __vmread(GUEST_PHYSICAL_ADDRESS, &gpa);
> +        if ( !ept_handle_misconfig(gpa) )
> +            goto exit_and_crash;
> +        break;
> +    }
> +
>     case EXIT_REASON_MONITOR_TRAP_FLAG:
>         v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
>         vmx_update_cpu_exec_control(v);
> --- a/xen/arch/x86/mm/p2m.c
> +++ b/xen/arch/x86/mm/p2m.c
> @@ -200,6 +200,18 @@ void p2m_change_entry_type_global(struct
>     p2m_unlock(p2m);
> }
> 
> +void p2m_memory_type_changed(struct domain *d)
> +{
> +    struct p2m_domain *p2m = p2m_get_hostp2m(d);
> +
> +    if ( p2m->memory_type_changed )
> +    {
> +        p2m_lock(p2m);
> +        p2m->memory_type_changed(p2m);
I don't see the memory_type_changed function pointer for p2m-pt.c.

It might not be needed immediately, but it's a little land-mine waiting to be 
NULL-dereferenced.

Andres
> +        p2m_unlock(p2m);
> +    }
> +}
> +
> mfn_t __get_gfn_type_access(struct p2m_domain *p2m, unsigned long gfn,
>                     p2m_type_t *t, p2m_access_t *a, p2m_query_t q,
>                     unsigned int *page_order, bool_t locked)
> --- a/xen/arch/x86/mm/p2m-ept.c
> +++ b/xen/arch/x86/mm/p2m-ept.c
> @@ -660,6 +660,130 @@ static void ept_change_entry_type_global
>     ept_sync_domain(p2m);
> }
> 
> +static bool_t ept_invalidate_emt(mfn_t mfn)
> +{
> +    ept_entry_t *epte = map_domain_page(mfn_x(mfn));
> +    unsigned int i;
> +    bool_t changed = 0;
> +
> +    for ( i = 0; i < EPT_PAGETABLE_ENTRIES; i++ )
> +    {
> +        ept_entry_t e = atomic_read_ept_entry(&epte[i]);
> +
> +        if ( !is_epte_valid(&e) || !is_epte_present(&e) ||
> +             e.emt == MTRR_NUM_TYPES )
> +            continue;
> +
> +        e.emt = MTRR_NUM_TYPES;
> +        atomic_write_ept_entry(&epte[i], e);
> +        changed = 1;
> +    }
> +
> +    unmap_domain_page(epte);
> +
> +    return changed;
> +}
> +
> +static void ept_memory_type_changed(struct p2m_domain *p2m)
> +{
> +    unsigned long mfn = ept_get_asr(&p2m->ept);
> +
> +    if ( !mfn )
> +        return;
> +
> +    if ( ept_invalidate_emt(_mfn(mfn)) )
> +        ept_sync_domain(p2m);
> +}
> +
> +bool_t ept_handle_misconfig(uint64_t gpa)
> +{
> +    struct vcpu *curr = current;
> +    struct p2m_domain *p2m = p2m_get_hostp2m(curr->domain);
> +    struct ept_data *ept = &p2m->ept;
> +    unsigned int level = ept_get_wl(ept);
> +    unsigned long gfn = PFN_DOWN(gpa);
> +    unsigned long mfn = ept_get_asr(ept);
> +    ept_entry_t *epte;
> +    bool_t okay;
> +
> +    if ( !mfn )
> +        return 0;
> +
> +    p2m_lock(p2m);
> +
> +    for ( okay = curr->arch.hvm_vmx.ept_spurious_misconfig; ; --level) {
> +        ept_entry_t e;
> +        unsigned int i;
> +
> +        epte = map_domain_page(mfn);
> +        i = (gfn >> (level * EPT_TABLE_ORDER)) & (EPT_PAGETABLE_ENTRIES - 1);
> +        e = atomic_read_ept_entry(&epte[i]);
> +
> +        if ( level == 0 || is_epte_superpage(&e) )
> +        {
> +            uint8_t ipat = 0;
> +            struct vcpu *v;
> +
> +            if ( e.emt != MTRR_NUM_TYPES )
> +                break;
> +
> +            if ( level == 0 )
> +            {
> +                for ( gfn -= i, i = 0; i < EPT_PAGETABLE_ENTRIES; ++i )
> +                {
> +                    e = atomic_read_ept_entry(&epte[i]);
> +                    if ( e.emt == MTRR_NUM_TYPES )
> +                        e.emt = 0;
> +                    if ( !is_epte_valid(&e) || !is_epte_present(&e) )
> +                        continue;
> +                    e.emt = epte_get_entry_emt(p2m->domain, gfn + i,
> +                                               _mfn(e.mfn), &ipat,
> +                                               e.sa_p2mt == p2m_mmio_direct);
> +                    e.ipat = ipat;
> +                    atomic_write_ept_entry(&epte[i], e);
> +                }
> +            }
> +            else
> +            {
> +                e.emt = epte_get_entry_emt(p2m->domain, gfn, _mfn(e.mfn),
> +                                           &ipat,
> +                                           e.sa_p2mt == p2m_mmio_direct);
> +                e.ipat = ipat;
> +                atomic_write_ept_entry(&epte[i], e);
> +            }
> +
> +            for_each_vcpu ( curr->domain, v )
> +                v->arch.hvm_vmx.ept_spurious_misconfig = 1;
> +            okay = 1;
> +            break;
> +        }
> +
> +        if ( e.emt == MTRR_NUM_TYPES )
> +        {
> +            ASSERT(is_epte_present(&e));
> +            e.emt = 0;
> +            atomic_write_ept_entry(&epte[i], e);
> +            unmap_domain_page(epte);
> +
> +            ept_invalidate_emt(_mfn(e.mfn));
> +            okay = 1;
> +        }
> +        else if ( is_epte_present(&e) && !e.emt )
> +            unmap_domain_page(epte);
> +        else
> +            break;
> +
> +        mfn = e.mfn;
> +    }
> +
> +    unmap_domain_page(epte);
> +    curr->arch.hvm_vmx.ept_spurious_misconfig = 0;
> +    ept_sync_domain(p2m);
> +    p2m_unlock(p2m);
> +
> +    return okay;
> +}
> +
> static void __ept_sync_domain(void *info)
> {
>     struct ept_data *ept = &((struct p2m_domain *)info)->ept;
> @@ -697,6 +821,7 @@ int ept_p2m_init(struct p2m_domain *p2m)
>     p2m->set_entry = ept_set_entry;
>     p2m->get_entry = ept_get_entry;
>     p2m->change_entry_type_global = ept_change_entry_type_global;
> +    p2m->memory_type_changed = ept_memory_type_changed;
>     p2m->audit_p2m = NULL;
> 
>     /* Set the memory type used when accessing EPT paging structures. */
> @@ -737,6 +862,7 @@ static void ept_dump_p2m_table(unsigned 
>         [MTRR_TYPE_WRTHROUGH]      = "WT",
>         [MTRR_TYPE_WRPROT]         = "WP",
>         [MTRR_TYPE_WRBACK]         = "WB",
> +        [MTRR_NUM_TYPES]           = "??"
>     };
> 
>     for_each_domain(d)
> @@ -750,11 +876,16 @@ static void ept_dump_p2m_table(unsigned 
> 
>         for ( gfn = 0; gfn <= p2m->max_mapped_pfn; gfn += 1UL << order )
>         {
> +            char c = 0;
> +
>             gfn_remainder = gfn;
>             table = 
> map_domain_page(pagetable_get_pfn(p2m_get_pagetable(p2m)));
> 
>             for ( i = ept_get_wl(ept); i > 0; i-- )
>             {
> +                ept_entry = table + (gfn_remainder >> (i * EPT_TABLE_ORDER));
> +                if ( ept_entry->emt == MTRR_NUM_TYPES )
> +                    c = '?';
>                 ret = ept_next_level(p2m, 1, &table, &gfn_remainder, i);
>                 if ( ret != GUEST_TABLE_NORMAL_PAGE )
>                     break;
> @@ -775,7 +906,7 @@ static void ept_dump_p2m_table(unsigned 
>                            memoryTypes[ept_entry->emt][0],
>                            memoryTypes[ept_entry->emt][1]
>                            ?: ept_entry->emt + '0',
> -                           ept_entry->ipat ? '!' : ' ');
> +                           c ?: ept_entry->ipat ? '!' : ' ');
> 
>                 if ( !(record_counter++ % 100) )
>                     process_pending_softirqs();
> --- a/xen/common/domctl.c
> +++ b/xen/common/domctl.c
> @@ -815,6 +815,10 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
>             ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1);
>         else
>             ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1);
> +#ifdef CONFIG_X86
> +        if ( !ret )
> +            memory_type_changed(d);
> +#endif
>     }
>     break;
> 
> --- a/xen/include/asm-x86/hvm/vmx/vmcs.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
> @@ -124,6 +124,9 @@ struct arch_vmx_struct {
> 
>     unsigned long        host_cr0;
> 
> +    /* Do we need to tolerate a spurious EPT_MISCONFIG VM exit? */
> +    bool_t               ept_spurious_misconfig;
> +
>     /* Is the guest in real mode? */
>     uint8_t              vmx_realmode;
>     /* Are we emulating rather than VMENTERing? */
> --- a/xen/include/asm-x86/hvm/vmx/vmx.h
> +++ b/xen/include/asm-x86/hvm/vmx/vmx.h
> @@ -520,6 +520,7 @@ int ept_p2m_init(struct p2m_domain *p2m)
> void ept_p2m_uninit(struct p2m_domain *p2m);
> 
> void ept_walk_table(struct domain *d, unsigned long gfn);
> +bool_t ept_handle_misconfig(uint64_t gpa);
> void setup_ept_dump(void);
> 
> void update_guest_eip(void);
> --- a/xen/include/asm-x86/mtrr.h
> +++ b/xen/include/asm-x86/mtrr.h
> @@ -88,6 +88,7 @@ extern bool_t mtrr_fix_range_msr_set(str
>                                      uint32_t row, uint64_t msr_content);
> extern bool_t mtrr_def_type_msr_set(struct domain *, struct mtrr_state *,
>                                     uint64_t msr_content);
> +extern void memory_type_changed(struct domain *);
> extern bool_t pat_msr_set(uint64_t *pat, uint64_t msr);
> 
> bool_t is_var_mtrr_overlapped(struct mtrr_state *m);
> --- a/xen/include/asm-x86/p2m.h
> +++ b/xen/include/asm-x86/p2m.h
> @@ -233,6 +233,7 @@ struct p2m_domain {
>     void               (*change_entry_type_global)(struct p2m_domain *p2m,
>                                                    p2m_type_t ot,
>                                                    p2m_type_t nt);
> +    void               (*memory_type_changed)(struct p2m_domain *p2m);
> 
>     void               (*write_p2m_entry)(struct p2m_domain *p2m,
>                                           unsigned long gfn, l1_pgentry_t *p,
> @@ -506,6 +507,9 @@ void p2m_change_type_range(struct domain
> p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
>                            p2m_type_t ot, p2m_type_t nt);
> 
> +/* Report a change affecting memory types. */
> +void p2m_memory_type_changed(struct domain *d);
> +
> /* Set mmio addresses in the p2m table (for pass-through) */
> int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn);
> int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn);
> 


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.