[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] RFC: AMD support for paging
> > >> -----Original Message----- >> From: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx >> [mailto:xen-devel-bounces@xxxxxxxxxxxxxxxxxxx] On Behalf Of Andres >> Lagar-Cavilla >> Sent: Wednesday, February 15, 2012 3:05 AM >> To: xen-devel@xxxxxxxxxxxxxxxxxxx >> Cc: olaf@xxxxxxxxx; keir.xen@xxxxxxxxx; tim@xxxxxxx; JBeulich@xxxxxxxx; >> adin@xxxxxxxxxxxxxx >> Subject: [Xen-devel] RFC: AMD support for paging >> >> We started hashing out some AMD support for mem_paging and mem_access. >> Right now my VMs boot, page out a bit, and then die on an HVM triple >> fault. >> >> Most importantly, I want to get somebody from AMD to comment/help out on >> this. It feels like we're inches away from enabling support for this >> very >> nice feature. I'm not sure who exactly on AMD monitors the list for >> these >> kinds of things. It'd be great to have you on board! >> >> For starters, the changes to the p2m code are relatively mild, but it'd >> be >> great if somebody spots a red flag. >> >> Another issue: comments indicate that bits 59-62 in NPT entries are used >> for IOMMU flags but effectively bits 61-62 are. Repossessing one bit >> (59) >> would give us enough space to support mem_access. Right now we only have >> 7 >> bits available for Xen flags and that is not enough for paging and >> access. >> Is bit 59 effectively reserved? >> >> Finally, the triple fault. Maybe I'm missing something obvious. Comments >> welcome. >> >> Patch inline below, thanks! >> Andres >> >> Enable AMD support for paging. >> >> Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx> >> Signed-off-by: Adin Scannell <adin@xxxxxxxxxxx> >> >> diff -r 25ca78889ed4 -r 10ca4e4293ce xen/arch/x86/mm/mem_event.c >> --- a/xen/arch/x86/mm/mem_event.c >> +++ b/xen/arch/x86/mm/mem_event.c >> @@ -537,10 +537,6 @@ int mem_event_domctl(struct domain *d, x >> if ( !hap_enabled(d) ) >> break; >> >> - /* Currently only EPT is supported */ >> - if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ) >> - break; >> - >> rc = -EXDEV; >> /* Disallow paging in a PoD guest */ >> if ( p2m->pod.entry_count ) >> diff -r 25ca78889ed4 -r 10ca4e4293ce xen/arch/x86/mm/p2m-pt.c >> --- a/xen/arch/x86/mm/p2m-pt.c >> +++ b/xen/arch/x86/mm/p2m-pt.c >> @@ -53,6 +53,20 @@ >> #define P2M_BASE_FLAGS \ >> (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED) >> >> +#ifdef __x86_64__ >> +/* l1e_from_pfn is not designed to have INVALID_MFN stored. The >> 0xff..ff >> + * value tramples over the higher-order bits used for flags (NX, p2mt, >> + * etc.) This happens for paging entries. Thus we do this clip/unclip >> + * juggle for l1 entries only (no paging superpages!) */ >> +#define EFF_MFN_WIDTH (PADDR_BITS-PAGE_SHIFT) /* 40 bits */ >> +#define clipped_mfn(mfn) ((mfn) & ((1UL << EFF_MFN_WIDTH) - 1)) >> +#define unclip_mfn(mfn) ((clipped_mfn((mfn)) == INVALID_MFN) ? \ >> + INVALID_MFN : (mfn)) >> +#else >> +#define clipped_mfn(mfn) (mfn) >> +#define unclip_mfn(mfn) (mfn) >> +#endif /* __x86_64__ */ >> + >> static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn) >> { >> unsigned long flags; >> @@ -77,6 +91,9 @@ static unsigned long p2m_type_to_flags(p >> case p2m_invalid: >> case p2m_mmio_dm: >> case p2m_populate_on_demand: >> + case p2m_ram_paging_out: >> + case p2m_ram_paged: >> + case p2m_ram_paging_in: >> default: >> return flags; >> case p2m_ram_ro: >> @@ -168,7 +185,7 @@ p2m_next_level(struct p2m_domain *p2m, m >> shift, max)) ) >> return 0; >> >> - /* PoD: Not present doesn't imply empty. */ >> + /* PoD/paging: Not present doesn't imply empty. */ >> if ( !l1e_get_flags(*p2m_entry) ) >> { >> struct page_info *pg; >> @@ -384,8 +401,9 @@ p2m_set_entry(struct p2m_domain *p2m, un >> 0, L1_PAGETABLE_ENTRIES); >> ASSERT(p2m_entry); >> >> - if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) ) >> - entry_content = l1e_from_pfn(mfn_x(mfn), >> + if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) || >> + (p2mt == p2m_ram_paged) || (p2mt == p2m_ram_paging_in) ) >> + entry_content = l1e_from_pfn(clipped_mfn(mfn_x(mfn)), >> p2m_type_to_flags(p2mt, mfn)); >> else >> entry_content = l1e_empty(); >> @@ -393,7 +411,7 @@ p2m_set_entry(struct p2m_domain *p2m, un >> if ( entry_content.l1 != 0 ) >> { >> p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags); >> - old_mfn = l1e_get_pfn(*p2m_entry); >> + old_mfn = unclip_mfn(l1e_get_pfn(*p2m_entry)); >> } >> /* level 1 entry */ >> p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn, >> entry_content, 1); >> @@ -615,11 +633,12 @@ pod_retry_l1: >> sizeof(l1e)); >> >> if ( ret == 0 ) { >> + unsigned long l1e_mfn = unclip_mfn(l1e_get_pfn(l1e)); >> p2mt = p2m_flags_to_type(l1e_get_flags(l1e)); >> - ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt)); >> + ASSERT( (l1e_mfn != INVALID_MFN || !p2m_is_ram(p2mt)) || >> + (l1e_mfn == INVALID_MFN && p2m_is_paging(p2mt)) ); >> >> - if ( p2m_flags_to_type(l1e_get_flags(l1e)) >> - == p2m_populate_on_demand ) >> + if ( p2mt == p2m_populate_on_demand ) >> { >> /* The read has succeeded, so we know that the mapping >> * exits at this point. */ >> @@ -641,7 +660,7 @@ pod_retry_l1: >> } >> >> if ( p2m_is_valid(p2mt) || p2m_is_grant(p2mt) ) >> - mfn = _mfn(l1e_get_pfn(l1e)); >> + mfn = _mfn(l1e_mfn); >> else >> /* XXX see above */ >> p2mt = p2m_mmio_dm; >> @@ -783,18 +802,26 @@ pod_retry_l2: >> pod_retry_l1: >> if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 ) >> { >> + p2m_type_t l1t = p2m_flags_to_type(l1e_get_flags(*l1e)); >> /* PoD: Try to populate */ >> - if ( p2m_flags_to_type(l1e_get_flags(*l1e)) == >> p2m_populate_on_demand ) >> + if ( l1t == p2m_populate_on_demand ) >> { >> if ( q != p2m_query ) { >> if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_4K, >> q) ) >> goto pod_retry_l1; >> } else >> *t = p2m_populate_on_demand; >> + } else { >> + if ( p2m_is_paging(l1t) ) >> + { >> + *t = l1t; >> + /* No need to unclip due to check below */ >> + mfn = _mfn(l1e_get_pfn(*l1e)); >> + } >> } >> >> unmap_domain_page(l1e); >> - return _mfn(INVALID_MFN); >> + return (l1t == p2m_ram_paging_out) ? mfn : _mfn(INVALID_MFN); > ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > This means when p2mt is p2m_ram_paging_in, the mfn must > be INVALID_MFN; > But in p2m_type_to_flags,when p2m_ram_paging_in or > p2m_ram_paged, the _PAGE_PRESENT is not set. Your patch below looks reasonable. I'll give it a try. Andres > > My idea shows like this: > In p2m_gfn_to_mfn function: > @@ -1370,6 +1379,7 @@ > paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT; > l2_pgentry_t *l2e; > l1_pgentry_t *l1e; > + unsigned long flags; > > ASSERT(paging_mode_translate(d)); > > @@ -1455,7 +1465,8 @@ > l1e = map_domain_page(mfn_x(mfn)); > l1e += l1_table_offset(addr); > pod_retry_l1: > - if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 ) > + flags = l1e_get_flags(*l1e); > + if ((( flags & _PAGE_PRESENT) == 0) && > (!p2m_is_paging(p2m_flags_to_type(flags)))) > { > /* PoD: Try to populate */ > if ( p2m_flags_to_type(l1e_get_flags(*l1e)) == > p2m_populate_on_demand ) > { > if ( q != p2m_query ) { > if ( !p2m_pod_check_and_populate(p2m, gfn, > (l1_pgentry_t *)l1e, > PAGE_ORDER_4K, q) ) > goto pod_retry_l1; > } else > *t = p2m_populate_on_demand; > } > > unmap_domain_page(l1e); > return _mfn(INVALID_MFN); > } > mfn = _mfn(l1e_get_pfn(*l1e)); > *t = p2m_flags_to_type(l1e_get_flags(*l1e)); > unmap_domain_page(l1e); > > > > >> } >> mfn = _mfn(l1e_get_pfn(*l1e)); >> *t = p2m_flags_to_type(l1e_get_flags(*l1e)); >> @@ -914,7 +941,7 @@ static void p2m_change_type_global(struc >> flags = l1e_get_flags(l1e[i1]); >> if ( p2m_flags_to_type(flags) != ot ) >> continue; >> - mfn = l1e_get_pfn(l1e[i1]); >> + mfn = unclip_mfn(l1e_get_pfn(l1e[i1])); >> gfn = i1 + (i2 + (i3 >> #if CONFIG_PAGING_LEVELS >= 4 >> + (i4 * L3_PAGETABLE_ENTRIES) >> @@ -923,7 +950,7 @@ static void p2m_change_type_global(struc >> * L2_PAGETABLE_ENTRIES) * >> L1_PAGETABLE_ENTRIES; >> /* create a new 1le entry with the new type */ >> flags = p2m_type_to_flags(nt, _mfn(mfn)); >> - l1e_content = l1e_from_pfn(mfn, flags); >> + l1e_content = l1e_from_pfn(clipped_mfn(mfn), >> flags); >> p2m->write_p2m_entry(p2m, gfn, &l1e[i1], >> l1mfn, l1e_content, 1); >> } >> @@ -1073,7 +1100,7 @@ long p2m_pt_audit_p2m(struct p2m_domain >> entry_count++; >> continue; >> } >> - mfn = l1e_get_pfn(l1e[i1]); >> + mfn = unclip_mfn(l1e_get_pfn(l1e[i1])); >> ASSERT(mfn_valid(_mfn(mfn))); >> m2pfn = get_gpfn_from_mfn(mfn); >> if ( m2pfn != gfn && >> >> >> _______________________________________________ >> Xen-devel mailing list >> Xen-devel@xxxxxxxxxxxxxxxxxxx >> http://lists.xensource.com/xen-devel > > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |