[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] RFC: AMD support for paging



>
>
>> -----Original Message-----
>> From: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
>> [mailto:xen-devel-bounces@xxxxxxxxxxxxxxxxxxx] On Behalf Of Andres
>> Lagar-Cavilla
>> Sent: Wednesday, February 15, 2012 3:05 AM
>> To: xen-devel@xxxxxxxxxxxxxxxxxxx
>> Cc: olaf@xxxxxxxxx; keir.xen@xxxxxxxxx; tim@xxxxxxx; JBeulich@xxxxxxxx;
>> adin@xxxxxxxxxxxxxx
>> Subject: [Xen-devel] RFC: AMD support for paging
>>
>> We started hashing out some AMD support for mem_paging and mem_access.
>> Right now my VMs boot, page out a bit, and then die on an HVM triple
>> fault.
>>
>> Most importantly, I want to get somebody from AMD to comment/help out on
>> this. It feels like we're inches away from enabling support for this
>> very
>> nice feature. I'm not sure who exactly on AMD monitors the list for
>> these
>> kinds of things. It'd be great to have you on board!
>>
>> For starters, the changes to the p2m code are relatively mild, but it'd
>> be
>> great if somebody spots a red flag.
>>
>> Another issue: comments indicate that bits 59-62 in NPT entries are used
>> for IOMMU flags but effectively bits 61-62 are. Repossessing one bit
>> (59)
>> would give us enough space to support mem_access. Right now we only have
>> 7
>> bits available for Xen flags and that is not enough for paging and
>> access.
>> Is bit 59 effectively reserved?
>>
>> Finally, the triple fault. Maybe I'm missing something obvious. Comments
>> welcome.
>>
>> Patch inline below, thanks!
>> Andres
>>
>> Enable AMD support for paging.
>>
>> Signed-off-by: Andres Lagar-Cavilla <andres@xxxxxxxxxxxxxxxx>
>> Signed-off-by: Adin Scannell <adin@xxxxxxxxxxx>
>>
>> diff -r 25ca78889ed4 -r 10ca4e4293ce xen/arch/x86/mm/mem_event.c
>> --- a/xen/arch/x86/mm/mem_event.c
>> +++ b/xen/arch/x86/mm/mem_event.c
>> @@ -537,10 +537,6 @@ int mem_event_domctl(struct domain *d, x
>>              if ( !hap_enabled(d) )
>>                  break;
>>
>> -            /* Currently only EPT is supported */
>> -            if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
>> -                break;
>> -
>>              rc = -EXDEV;
>>              /* Disallow paging in a PoD guest */
>>              if ( p2m->pod.entry_count )
>> diff -r 25ca78889ed4 -r 10ca4e4293ce xen/arch/x86/mm/p2m-pt.c
>> --- a/xen/arch/x86/mm/p2m-pt.c
>> +++ b/xen/arch/x86/mm/p2m-pt.c
>> @@ -53,6 +53,20 @@
>>  #define P2M_BASE_FLAGS \
>>          (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
>>
>> +#ifdef __x86_64__
>> +/* l1e_from_pfn is not designed to have INVALID_MFN stored. The
>> 0xff..ff
>> + * value tramples over the higher-order bits used for flags (NX, p2mt,
>> + * etc.) This happens for paging entries. Thus we do this clip/unclip
>> + * juggle for l1 entries only (no paging superpages!) */
>> +#define EFF_MFN_WIDTH       (PADDR_BITS-PAGE_SHIFT) /* 40 bits */
>> +#define clipped_mfn(mfn)    ((mfn) & ((1UL << EFF_MFN_WIDTH) - 1))
>> +#define unclip_mfn(mfn)     ((clipped_mfn((mfn)) == INVALID_MFN) ? \
>> +                                INVALID_MFN : (mfn))
>> +#else
>> +#define clipped_mfn(mfn)    (mfn)
>> +#define unclip_mfn(mfn)     (mfn)
>> +#endif /* __x86_64__ */
>> +
>>  static unsigned long p2m_type_to_flags(p2m_type_t t, mfn_t mfn)
>>  {
>>      unsigned long flags;
>> @@ -77,6 +91,9 @@ static unsigned long p2m_type_to_flags(p
>>      case p2m_invalid:
>>      case p2m_mmio_dm:
>>      case p2m_populate_on_demand:
>> +    case p2m_ram_paging_out:
>> +    case p2m_ram_paged:
>> +    case p2m_ram_paging_in:
>>      default:
>>          return flags;
>>      case p2m_ram_ro:
>> @@ -168,7 +185,7 @@ p2m_next_level(struct p2m_domain *p2m, m
>>                                        shift, max)) )
>>          return 0;
>>
>> -    /* PoD: Not present doesn't imply empty. */
>> +    /* PoD/paging: Not present doesn't imply empty. */
>>      if ( !l1e_get_flags(*p2m_entry) )
>>      {
>>          struct page_info *pg;
>> @@ -384,8 +401,9 @@ p2m_set_entry(struct p2m_domain *p2m, un
>>                                     0, L1_PAGETABLE_ENTRIES);
>>          ASSERT(p2m_entry);
>>
>> -        if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
>> -            entry_content = l1e_from_pfn(mfn_x(mfn),
>> +        if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) ||
>> +             (p2mt == p2m_ram_paged) || (p2mt == p2m_ram_paging_in) )
>> +            entry_content = l1e_from_pfn(clipped_mfn(mfn_x(mfn)),
>>                                           p2m_type_to_flags(p2mt, mfn));
>>          else
>>              entry_content = l1e_empty();
>> @@ -393,7 +411,7 @@ p2m_set_entry(struct p2m_domain *p2m, un
>>          if ( entry_content.l1 != 0 )
>>          {
>>              p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags);
>> -            old_mfn = l1e_get_pfn(*p2m_entry);
>> +            old_mfn = unclip_mfn(l1e_get_pfn(*p2m_entry));
>>          }
>>          /* level 1 entry */
>>          p2m->write_p2m_entry(p2m, gfn, p2m_entry, table_mfn,
>> entry_content, 1);
>> @@ -615,11 +633,12 @@ pod_retry_l1:
>>                             sizeof(l1e));
>>
>>      if ( ret == 0 ) {
>> +        unsigned long l1e_mfn = unclip_mfn(l1e_get_pfn(l1e));
>>          p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
>> -        ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
>> +        ASSERT( (l1e_mfn != INVALID_MFN || !p2m_is_ram(p2mt)) ||
>> +                (l1e_mfn == INVALID_MFN && p2m_is_paging(p2mt)) );
>>
>> -        if ( p2m_flags_to_type(l1e_get_flags(l1e))
>> -             == p2m_populate_on_demand )
>> +        if ( p2mt == p2m_populate_on_demand )
>>          {
>>              /* The read has succeeded, so we know that the mapping
>>               * exits at this point.  */
>> @@ -641,7 +660,7 @@ pod_retry_l1:
>>          }
>>
>>          if ( p2m_is_valid(p2mt) || p2m_is_grant(p2mt) )
>> -            mfn = _mfn(l1e_get_pfn(l1e));
>> +            mfn = _mfn(l1e_mfn);
>>          else
>>              /* XXX see above */
>>              p2mt = p2m_mmio_dm;
>> @@ -783,18 +802,26 @@ pod_retry_l2:
>>  pod_retry_l1:
>>      if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 )
>>      {
>> +        p2m_type_t l1t = p2m_flags_to_type(l1e_get_flags(*l1e));
>>          /* PoD: Try to populate */
>> -        if ( p2m_flags_to_type(l1e_get_flags(*l1e)) ==
>> p2m_populate_on_demand )
>> +        if ( l1t == p2m_populate_on_demand )
>>          {
>>              if ( q != p2m_query ) {
>>                  if ( !p2m_pod_demand_populate(p2m, gfn, PAGE_ORDER_4K,
>> q) )
>>                      goto pod_retry_l1;
>>              } else
>>                  *t = p2m_populate_on_demand;
>> +        } else {
>> +            if ( p2m_is_paging(l1t) )
>> +            {
>> +                *t = l1t;
>> +                /* No need to unclip due to check below */
>> +                mfn = _mfn(l1e_get_pfn(*l1e));
>> +            }
>>          }
>>
>>          unmap_domain_page(l1e);
>> -        return _mfn(INVALID_MFN);
>> +        return (l1t == p2m_ram_paging_out) ? mfn : _mfn(INVALID_MFN);
>             ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>                    This means when p2mt is p2m_ram_paging_in, the mfn must
> be INVALID_MFN;
>                    But in p2m_type_to_flags,when p2m_ram_paging_in or
> p2m_ram_paged, the _PAGE_PRESENT is not set.

Your patch below looks reasonable. I'll give it a try.
Andres

>
> My idea shows like this:
> In p2m_gfn_to_mfn function:
> @@ -1370,6 +1379,7 @@
>      paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
>      l2_pgentry_t *l2e;
>      l1_pgentry_t *l1e;
> +    unsigned long flags;
>
>      ASSERT(paging_mode_translate(d));
>
> @@ -1455,7 +1465,8 @@
>      l1e = map_domain_page(mfn_x(mfn));
>      l1e += l1_table_offset(addr);
>  pod_retry_l1:
> -    if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 )
> +    flags = l1e_get_flags(*l1e);
> +    if ((( flags & _PAGE_PRESENT) == 0) &&
> (!p2m_is_paging(p2m_flags_to_type(flags))))
>      {
>          /* PoD: Try to populate */
>          if ( p2m_flags_to_type(l1e_get_flags(*l1e)) ==
> p2m_populate_on_demand )
>         {
>             if ( q != p2m_query ) {
>                 if ( !p2m_pod_check_and_populate(p2m, gfn,
>                                                  (l1_pgentry_t *)l1e,
> PAGE_ORDER_4K, q) )
>                     goto pod_retry_l1;
>             } else
>                 *t = p2m_populate_on_demand;
>         }
>
>         unmap_domain_page(l1e);
>         return _mfn(INVALID_MFN);
>     }
>     mfn = _mfn(l1e_get_pfn(*l1e));
>     *t = p2m_flags_to_type(l1e_get_flags(*l1e));
>     unmap_domain_page(l1e);
>
>
>
>
>>      }
>>      mfn = _mfn(l1e_get_pfn(*l1e));
>>      *t = p2m_flags_to_type(l1e_get_flags(*l1e));
>> @@ -914,7 +941,7 @@ static void p2m_change_type_global(struc
>>                      flags = l1e_get_flags(l1e[i1]);
>>                      if ( p2m_flags_to_type(flags) != ot )
>>                          continue;
>> -                    mfn = l1e_get_pfn(l1e[i1]);
>> +                    mfn = unclip_mfn(l1e_get_pfn(l1e[i1]));
>>                      gfn = i1 + (i2 + (i3
>>  #if CONFIG_PAGING_LEVELS >= 4
>>                                      + (i4 * L3_PAGETABLE_ENTRIES)
>> @@ -923,7 +950,7 @@ static void p2m_change_type_global(struc
>>                             * L2_PAGETABLE_ENTRIES) *
>> L1_PAGETABLE_ENTRIES;
>>                      /* create a new 1le entry with the new type */
>>                      flags = p2m_type_to_flags(nt, _mfn(mfn));
>> -                    l1e_content = l1e_from_pfn(mfn, flags);
>> +                    l1e_content = l1e_from_pfn(clipped_mfn(mfn),
>> flags);
>>                      p2m->write_p2m_entry(p2m, gfn, &l1e[i1],
>>                                           l1mfn, l1e_content, 1);
>>                  }
>> @@ -1073,7 +1100,7 @@ long p2m_pt_audit_p2m(struct p2m_domain
>>                                  entry_count++;
>>                              continue;
>>                          }
>> -                        mfn = l1e_get_pfn(l1e[i1]);
>> +                        mfn = unclip_mfn(l1e_get_pfn(l1e[i1]));
>>                          ASSERT(mfn_valid(_mfn(mfn)));
>>                          m2pfn = get_gpfn_from_mfn(mfn);
>>                          if ( m2pfn != gfn &&
>>
>>
>> _______________________________________________
>> Xen-devel mailing list
>> Xen-devel@xxxxxxxxxxxxxxxxxxx
>> http://lists.xensource.com/xen-devel
>
>



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.