[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [V10 PATCH 0/4] pvh dom0 patches...



On 07/05/14 13:34, Jan Beulich wrote:
>>>> On 07.05.14 at 11:48, <roger.pau@xxxxxxxxxx> wrote:
>> I've expanded my patch that added the memory removed form the holes to 
>> the end of the memory map to also create an e820 map for Dom0 that 
>> reflects the reality of the underlying p2m. This way PVH guests (either 
>> DomU or Dom0) should only use XENMEM_memory_map to get the correct e820 
>> map (and no need for clamping in the Dom0 case).
> 
> Looks generally good (except that it doesn't seem to deal with the
> DomU case yet), but there are a couple of possible issues:
> 
>> --- a/xen/arch/x86/domain_build.c
>> +++ b/xen/arch/x86/domain_build.c
>> @@ -327,11 +327,13 @@ static __init void pvh_add_mem_mapping(struct domain 
>> *d, unsigned long gfn,
>>   * pvh fixme: The following doesn't map MMIO ranges when they sit above the
>>   *            highest E820 covered address.
>>   */
>> -static __init void pvh_map_all_iomem(struct domain *d)
>> +static __init void pvh_map_all_iomem(struct domain *d, unsigned long 
>> nr_pages)
>>  {
>>      unsigned long start_pfn, end_pfn, end = 0, start = 0;
>>      const struct e820entry *entry;
>> -    unsigned int i, nump;
>> +    unsigned int i, j, nump, navail, nmap, nr_holes = 0;
> 
> With nr_pages above being unsigned long, I think some of these
> variables should be too.
> 
>> @@ -369,6 +374,89 @@ static __init void pvh_map_all_iomem(struct domain *d)
>>          nump = end_pfn - start_pfn;
>>          pvh_add_mem_mapping(d, start_pfn, start_pfn, nump);
>>      }
>> +
>> +    /*
>> +     * Add the memory removed by the holes at the end of the
>> +     * memory map.
>> +     */
>> +    for ( i = 0, entry = e820.map; i < e820.nr_map; i++, entry++ )
>> +    {
>> +        if ( entry->type != E820_RAM )
>> +            continue;
>> +
>> +        end_pfn = PFN_UP(entry->addr + entry->size);
>> +        if ( end_pfn <= nr_pages )
>> +            continue;
>> +
>> +        navail = end_pfn - nr_pages;
>> +        nmap = navail > nr_holes ? nr_holes : navail;
>> +        start_pfn = PFN_DOWN(entry->addr) < nr_pages ?
>> +                        nr_pages : PFN_DOWN(entry->addr);
>> +        page = alloc_domheap_pages(d, get_order_from_pages(nmap), 0);
>> +        if ( !page )
>> +            panic("Not enough RAM for domain 0");
>> +        for ( j = 0; j < nmap; j++ )
>> +        {
>> +            rc = guest_physmap_add_page(d, start_pfn + j, 
>> page_to_mfn(page), 0);
> 
> I realize that this interface isn't the most flexible one in terms of what
> page orders it allows to be passed in, but could you at least use order
> 9 here when the allocation order above is 9 or higher?

I've changed it to be:

guest_physmap_add_page(d, start_pfn, page_to_mfn(page), order);

and removed the loop.

> 
>> +static __init void pvh_setup_e820(struct domain *d, unsigned long nr_pages)
>> +{
>> +    struct e820entry *entry;
>> +    unsigned int i;
>> +    unsigned long pages, cur_pages = 0;
>> +
>> +    /*
>> +     * Craft the e820 memory map for Dom0 based on the hardware e820 map.
>> +     */
>> +    d->arch.e820 = xzalloc_array(struct e820entry, e820.nr_map);
>> +    if ( !d->arch.e820 )
>> +        panic("Unable to allocate memory for Dom0 e820 map");
>> +
>> +    memcpy(d->arch.e820, e820.map, sizeof(struct e820entry) * e820.nr_map);
>> +    d->arch.nr_e820 = e820.nr_map;
>> +
>> +    /* Clamp e820 memory map to match the memory assigned to Dom0 */
>> +    for ( i = 0, entry = d->arch.e820; i < d->arch.nr_e820; i++, entry++ )
>> +    {
>> +        if ( entry->type != E820_RAM )
>> +            continue;
>> +
>> +        if ( nr_pages == cur_pages )
>> +        {
>> +            /*
>> +             * We already have all the assigned memory,
>> +             * mark this region as reserved.
>> +             */
>> +            entry->type = E820_RESERVED;
> 
> That seems undesirable, as it will prevent Linux from using that space
> for MMIO purposes. Plus keeping the region here is consistent with
> simply shrinking the range below (yielding the remainder uncovered
> by any E820 entry). I think you ought to zap the entry here.

I don't think this regions can be used for MMIO, the gpfns in the guest
p2m are not set as p2m_mmio_direct.

Here is the updated patch:
---
commit b84f2ae87de562a3df69599c4b3734262e106445
Author: Roger Pau Monne <roger.pau@xxxxxxxxxx>
Date:   Thu May 8 09:21:49 2014 +0200

    xen: fix setup of PVH Dom0 memory map
    
    This patch adds the holes removed by MMIO regions to the end of the
    memory map for PVH Dom0, so the guest OS doesn't have to manually
    populate this memory.
    
    Also, provide a suitable e820 memory map for PVH Dom0, that matches
    the underlying p2m map. This means that PVH guests should always use
    XENMEM_memory_map in order to obtain the e820, even when running as
    Dom0.
    
    Signed-off-by: Roger Pau Monné <roger.pau@xxxxxxxxxx>

diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
index 38ed9f6..3cbd43c 100644
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -327,11 +327,14 @@ static __init void pvh_add_mem_mapping(struct domain *d, 
unsigned long gfn,
  * pvh fixme: The following doesn't map MMIO ranges when they sit above the
  *            highest E820 covered address.
  */
-static __init void pvh_map_all_iomem(struct domain *d)
+static __init void pvh_map_all_iomem(struct domain *d, unsigned long nr_pages)
 {
     unsigned long start_pfn, end_pfn, end = 0, start = 0;
     const struct e820entry *entry;
-    unsigned int i, nump;
+    unsigned long nump, nmap, navail, nr_holes = 0;
+    unsigned int i, order;
+    struct page_info *page;
+    int rc;
 
     for ( i = 0, entry = e820.map; i < e820.nr_map; i++, entry++ )
     {
@@ -353,6 +356,9 @@ static __init void pvh_map_all_iomem(struct domain *d)
                 nump = end_pfn - start_pfn;
                 /* Add pages to the mapping */
                 pvh_add_mem_mapping(d, start_pfn, start_pfn, nump);
+                if ( start_pfn <= nr_pages )
+                    nr_holes += (end_pfn < nr_pages) ?
+                                    nump : (nr_pages - start_pfn);
             }
             start = end;
         }
@@ -369,6 +375,84 @@ static __init void pvh_map_all_iomem(struct domain *d)
         nump = end_pfn - start_pfn;
         pvh_add_mem_mapping(d, start_pfn, start_pfn, nump);
     }
+
+    /*
+     * Add the memory removed by the holes at the end of the
+     * memory map.
+     */
+    for ( i = 0, entry = e820.map; i < e820.nr_map && nr_holes > 0;
+          i++, entry++ )
+    {
+        if ( entry->type != E820_RAM )
+            continue;
+
+        end_pfn = PFN_UP(entry->addr + entry->size);
+        if ( end_pfn <= nr_pages )
+            continue;
+
+        navail = end_pfn - nr_pages;
+        nmap = navail > nr_holes ? nr_holes : navail;
+        start_pfn = PFN_DOWN(entry->addr) < nr_pages ?
+                        nr_pages : PFN_DOWN(entry->addr);
+        order = get_order_from_pages(nmap);
+        page = alloc_domheap_pages(d, order, 0);
+        if ( !page )
+            panic("Not enough RAM for domain 0");
+        rc = guest_physmap_add_page(d, start_pfn, page_to_mfn(page), order);
+        if ( rc != 0 )
+            panic("Unable to add gpfn %#lx mfn %#lx order: %u to Dom0 physmap",
+                  start_pfn, page_to_mfn(page), order);
+        nr_holes -= nmap;
+    }
+
+    ASSERT(nr_holes == 0);
+}
+
+static __init void pvh_setup_e820(struct domain *d, unsigned long nr_pages)
+{
+    struct e820entry *entry;
+    unsigned int i;
+    unsigned long pages, cur_pages = 0;
+
+    /*
+     * Craft the e820 memory map for Dom0 based on the hardware e820 map.
+     */
+    d->arch.e820 = xzalloc_array(struct e820entry, e820.nr_map);
+    if ( !d->arch.e820 )
+        panic("Unable to allocate memory for Dom0 e820 map");
+
+    memcpy(d->arch.e820, e820.map, sizeof(struct e820entry) * e820.nr_map);
+    d->arch.nr_e820 = e820.nr_map;
+
+    /* Clamp e820 memory map to match the memory assigned to Dom0 */
+    for ( i = 0, entry = d->arch.e820; i < d->arch.nr_e820; i++, entry++ )
+    {
+        if ( entry->type != E820_RAM )
+            continue;
+
+        if ( nr_pages == cur_pages )
+        {
+            /*
+             * We already have all the assigned memory,
+             * set this region length to 0.
+             */
+            entry->size = 0;
+            continue;
+        }
+
+        pages = entry->size >> PAGE_SHIFT;
+        if ( (cur_pages + pages) > nr_pages )
+        {
+            /* Truncate region */
+            entry->size = (nr_pages - cur_pages) << PAGE_SHIFT;
+            cur_pages = nr_pages;
+        }
+        else
+        {
+            cur_pages += pages;
+        }
+    }
+    ASSERT(cur_pages == nr_pages);
 }
 
 static __init void dom0_update_physmap(struct domain *d, unsigned long pfn,
@@ -1391,7 +1475,8 @@ int __init construct_dom0(
         pfn = shared_info_paddr >> PAGE_SHIFT;
         dom0_update_physmap(d, pfn, mfn, 0);
 
-        pvh_map_all_iomem(d);
+        pvh_map_all_iomem(d, nr_pages);
+        pvh_setup_e820(d, nr_pages);
     }
 
     if ( d->domain_id == hardware_domid )

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.