[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [V10 PATCH 0/4] pvh dom0 patches...



On 07/05/14 09:50, Jan Beulich wrote:
>>>> On 07.05.14 at 03:00, <mukesh.rathor@xxxxxxxxxx> wrote:
>> As Konrad points out, there are other issues with that part of the code.
>> So, IMO, we should wait and address that altogether for both PV and PVH,
>> and for now leave this as is.
> 
> I'm fine either way, as long as you and Roger can reach agreement.
> I still tend towards considering Roger's position the right one as a
> mid/long term route.
> 
>> If you still feel we should do this in xen (as in required for this
>> patch series to go in), then we'd need to communicate last pfn to guest.
>> However, note that guest would still have all that code for PV, for pvh
>> we'd just skip it. Obvious way coming to mind is:
>>
>> unsigned long last_pfn_mapped; 
>>
>> added to struct start_info.  Alternative would be to add a 
>> new sub call to perhaps getdomaininfo hcall or something similar. 
>> Please lmk. 
> 
> We already have the max_pfn field in the shared info, which for
> PV is under the sole control of the guest kernel. That could
> certainly be set to other than zero to communicate what the
> highest mapped PFN is. The problem I'm seeing with this (and
> with everything you suggest above) is that this doesn't tell the
> guest where the holes are - while Dom0 can get at the machine
> memory map to tell (or really: guess) where they are, DomU can't
> yet will need to as soon as you want to be able to support device
> pass-through. Hence I guess we will need XENMEM_memory_map to
> reflect reality for PVH guests.

I've expanded my patch that added the memory removed form the holes to 
the end of the memory map to also create an e820 map for Dom0 that 
reflects the reality of the underlying p2m. This way PVH guests (either 
DomU or Dom0) should only use XENMEM_memory_map to get the correct e820 
map (and no need for clamping in the Dom0 case).

---
diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
index 38ed9f6..03e8008 100644
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -327,11 +327,13 @@ static __init void pvh_add_mem_mapping(struct domain *d, 
unsigned long gfn,
  * pvh fixme: The following doesn't map MMIO ranges when they sit above the
  *            highest E820 covered address.
  */
-static __init void pvh_map_all_iomem(struct domain *d)
+static __init void pvh_map_all_iomem(struct domain *d, unsigned long nr_pages)
 {
     unsigned long start_pfn, end_pfn, end = 0, start = 0;
     const struct e820entry *entry;
-    unsigned int i, nump;
+    unsigned int i, j, nump, navail, nmap, nr_holes = 0;
+    struct page_info *page;
+    int rc;
 
     for ( i = 0, entry = e820.map; i < e820.nr_map; i++, entry++ )
     {
@@ -353,6 +355,9 @@ static __init void pvh_map_all_iomem(struct domain *d)
                 nump = end_pfn - start_pfn;
                 /* Add pages to the mapping */
                 pvh_add_mem_mapping(d, start_pfn, start_pfn, nump);
+                if ( start_pfn <= nr_pages )
+                    nr_holes += (end_pfn < nr_pages) ?
+                                    nump : (nr_pages - start_pfn);
             }
             start = end;
         }
@@ -369,6 +374,89 @@ static __init void pvh_map_all_iomem(struct domain *d)
         nump = end_pfn - start_pfn;
         pvh_add_mem_mapping(d, start_pfn, start_pfn, nump);
     }
+
+    /*
+     * Add the memory removed by the holes at the end of the
+     * memory map.
+     */
+    for ( i = 0, entry = e820.map; i < e820.nr_map; i++, entry++ )
+    {
+        if ( entry->type != E820_RAM )
+            continue;
+
+        end_pfn = PFN_UP(entry->addr + entry->size);
+        if ( end_pfn <= nr_pages )
+            continue;
+
+        navail = end_pfn - nr_pages;
+        nmap = navail > nr_holes ? nr_holes : navail;
+        start_pfn = PFN_DOWN(entry->addr) < nr_pages ?
+                        nr_pages : PFN_DOWN(entry->addr);
+        page = alloc_domheap_pages(d, get_order_from_pages(nmap), 0);
+        if ( !page )
+            panic("Not enough RAM for domain 0");
+        for ( j = 0; j < nmap; j++ )
+        {
+            rc = guest_physmap_add_page(d, start_pfn + j, page_to_mfn(page), 
0);
+            if ( rc != 0 )
+                panic("Unable to add gpfn %#lx mfn %#lx to Dom0 physmap",
+                      start_pfn + j, page_to_mfn(page));
+            page++;
+
+        }
+        nr_holes -= nmap;
+        if ( nr_holes == 0 )
+            break;
+    }
+
+    ASSERT(nr_holes == 0);
+}
+
+static __init void pvh_setup_e820(struct domain *d, unsigned long nr_pages)
+{
+    struct e820entry *entry;
+    unsigned int i;
+    unsigned long pages, cur_pages = 0;
+
+    /*
+     * Craft the e820 memory map for Dom0 based on the hardware e820 map.
+     */
+    d->arch.e820 = xzalloc_array(struct e820entry, e820.nr_map);
+    if ( !d->arch.e820 )
+        panic("Unable to allocate memory for Dom0 e820 map");
+
+    memcpy(d->arch.e820, e820.map, sizeof(struct e820entry) * e820.nr_map);
+    d->arch.nr_e820 = e820.nr_map;
+
+    /* Clamp e820 memory map to match the memory assigned to Dom0 */
+    for ( i = 0, entry = d->arch.e820; i < d->arch.nr_e820; i++, entry++ )
+    {
+        if ( entry->type != E820_RAM )
+            continue;
+
+        if ( nr_pages == cur_pages )
+        {
+            /*
+             * We already have all the assigned memory,
+             * mark this region as reserved.
+             */
+            entry->type = E820_RESERVED;
+            continue;
+        }
+
+        pages = entry->size >> PAGE_SHIFT;
+        if ( (cur_pages + pages) > nr_pages )
+        {
+            /* Truncate region */
+            entry->size = (nr_pages - cur_pages) << PAGE_SHIFT;
+            cur_pages = nr_pages;
+        }
+        else
+        {
+            cur_pages += pages;
+        }
+    }
+    ASSERT(cur_pages == nr_pages);
 }
 
 static __init void dom0_update_physmap(struct domain *d, unsigned long pfn,
@@ -1391,7 +1479,8 @@ int __init construct_dom0(
         pfn = shared_info_paddr >> PAGE_SHIFT;
         dom0_update_physmap(d, pfn, mfn, 0);
 
-        pvh_map_all_iomem(d);
+        pvh_map_all_iomem(d, nr_pages);
+        pvh_setup_e820(d, nr_pages);
     }
 
     if ( d->domain_id == hardware_domid )


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.