[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC PATCH v3 02/22] x86/boot: Reserve live update boot memory



From: David Woodhouse <dwmw@xxxxxxxxxxxx>

For live update to work, it will need a region of memory that can be
given to the boot allocator while it parses the state information from
the previous Xen and works out which of the other pages of memory it
can consume.

Reserve that like the crashdump region, and accept it on the command
line. Use only that region for early boot, and register the remaining
RAM (all of it for now, until the real live update happens) later.

Signed-off-by: David Woodhouse <dwmw@xxxxxxxxxxxx>
---
 docs/misc/xen-command-line.pandoc |   9 +++
 xen/arch/x86/setup.c              | 120 ++++++++++++++++++++++++++++--
 xen/include/asm-x86/config.h      |   1 +
 3 files changed, 123 insertions(+), 7 deletions(-)

diff --git a/docs/misc/xen-command-line.pandoc 
b/docs/misc/xen-command-line.pandoc
index 5eb3a07276..61524f1056 100644
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -1402,6 +1402,15 @@ This option is intended for debugging purposes only.  
Enable MSR_DEBUGCTL.LBR
 in hypervisor context to be able to dump the Last Interrupt/Exception To/From
 record with other registers.
 
+### liveupdate
+> `= <size>[@<offset>]`
+
+Specify size and optionally placement of the boot memory reserved for
+Xen live update. The size must be a multiple of 2MiB.
+
+A trailing `@<offset>` specifies the exact address this area should be
+placed at, which must be below 4GiB.
+
 ### loglvl
 > `= <level>[/<rate-limited level>]` where level is `none | error | warning | 
 > info | debug | all`
 
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 2677f127b9..63f06d4856 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -681,6 +681,47 @@ static unsigned int __init copy_bios_e820(struct e820entry 
*map, unsigned int li
 /* How much of the directmap is prebuilt at compile time. */
 #define PREBUILT_MAP_LIMIT (1 << L2_PAGETABLE_SHIFT)
 
+unsigned long lu_bootmem_start, lu_bootmem_size;
+static unsigned long lu_breadcrumb_phys;
+
+static int __init parse_liveupdate(const char *str)
+{
+    const char *cur;
+
+    lu_bootmem_size = parse_size_and_unit(cur = str, &str);
+    if ( !lu_bootmem_size || cur == str )
+        return -EINVAL;
+
+    if ( lu_bootmem_size & ((MB(2) - 1)) )
+    {
+        printk(XENLOG_WARNING "Live update size must be a multiple of 2MiB\n");
+        return -EINVAL;
+    }
+
+    if (!*str) {
+        printk(XENLOG_INFO "Live update size 0x%lx\n", lu_bootmem_size);
+        return 0;
+    }
+
+    if (*str != '@')
+        return -EINVAL;
+
+    lu_bootmem_start = parse_size_and_unit(cur = str + 1, &str);
+    if ( !lu_bootmem_start || cur == str )
+        return -EINVAL;
+
+    printk(XENLOG_INFO "Live update area 0x%lx-0x%lx (0x%lx)\n", 
lu_bootmem_start,
+           lu_bootmem_start + lu_bootmem_size, lu_bootmem_size);
+
+    /*
+     * If present, the breadcrumb leading to the migration data stream is
+     * in the very beginning of the reserved bootmem region.
+     */
+    lu_breadcrumb_phys = lu_bootmem_start;
+    return 0;
+}
+custom_param("liveupdate", parse_liveupdate);
+
 void __init noreturn __start_xen(unsigned long mbi_p)
 {
     char *memmap_type = NULL;
@@ -690,7 +731,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
     module_t *mod;
     unsigned long nr_pages, raw_max_page, modules_headroom, module_map[1];
     int i, j, e820_warn = 0, bytes = 0;
-    bool acpi_boot_table_init_done = false, relocated = false;
+    bool acpi_boot_table_init_done = false, relocated = false, lu_reserved = 
false;
     int ret;
     struct ns16550_defaults ns16550 = {
         .data_bits = 8,
@@ -980,6 +1021,22 @@ void __init noreturn __start_xen(unsigned long mbi_p)
     set_kexec_crash_area_size((u64)nr_pages << PAGE_SHIFT);
     kexec_reserve_area(&boot_e820);
 
+    if ( lu_bootmem_start )
+    {
+        /* XX: Check it's in usable memory first */
+        reserve_e820_ram(&boot_e820, lu_bootmem_start, lu_bootmem_start + 
lu_bootmem_size);
+
+        /* Since it will already be out of the e820 map by the time the first
+         * loop over physical memory, map it manually already. */
+        set_pdx_range(lu_bootmem_start >> PAGE_SHIFT,
+                      (lu_bootmem_start + lu_bootmem_size) >> PAGE_SHIFT);
+        map_pages_to_xen((unsigned long)__va(lu_bootmem_start),
+                         maddr_to_mfn(lu_bootmem_start),
+                         PFN_DOWN(lu_bootmem_size), PAGE_HYPERVISOR);
+
+        lu_reserved = true;
+    }
+
     initial_images = mod;
     nr_initial_images = mbi->mods_count;
 
@@ -1207,6 +1264,16 @@ void __init noreturn __start_xen(unsigned long mbi_p)
             printk("New Xen image base address: %#lx\n", xen_phys_start);
         }
 
+        /* Is the region suitable for the live update bootmem region? */
+        if ( lu_bootmem_size && ! lu_bootmem_start && e < limit )
+        {
+            end = consider_modules(s, e, lu_bootmem_size, mod, mbi->mods_count 
+ relocated, -1);
+            if ( end )
+            {
+                e = lu_bootmem_start = end - lu_bootmem_size;
+            }
+        }
+
         /* Is the region suitable for relocating the multiboot modules? */
         for ( j = mbi->mods_count - 1; j >= 0; j-- )
         {
@@ -1270,6 +1337,15 @@ void __init noreturn __start_xen(unsigned long mbi_p)
     if ( !xen_phys_start )
         panic("Not enough memory to relocate Xen\n");
 
+    if ( lu_bootmem_start )
+    {
+        if ( !lu_reserved )
+            reserve_e820_ram(&boot_e820, lu_bootmem_start, lu_bootmem_start + 
lu_bootmem_size);
+        printk("LU bootmem: 0x%lx - 0x%lx\n", lu_bootmem_start, 
lu_bootmem_start + lu_bootmem_size);
+        init_boot_pages(lu_bootmem_start, lu_bootmem_start + lu_bootmem_size);
+        lu_reserved = true;
+    }
+
     /* This needs to remain in sync with xen_in_range(). */
     reserve_e820_ram(&boot_e820, __pa(_stext), __pa(__2M_rwdata_end));
 
@@ -1281,8 +1357,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
         xenheap_max_mfn(PFN_DOWN(highmem_start - 1));
 
     /*
-     * Walk every RAM region and map it in its entirety (on x86/64, at least)
-     * and notify it to the boot allocator.
+     * Walk every RAM region and map it in its entirety and (unless in
+     * live update mode) notify it to the boot allocator.
      */
     for ( i = 0; i < boot_e820.nr_map; i++ )
     {
@@ -1335,6 +1411,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
                 printk(XENLOG_WARNING "Ignoring inaccessible memory range"
                                       " %013"PRIx64"-%013"PRIx64"\n",
                        s, e);
+                reserve_e820_ram(&boot_e820, s, e);
                 continue;
             }
             map_e = e;
@@ -1342,6 +1419,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
             printk(XENLOG_WARNING "Ignoring inaccessible memory range"
                                   " %013"PRIx64"-%013"PRIx64"\n",
                    e, map_e);
+            reserve_e820_ram(&boot_e820, e, map_e);
         }
 
         set_pdx_range(s >> PAGE_SHIFT, e >> PAGE_SHIFT);
@@ -1352,7 +1430,9 @@ void __init noreturn __start_xen(unsigned long mbi_p)
                       ARRAY_SIZE(l2_directmap) << L2_PAGETABLE_SHIFT);
 
         /* Pass mapped memory to allocator /before/ creating new mappings. */
-        init_boot_pages(s, min(map_s, e));
+        if ( !lu_reserved)
+            init_boot_pages(s, min(map_s, e));
+
         s = map_s;
         if ( s < map_e )
         {
@@ -1360,7 +1440,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
 
             map_s = (s + mask) & ~mask;
             map_e &= ~mask;
-            init_boot_pages(map_s, map_e);
+            if ( !lu_reserved)
+                init_boot_pages(map_s, map_e);
         }
 
         if ( map_s > map_e )
@@ -1376,7 +1457,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
             {
                 map_pages_to_xen((unsigned long)__va(map_e), 
maddr_to_mfn(map_e),
                                  PFN_DOWN(end - map_e), PAGE_HYPERVISOR);
-                init_boot_pages(map_e, end);
+                if ( !lu_reserved)
+                    init_boot_pages(map_e, end);
                 map_e = end;
             }
         }
@@ -1391,7 +1473,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
         {
             map_pages_to_xen((unsigned long)__va(s), maddr_to_mfn(s),
                              PFN_DOWN(map_s - s), PAGE_HYPERVISOR);
-            init_boot_pages(s, map_s);
+            if ( !lu_reserved)
+                init_boot_pages(s, map_s);
         }
     }
 
@@ -1489,6 +1572,29 @@ void __init noreturn __start_xen(unsigned long mbi_p)
 
     numa_initmem_init(0, raw_max_page);
 
+    if ( lu_bootmem_start )
+    {
+        unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1);
+        uint64_t mask = PAGE_SIZE - 1;
+
+        for ( i = 0; i < boot_e820.nr_map; i++ )
+        {
+            uint64_t s, e;
+
+            if ( boot_e820.map[i].type != E820_RAM )
+                continue;
+            s = (boot_e820.map[i].addr + mask) & ~mask;
+            e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask;
+            s = max_t(uint64_t, s, 1<<20);
+            if ( PFN_DOWN(s) > limit )
+                continue;
+            if ( PFN_DOWN(e) > limit )
+                e = pfn_to_paddr(limit);
+
+            init_boot_pages(s, e);
+        }
+    }
+
     if ( max_page - 1 > virt_to_mfn(HYPERVISOR_VIRT_END - 1) )
     {
         unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1);
diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h
index d0cfbb70a8..d59e5101c3 100644
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -279,6 +279,7 @@ extern unsigned char boot_edid_info[128];
 
 #ifndef __ASSEMBLY__
 extern unsigned long xen_phys_start;
+extern unsigned long lu_bootmem_start, lu_bootmem_size;
 #endif
 
 /* GDT/LDT shadow mapping area. The first per-domain-mapping sub-area. */
-- 
2.21.0


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.