[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [MINI-OS PATCH 12/19] mini-os: kexec: add support for handing over some memory across kexec


  • To: Juergen Gross <jgross@xxxxxxxx>, <minios-devel@xxxxxxxxxxxxxxxxxxxx>, <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Jason Andryuk <jason.andryuk@xxxxxxx>
  • Date: Thu, 10 Jul 2025 19:21:18 -0400
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass (sender ip is 165.204.84.17) smtp.rcpttodomain=suse.com smtp.mailfrom=amd.com; dmarc=pass (p=quarantine sp=quarantine pct=100) action=none header.from=amd.com; dkim=none (message not signed); arc=none (0)
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector10001; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=Mu8HczdrIOu4FRdPm2mPE+j0YN4Icvri6gDA0ab8h0c=; b=t2BvLkyJESoUiRBJ2G1/WC7ouwIlZ/8kdcTLjpsqIBYMp825OwE7BHXTgFbSlZGY7B1x1ElKuaDveY4rkCnuIxQiG/gPBfzGynoFD6V4xmvO4imTCrCyrDwGVyAMN8nNlSut9r1Nqa8Wn36m//BhU1ontyZ5Zf02WVYlqnp2XvSgFRthBzQ7klLvtsU0Em0vp6uNjpMbrBLyTuxaYF3KSmqR5XI0J564RYK0HwZ9+yj8BmmN/AS6Db//zte69RaXCU9ylIP4hbBfAhXLvszH54zgCEOre8cjK1V2kEueS1bpJ3ncK+9XtgjCpmasYpJ3pXcubAr9jrext7SQA0hSaA==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector10001; d=microsoft.com; cv=none; b=gAJ3tdtnJHx7+YBLXu7Yc9tX9XiazvJXT3FUbk47j9oFlaFxyXp/XW2e4K6KaxQ3co4YiA/ZCY+pP6PIvl6tC0R3S3pUen6GcyPVoGNdcbswxJywraLq7gu0PxiBY7qnC+S9nph/Pe+YnuSu2zig6aRkXc3DIJekfzMHPzaBTsUksxfH5/N+hVEhTdMJIHec8vWvXuIoW+p/pAADwjMrs29BNvoYDcpl5CuUV2zFdwo6KmWIEuOjU0hf4ePgzbwfqGtbrbTsMejZnqa438ukQv6THGw5JyU6WBEjBPpzVKUzKUnqcUtPVA5y1iOg764H6NJ/AyFLMN5FOU7LMNlOOA==
  • Cc: <samuel.thibault@xxxxxxxxxxxx>
  • Delivery-date: Fri, 11 Jul 2025 00:36:55 +0000
  • List-id: Mini-os development list <minios-devel.lists.xenproject.org>

On 2025-07-02 04:12, Juergen Gross wrote:
Especially for support of Xenstore-stubdom live update some memory must
be handed over to the new kernel without moving it around: as the
9pfs device used for storing and retrieving the state of Xenstore
needs to be kept operational across kexec (it can't be reopened due to
Xenstore not being available without access to the device), the ring
pages need to be accessible via active grants by the backend all the
time.

Add the basic support for that by reserving a pre-defined number of
memory pages at the top of the memory. This memory area will be
handed over to the new kernel via specifying it as a module in
struct hvm_start_info.

The contents of the memory area are described via a generic table of
contents in the last page of the memory.

Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
---
  Config.mk             |  2 ++
  arch/x86/kexec.c      | 77 +++++++++++++++++++++++++++++++++++++++++++
  arch/x86/mm.c         | 18 ++++++++++
  arch/x86/setup.c      | 28 ++++++++++++++++
  include/kernel.h      |  1 +
  include/kexec.h       | 45 +++++++++++++++++++++++++
  include/x86/arch_mm.h |  1 +
  kexec.c               |  3 ++
  mm.c                  |  6 ++++
  9 files changed, 181 insertions(+)

diff --git a/Config.mk b/Config.mk
index b9675e61..0e4e86d8 100644
--- a/Config.mk
+++ b/Config.mk
@@ -220,6 +220,8 @@ CONFIG-$(lwip) += CONFIG_LWIP
  $(foreach i,$(CONFIG-y),$(eval $(i) ?= y))
  $(foreach i,$(CONFIG-n),$(eval $(i) ?= n))
+CONFIG-val-$(CONFIG_KEXEC) += CONFIG_KEXEC_MODULE_PAGES
+

I don't know Makefiles well enough to review the preceding patch. This doesn't seem to be used?

  $(foreach i,$(CONFIG-val-y),$(eval $(i) ?= 0))
CONFIG-x += CONFIG_LIBXS
diff --git a/arch/x86/kexec.c b/arch/x86/kexec.c
index 804e7b6d..7fb98473 100644
--- a/arch/x86/kexec.c
+++ b/arch/x86/kexec.c
@@ -201,10 +201,73 @@ static unsigned long kexec_param_loc;
  static unsigned int kexec_param_size;
  static unsigned long kexec_param_mem;
+static struct kexec_module *kexec_check_module(void)
+{
+    unsigned long mod_size;
+    unsigned long mod;
+    struct kexec_module *module_ptr;
+
+    mod = get_module(&mod_size);
+    if ( !mod )
+        return NULL;
+    /* Size must be a multiple of PAGE_SIZE. */
+    if ( mod_size & ~PAGE_MASK )
+        return NULL;
+
+    /* Kxec module description is at start of the last page of the module. */

Kexec

+    module_ptr = (void *)(mod + mod_size - (unsigned long)PAGE_SIZE);
+
+    /* Check eye catcher. */
+    if ( memcmp(module_ptr->eye_catcher, KEXECMOD_EYECATCHER,
+                sizeof(module_ptr->eye_catcher)) )
+        return NULL;
+    if ( module_ptr->n_pages != (mod_size >> PAGE_SHIFT) - 1 )
+        return NULL;
+
+    return module_ptr;
+}

+#define min(a, b) ((a) < (b) ? (a) : (b))
+void kexec_module(unsigned long start_pfn, unsigned long max_pfn)
+{
+    /* Reuse already existing kexec module. */
+    mod_ptr = kexec_check_module();
+    if ( !mod_ptr && CONFIG_KEXEC_MODULE_PAGES )

What if CONFIG_KEXEC_MODULE_PAGES changes between the old and the new stubdom?

+    {
+        max_pfn = min(max_pfn, PHYS_PFN(0xffffffff));
+
+        iterate_memory_range(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn),
+                             get_mod_addr);
+        BUG_ON(!kexec_mod_start);
+
+        mod_ptr = (void *)(kexec_mod_start +
+                           ((CONFIG_KEXEC_MODULE_PAGES - 1) << PAGE_SHIFT));
+        memset(mod_ptr, 0, PAGE_SIZE);
+        memcpy(mod_ptr->eye_catcher, KEXECMOD_EYECATCHER,
+               sizeof(mod_ptr->eye_catcher));
+        mod_ptr->n_pages = CONFIG_KEXEC_MODULE_PAGES - 1;
+        memset(mod_ptr->pg2rec, KEXECMOD_PG_FREE, mod_ptr->n_pages);

I was wondering about a BUILD_BUG_ON for CONFIG_KEXEC_MODULE_PAGES versus some limit but I can't think of one.
+        mod_ptr->recs_off = sizeof(struct kexec_module) +
+                            CONFIG_KEXEC_MODULE_PAGES + (mod_ptr->n_pages & 1);

mod_ptr->n_pages & 1 is to ensure 16bit alignment?

mod_ptr->n_pages = CONFIG_KEXEC_MODULE_PAGES - 1, and pg2rec is n_pages, so using CONFIG_KEXEC_MODULE_PAGES makes this off by 1?

+
+        set_reserved_range(kexec_mod_start, (unsigned long)mod_ptr + 
PAGE_SIZE);
+    }
+}
+

@@ -252,6 +316,19 @@ int kexec_get_entry(const char *cmdline)
      info->memmap_entries = mmap - (struct hvm_memmap_table_entry *)next;
      next = mmap;
+ if ( mod_ptr )
+    {
+        mod = next;
+        memset(mod, 0, sizeof(*mod));
+        info->nr_modules = 1;
+        info->modlist_paddr = kexec_param_loc +

Looking at this again, I wonder if kexec_param_loc would be better named _pa or _paddr.

+                              (unsigned long)next - kexec_param_mem;
+        mod->paddr = kexec_mod_start;
+        mod->size = PFN_PHYS(mod_ptr->n_pages + 1);
+        mod->cmdline_paddr = 0;
+        next = mod + 1;
+    }
+
      info->cmdline_paddr = kexec_param_loc + (unsigned long)next - 
kexec_param_mem;
      strcpy(next, cmdline);


diff --git a/include/kexec.h b/include/kexec.h
index b89c3000..0200005f 100644
--- a/include/kexec.h
+++ b/include/kexec.h
@@ -2,6 +2,48 @@
  #define _KEXEC_H
  #include <mini-os/elf.h>
+/*
+ * Kexec module used to hand over memory across kexec().
+ *
+ * This is an ABI which should be modified only in a compatible way.
+ * struct kexec_module is located at the start of the last page of the module.

Why is kexec_module, which seems like a header, placed in the last page?

+ *
+ * The module can contain data/pages of multiple users. Each user has an own
+ * record which layout is depending on the user. Records are linked via a table
+ * of record offsets.
+ *
+ * All admin data (struct kexec_module, record offset table and records) must
+ * fit into the last page of the module.
+ */
+struct kexec_module {
+    uint8_t eye_catcher[8];
+#define KEXECMOD_EYECATCHER "KexecMem"
+    uint16_t n_pages;       /* Number of allocatable pages in the module.    */
+    uint16_t n_records;     /* Size of record table (max. 255).              */
+#define KEXECMOD_REC_MAX    255
+    uint16_t recs_off;      /* Offset to record table from start of page.    */
+                            /* The record table is an array of               */
+                            /* struct kexec_module_rec.                      */
+    uint8_t pg2rec[];       /* One entry per allocatable module page, value  */
+                            /* is record number (starting from 0) associated */
+                            /* with it. Free pages have value 255.           */

This reads like it is 1 page per record...

+#define KEXECMOD_PG_FREE    255
+};
+
+struct kexec_module_rec {
+    uint16_t offset;        /* Offset to record from start of page.          */

... but then why would you need an offset? How do you identify which "page" or do you mean from the start of the module?

+    uint8_t type;           /* Type of record.                               */
+#define KEXECMOD_REC_NONE   0
+    uint8_t size;           /* Size of record.                               */
+};
+
+#ifndef CONFIG_KEXEC_MODULE_PAGES
+#define CONFIG_KEXEC_MODULE_PAGES 0
+#endif
+
+extern unsigned long kexec_mod_start;
+extern struct kexec_module *mod_ptr;
+
  /* One element of kexec actions (last element must have action KEXEC_CALL): */
  struct kexec_action {
      enum {

diff --git a/kexec.c b/kexec.c
index 2607c819..ded29882 100644
--- a/kexec.c
+++ b/kexec.c
@@ -250,3 +250,6 @@ int kexec_add_action(int action, void *dest, void *src, 
unsigned int len)
return 0;
  }
+
+unsigned long kexec_mod_start;
+struct kexec_module *mod_ptr;

Personally, I find it a little unusual to add variables at the end of the file. But it's fine and you don't have to change it.

Regards,
Jason



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.