|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH] Fix XEN_SYSCTL_numainfo[node].memsize for memory holes
Fix a long-standing issue (known at since 2014) with the numainfo call.
The Hypercall `XEN_SYSCTL_numainfo` returns `memsize` for each NUMA node:
xl info -n:
node: memsize memfree distances
0: -> 67584 <- 60672 10,21 <- memsize is off by 2048 MB
1: 65536 60958 21,10
So far, `memsize` is calculated from `NODE_DATA->node_spanned_pages`.
It includes memory holes, leading to wrong memsize on x86.
This patch gets the sum of E820_RAM entries for each NUMA node on boot,
stores it in NODE_DATA->node_present_pages and uses it for `memsize`.
It also increases it like `total_pages` on memory_add() for memory hotplug.
The new NODE_DATA->node_present_pages can be slighly lower than the
physical node's RAM due to reserved memory for some of the NUMA nodes.
For example, on this example system, NODE_DATA->node_present_pages
reports 63.5GB of usable memory on the 1st NUMA node with this patch.
This patch uses `arch_get_ram_range()` which is an architecture-provided
call that all NUMA architectures already need to provide for iterating
over the usable RAM of the system.
- Tested on 2-socket and a 4-socket x86 systems
- Memory hot-add not tested, but is identical to bumping total_pages.
Signed-off-by: Bernhard Kaindl <bernhard.kaindl@xxxxxxxxx>
---
xen/arch/x86/x86_64/mm.c | 2 ++
xen/common/numa.c | 14 +++++++++++++-
xen/common/sysctl.c | 2 +-
xen/include/xen/numa.h | 3 +++
4 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index b2a280fba3..a22aa45060 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -1333,6 +1333,8 @@ int memory_add(unsigned long spfn, unsigned long epfn,
unsigned int pxm)
/* We can't revert any more */
share_hotadd_m2p_table(&info);
transfer_pages_to_heap(&info);
+ /* Update the node's present pages (like the total_pages of the system) */
+ NODE_DATA(node)->node_present_pages += epfn - spfn;
return 0;
diff --git a/xen/common/numa.c b/xen/common/numa.c
index 28a09766fa..d68cbea44c 100644
--- a/xen/common/numa.c
+++ b/xen/common/numa.c
@@ -504,10 +504,22 @@ void __init setup_node_bootmem(nodeid_t nodeid, paddr_t
start, paddr_t end)
{
unsigned long start_pfn = paddr_to_pfn(start);
unsigned long end_pfn = paddr_to_pfn(end);
+ paddr_t map_start, map_end;
+ int i = 0, err;
NODE_DATA(nodeid)->node_start_pfn = start_pfn;
NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
+ /* Add RAM pages within the node to get the present pages for memsize
infos */
+ NODE_DATA(nodeid)->node_present_pages = 0;
+ while ( (err = arch_get_ram_range(i++, &map_start, &map_end)) != -ENOENT )
{
+ if ( err || map_start >= end || map_end <= start )
+ continue; /* Skip non-RAM and maps outside of the node's memory
range */
+ /* Add memory that is in the node's memory range (within start and
end): */
+ map_start = max(map_start, start);
+ map_end = min(map_end, end);
+ NODE_DATA(nodeid)->node_present_pages += (map_end - map_start) >>
PAGE_SHIFT;
+ }
node_set_online(nodeid);
}
@@ -675,7 +687,7 @@ static void cf_check dump_numa(unsigned char key)
mfn_t mfn = _mfn(node_start_pfn(i) + 1);
printk("NODE%u start->%lu size->%lu free->%lu\n",
- i, node_start_pfn(i), node_spanned_pages(i),
+ i, node_start_pfn(i), node_present_pages(i),
avail_node_heap_pages(i));
/* Sanity check mfn_to_nid() */
if ( node_spanned_pages(i) > 1 && mfn_to_nid(mfn) != i )
diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c
index d02f44fe3a..cba6d3cfea 100644
--- a/xen/common/sysctl.c
+++ b/xen/common/sysctl.c
@@ -316,7 +316,7 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t)
u_sysctl)
{
if ( node_online(i) )
{
- meminfo.memsize = node_spanned_pages(i) << PAGE_SHIFT;
+ meminfo.memsize = node_present_pages(i) << PAGE_SHIFT;
meminfo.memfree = avail_node_heap_pages(i) <<
PAGE_SHIFT;
}
else
diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
index fd1511a6fb..c860f3ad1c 100644
--- a/xen/include/xen/numa.h
+++ b/xen/include/xen/numa.h
@@ -71,6 +71,7 @@ extern nodeid_t *memnodemap;
struct node_data {
unsigned long node_start_pfn;
unsigned long node_spanned_pages;
+ unsigned long node_present_pages;
};
extern struct node_data node_data[];
@@ -91,6 +92,7 @@ static inline nodeid_t mfn_to_nid(mfn_t mfn)
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
#define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages)
+#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \
NODE_DATA(nid)->node_spanned_pages)
@@ -123,6 +125,7 @@ extern void numa_set_processor_nodes_parsed(nodeid_t node);
extern mfn_t first_valid_mfn;
#define node_spanned_pages(nid) (max_page - mfn_x(first_valid_mfn))
+#define node_present_pages(nid) total_pages
#define node_start_pfn(nid) mfn_x(first_valid_mfn)
#define __node_distance(a, b) 20
--
2.43.0
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |