[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v8 01/21] xen: make two memory hypercalls vNUMA-aware



Make XENMEM_increase_reservation and XENMEM_populate_physmap
vNUMA-aware.

That is, if guest requests Xen to allocate memory for specific vnode,
Xen can translate vnode to pnode using vNUMA information of that guest.

XENMEMF_vnode is introduced for the guest to mark the node number is in
fact virtual node number and should be translated by Xen.

XENFEAT_memory_op_vnode_supported is introduced to indicate that Xen is
able to translate virtual node to physical node.

Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>
Cc: Jan Beulich <JBeulich@xxxxxxxx>
Cc: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
---
Changes in v8:
1. Move all "args.* = " after construct_memop_from_reservation.

Changes in v7:
1. Remove XEN_NUMA_NO_NODE.
2. Use nodeid_t for vnode and pnode variables.

Changes in v6:
1. Add logic in construct_memop_from_reservation.
---
 xen/common/kernel.c           |  2 +-
 xen/common/memory.c           | 60 +++++++++++++++++++++++++++++++++----------
 xen/include/public/features.h |  3 +++
 xen/include/public/memory.h   |  2 ++
 4 files changed, 53 insertions(+), 14 deletions(-)

diff --git a/xen/common/kernel.c b/xen/common/kernel.c
index 8a04d8b..6f359c1 100644
--- a/xen/common/kernel.c
+++ b/xen/common/kernel.c
@@ -307,7 +307,7 @@ DO(xen_version)(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
         switch ( fi.submap_idx )
         {
         case 0:
-            fi.submap = 0;
+            fi.submap = (1U << XENFEAT_memory_op_vnode_supported);
             if ( VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3) )
                 fi.submap |= (1U << XENFEAT_pae_pgdir_above_4gb);
             if ( paging_mode_translate(current->domain) )
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 9d9d43c..0737811 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -692,11 +692,12 @@ out:
     return rc;
 }
 
-static int construct_memop_from_reservation(
+static int construct_memop_from_reservation(struct domain *d,
                const struct xen_memory_reservation *r,
                struct memop_args *a)
 {
     unsigned int address_bits;
+    int rc;
 
     a->extent_list  = r->extent_start;
     a->nr_extents   = r->nr_extents;
@@ -712,11 +713,41 @@ static int construct_memop_from_reservation(
         a->memflags = MEMF_bits(address_bits);
     }
 
-    a->memflags |= MEMF_node(XENMEMF_get_node(r->mem_flags));
-    if ( r->mem_flags & XENMEMF_exact_node_request )
-        a->memflags |= MEMF_exact_node;
+    if ( r->mem_flags & XENMEMF_vnode )
+    {
+        nodeid_t vnode, pnode;
 
-    return 0;
+        read_lock(&d->vnuma_rwlock);
+        if ( d->vnuma )
+        {
+            vnode = XENMEMF_get_node(r->mem_flags);
+            if ( vnode >= d->vnuma->nr_vnodes )
+            {
+                rc = -EINVAL;
+                read_unlock(&d->vnuma_rwlock);
+                goto out;
+            }
+
+            pnode = d->vnuma->vnode_to_pnode[vnode];
+            if ( pnode != NUMA_NO_NODE )
+            {
+                a->memflags |= MEMF_node(pnode);
+                if ( r->mem_flags & XENMEMF_exact_node_request )
+                    a->memflags |= MEMF_exact_node;
+            }
+        }
+        read_unlock(&d->vnuma_rwlock);
+    }
+    else
+    {
+        a->memflags |= MEMF_node(XENMEMF_get_node(r->mem_flags));
+        if ( r->mem_flags & XENMEMF_exact_node_request )
+            a->memflags |= MEMF_exact_node;
+    }
+
+    rc = 0;
+out:
+    return rc;
 }
 
 long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
@@ -744,21 +775,24 @@ long do_memory_op(unsigned long cmd, 
XEN_GUEST_HANDLE_PARAM(void) arg)
         if ( unlikely(start_extent >= reservation.nr_extents) )
             return start_extent;
 
-        if ( construct_memop_from_reservation(&reservation, &args) )
+        d = rcu_lock_domain_by_any_id(reservation.domid);
+        if ( d == NULL )
+            return start_extent;
+
+        if ( construct_memop_from_reservation(d, &reservation, &args) )
+        {
+            rcu_unlock_domain(d);
             return start_extent;
-        args.nr_done      = start_extent;
-        args.preempted    = 0;
+        }
 
+        args.domain    = d;
+        args.nr_done   = start_extent;
+        args.preempted = 0;
 
         if ( op == XENMEM_populate_physmap
              && (reservation.mem_flags & XENMEMF_populate_on_demand) )
             args.memflags |= MEMF_populate_on_demand;
 
-        d = rcu_lock_domain_by_any_id(reservation.domid);
-        if ( d == NULL )
-            return start_extent;
-        args.domain = d;
-
         if ( xsm_memory_adjust_reservation(XSM_TARGET, current->domain, d) )
         {
             rcu_unlock_domain(d);
diff --git a/xen/include/public/features.h b/xen/include/public/features.h
index 16d92aa..2110b04 100644
--- a/xen/include/public/features.h
+++ b/xen/include/public/features.h
@@ -99,6 +99,9 @@
 #define XENFEAT_grant_map_identity        12
  */
 
+/* Guest can use XENMEMF_vnode to specify virtual node for memory op. */
+#define XENFEAT_memory_op_vnode_supported 13
+
 #define XENFEAT_NR_SUBMAPS 1
 
 #endif /* __XEN_PUBLIC_FEATURES_H__ */
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 595f953..2b5206b 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -55,6 +55,8 @@
 /* Flag to request allocation only from the node specified */
 #define XENMEMF_exact_node_request  (1<<17)
 #define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request)
+/* Flag to indicate the node specified is virtual node */
+#define XENMEMF_vnode  (1<<18)
 #endif
 
 struct xen_memory_reservation {
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.