[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v2 3/7] libxc: vnodes allocation on NUMA nodes.



On 11/14/2013 03:26 AM, Elena Ufimtseva wrote:
If vnuma topology is defined and its a hardware NUMA machine,
allocate vnodes on physical numa nodes based on vnuma_nodemap.
Otherwise, use default allocation mechanism.

Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx>
---
  tools/libxc/xc_dom.h     |    1 +
  tools/libxc/xc_dom_x86.c |   85 +++++++++++++++++++++++++++++++++++++++++-----
  tools/libxc/xg_private.h |    1 +
  3 files changed, 78 insertions(+), 9 deletions(-)

diff --git a/tools/libxc/xc_dom.h b/tools/libxc/xc_dom.h
index 42a16c9..da7472e 100644
--- a/tools/libxc/xc_dom.h
+++ b/tools/libxc/xc_dom.h
@@ -371,6 +371,7 @@ static inline xen_pfn_t xc_dom_p2m_guest(struct 
xc_dom_image *dom,
  int arch_setup_meminit(struct xc_dom_image *dom);
  int arch_setup_bootearly(struct xc_dom_image *dom);
  int arch_setup_bootlate(struct xc_dom_image *dom);
+int arch_boot_numa_alloc(struct xc_dom_image *dom);

  /*
   * Local variables:
diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c
index 60fc544..96658bb 100644
--- a/tools/libxc/xc_dom_x86.c
+++ b/tools/libxc/xc_dom_x86.c
@@ -790,27 +790,47 @@ int arch_setup_meminit(struct xc_dom_image *dom)
      else
      {
          /* try to claim pages for early warning of insufficient memory avail 
*/
+        rc = 0;
          if ( dom->claim_enabled ) {
              rc = xc_domain_claim_pages(dom->xch, dom->guest_domid,
                                         dom->total_pages);
              if ( rc )
+            {
+                xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                             "%s: Failed to claim mem for dom\n",
+                             __FUNCTION__);
                  return rc;
+            }
          }
          /* setup initial p2m */
          for ( pfn = 0; pfn < dom->total_pages; pfn++ )
              dom->p2m_host[pfn] = pfn;

          /* allocate guest memory */
-        for ( i = rc = allocsz = 0;
-              (i < dom->total_pages) && !rc;
-              i += allocsz )
+        if (dom->nr_vnodes > 0)
          {
-            allocsz = dom->total_pages - i;
-            if ( allocsz > 1024*1024 )
-                allocsz = 1024*1024;
-            rc = xc_domain_populate_physmap_exact(
-                dom->xch, dom->guest_domid, allocsz,
-                0, 0, &dom->p2m_host[i]);
+            rc = arch_boot_numa_alloc(dom);
+            if ( rc )
+            {
+                xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                             "%s: Failed to allocate memory on NUMA nodes\n",
+                             __FUNCTION__);
+                return rc;
+            }
+        }
+        else
+        {
+            for ( i = rc = allocsz = 0;
+                  (i < dom->total_pages) && !rc;
+                  i += allocsz )
+            {
+                allocsz = dom->total_pages - i;
+                if ( allocsz > 1024*1024 )
+                    allocsz = 1024*1024;
+                rc = xc_domain_populate_physmap_exact(
+                    dom->xch, dom->guest_domid, allocsz,
+                    0, 0, &dom->p2m_host[i]);
+            }
          }

I can appreciate the desire to make leave the non-vNUMA case as unchanged as possible, but I think in the long run it would be better to unify the two. As it is, you already have code to handle "nr_nodes==1", "vnode_numamap[0]==VNUMA_NO_NODE", which should end up behaving exacatly as the original code, AFAICT.

The libxc interface isn't stable, so it might even be worth just saying, "From now on you have to specify a NUMA topology, even if it's just a 'null' one (nr_nodes==1, vnode_numamap[0]==VNUMA_NO_NODE)". If not, then it should have a way to default to the "null" vNUMA topology.


          /* Ensure no unclaimed pages are left unused.
@@ -818,7 +838,54 @@ int arch_setup_meminit(struct xc_dom_image *dom)
          (void)xc_domain_claim_pages(dom->xch, dom->guest_domid,
                                      0 /* cancels the claim */);
      }
+    return rc;
+}
+
+int arch_boot_numa_alloc(struct xc_dom_image *dom)
+{
+    int rc;
+    unsigned int n;
+    unsigned long long guest_pages;
+    unsigned long long allocsz = 0, k, i;
+    unsigned long memflags;
+
+    rc = allocsz = k = 0;
+    if(dom->nr_vnodes == 0)
+        return -EINVAL;

+    for (n = 0; n < dom->nr_vnodes; n++)
+        allocsz += (dom->vnuma_memszs[n] << 20) >> PAGE_SHIFT_X86;
+
+    allocsz = 0;

Wait, what? Why did we just do through and calculate the total allocation size, only to set it to zero again? I'm guessing the loop above is vestigal...

+    for(n = 0; n < dom->nr_vnodes; n++)
+    {
+        memflags = 0;
+        if ( dom->vnode_numamap[n] != VNUMA_NO_NODE )
+        {
+            memflags |= XENMEMF_exact_node(dom->vnode_numamap[n]);
+            memflags |= XENMEMF_exact_node_request;
+        }
+        guest_pages = (dom->vnuma_memszs[n] << 20) >> PAGE_SHIFT_X86;
+        for ( i = 0;
+            (i < guest_pages) && !rc;
+                i += allocsz )
+        {
+            allocsz = guest_pages - i;
+            if ( allocsz > 1024*1024 )
+                allocsz = 1024*1024;
+                rc = xc_domain_populate_physmap_exact(
+                                    dom->xch, dom->guest_domid, allocsz,
+                                    0, memflags, &dom->p2m_host[i + k]);
+        }
+        if ( rc )
+        {
+            xc_dom_panic(dom->xch, XC_INTERNAL_ERROR,
+                    "%s: Failed allocation of %Lu pages for vnode %d on pnode %d 
out of %lu\n",
+                    __FUNCTION__, guest_pages, n, dom->vnode_numamap[n], 
dom->total_pages);
+            return rc;
+        }
+        k += i;

I think this would be better to have a more useful name -- node_pfn_base, maybe? Just to make it easier to understand what's going on here.

Also, is there any attempt to verify that the resulting pfn -> node map will match the map we're going to pass to the guest? Or are we just leaving it to the toolstack to make sure that the memory layout we get from what is passed in xc_dom_image resembles what is passed in to xc_domain_setvnodes()?

 -George

+    }
      return rc;
  }

diff --git a/tools/libxc/xg_private.h b/tools/libxc/xg_private.h
index 5ff2124..9554b71 100644
--- a/tools/libxc/xg_private.h
+++ b/tools/libxc/xg_private.h
@@ -127,6 +127,7 @@ typedef uint64_t l4_pgentry_64_t;
  #define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & 
~((1UL<<(_w))-1))
  #define NRPAGES(x) (ROUNDUP(x, PAGE_SHIFT) >> PAGE_SHIFT)

+#define VNUMA_NO_NODE ~((unsigned int)0)

  /* XXX SMH: following skanky macros rely on variable p2m_size being set */
  /* XXX TJD: also, "guest_width" should be the guest's sizeof(unsigned long) */



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.