[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 08 of 11 v4] libxl: optimize the calculation of how many VCPUs can run on a candidate

To: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
From: Ian Campbell <Ian.Campbell@xxxxxxxxxx>
Date: Mon, 18 Mar 2013 14:34:24 +0000
Cc: Marcus Granado <Marcus.Granado@xxxxxxxxxxxxx>, Dan Magenheimer <dan.magenheimer@xxxxxxxxxx>, Matt Wilson <msw@xxxxxxxxxx>, Anil Madhavapeddy <anil@xxxxxxxxxx>, George Dunlap <George.Dunlap@xxxxxxxxxxxxx>, Andrew Cooper <Andrew.Cooper3@xxxxxxxxxx>, Juergen Gross <juergen.gross@xxxxxxxxxxxxxx>, Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>, Xen-Devel <xen-devel@xxxxxxxxxxxxx>, Jan Beulich <JBeulich@xxxxxxxx>, Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>
Delivery-date: Mon, 18 Mar 2013 14:34:46 +0000
List-id: Xen developer discussion <xen-devel.lists.xen.org>

On Fri, 2013-03-15 at 02:30 +0000, Dario Faggioli wrote:
> For choosing the best NUMA placement candidate, we need to figure out
> how many VCPUs are runnable on each of them. That requires going through
> all the VCPUs of all the domains and check their affinities.
> 
> With this change, instead of doing the above for each candidate, we
> do it once for all, populating an array while counting. This way, when
> we later are evaluating candidates, all we need is summing up the right
> elements of the array itself.
> 
> This reduces the complexity of the overall algorithm, as it moves a
> potentially expensive operation (for_each_vcpu_of_each_domain {})
> outside from the core placement loop, so that it is performed only
> once instead of (potentially) tens or hundreds of times. More
> specifically, we go from a worst case computation time complaxity of:
> 
>  O(2^n_nodes) * O(n_domains*n_domain_vcpus)
> 
> To, with this change:
> 
>  O(n_domains*n_domains_vcpus) + O(2^n_nodes) = O(2^n_nodes)
> 
> (with n_nodes<=16, otherwise the algorithm suggests partitioning with
> cpupools and does not even start.)
> 
> Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx>
> Acked-by: Juergen Gross <juergen.gross@xxxxxxxxxxxxxx>
> Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>

Acked-by: Ian Campbell <ian.campbell@xxxxxxxxxx>

> ---
> Changes from v2:
>  * in nr_vcpus_on_nodes(), vcpu_nodemap renamed to something more
>    sensible, as suggested during review.
> 
> diff --git a/tools/libxl/libxl_numa.c b/tools/libxl/libxl_numa.c
> --- a/tools/libxl/libxl_numa.c
> +++ b/tools/libxl/libxl_numa.c
> @@ -165,22 +165,34 @@ static uint32_t nodemap_to_free_memkb(li
>      return free_memkb;
>  }
>  
> -/* Retrieve the number of vcpus able to run on the cpus of the nodes
> - * that are part of the nodemap. */
> -static int nodemap_to_nr_vcpus(libxl__gc *gc, libxl_cputopology *tinfo,
> +/* Retrieve the number of vcpus able to run on the nodes in nodemap */
> +static int nodemap_to_nr_vcpus(libxl__gc *gc, int vcpus_on_node[],
>                                 const libxl_bitmap *nodemap)
>  {
> +    int i, nr_vcpus = 0;
> +
> +    libxl_for_each_set_bit(i, *nodemap)
> +        nr_vcpus += vcpus_on_node[i];
> +
> +    return nr_vcpus;
> +}
> +
> +/* Number of vcpus able to run on the cpus of the various nodes
> + * (reported by filling the array vcpus_on_node[]). */
> +static int nr_vcpus_on_nodes(libxl__gc *gc, libxl_cputopology *tinfo,
> +                             const libxl_bitmap *suitable_cpumap,
> +                             int vcpus_on_node[])
> +{
>      libxl_dominfo *dinfo = NULL;
> -    libxl_bitmap vcpu_nodemap;
> +    libxl_bitmap nodes_counted;
>      int nr_doms, nr_cpus;
> -    int nr_vcpus = 0;
>      int i, j, k;
>  
>      dinfo = libxl_list_domain(CTX, &nr_doms);
>      if (dinfo == NULL)
>          return ERROR_FAIL;
>  
> -    if (libxl_node_bitmap_alloc(CTX, &vcpu_nodemap, 0) < 0) {
> +    if (libxl_node_bitmap_alloc(CTX, &nodes_counted, 0) < 0) {
>          libxl_dominfo_list_free(dinfo, nr_doms);
>          return ERROR_FAIL;
>      }
> @@ -193,19 +205,17 @@ static int nodemap_to_nr_vcpus(libxl__gc
>          if (vinfo == NULL)
>              continue;
>  
> -        /* For each vcpu of each domain ... */
>          for (j = 0; j < nr_dom_vcpus; j++) {
> +            /* For each vcpu of each domain, increment the elements of
> +             * the array corresponding to the nodes where the vcpu runs */
> +            libxl_bitmap_set_none(&nodes_counted);
> +            libxl_for_each_set_bit(k, vinfo[j].cpumap) {
> +                int node = tinfo[k].node;
>  
> -            /* Build up a map telling on which nodes the vcpu is runnable on 
> */
> -            libxl_bitmap_set_none(&vcpu_nodemap);
> -            libxl_for_each_set_bit(k, vinfo[j].cpumap)
> -                libxl_bitmap_set(&vcpu_nodemap, tinfo[k].node);
> -
> -            /* And check if that map has any intersection with our nodemap */
> -            libxl_for_each_set_bit(k, vcpu_nodemap) {
> -                if (libxl_bitmap_test(nodemap, k)) {
> -                    nr_vcpus++;
> -                    break;
> +                if (libxl_bitmap_test(suitable_cpumap, k) &&
> +                    !libxl_bitmap_test(&nodes_counted, node)) {
> +                    libxl_bitmap_set(&nodes_counted, node);
> +                    vcpus_on_node[node]++;
>                  }
>              }
>          }
> @@ -213,9 +223,9 @@ static int nodemap_to_nr_vcpus(libxl__gc
>          libxl_vcpuinfo_list_free(vinfo, nr_dom_vcpus);
>      }
>  
> -    libxl_bitmap_dispose(&vcpu_nodemap);
> +    libxl_bitmap_dispose(&nodes_counted);
>      libxl_dominfo_list_free(dinfo, nr_doms);
> -    return nr_vcpus;
> +    return 0;
>  }
>  
>  /*
> @@ -270,7 +280,7 @@ int libxl__get_numa_candidate(libxl__gc 
>      libxl_numainfo *ninfo = NULL;
>      int nr_nodes = 0, nr_suit_nodes, nr_cpus = 0;
>      libxl_bitmap suitable_nodemap, nodemap;
> -    int rc = 0;
> +    int *vcpus_on_node, rc = 0;
>  
>      libxl_bitmap_init(&nodemap);
>      libxl_bitmap_init(&suitable_nodemap);
> @@ -281,6 +291,8 @@ int libxl__get_numa_candidate(libxl__gc 
>      if (ninfo == NULL)
>          return ERROR_FAIL;
>  
> +    GCNEW_ARRAY(vcpus_on_node, nr_nodes);
> +
>      /*
>       * The good thing about this solution is that it is based on heuristics
>       * (implemented in numa_cmpf() ), but we at least can evaluate it on
> @@ -330,6 +342,19 @@ int libxl__get_numa_candidate(libxl__gc 
>          goto out;
>  
>      /*
> +     * Later on, we will try to figure out how many vcpus are runnable on
> +     * each candidate (as a part of choosing the best one of them). That
> +     * requires going through all the vcpus of all the domains and check
> +     * their affinities. So, instead of doing that for each candidate,
> +     * let's count here the number of vcpus runnable on each node, so that
> +     * all we have to do later is summing up the right elements of the
> +     * vcpus_on_node array.
> +     */
> +    rc = nr_vcpus_on_nodes(gc, tinfo, suitable_cpumap, vcpus_on_node);
> +    if (rc)
> +        goto out;
> +
> +    /*
>       * If the minimum number of NUMA nodes is not explicitly specified
>       * (i.e., min_nodes == 0), we try to figure out a sensible number of 
> nodes
>       * from where to start generating candidates, if possible (or just start
> @@ -414,7 +439,8 @@ int libxl__get_numa_candidate(libxl__gc 
>               * current best one (if any).
>               */
>              libxl__numa_candidate_put_nodemap(gc, &new_cndt, &nodemap);
> -            new_cndt.nr_vcpus = nodemap_to_nr_vcpus(gc, tinfo, &nodemap);
> +            new_cndt.nr_vcpus = nodemap_to_nr_vcpus(gc, vcpus_on_node,
> +                                                    &nodemap);
>              new_cndt.free_memkb = nodes_free_memkb;
>              new_cndt.nr_nodes = libxl_bitmap_count_set(&nodemap);
>              new_cndt.nr_cpus = nodes_cpus;



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

References:
- [Xen-devel] [PATCH 00 of 11 v4] NUMA aware credit scheduling
  - From: Dario Faggioli
- [Xen-devel] [PATCH 08 of 11 v4] libxl: optimize the calculation of how many VCPUs can run on a candidate
  - From: Dario Faggioli

Prev by Date: Re: [Xen-devel] [PATCH 07 of 11 v4] libxl: allow for explicitly specifying node-affinity
Next by Date: Re: [Xen-devel] [PATCH 07 of 11 v4] libxl: allow for explicitly specifying node-affinity
Previous by thread: [Xen-devel] [PATCH 08 of 11 v4] libxl: optimize the calculation of how many VCPUs can run on a candidate
Next by thread: [Xen-devel] [PATCH 09 of 11 v4] libxl: automatic placement deals with node-affinity
Index(es):
- Date
- Thread

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.