[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 05 of 11 v3] xen: allow for explicitly specifying node-affinity



Am 01.02.2013 12:01, schrieb Dario Faggioli:
Make it possible to pass the node-affinity of a domain to the hypervisor
from the upper layers, instead of always being computed automatically.

Note that this also required generalizing the Flask hooks for setting
and getting the affinity, so that they now deal with both vcpu and
node affinity.

Signed-off-by: Dario Faggioli<dario.faggioli@xxxxxxxxxx>
Acked-by: Daniel De Graaf<dgdegra@xxxxxxxxxxxxx>
Acked-by: George Dunlap<george.dunlap@xxxxxxxxxxxxx>

Except one minor note (see below):

Acked-by: Juergen Gross <juergen.gross@xxxxxxxxxxxxxx>

---
Changes from v2:
  * reworked as needed after the merge of Daniel's IS_PRIV work;
  * xen.te taken care of, as requested during review.

Changes from v1:
  * added the missing dummy hook for nodeaffinity;
  * let the permission renaming affect flask policies too.

diff --git a/tools/flask/policy/policy/mls b/tools/flask/policy/policy/mls
--- a/tools/flask/policy/policy/mls
+++ b/tools/flask/policy/policy/mls
@@ -70,11 +70,11 @@ mlsconstrain domain transition
        (( h1 dom h2 ) and (( l1 eq l2 ) or (t1 == mls_priv)));

  # all the domain "read" ops
-mlsconstrain domain { getvcpuaffinity getdomaininfo getvcpuinfo getvcpucontext 
getaddrsize getextvcpucontext }
+mlsconstrain domain { getaffinity getdomaininfo getvcpuinfo getvcpucontext 
getaddrsize getextvcpucontext }
        ((l1 dom l2) or (t1 == mls_priv));

  # all the domain "write" ops
-mlsconstrain domain { setvcpucontext pause unpause resume create max_vcpus 
destroy setvcpuaffinity scheduler setdomainmaxmem setdomainhandle setdebugging 
hypercall settime set_target shutdown setaddrsize trigger setextvcpucontext }
+mlsconstrain domain { setvcpucontext pause unpause resume create max_vcpus 
destroy setaffinity scheduler setdomainmaxmem setdomainhandle setdebugging 
hypercall settime set_target shutdown setaddrsize trigger setextvcpucontext }
        ((l1 eq l2) or (t1 == mls_priv));

  # This is incomplete - similar constraints must be written for all classes
diff --git a/tools/flask/policy/policy/modules/xen/xen.if 
b/tools/flask/policy/policy/modules/xen/xen.if
--- a/tools/flask/policy/policy/modules/xen/xen.if
+++ b/tools/flask/policy/policy/modules/xen/xen.if
@@ -48,7 +48,7 @@ define(`create_domain_common', `
        allow $1 $2:domain { create max_vcpus setdomainmaxmem setaddrsize
                        getdomaininfo hypercall setvcpucontext setextvcpucontext
                        getscheduler getvcpuinfo getvcpuextstate getaddrsize
-                       getvcpuaffinity setvcpuaffinity };
+                       getaffinity setaffinity };
        allow $1 $2:domain2 { set_cpuid settsc setscheduler };
        allow $1 $2:security check_context;
        allow $1 $2:shadow enable;
@@ -77,9 +77,9 @@ define(`create_domain_build_label', `
  # manage_domain(priv, target)
  #   Allow managing a running domain
  define(`manage_domain', `
-       allow $1 $2:domain { getdomaininfo getvcpuinfo getvcpuaffinity
+       allow $1 $2:domain { getdomaininfo getvcpuinfo getaffinity
                        getaddrsize pause unpause trigger shutdown destroy
-                       setvcpuaffinity setdomainmaxmem getscheduler };
+                       setaffinity setdomainmaxmem getscheduler };
  ')

  # migrate_domain_out(priv, target)
diff --git a/tools/flask/policy/policy/modules/xen/xen.te 
b/tools/flask/policy/policy/modules/xen/xen.te
--- a/tools/flask/policy/policy/modules/xen/xen.te
+++ b/tools/flask/policy/policy/modules/xen/xen.te
@@ -62,7 +62,7 @@ allow dom0_t security_t:security { check
        compute_member load_policy compute_relabel compute_user setenforce
        setbool setsecparam add_ocontext del_ocontext };

-allow dom0_t dom0_t:domain { getdomaininfo getvcpuinfo getvcpuaffinity };
+allow dom0_t dom0_t:domain { getdomaininfo getvcpuinfo getaffinity };
  allow dom0_t dom0_t:resource { add remove };

  admin_device(dom0_t, device_t)
diff --git a/xen/common/domain.c b/xen/common/domain.c
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -222,6 +222,7 @@ struct domain *domain_create(

      spin_lock_init(&d->node_affinity_lock);
      d->node_affinity = NODE_MASK_ALL;
+    d->auto_node_affinity = 1;

      spin_lock_init(&d->shutdown_lock);
      d->shutdown_code = -1;
@@ -362,11 +363,26 @@ void domain_update_node_affinity(struct
          cpumask_or(cpumask, cpumask, online_affinity);
      }

-    for_each_online_node ( node )
-        if ( cpumask_intersects(&node_to_cpumask(node), cpumask) )
-            node_set(node, nodemask);
+    if ( d->auto_node_affinity )
+    {
+        /* Node-affinity is automaically computed from all vcpu-affinities */
+        for_each_online_node ( node )
+            if ( cpumask_intersects(&node_to_cpumask(node), cpumask) )
+                node_set(node, nodemask);

-    d->node_affinity = nodemask;
+        d->node_affinity = nodemask;
+    }
+    else
+    {
+        /* Node-affinity is provided by someone else, just filter out cpus
+         * that are either offline or not in the affinity of any vcpus. */
+        for_each_node_mask ( node, d->node_affinity )
+            if ( !cpumask_intersects(&node_to_cpumask(node), cpumask) )
+                node_clear(node, d->node_affinity);
+    }
+
+    sched_set_node_affinity(d,&d->node_affinity);
+
      spin_unlock(&d->node_affinity_lock);

      free_cpumask_var(online_affinity);
@@ -374,6 +390,36 @@ void domain_update_node_affinity(struct
  }


+int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity)
+{
+    /* Being affine with no nodes is just wrong */
+    if ( nodes_empty(*affinity) )
+        return -EINVAL;
+
+    spin_lock(&d->node_affinity_lock);
+
+    /*
+     * Being/becoming explicitly affine to all nodes is not particularly
+     * useful. Let's take it as the `reset node affinity` command.
+     */
+    if ( nodes_full(*affinity) )
+    {
+        d->auto_node_affinity = 1;
+        goto out;
+    }
+
+    d->auto_node_affinity = 0;
+    d->node_affinity = *affinity;
+
+out:
+    spin_unlock(&d->node_affinity_lock);
+
+    domain_update_node_affinity(d);
+
+    return 0;
+}
+
+
  struct domain *get_domain_by_id(domid_t dom)
  {
      struct domain *d;
diff --git a/xen/common/domctl.c b/xen/common/domctl.c
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -559,6 +559,26 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe
      }
      break;

+    case XEN_DOMCTL_setnodeaffinity:
+    case XEN_DOMCTL_getnodeaffinity:
+    {
+        if ( op->cmd == XEN_DOMCTL_setnodeaffinity )
+        {
+            nodemask_t new_affinity;
+
+            ret = xenctl_bitmap_to_nodemask(&new_affinity,
+&op->u.nodeaffinity.nodemap);
+            if ( !ret )
+                ret = domain_set_node_affinity(d,&new_affinity);
+        }
+        else
+        {
+            ret = nodemask_to_xenctl_bitmap(&op->u.nodeaffinity.nodemap,
+&d->node_affinity);
+        }
+    }
+    break;
+

The two cases shouldn't be handled in one block, I think. Only the break
statement seems to be common here.


Jürgen

--
Juergen Gross                 Principal Developer Operating Systems
PBG PDG ES&S SWE OS6                   Telephone: +49 (0) 89 3222 2967
Fujitsu Technology Solutions              e-mail: juergen.gross@xxxxxxxxxxxxxx
Domagkstr. 28                           Internet: ts.fujitsu.com
D-80807 Muenchen                 Company details: ts.fujitsu.com/imprint.html

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.