[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v5 2/2] allow hardware domain != dom0



This adds a hypervisor command line option "hardware_dom=" which takes a
domain ID.  When the domain with this ID is created, it will be used
as the hardware domain.

This is intended to be used when domain 0 is a dedicated stub domain for
domain building, allowing the hardware domain to be de-privileged and
act only as a driver domain.

Signed-off-by: Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>
Cc: Ian Campbell <ian.campbell@xxxxxxxxxx>
Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Keir Fraser <keir@xxxxxxx>
Cc: Tim Deegan <tim@xxxxxxx>
---

Changes since v4:
 - Use user-visible XSM_ENABLE configuration option in documentation
 - Guard arch.ioport_caps reference with #ifdef CONFIG_X86
 - Explicitly BUG if reserved domain IDs are set as the hardware domain
 - Fix lock ordering conditional in rangeset_swap
 - Move late_hwdom_init call above domlist update to better handle errors

 docs/misc/xen-command-line.markdown | 10 ++++++
 xen/arch/x86/domain_build.c         |  4 ++-
 xen/common/domain.c                 | 61 +++++++++++++++++++++++++++++++++++--
 xen/common/rangeset.c               | 40 ++++++++++++++++++++++++
 xen/include/xen/rangeset.h          |  3 ++
 xen/include/xen/sched.h             |  6 ++++
 xen/include/xsm/dummy.h             |  6 ++++
 xen/include/xsm/xsm.h               |  6 ++++
 xen/xsm/dummy.c                     |  2 ++
 xen/xsm/flask/hooks.c               |  6 ++++
 xen/xsm/flask/policy/access_vectors |  2 ++
 11 files changed, 142 insertions(+), 4 deletions(-)

diff --git a/docs/misc/xen-command-line.markdown 
b/docs/misc/xen-command-line.markdown
index 87de2dc..e8d23b4 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -590,6 +590,16 @@ Paging (HAP).
 Flag to enable 2 MB host page table support for Hardware Assisted
 Paging (HAP).
 
+### hardware\_dom
+> `= <domid>`
+
+> Default: `0`
+
+Enable late hardware domain creation using the specified domain ID.  This is
+intended to be used when domain 0 is a stub domain which builds a disaggregated
+system including a hardware domain with the specified domain ID.  This option 
is
+supported only when compiled with XSM\_ENABLE=y on x86.
+
 ### hpetbroadcast
 > `= <boolean>`
 
diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c
index f75f6e7..10fcd43 100644
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -1150,7 +1150,9 @@ int __init construct_dom0(
         printk(" Xen warning: dom0 kernel broken ELF: %s\n",
                elf_check_broken(&elf));
 
-    iommu_hwdom_init(hardware_domain);
+    if ( d->domain_id == hardware_domid )
+        iommu_hwdom_init(d);
+
     return 0;
 
 out:
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 3c05711..2d2cdbe 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -61,6 +61,11 @@ struct domain *domain_list;
 
 struct domain *hardware_domain __read_mostly;
 
+#ifdef CONFIG_LATE_HWDOM
+domid_t hardware_domid __read_mostly;
+integer_param("hardware_dom", hardware_domid);
+#endif
+
 struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
 
 vcpu_info_t dummy_vcpu_info;
@@ -178,6 +183,51 @@ struct vcpu *alloc_vcpu(
     return v;
 }
 
+static int late_hwdom_init(struct domain *d)
+{
+#ifdef CONFIG_LATE_HWDOM
+    struct domain *dom0;
+    int rv;
+
+    if ( d != hardware_domain || d->domain_id == 0 )
+        return 0;
+
+    rv = xsm_init_hardware_domain(XSM_HOOK, d);
+    if ( rv )
+        return rv;
+
+    printk("Initialising hardware domain %d\n", hardware_domid);
+
+    dom0 = rcu_lock_domain_by_id(0);
+    ASSERT(dom0 != NULL);
+    /*
+     * Hardware resource ranges for domain 0 have been set up from
+     * various sources intended to restrict the hardware domain's
+     * access.  Apply these ranges to the actual hardware domain.
+     *
+     * Because the lists are being swapped, a side effect of this
+     * operation is that Domain 0's rangesets are cleared.  Since
+     * domain 0 should not be accessing the hardware when it constructs
+     * a hardware domain, this should not be a problem.  Both lists
+     * may be modified after this hypercall returns if a more complex
+     * device model is desired.
+     */
+    rangeset_swap(d->irq_caps, dom0->irq_caps);
+    rangeset_swap(d->iomem_caps, dom0->iomem_caps);
+#ifdef CONFIG_X86
+    rangeset_swap(d->arch.ioport_caps, dom0->arch.ioport_caps);
+#endif
+
+    rcu_unlock_domain(dom0);
+
+    iommu_hwdom_init(d);
+
+    return rv;
+#else
+    return 0;
+#endif
+}
+
 static unsigned int __read_mostly extra_dom0_irqs = 256;
 static unsigned int __read_mostly extra_domU_irqs = 32;
 static void __init parse_extra_guest_irqs(const char *s)
@@ -192,7 +242,7 @@ custom_param("extra_guest_irqs", parse_extra_guest_irqs);
 struct domain *domain_create(
     domid_t domid, unsigned int domcr_flags, uint32_t ssidref)
 {
-    struct domain *d, **pd;
+    struct domain *d, **pd, *old_hwdom = NULL;
     enum { INIT_xsm = 1u<<0, INIT_watchdog = 1u<<1, INIT_rangeset = 1u<<2,
            INIT_evtchn = 1u<<3, INIT_gnttab = 1u<<4, INIT_arch = 1u<<5 };
     int err, init_status = 0;
@@ -237,10 +287,12 @@ struct domain *domain_create(
     else if ( domcr_flags & DOMCRF_pvh )
         d->guest_type = guest_type_pvh;
 
-    if ( domid == 0 )
+    if ( domid == 0 || domid == hardware_domid )
     {
+        BUG_ON(domid >= DOMID_FIRST_RESERVED);
         d->is_pinned = opt_dom0_vcpus_pin;
         d->disable_migrate = 1;
+        old_hwdom = hardware_domain;
         hardware_domain = d;
     }
 
@@ -302,6 +354,9 @@ struct domain *domain_create(
     if ( (err = sched_init_domain(d)) != 0 )
         goto fail;
 
+    if ( (err = late_hwdom_init(d)) != 0 )
+        goto fail;
+
     if ( !is_idle_domain(d) )
     {
         spin_lock(&domlist_update_lock);
@@ -321,7 +376,7 @@ struct domain *domain_create(
  fail:
     d->is_dying = DOMDYING_dead;
     if ( hardware_domain == d )
-        hardware_domain = NULL;
+        hardware_domain = old_hwdom;
     atomic_set(&d->refcnt, DOMAIN_DESTROYED);
     xfree(d->mem_event);
     xfree(d->pbuf);
diff --git a/xen/common/rangeset.c b/xen/common/rangeset.c
index f09c0c4..9a4d277 100644
--- a/xen/common/rangeset.c
+++ b/xen/common/rangeset.c
@@ -380,6 +380,46 @@ void rangeset_domain_destroy(
     }
 }
 
+void rangeset_swap(struct rangeset *a, struct rangeset *b)
+{
+    struct list_head tmp;
+    if ( a < b )
+    {
+        spin_lock(&a->lock);
+        spin_lock(&b->lock);
+    }
+    else
+    {
+        spin_lock(&b->lock);
+        spin_lock(&a->lock);
+    }
+    memcpy(&tmp, &a->range_list, sizeof(tmp));
+    memcpy(&a->range_list, &b->range_list, sizeof(tmp));
+    memcpy(&b->range_list, &tmp, sizeof(tmp));
+    if ( a->range_list.next == &b->range_list )
+    {
+        a->range_list.next = &a->range_list;
+        a->range_list.prev = &a->range_list;
+    }
+    else
+    {
+        a->range_list.next->prev = &a->range_list;
+        a->range_list.prev->next = &a->range_list;
+    }
+    if ( b->range_list.next == &a->range_list )
+    {
+        b->range_list.next = &b->range_list;
+        b->range_list.prev = &b->range_list;
+    }
+    else
+    {
+        b->range_list.next->prev = &b->range_list;
+        b->range_list.prev->next = &b->range_list;
+    }
+    spin_unlock(&a->lock);
+    spin_unlock(&b->lock);
+}
+
 /*****************************
  * Pretty-printing functions
  */
diff --git a/xen/include/xen/rangeset.h b/xen/include/xen/rangeset.h
index 1e16a6b..2c122c1 100644
--- a/xen/include/xen/rangeset.h
+++ b/xen/include/xen/rangeset.h
@@ -67,6 +67,9 @@ int __must_check rangeset_remove_singleton(
 int __must_check rangeset_contains_singleton(
     struct rangeset *r, unsigned long s);
 
+/* swap contents */
+void rangeset_swap(struct rangeset *a, struct rangeset *b);
+
 /* Rangeset pretty printing. */
 void rangeset_printk(
     struct rangeset *r);
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 734f7a9..44851ae 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -46,6 +46,12 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t);
 /* A global pointer to the hardware domain (usually DOM0). */
 extern struct domain *hardware_domain;
 
+#ifdef CONFIG_LATE_HWDOM
+extern domid_t hardware_domid;
+#else
+#define hardware_domid 0
+#endif
+
 #ifndef CONFIG_COMPAT
 #define BITS_PER_EVTCHN_WORD(d) BITS_PER_XEN_ULONG
 #else
diff --git a/xen/include/xsm/dummy.h b/xen/include/xsm/dummy.h
index e722155..8ca1117 100644
--- a/xen/include/xsm/dummy.h
+++ b/xen/include/xsm/dummy.h
@@ -299,6 +299,12 @@ static XSM_INLINE char *xsm_show_security_evtchn(struct 
domain *d, const struct
     return NULL;
 }
 
+static XSM_INLINE int xsm_init_hardware_domain(XSM_DEFAULT_ARG struct domain 
*d)
+{
+    XSM_ASSERT_ACTION(XSM_HOOK);
+    return xsm_default_action(action, current->domain, d);
+}
+
 static XSM_INLINE int xsm_get_pod_target(XSM_DEFAULT_ARG struct domain *d)
 {
     XSM_ASSERT_ACTION(XSM_PRIV);
diff --git a/xen/include/xsm/xsm.h b/xen/include/xsm/xsm.h
index 2cd3a3b..ef1c584 100644
--- a/xen/include/xsm/xsm.h
+++ b/xen/include/xsm/xsm.h
@@ -82,6 +82,7 @@ struct xsm_operations {
     int (*alloc_security_evtchn) (struct evtchn *chn);
     void (*free_security_evtchn) (struct evtchn *chn);
     char *(*show_security_evtchn) (struct domain *d, const struct evtchn *chn);
+    int (*init_hardware_domain) (struct domain *d);
 
     int (*get_pod_target) (struct domain *d);
     int (*set_pod_target) (struct domain *d);
@@ -311,6 +312,11 @@ static inline char *xsm_show_security_evtchn (struct 
domain *d, const struct evt
     return xsm_ops->show_security_evtchn(d, chn);
 }
 
+static inline int xsm_init_hardware_domain (xsm_default_t def, struct domain 
*d)
+{
+    return xsm_ops->init_hardware_domain(d);
+}
+
 static inline int xsm_get_pod_target (xsm_default_t def, struct domain *d)
 {
     return xsm_ops->get_pod_target(d);
diff --git a/xen/xsm/dummy.c b/xen/xsm/dummy.c
index b79e10f..c2804f2 100644
--- a/xen/xsm/dummy.c
+++ b/xen/xsm/dummy.c
@@ -58,6 +58,8 @@ void xsm_fixup_ops (struct xsm_operations *ops)
     set_to_dummy_if_null(ops, alloc_security_evtchn);
     set_to_dummy_if_null(ops, free_security_evtchn);
     set_to_dummy_if_null(ops, show_security_evtchn);
+    set_to_dummy_if_null(ops, init_hardware_domain);
+
     set_to_dummy_if_null(ops, get_pod_target);
     set_to_dummy_if_null(ops, set_pod_target);
 
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index 4ce31c9..f1a4a2d 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -327,6 +327,11 @@ static char *flask_show_security_evtchn(struct domain *d, 
const struct evtchn *c
     return ctx;
 }
 
+static int flask_init_hardware_domain(struct domain *d)
+{
+    return current_has_perm(d, SECCLASS_DOMAIN2, 
DOMAIN2__CREATE_HARDWARE_DOMAIN);
+}
+
 static int flask_grant_mapref(struct domain *d1, struct domain *d2, 
                               uint32_t flags)
 {
@@ -1500,6 +1505,7 @@ static struct xsm_operations flask_ops = {
     .alloc_security_evtchn = flask_alloc_security_evtchn,
     .free_security_evtchn = flask_free_security_evtchn,
     .show_security_evtchn = flask_show_security_evtchn,
+    .init_hardware_domain = flask_init_hardware_domain,
 
     .get_pod_target = flask_get_pod_target,
     .set_pod_target = flask_set_pod_target,
diff --git a/xen/xsm/flask/policy/access_vectors 
b/xen/xsm/flask/policy/access_vectors
index a0ed13d..32371a9 100644
--- a/xen/xsm/flask/policy/access_vectors
+++ b/xen/xsm/flask/policy/access_vectors
@@ -198,6 +198,8 @@ class domain2
     set_max_evtchn
 # XEN_DOMCTL_cacheflush
     cacheflush
+# Creation of the hardware domain when it is not dom0
+    create_hardware_domain
 }
 
 # Similar to class domain, but primarily contains domctls related to HVM 
domains
-- 
1.9.0


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.