[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] x86/ioreq: Extend ioreq server to support multiple ioreq pages



A single shared ioreq page provides PAGE_SIZE/sizeof(struct ioreq) = 128
slots, limiting HVM guests to 128 vCPUs. To support more vCPUs, extend
the ioreq server to allocate multiple contiguous ioreq pages based on
the max number of vCPUs. This patch replaces the single ioreq_page with
an array of pages (ioreq_pages). It also extends the GFN allocation to
find contiguous free GFNs for multi-page mappings. All existing
single-page paths (bufioreq, legacy clients) remain unchanged.

Signed-off-by: Julian Vetter <julian.vetter@xxxxxxxxxx>
---
 xen/arch/x86/hvm/ioreq.c | 160 ++++++++++++++++++++++++++++++---------
 xen/common/ioreq.c       | 145 +++++++++++++++++++++++------------
 xen/include/xen/ioreq.h  |  13 +++-
 3 files changed, 230 insertions(+), 88 deletions(-)

diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c
index a5fa97e149..a5c2a4baca 100644
--- a/xen/arch/x86/hvm/ioreq.c
+++ b/xen/arch/x86/hvm/ioreq.c
@@ -71,6 +71,38 @@ static gfn_t hvm_alloc_legacy_ioreq_gfn(struct ioreq_server 
*s)
     return INVALID_GFN;
 }
 
+static gfn_t hvm_alloc_ioreq_gfns(struct ioreq_server *s,
+                                  unsigned int nr_pages)
+{
+    struct domain *d = s->target;
+    unsigned long mask = d->arch.hvm.ioreq_gfn.mask;
+    unsigned int i, run;
+
+    /* Find nr_pages consecutive set bits */
+    for ( i = 0, run = 0; i < BITS_PER_LONG; i++ )
+    {
+        if ( test_bit(i, &mask) )
+        {
+            if ( ++run == nr_pages )
+            {
+                /* Found a run - clear all bits and return base GFN */
+                unsigned int start = i - nr_pages + 1;
+                for ( unsigned int j = start; j <= i; j++ )
+                    clear_bit(j, &d->arch.hvm.ioreq_gfn.mask);
+                return _gfn(d->arch.hvm.ioreq_gfn.base + start);
+            }
+        }
+        else
+            run = 0;
+    }
+
+    /* Fall back to legacy for single page only */
+    if ( nr_pages == 1 )
+        return hvm_alloc_legacy_ioreq_gfn(s);
+
+    return INVALID_GFN;
+}
+
 static gfn_t hvm_alloc_ioreq_gfn(struct ioreq_server *s)
 {
     struct domain *d = s->target;
@@ -121,52 +153,95 @@ static void hvm_free_ioreq_gfn(struct ioreq_server *s, 
gfn_t gfn)
     }
 }
 
+static void hvm_free_ioreq_gfns(struct ioreq_server *s, gfn_t gfn,
+                                unsigned int nr_pages)
+{
+    unsigned int i;
+
+    for ( i = 0; i < nr_pages; i++ )
+        hvm_free_ioreq_gfn(s, _gfn(gfn_x(gfn) + i));
+}
+
 static void hvm_unmap_ioreq_gfn(struct ioreq_server *s, bool buf)
 {
-    struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
+    unsigned int i, nr_pages = buf ? 1 : NR_IOREQ_PAGES;
 
-    if ( gfn_eq(iorp->gfn, INVALID_GFN) )
-        return;
+    for ( i = 0; i < nr_pages; i++ )
+    {
+        struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreqs.page[i];
+
+        if ( gfn_eq(iorp->gfn, INVALID_GFN) )
+            continue;
 
-    destroy_ring_for_helper(&iorp->va, iorp->page);
-    iorp->page = NULL;
+        destroy_ring_for_helper(&iorp->va, iorp->page);
+        iorp->page = NULL;
 
-    hvm_free_ioreq_gfn(s, iorp->gfn);
-    iorp->gfn = INVALID_GFN;
+        hvm_free_ioreq_gfn(s, iorp->gfn);
+        iorp->gfn = INVALID_GFN;
+    }
 }
 
 static int hvm_map_ioreq_gfn(struct ioreq_server *s, bool buf)
 {
     struct domain *d = s->target;
-    struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
+    unsigned int i, nr_pages = buf ? 1 : NR_IOREQ_PAGES;
+    gfn_t base_gfn;
     int rc;
 
-    if ( iorp->page )
+    /* Check if already mapped */
+    for ( i = 0; i < nr_pages; i++ )
     {
-        /*
-         * If a page has already been allocated (which will happen on
-         * demand if ioreq_server_get_frame() is called), then
-         * mapping a guest frame is not permitted.
-         */
-        if ( gfn_eq(iorp->gfn, INVALID_GFN) )
-            return -EPERM;
+        struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreqs.page[i];
 
-        return 0;
+        if ( iorp->page )
+        {
+            /*
+             * If a page has already been allocated (which will happen on
+             * demand if ioreq_server_get_frame() is called), then
+             * mapping a guest frame is not permitted.
+             */
+            if ( gfn_eq(iorp->gfn, INVALID_GFN) )
+                return -EPERM;
+
+            return 0;
+        }
     }
 
     if ( d->is_dying )
         return -EINVAL;
 
-    iorp->gfn = hvm_alloc_ioreq_gfn(s);
+    /* Allocate contiguous GFNs for all pages */
+    base_gfn = buf ? hvm_alloc_ioreq_gfn(s) : hvm_alloc_ioreq_gfns(s, 
nr_pages);
 
-    if ( gfn_eq(iorp->gfn, INVALID_GFN) )
+    if ( gfn_eq(base_gfn, INVALID_GFN) )
         return -ENOMEM;
 
-    rc = prepare_ring_for_helper(d, gfn_x(iorp->gfn), &iorp->page,
-                                 &iorp->va);
+    /* Map each page */
+    for ( i = 0; i < nr_pages; i++ )
+    {
+        struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreqs.page[i];
+
+        iorp->gfn = _gfn(gfn_x(base_gfn) + i);
 
-    if ( rc )
-        hvm_unmap_ioreq_gfn(s, buf);
+        rc = prepare_ring_for_helper(d, gfn_x(iorp->gfn), &iorp->page,
+                                     &iorp->va);
+        if ( rc )
+            goto fail;
+    }
+
+    return 0;
+
+fail:
+    /* Unmap any pages we successfully mapped */
+    while ( i-- > 0 )
+    {
+        struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreqs.page[i];
+
+        destroy_ring_for_helper(&iorp->va, iorp->page);
+        iorp->page = NULL;
+        iorp->gfn = INVALID_GFN;
+    }
+    hvm_free_ioreq_gfns(s, base_gfn, nr_pages);
 
     return rc;
 }
@@ -174,32 +249,43 @@ static int hvm_map_ioreq_gfn(struct ioreq_server *s, bool 
buf)
 static void hvm_remove_ioreq_gfn(struct ioreq_server *s, bool buf)
 {
     struct domain *d = s->target;
-    struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
+    unsigned int i, nr_pages = buf ? 1 : NR_IOREQ_PAGES;
 
-    if ( gfn_eq(iorp->gfn, INVALID_GFN) )
-        return;
+    for ( i = 0; i < nr_pages; i++ )
+    {
+        struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreqs.page[i];
+
+        if ( gfn_eq(iorp->gfn, INVALID_GFN) )
+            continue;
 
-    if ( p2m_remove_page(d, iorp->gfn, page_to_mfn(iorp->page), 0) )
-        domain_crash(d);
-    clear_page(iorp->va);
+        if ( p2m_remove_page(d, iorp->gfn, page_to_mfn(iorp->page), 0) )
+            domain_crash(d);
+        clear_page(iorp->va);
+    }
 }
 
 static int hvm_add_ioreq_gfn(struct ioreq_server *s, bool buf)
 {
     struct domain *d = s->target;
-    struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
+    unsigned int i, nr_pages = buf ? 1 : NR_IOREQ_PAGES;
     int rc;
 
-    if ( gfn_eq(iorp->gfn, INVALID_GFN) )
-        return 0;
+    for ( i = 0; i < nr_pages; i++ )
+    {
+        struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreqs.page[i];
 
-    clear_page(iorp->va);
+        if ( gfn_eq(iorp->gfn, INVALID_GFN) )
+            continue;
 
-    rc = p2m_add_page(d, iorp->gfn, page_to_mfn(iorp->page), 0, p2m_ram_rw);
-    if ( rc == 0 )
-        paging_mark_pfn_dirty(d, _pfn(gfn_x(iorp->gfn)));
+        clear_page(iorp->va);
 
-    return rc;
+        rc = p2m_add_page(d, iorp->gfn, page_to_mfn(iorp->page), 0, 
p2m_ram_rw);
+        if ( rc )
+            return rc;
+
+        paging_mark_pfn_dirty(d, _pfn(gfn_x(iorp->gfn)));
+    }
+    return 0;
 }
 
 int arch_ioreq_server_map_pages(struct ioreq_server *s)
diff --git a/xen/common/ioreq.c b/xen/common/ioreq.c
index f5fd30ce12..13c638db53 100644
--- a/xen/common/ioreq.c
+++ b/xen/common/ioreq.c
@@ -95,12 +95,15 @@ static struct ioreq_server *get_ioreq_server(const struct 
domain *d,
 
 static ioreq_t *get_ioreq(struct ioreq_server *s, struct vcpu *v)
 {
-    shared_iopage_t *p = s->ioreq.va;
+    unsigned int vcpu_id = v->vcpu_id;
+    unsigned int page_idx = vcpu_id / IOREQS_PER_PAGE;
+    unsigned int slot_idx = vcpu_id % IOREQS_PER_PAGE;
+    shared_iopage_t *p = s->ioreqs.page[page_idx].va;
 
     ASSERT((v == current) || !vcpu_runnable(v));
     ASSERT(p != NULL);
 
-    return &p->vcpu_ioreq[v->vcpu_id];
+    return &p->vcpu_ioreq[slot_idx];
 }
 
 /*
@@ -260,84 +263,120 @@ bool vcpu_ioreq_handle_completion(struct vcpu *v)
 
 static int ioreq_server_alloc_mfn(struct ioreq_server *s, bool buf)
 {
-    struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
     struct page_info *page;
+    unsigned int i, j, nr_pages = buf ? 1 : NR_IOREQ_PAGES;
 
-    if ( iorp->page )
+    for ( i = 0; i < nr_pages; i++ )
     {
-        /*
-         * If a guest frame has already been mapped (which may happen
-         * on demand if ioreq_server_get_info() is called), then
-         * allocating a page is not permitted.
-         */
-        if ( !gfn_eq(iorp->gfn, INVALID_GFN) )
-            return -EPERM;
+        struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreqs.page[i];
 
-        return 0;
-    }
+        if ( iorp->page )
+        {
+            /*
+             * If a guest frame has already been mapped (which may happen
+             * on demand if ioreq_server_get_info() is called), then
+             * allocating a page is not permitted.
+             */
+            if ( !gfn_eq(iorp->gfn, INVALID_GFN) )
+                return -EPERM;
+            continue;  /* Already allocated */
+        }
 
-    page = alloc_domheap_page(s->target, MEMF_no_refcount);
+        page = alloc_domheap_page(s->target, MEMF_no_refcount);
+        if ( !page )
+            goto fail;
 
-    if ( !page )
-        return -ENOMEM;
+        if ( !get_page_and_type(page, s->target, PGT_writable_page) )
+        {
+            /*
+             * The domain can't possibly know about this page yet, so failure
+             * here is a clear indication of something fishy going on.
+             */
+            put_page_alloc_ref(page);
+            domain_crash(s->emulator);
+            return -ENODATA;
+        }
 
-    if ( !get_page_and_type(page, s->target, PGT_writable_page) )
-    {
-        /*
-         * The domain can't possibly know about this page yet, so failure
-         * here is a clear indication of something fishy going on.
-         */
-        domain_crash(s->emulator);
-        return -ENODATA;
-    }
+        /* Assign early so cleanup can find it */
+        iorp->page = page;
 
-    iorp->va = __map_domain_page_global(page);
-    if ( !iorp->va )
-        goto fail;
+        iorp->va = __map_domain_page_global(page);
+        if ( !iorp->va )
+            goto fail;
+
+        clear_page(iorp->va);
+    }
 
-    iorp->page = page;
-    clear_page(iorp->va);
     return 0;
 
- fail:
-    put_page_alloc_ref(page);
-    put_page_and_type(page);
+fail:
+    /* Free all previously allocated pages */
+    for ( j = 0; j <= i; j++ )
+    {
+        struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreqs.page[j];
+        if ( iorp->page )
+        {
+            if ( iorp->va )
+                  unmap_domain_page_global(iorp->va);
+            iorp->va = NULL;
+            put_page_alloc_ref(iorp->page);
+            put_page_and_type(iorp->page);
+            iorp->page = NULL;
+        }
+    }
 
     return -ENOMEM;
 }
 
 static void ioreq_server_free_mfn(struct ioreq_server *s, bool buf)
 {
-    struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
-    struct page_info *page = iorp->page;
+    unsigned int i, nr_pages = buf ? 1 : NR_IOREQ_PAGES;
 
-    if ( !page )
-        return;
+    for ( i = 0; i < nr_pages; i++ )
+    {
+        struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreqs.page[i];
+        struct page_info *page = iorp->page;
 
-    iorp->page = NULL;
+        if ( !page )
+            continue;
+
+        iorp->page = NULL;
 
-    unmap_domain_page_global(iorp->va);
-    iorp->va = NULL;
+        unmap_domain_page_global(iorp->va);
+        iorp->va = NULL;
 
-    put_page_alloc_ref(page);
-    put_page_and_type(page);
+        put_page_alloc_ref(page);
+        put_page_and_type(page);
+    }
 }
 
 bool is_ioreq_server_page(struct domain *d, const struct page_info *page)
 {
     const struct ioreq_server *s;
-    unsigned int id;
+    unsigned int id, i;
     bool found = false;
 
     rspin_lock(&d->ioreq_server.lock);
 
     FOR_EACH_IOREQ_SERVER(d, id, s)
     {
-        if ( (s->ioreq.page == page) || (s->bufioreq.page == page) )
+        if ( s->bufioreq.page == page )
         {
             found = true;
             break;
         }
+
+        for ( i = 0; i < NR_IOREQ_PAGES; i++ )
+        {
+            if ( s->ioreqs.page[i].page == page )
+            {
+                found = true;
+                break;
+            }
+        }
+
+        if ( found )
+            break;
     }
 
     rspin_unlock(&d->ioreq_server.lock);
@@ -348,9 +387,11 @@ bool is_ioreq_server_page(struct domain *d, const struct 
page_info *page)
 static void ioreq_server_update_evtchn(struct ioreq_server *s,
                                        struct ioreq_vcpu *sv)
 {
+    unsigned int page_idx = sv->vcpu->vcpu_id / IOREQS_PER_PAGE;
+
     ASSERT(spin_is_locked(&s->lock));
 
-    if ( s->ioreq.va != NULL )
+    if ( s->ioreqs.page[page_idx].va != NULL )
     {
         ioreq_t *p = get_ioreq(s, sv->vcpu);
 
@@ -579,6 +620,7 @@ static int ioreq_server_init(struct ioreq_server *s,
 {
     struct domain *currd = current->domain;
     struct vcpu *v;
+    unsigned int i;
     int rc;
 
     s->target = d;
@@ -590,7 +632,8 @@ static int ioreq_server_init(struct ioreq_server *s,
     INIT_LIST_HEAD(&s->ioreq_vcpu_list);
     spin_lock_init(&s->bufioreq_lock);
 
-    s->ioreq.gfn = INVALID_GFN;
+    for ( i = 0; i < NR_IOREQ_PAGES; i++ )
+        s->ioreqs.page[i].gfn = INVALID_GFN;
     s->bufioreq.gfn = INVALID_GFN;
 
     rc = ioreq_server_alloc_rangesets(s, id);
@@ -768,8 +811,9 @@ static int ioreq_server_get_info(struct domain *d, 
ioservid_t id,
             goto out;
     }
 
+    /* Just return the first ireq page because the region is contigeous */
     if ( ioreq_gfn )
-        *ioreq_gfn = gfn_x(s->ioreq.gfn);
+        *ioreq_gfn = gfn_x(s->ioreqs.page[0].gfn);
 
     if ( HANDLE_BUFIOREQ(s) )
     {
@@ -822,12 +866,13 @@ int ioreq_server_get_frame(struct domain *d, ioservid_t 
id,
         *mfn = page_to_mfn(s->bufioreq.page);
         rc = 0;
         break;
+    case XENMEM_resource_ioreq_server_frame_ioreq(0) ...
+         XENMEM_resource_ioreq_server_frame_ioreq(NR_IOREQ_PAGES - 1):
+        unsigned int page_idx = idx - 
XENMEM_resource_ioreq_server_frame_ioreq(0);
 
-    case XENMEM_resource_ioreq_server_frame_ioreq(0):
-        *mfn = page_to_mfn(s->ioreq.page);
+        *mfn = page_to_mfn(s->ioreqs.page[page_idx].page);
         rc = 0;
         break;
-
     default:
         rc = -EINVAL;
         break;
diff --git a/xen/include/xen/ioreq.h b/xen/include/xen/ioreq.h
index e86f0869fa..8604311cb4 100644
--- a/xen/include/xen/ioreq.h
+++ b/xen/include/xen/ioreq.h
@@ -19,9 +19,16 @@
 #ifndef __XEN_IOREQ_H__
 #define __XEN_IOREQ_H__
 
+#include <xen/lib.h>
 #include <xen/sched.h>
 
 #include <public/hvm/dm_op.h>
+#include <public/hvm/hvm_info_table.h>
+#include <public/hvm/ioreq.h>
+
+/* 4096 / 32 = 128 ioreq slots per page */
+#define IOREQS_PER_PAGE  (PAGE_SIZE / sizeof(struct ioreq))
+#define NR_IOREQ_PAGES   DIV_ROUND_UP(HVM_MAX_VCPUS, IOREQS_PER_PAGE)
 
 struct ioreq_page {
     gfn_t gfn;
@@ -29,6 +36,10 @@ struct ioreq_page {
     void *va;
 };
 
+struct ioreq_pages {
+    struct ioreq_page page[NR_IOREQ_PAGES];
+};
+
 struct ioreq_vcpu {
     struct list_head list_entry;
     struct vcpu      *vcpu;
@@ -45,7 +56,7 @@ struct ioreq_server {
     /* Lock to serialize toolstack modifications */
     spinlock_t             lock;
 
-    struct ioreq_page      ioreq;
+    struct ioreq_pages     ioreqs;
     struct list_head       ioreq_vcpu_list;
     struct ioreq_page      bufioreq;
 
-- 
2.51.0



--
Julian Vetter | Vates Hypervisor & Kernel Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech




 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.