|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [PATCH v4 2/2] x86/ioreq: Extend ioreq server to support multiple ioreq pages
A single shared ioreq page provides PAGE_SIZE/sizeof(ioreq_t) = 128
slots, limiting HVM guests to 128 vCPUs. To support more vCPUs, the
single struct ioreq_page in the ioreq_server is replaced with an ioreq_t
pointer backed by a dynamically sized allocation.
For the resource mapping path (XENMEM_acquire_resource), the common
ioreq_server_alloc_mfn() allocates nr_ioreq_pages(d) domain heap pages
with MEMF_no_refcount and writable type references, then maps them as a
single contiguous VA via vmap(). Teardown recovers the pages from the
vmap address via vmap_size() and vmap_to_page().
The legacy GFN-mapped path continues to support only a single ioreq
page. For domains whose vCPU count exceeds PAGE_SIZE/sizeof(ioreq_t),
hvm_map_ioreq_gfn() returns -EOPNOTSUPP. Those domains need to use the
resource mapping interface. This also avoids fragmentation when
allocating multiple slots from the GFN pool. The GFN path uses vmap()
for the Xen-side mapping, consistent with ioreq_server_alloc_mfn(),
allowing vmap_to_page() to recover the page during teardown.
Signed-off-by: Julian Vetter <julian.vetter@xxxxxxxxxx>
---
Changes in v4:
- Dropped the multi-page support for the legacy GFN-mapped path
- When 'nr_ioreq_pages(d) > 1' -> -EOPNOTSUPP
- But now also use vmap(), same as in resource mapping path, so new
s->ioreq pointer is used in either case
- Mirror exactly what prepare_ring_for_helper/destroy_ring_for_helper
is doing except the __map_domain_page_global (replaced by
vmap/vunmap)
- Replaced xvzalloc_array() by xvmalloc_array() -> No need to have mfns
array zero'ed
- Inverted logic in ioreq_server_alloc_mfn() and ioreq_server_free_mfn()
to check for 'if (buf)' (same as in the GFN-mapped path)
- Fixed ioreq_server_alloc_mfn -> Correctly call alloc_domheap_page +
get_page_and_type
- Fixed error handling and teardown path to correctly call
put_page_alloc_ref + put_page_and_type just like for the bufioreq
- Kept shared_iopage_t so QEMU, varstored, etc. can just reference into
it via p->ioreq[cpu_id] -> No modification needed
---
xen/arch/x86/hvm/ioreq.c | 153 ++++++++++++++++++++++++++++++++----
xen/common/ioreq.c | 166 ++++++++++++++++++++++++++++++++-------
xen/include/xen/ioreq.h | 10 ++-
3 files changed, 286 insertions(+), 43 deletions(-)
diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c
index 355b2ba12c..ec4f210768 100644
--- a/xen/arch/x86/hvm/ioreq.c
+++ b/xen/arch/x86/hvm/ioreq.c
@@ -15,6 +15,7 @@
#include <xen/sched.h>
#include <xen/softirq.h>
#include <xen/trace.h>
+#include <xen/vmap.h>
#include <xen/vpci.h>
#include <asm/hvm/emulate.h>
@@ -123,9 +124,10 @@ static void hvm_free_ioreq_gfn(struct ioreq_server *s,
gfn_t gfn)
static void hvm_unmap_ioreq_gfn(struct ioreq_server *s, bool buf)
{
- struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
-
+ if ( buf )
{
+ struct ioreq_page *iorp = &s->bufioreq;
+
if ( gfn_eq(iorp->gfn, INVALID_GFN) )
return;
@@ -134,16 +136,33 @@ static void hvm_unmap_ioreq_gfn(struct ioreq_server *s,
bool buf)
hvm_free_ioreq_gfn(s, iorp->gfn);
iorp->gfn = INVALID_GFN;
+ return;
}
+
+ if ( gfn_eq(s->ioreq_gfn, INVALID_GFN) )
+ return;
+
+ put_page_and_type(vmap_to_page((void *)s->ioreq));
+ vunmap(s->ioreq);
+ s->ioreq = NULL;
+
+ hvm_free_ioreq_gfn(s, s->ioreq_gfn);
+ s->ioreq_gfn = INVALID_GFN;
}
static int hvm_map_ioreq_gfn(struct ioreq_server *s, bool buf)
{
struct domain *d = s->target;
- struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
+ gfn_t base_gfn;
+ struct page_info *pg;
+ p2m_type_t p2mt;
+ mfn_t mfn;
int rc;
+ if ( buf )
{
+ struct ioreq_page *iorp = &s->bufioreq;
+
if ( iorp->page )
{
/*
@@ -173,35 +192,141 @@ static int hvm_map_ioreq_gfn(struct ioreq_server *s,
bool buf)
return rc;
}
+
+ if ( s->ioreq )
+ {
+ /*
+ * If pages have already been allocated (which will happen on
+ * demand if ioreq_server_get_frame() is called), then
+ * mapping a guest frame is not permitted.
+ */
+ if ( gfn_eq(s->ioreq_gfn, INVALID_GFN) )
+ return -EPERM;
+ return 0;
+ }
+
+ /*
+ * The legacy GFN path supports only a single ioreq page. Guests requiring
+ * more ioreq slots must use the resource mapping interface
+ * (XENMEM_acquire_resource).
+ */
+ if ( nr_ioreq_pages(d) > 1 )
+ return -EOPNOTSUPP;
+
+ if ( d->is_dying )
+ return -EINVAL;
+
+ base_gfn = hvm_alloc_ioreq_gfn(s);
+
+ if ( gfn_eq(base_gfn, INVALID_GFN) )
+ return -ENOMEM;
+
+ /*
+ * The page management is the same as prepare_ring_for_helper(), but vmap()
+ * is used instead of __map_domain_page_global() to be consistent with
+ * ioreq_server_alloc_mfn(), which uses vmap() to map potentially multiple
+ * pages. This also allows vmap_to_page() to recover the struct page_info *
+ * from s->ioreq during teardown.
+ */
+ rc = check_get_page_from_gfn(d, base_gfn, false, &p2mt, &pg);
+ if ( rc )
+ {
+ if ( rc == -EAGAIN )
+ rc = -ENOENT;
+ goto fail;
+ }
+
+ if ( !get_page_type(pg, PGT_writable_page) )
+ {
+ put_page(pg);
+ rc = -EINVAL;
+ goto fail;
+ }
+
+ mfn = page_to_mfn(pg);
+ s->ioreq = vmap(&mfn, 1);
+ if ( !s->ioreq )
+ {
+ put_page_and_type(pg);
+ rc = -ENOMEM;
+ goto fail;
+ }
+
+ s->ioreq_gfn = base_gfn;
+ return 0;
+
+ fail:
+ hvm_free_ioreq_gfn(s, base_gfn);
+ return rc;
}
static void hvm_remove_ioreq_gfn(struct ioreq_server *s, bool buf)
{
struct domain *d = s->target;
- struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
+ struct page_info *pg;
+ void *va;
+ gfn_t gfn;
- if ( gfn_eq(iorp->gfn, INVALID_GFN) )
- return;
+ if ( buf )
+ {
+ struct ioreq_page *iorp = &s->bufioreq;
+
+ if ( gfn_eq(iorp->gfn, INVALID_GFN) )
+ return;
+
+ gfn = iorp->gfn;
+ pg = iorp->page;
+ va = iorp->va;
+ }
+ else
+ {
+ if ( gfn_eq(s->ioreq_gfn, INVALID_GFN) )
+ return;
- if ( p2m_remove_page(d, iorp->gfn, page_to_mfn(iorp->page), 0) )
+ gfn = s->ioreq_gfn;
+ pg = vmap_to_page(s->ioreq);
+ va = s->ioreq;
+ }
+
+ if ( p2m_remove_page(d, gfn, page_to_mfn(pg), 0) )
domain_crash(d);
- clear_page(iorp->va);
+ clear_page(va);
}
static int hvm_add_ioreq_gfn(struct ioreq_server *s, bool buf)
{
struct domain *d = s->target;
- struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
+ struct page_info *pg;
+ void *va;
+ gfn_t gfn;
int rc;
- if ( gfn_eq(iorp->gfn, INVALID_GFN) )
- return 0;
+ if ( buf )
+ {
+ struct ioreq_page *iorp = &s->bufioreq;
+
+ if ( gfn_eq(iorp->gfn, INVALID_GFN) )
+ return 0;
+
+ gfn = iorp->gfn;
+ pg = iorp->page;
+ va = iorp->va;
+ }
+ else
+ {
+ if ( gfn_eq(s->ioreq_gfn, INVALID_GFN) )
+ return 0;
+
+ gfn = s->ioreq_gfn;
+ pg = vmap_to_page(s->ioreq);
+ va = s->ioreq;
+ }
- clear_page(iorp->va);
+ clear_page(va);
- rc = p2m_add_page(d, iorp->gfn, page_to_mfn(iorp->page), 0, p2m_ram_rw);
+ rc = p2m_add_page(d, gfn, page_to_mfn(pg), 0, p2m_ram_rw);
if ( rc == 0 )
- paging_mark_pfn_dirty(d, _pfn(gfn_x(iorp->gfn)));
+ paging_mark_pfn_dirty(d, _pfn(gfn_x(gfn)));
return rc;
}
diff --git a/xen/common/ioreq.c b/xen/common/ioreq.c
index 2e284ad26c..5a09e2ba36 100644
--- a/xen/common/ioreq.c
+++ b/xen/common/ioreq.c
@@ -26,6 +26,8 @@
#include <xen/paging.h>
#include <xen/sched.h>
#include <xen/trace.h>
+#include <xen/vmap.h>
+#include <xen/xvmalloc.h>
#include <asm/guest_atomics.h>
#include <asm/ioreq.h>
@@ -95,12 +97,10 @@ static struct ioreq_server *get_ioreq_server(const struct
domain *d,
static ioreq_t *get_ioreq(struct ioreq_server *s, struct vcpu *v)
{
- shared_iopage_t *p = s->ioreq.va;
-
ASSERT((v == current) || !vcpu_runnable(v));
- ASSERT(p != NULL);
+ ASSERT(s->ioreq != NULL);
- return &p->vcpu_ioreq[v->vcpu_id];
+ return &s->ioreq[v->vcpu_id];
}
/*
@@ -260,10 +260,16 @@ bool vcpu_ioreq_handle_completion(struct vcpu *v)
static int ioreq_server_alloc_mfn(struct ioreq_server *s, bool buf)
{
- struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
- struct page_info *page;
+ unsigned int i, nr_pages;
+ mfn_t *mfns;
+ int rc;
+ /* bufioreq: single page allocation */
+ if ( buf )
{
+ struct ioreq_page *iorp = &s->bufioreq;
+ struct page_info *page;
+
if ( iorp->page )
{
/*
@@ -294,26 +300,92 @@ static int ioreq_server_alloc_mfn(struct ioreq_server *s,
bool buf)
iorp->va = __map_domain_page_global(page);
if ( !iorp->va )
- goto fail;
+ {
+ put_page_alloc_ref(page);
+ put_page_and_type(page);
+ return -ENOMEM;
+ }
iorp->page = page;
clear_page(iorp->va);
return 0;
}
- fail:
- put_page_alloc_ref(page);
- put_page_and_type(page);
+ /* ioreq: multi-page allocation */
+ if ( s->ioreq )
+ {
+ /*
+ * If a guest frame has already been mapped (which may happen
+ * on demand if ioreq_server_get_info() is called), then
+ * allocating a page is not permitted.
+ */
+ if ( !gfn_eq(s->ioreq_gfn, INVALID_GFN) )
+ return -EPERM;
+
+ return 0;
+ }
+
+ nr_pages = nr_ioreq_pages(s->target);
+ mfns = xvmalloc_array(mfn_t, nr_pages);
+
+ if ( !mfns )
+ return -ENOMEM;
+
+ for ( i = 0; i < nr_pages; i++ )
+ {
+ struct page_info *pg = alloc_domheap_page(s->target,
+ MEMF_no_refcount);
+
+ rc = -ENOMEM;
+ if ( !pg )
+ goto fail_pages;
+
+ if ( !get_page_and_type(pg, s->target, PGT_writable_page) )
+ {
+ /*
+ * The domain can't possibly know about this page yet, so
+ * failure here is a clear indication of something fishy
+ * going on.
+ */
+ domain_crash(s->emulator);
+ rc = -ENODATA;
+ goto fail_pages;
+ }
+
+ mfns[i] = page_to_mfn(pg);
+ }
+
+ s->ioreq = vmap(mfns, nr_pages);
+ if ( !s->ioreq )
+ {
+ rc = -ENOMEM;
+ i = nr_pages;
+ goto fail_pages;
+ }
+
+ memset(s->ioreq, 0, nr_pages * PAGE_SIZE);
+ xvfree(mfns);
+ return 0;
- return -ENOMEM;
+ fail_pages:
+ while ( i-- > 0 )
+ {
+ struct page_info *pg = mfn_to_page(mfns[i]);
+
+ put_page_alloc_ref(pg);
+ put_page_and_type(pg);
+ }
+ xvfree(mfns);
+ return rc;
}
static void ioreq_server_free_mfn(struct ioreq_server *s, bool buf)
{
- struct ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
- struct page_info *page = iorp->page;
-
+ if ( buf )
{
+ struct ioreq_page *iorp = &s->bufioreq;
+ struct page_info *page = iorp->page;
+
if ( !page )
return;
@@ -324,6 +396,23 @@ static void ioreq_server_free_mfn(struct ioreq_server *s,
bool buf)
put_page_alloc_ref(page);
put_page_and_type(page);
+ return;
+ }
+
+ if ( s->ioreq )
+ {
+ unsigned int i, nr_pages = vmap_size(s->ioreq);
+
+ for ( i = 0; i < nr_pages; i++ )
+ {
+ struct page_info *pg = vmap_to_page((void *)s->ioreq +
+ i * PAGE_SIZE);
+
+ put_page_alloc_ref(pg);
+ put_page_and_type(pg);
+ }
+ vunmap(s->ioreq);
+ s->ioreq = NULL;
}
}
@@ -337,11 +426,29 @@ bool is_ioreq_server_page(struct domain *d, const struct
page_info *page)
FOR_EACH_IOREQ_SERVER(d, id, s)
{
- if ( (s->ioreq.page == page) || (s->bufioreq.page == page) )
+ if ( s->bufioreq.page == page )
{
found = true;
break;
}
+
+ if ( s->ioreq )
+ {
+ unsigned int i;
+
+ for ( i = 0; i < nr_ioreq_pages(d); i++ )
+ {
+ if ( vmap_to_page((char *)s->ioreq +
+ i * PAGE_SIZE) == page )
+ {
+ found = true;
+ break;
+ }
+ }
+
+ if ( found )
+ break;
+ }
}
rspin_unlock(&d->ioreq_server.lock);
@@ -354,7 +461,7 @@ static void ioreq_server_update_evtchn(struct ioreq_server
*s,
{
ASSERT(spin_is_locked(&s->lock));
- if ( s->ioreq.va != NULL )
+ if ( s->ioreq != NULL )
{
ioreq_t *p = get_ioreq(s, sv->vcpu);
@@ -594,7 +701,7 @@ static int ioreq_server_init(struct ioreq_server *s,
INIT_LIST_HEAD(&s->ioreq_vcpu_list);
spin_lock_init(&s->bufioreq_lock);
- s->ioreq.gfn = INVALID_GFN;
+ s->ioreq_gfn = INVALID_GFN;
s->bufioreq.gfn = INVALID_GFN;
rc = ioreq_server_alloc_rangesets(s, id);
@@ -773,7 +880,7 @@ static int ioreq_server_get_info(struct domain *d,
ioservid_t id,
}
if ( ioreq_gfn )
- *ioreq_gfn = gfn_x(s->ioreq.gfn);
+ *ioreq_gfn = gfn_x(s->ioreq_gfn);
if ( HANDLE_BUFIOREQ(s) )
{
@@ -816,26 +923,29 @@ int ioreq_server_get_frame(struct domain *d, ioservid_t
id,
if ( rc )
goto out;
- switch ( idx )
+ if ( idx == XENMEM_resource_ioreq_server_frame_bufioreq )
{
- case XENMEM_resource_ioreq_server_frame_bufioreq:
rc = -ENOENT;
if ( !HANDLE_BUFIOREQ(s) )
goto out;
*mfn = page_to_mfn(s->bufioreq.page);
rc = 0;
- break;
-
- case XENMEM_resource_ioreq_server_frame_ioreq(0):
- *mfn = page_to_mfn(s->ioreq.page);
- rc = 0;
- break;
+ }
+ else if ( idx >= XENMEM_resource_ioreq_server_frame_ioreq(0) &&
+ idx <
XENMEM_resource_ioreq_server_frame_ioreq(nr_ioreq_pages(d)) )
+ {
+ unsigned int page_idx = idx -
XENMEM_resource_ioreq_server_frame_ioreq(0);
- default:
rc = -EINVAL;
- break;
+ if ( s->ioreq )
+ {
+ *mfn = vmap_to_mfn((void *)s->ioreq + page_idx * PAGE_SIZE);
+ rc = 0;
+ }
}
+ else
+ rc = -EINVAL;
out:
rspin_unlock(&d->ioreq_server.lock);
diff --git a/xen/include/xen/ioreq.h b/xen/include/xen/ioreq.h
index e86f0869fa..41650a59ca 100644
--- a/xen/include/xen/ioreq.h
+++ b/xen/include/xen/ioreq.h
@@ -19,9 +19,16 @@
#ifndef __XEN_IOREQ_H__
#define __XEN_IOREQ_H__
+#include <xen/macros.h>
#include <xen/sched.h>
#include <public/hvm/dm_op.h>
+#include <public/hvm/ioreq.h>
+
+static inline unsigned int nr_ioreq_pages(const struct domain *d)
+{
+ return DIV_ROUND_UP(d->max_vcpus, PAGE_SIZE / sizeof(ioreq_t));
+}
struct ioreq_page {
gfn_t gfn;
@@ -45,7 +52,8 @@ struct ioreq_server {
/* Lock to serialize toolstack modifications */
spinlock_t lock;
- struct ioreq_page ioreq;
+ ioreq_t *ioreq;
+ gfn_t ioreq_gfn;
struct list_head ioreq_vcpu_list;
struct ioreq_page bufioreq;
--
2.51.0
--
Julian Vetter | Vates Hypervisor & Kernel Developer
XCP-ng & Xen Orchestra - Vates solutions
web: https://vates.tech
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |