[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 11/11] evtchn: add FIFO-based event channel hypercalls and port ops



From: David Vrabel <david.vrabel@xxxxxxxxxx>

Add the implementation for the FIFO-based event channel ABI.  The new
hypercall sub-ops (EVTCHNOP_init_control, EVTCHNOP_expand_array) and
the required evtchn_ops (set_pending, unmask, etc.).

Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
---
 xen/common/Makefile          |    1 +
 xen/common/event_channel.c   |   20 ++
 xen/common/event_fifo.c      |  455 ++++++++++++++++++++++++++++++++++++++++++
 xen/include/xen/event_fifo.h |   54 +++++
 xen/include/xen/sched.h      |    7 +-
 5 files changed, 536 insertions(+), 1 deletions(-)
 create mode 100644 xen/common/event_fifo.c
 create mode 100644 xen/include/xen/event_fifo.h

diff --git a/xen/common/Makefile b/xen/common/Makefile
index 0a3a367..533b603 100644
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -7,6 +7,7 @@ obj-y += domctl.o
 obj-y += domain.o
 obj-y += event_2l.o
 obj-y += event_channel.o
+obj-y += event_fifo.o
 obj-y += grant_table.o
 obj-y += irq.o
 obj-y += kernel.o
diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c
index 45ff115..9e0832f 100644
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -26,6 +26,7 @@
 #include <xen/compat.h>
 #include <xen/guest_access.h>
 #include <xen/keyhandler.h>
+#include <xen/event_fifo.h>
 #include <asm/current.h>
 
 #include <public/xen.h>
@@ -1096,6 +1097,24 @@ long do_event_channel_op(int cmd, 
XEN_GUEST_HANDLE_PARAM(void) arg)
         break;
     }
 
+    case EVTCHNOP_init_control: {
+        struct evtchn_init_control init_control;
+        if ( copy_from_guest(&init_control, arg, 1) != 0 )
+            return -EFAULT;
+        rc = evtchn_fifo_init_control(&init_control);
+        if ( !rc && __copy_to_guest(arg, &init_control, 1) )
+            rc = -EFAULT;
+        break;
+    }
+
+    case EVTCHNOP_expand_array: {
+        struct evtchn_expand_array expand_array;
+        if ( copy_from_guest(&expand_array, arg, 1) != 0 )
+            return -EFAULT;
+        rc = evtchn_fifo_expand_array(&expand_array);
+        break;
+    }
+
     case EVTCHNOP_set_priority: {
         struct evtchn_set_priority set_priority;
         if ( copy_from_guest(&set_priority, arg, 1) != 0 )
@@ -1300,6 +1319,7 @@ void evtchn_destroy(struct domain *d)
 
     clear_global_virq_handlers(d);
 
+    evtchn_fifo_destroy(d);
     xfree(d->evtchn);
 }
 
diff --git a/xen/common/event_fifo.c b/xen/common/event_fifo.c
new file mode 100644
index 0000000..c674178
--- /dev/null
+++ b/xen/common/event_fifo.c
@@ -0,0 +1,455 @@
+/*
+ * FIFO event channel management.
+ *
+ * Copyright (C) 2013 Citrix Systems R&D Ltd.
+ * 
+ * This source code is licensed under the GNU General Public License,
+ * Version 2 or later.  See the file COPYING for more details.
+ */
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/event.h>
+#include <xen/event_fifo.h>
+#include <xen/paging.h>
+#include <xen/mm.h>
+
+#include <public/event_channel.h>
+
+static inline event_word_t *evtchn_fifo_word_from_port(struct domain *d,
+                                                       unsigned port)
+{
+    unsigned p, w;
+
+    if ( unlikely(port >= d->evtchn_fifo->num_evtchns) )
+        return NULL;
+
+    p = port / EVTCHN_FIFO_EVENT_WORDS_PER_PAGE;
+    w = port % EVTCHN_FIFO_EVENT_WORDS_PER_PAGE;
+
+    return d->evtchn_fifo->event_array[p].virt + w;
+}
+
+static bool_t evtchn_fifo_set_link(event_word_t *word, uint32_t link)
+{
+    event_word_t n, o, w;
+
+    w = *word;
+
+    do {
+        if ( !(w & (1 << EVTCHN_FIFO_LINKED)) )
+            return 0;
+        o = w;
+        n = (w & ~EVTCHN_FIFO_LINK_MASK) | link;
+    } while ( (w = cmpxchg(word, o, n)) != o );
+
+    return 1;
+}
+
+static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn)
+{
+    struct domain *d = v->domain;
+    unsigned port;
+    event_word_t *word;
+    struct evtchn_fifo_queue *q;
+    unsigned long flags;
+    bool_t was_pending;
+
+    port = evtchn->port;
+    word = evtchn_fifo_word_from_port(d, port);
+    if ( unlikely(!word) )
+        return;
+
+    /*
+     * No locking around getting the queue. This may race with
+     * changing the priority but we are allowed to signal the event
+     * once on the old priority.
+     */
+    q = &v->evtchn_fifo->queue[evtchn->priority];
+
+    was_pending = test_and_set_bit(EVTCHN_FIFO_PENDING, word);
+
+    /*
+     * Link the event if it unmasked and not already linked.
+     */
+    if ( !test_bit(EVTCHN_FIFO_MASKED, word)
+         && !test_and_set_bit(EVTCHN_FIFO_LINKED, word) )
+    {
+        event_word_t *tail_word;
+        bool_t linked = 0;
+
+        spin_lock_irqsave(&q->lock, flags);
+
+        /*
+         * Atomically link the tail to port iff the tail is linked.
+         * If the tail is unlinked the queue is empty.
+         *
+         * If port is the same as tail, the queue is empty but q->tail
+         * will appear linked as we just set LINKED above.
+         *
+         * If the queue is empty (i.e., we haven't linked to the new
+         * event), head must be updated.
+         */
+        if ( port != q->tail )
+        {
+            tail_word = evtchn_fifo_word_from_port(d, q->tail);
+            linked = evtchn_fifo_set_link(tail_word, port);
+        }
+        if ( !linked )
+            write_atomic(q->head, port);
+        q->tail = port;
+
+        spin_unlock_irqrestore(&q->lock, flags);
+
+        if ( !test_and_set_bit(q->priority,
+                               &v->evtchn_fifo->control_block->ready) )
+            vcpu_mark_events_pending(v);
+    }
+
+    if ( !was_pending )
+        evtchn_check_pollers(d, port);
+}
+
+static void evtchn_fifo_clear_pending(struct domain *d, struct evtchn *evtchn)
+{
+    event_word_t *word;
+
+    word = evtchn_fifo_word_from_port(d, evtchn->port);
+    if ( unlikely(!word) )
+        return;
+
+    /*
+     * Just clear the P bit.
+     *
+     * No need to unlink as the guest will unlink and ignore
+     * non-pending events.
+     */
+    clear_bit(EVTCHN_FIFO_PENDING, word);
+}
+
+static void evtchn_fifo_unmask(struct domain *d, struct evtchn *evtchn)
+{
+    struct vcpu *v = d->vcpu[evtchn->notify_vcpu_id];
+    event_word_t *word;
+
+    word = evtchn_fifo_word_from_port(d, evtchn->port);
+    if ( unlikely(!word) )
+        return;
+
+    clear_bit(EVTCHN_FIFO_MASKED, word);
+
+    /* Relink if pending. */
+    if ( test_bit(EVTCHN_FIFO_PENDING, word) )
+        evtchn_fifo_set_pending(v, evtchn);
+}
+
+static bool_t evtchn_fifo_is_pending(struct domain *d,
+                                     const struct evtchn *evtchn)
+{
+    event_word_t *word;
+
+    word = evtchn_fifo_word_from_port(d, evtchn->port);
+    if ( unlikely(!word) )
+        return 0;
+
+    return test_bit(EVTCHN_FIFO_PENDING, word);
+}
+
+static bool_t evtchn_fifo_is_masked(struct domain *d,
+                                    const struct evtchn *evtchn)
+{
+    event_word_t *word;
+
+    word = evtchn_fifo_word_from_port(d, evtchn->port);
+    if ( unlikely(!word) )
+        return 1;
+
+    return test_bit(EVTCHN_FIFO_MASKED, word);
+}
+
+static int evtchn_fifo_set_priority(struct domain *d, struct evtchn *evtchn,
+                                    unsigned priority)
+{
+    if ( priority > EVTCHN_FIFO_PRIORITY_MIN )
+        return -EINVAL;
+
+    /*
+     * Only need to switch to the new queue for future events. If the
+     * event is already pending or in the process of being linked it
+     * will be on the old queue -- this is fine.
+     */
+    evtchn->priority = priority;
+
+    return 0;
+}
+
+static void evtchn_fifo_print_state(struct domain *d,
+                                    const struct evtchn *evtchn)
+{
+    event_word_t *word;
+
+    word = evtchn_fifo_word_from_port(d, evtchn->port);
+    if ( !word )
+        printk("?   ");
+    else if ( test_bit(EVTCHN_FIFO_LINKED, word) )
+        printk("%-4u", *word & EVTCHN_FIFO_LINK_MASK);
+    else
+        printk("-   ");
+}
+
+static const struct evtchn_port_ops evtchn_port_ops_fifo =
+{
+    .set_pending   = evtchn_fifo_set_pending,
+    .clear_pending = evtchn_fifo_clear_pending,
+    .unmask        = evtchn_fifo_unmask,
+    .is_pending    = evtchn_fifo_is_pending,
+    .is_masked     = evtchn_fifo_is_masked,
+    .set_priority  = evtchn_fifo_set_priority,
+    .print_state   = evtchn_fifo_print_state,
+};
+
+static int map_guest_page(struct domain *d, uint64_t gfn,
+                          struct page_info **page, void **virt)
+{
+    struct page_info *p;
+
+    p = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
+    if ( !p )
+        return -EINVAL;
+
+    if ( !get_page_type(p, PGT_writable_page) )
+    {
+        put_page(p);
+        return -EINVAL;
+    }
+
+    *virt = map_domain_page_global(gfn);
+    if ( !*virt )
+    {
+        put_page_and_type(p);
+        return -ENOMEM;
+    }
+    *page = p;
+    return 0;
+}
+
+static void unmap_guest_page(struct page_info *page, void *virt)
+{
+    if ( page == NULL )
+        return;
+
+    unmap_domain_page_global(virt);
+    put_page_and_type(page);
+}
+
+static void cleanup_control_block(struct vcpu *v)
+{
+    if ( v->evtchn_fifo )
+    {
+        unmap_guest_page(v->evtchn_fifo->cb_page, 
v->evtchn_fifo->control_block);
+        xfree(v->evtchn_fifo);
+        v->evtchn_fifo = NULL;
+    }
+}
+
+static void init_queue(struct vcpu *v, struct evtchn_fifo_queue *q, unsigned i)
+{
+    spin_lock_init(&q->lock);
+    q->priority = i;
+    q->head = &v->evtchn_fifo->control_block->head[i];
+}
+
+static int setup_control_block(struct vcpu *v, uint64_t gfn, uint32_t offset)
+{
+    struct domain *d = v->domain;
+    struct evtchn_fifo_vcpu *efv;
+    struct page_info *page;
+    void *virt;
+    unsigned i;
+    int rc;
+
+    if ( v->evtchn_fifo )
+        return -EINVAL;
+
+    efv = xzalloc(struct evtchn_fifo_vcpu);
+    if ( efv == NULL )
+        return -ENOMEM;
+
+    rc = map_guest_page(d, gfn, &page, &virt);
+    if ( rc < 0 )
+    {
+        xfree(efv);
+        return rc;
+    }
+
+    v->evtchn_fifo = efv;
+
+    v->evtchn_fifo->cb_page       = page;
+    v->evtchn_fifo->control_block = virt + offset;
+
+    for ( i = 0; i <= EVTCHN_FIFO_PRIORITY_MIN; i++ )
+        init_queue(v, &v->evtchn_fifo->queue[i], i);
+ 
+    return 0;
+}
+
+/*
+ * Setup an event array with no pages.
+ */
+static int setup_event_array(struct domain *d)
+{
+    if ( d->evtchn_fifo )
+        return 0;
+
+    d->evtchn_fifo = xzalloc(struct evtchn_fifo_domain);
+    if ( d->evtchn_fifo == NULL )
+        return -ENOMEM;
+
+    d->evtchn_fifo->num_evtchns = 0;
+
+    return 0;
+}
+
+static void cleanup_event_array(struct domain *d)
+{
+    unsigned i;
+
+    if ( d->evtchn_fifo == NULL )
+        return;
+
+    for ( i = 0; i < EVTCHN_FIFO_MAX_EVENT_ARRAY_PAGES; i++ )
+    {
+        unmap_guest_page(d->evtchn_fifo->event_array[i].page,
+                         d->evtchn_fifo->event_array[i].virt);
+    }
+    xfree(d->evtchn_fifo);
+}
+
+static void set_priority_all(struct domain *d, unsigned priority)
+{
+    unsigned port;
+
+    for ( port = 1; port < d->max_evtchns; port++ )
+    {
+        if ( !port_is_valid(d, port) )
+            break;
+
+        evtchn_port_set_priority(d, evtchn_from_port(d, port), priority);
+    }
+}
+
+int evtchn_fifo_init_control(struct evtchn_init_control *init_control)
+{
+    struct domain *d = current->domain;
+    uint32_t vcpu_id;
+    uint64_t gfn;
+    uint32_t offset;
+    struct vcpu *v;
+    int rc;
+
+    init_control->link_bits = EVTCHN_FIFO_LINK_BITS;
+
+    vcpu_id = init_control->vcpu;
+    gfn     = init_control->control_mfn;
+    offset  = init_control->offset;
+
+    if ( (vcpu_id >= d->max_vcpus) || (d->vcpu[vcpu_id] == NULL) )
+        return -ENOENT;
+    v = d->vcpu[vcpu_id];
+
+    /* Must not cross page boundary. */
+    if ( offset > (PAGE_SIZE - sizeof(evtchn_fifo_control_block_t)) )
+        return -EINVAL;
+
+    /* Must be 8-bytes aligned. */
+    if ( offset & (8 - 1) )
+        return -EINVAL;
+
+    spin_lock(&d->event_lock);
+
+    rc = setup_control_block(v, gfn, offset);
+
+    /*
+     * If this is the first control block, setup an empty event array
+     * and switch to the fifo port ops.
+     *
+     * Any ports currently bound will have their priority set to the
+     * default.
+     */
+    if ( d->evtchn_fifo == NULL )
+    {
+        rc = setup_event_array(d);
+        if ( rc < 0 )
+            cleanup_control_block(v);
+        else
+        {
+            d->evtchn_port_ops = &evtchn_port_ops_fifo;
+            d->max_evtchns = 1 << EVTCHN_FIFO_LINK_BITS;
+            set_priority_all(d, EVTCHN_FIFO_PRIORITY_DEFAULT);
+        }
+    }
+
+    spin_unlock(&d->event_lock);
+
+    return rc;
+}
+
+static int add_page_to_event_array(struct domain *d, unsigned long gfn)
+{
+    struct page_info *page = NULL;
+    void *virt;
+    unsigned slot;
+    int rc;
+
+    slot = d->evtchn_fifo->num_evtchns / EVTCHN_FIFO_EVENT_WORDS_PER_PAGE;
+    if ( slot >= EVTCHN_FIFO_MAX_EVENT_ARRAY_PAGES )
+        return -ENOSPC;
+
+    rc = map_guest_page(d, gfn, &page, &virt);
+    if ( rc < 0 )
+        return rc;
+
+    d->evtchn_fifo->event_array[slot].page = page;
+    d->evtchn_fifo->event_array[slot].virt = virt;
+
+    d->evtchn_fifo->num_evtchns += EVTCHN_FIFO_EVENT_WORDS_PER_PAGE;
+
+    return 0;
+}
+
+int evtchn_fifo_expand_array(const struct evtchn_expand_array *expand_array)
+{
+    struct domain *d = current->domain;
+    int rc;
+
+    if ( !d->evtchn_fifo )
+        return -ENOSYS;
+
+    spin_lock(&d->event_lock);
+    rc = add_page_to_event_array(d, expand_array->array_mfn);
+    spin_unlock(&d->event_lock);
+
+    return rc;
+}
+
+void evtchn_fifo_destroy(struct domain *d)
+{
+    struct vcpu *v;
+
+    for_each_vcpu( d, v )
+        cleanup_control_block(v);
+    cleanup_event_array(d);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/xen/event_fifo.h b/xen/include/xen/event_fifo.h
new file mode 100644
index 0000000..702d692
--- /dev/null
+++ b/xen/include/xen/event_fifo.h
@@ -0,0 +1,54 @@
+/*
+ * FIFO-based event channel ABI.
+ *
+ * Copyright (C) 2013 Citrix Systems R&D Ltd.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2 or later.  See the file COPYING for more details.
+ */
+#ifndef __XEN_EVENT_FIFO_H__
+#define __XEN_EVENT_FIFO_H__
+
+struct evtchn_fifo_queue {
+    uint32_t *head; /* points into control block */
+    uint32_t tail;
+    spinlock_t lock;
+    uint8_t priority;
+};
+
+struct evtchn_fifo_vcpu {
+    struct page_info *cb_page;
+    struct evtchn_fifo_control_block *control_block;
+    struct evtchn_fifo_queue queue[EVTCHN_FIFO_MAX_QUEUES];
+};
+
+#define EVTCHN_FIFO_EVENT_WORDS_PER_PAGE (PAGE_SIZE / sizeof(event_word_t))
+#define EVTCHN_FIFO_MAX_EVENT_ARRAY_PAGES \
+    ((1 << EVTCHN_FIFO_LINK_BITS) / EVTCHN_FIFO_EVENT_WORDS_PER_PAGE)
+
+
+struct evtchn_fifo_array_page {
+    struct page_info *page;
+    event_word_t *virt;
+};
+
+struct evtchn_fifo_domain {
+    struct evtchn_fifo_array_page 
event_array[EVTCHN_FIFO_MAX_EVENT_ARRAY_PAGES];
+    unsigned num_evtchns;
+};
+
+int evtchn_fifo_init_control(struct evtchn_init_control *init_control);
+int evtchn_fifo_expand_array(const struct evtchn_expand_array *expand_array);
+void evtchn_fifo_destroy(struct domain *domain);
+
+#endif /* __XEN_EVENT_FIFO_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index ba3714d..5ec57af 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -61,7 +61,8 @@ extern struct domain *dom0;
 #define next_power_of_2(x)      (__RDU32((x)-1) + 1)
 
 /* Maximum number of event channels for any ABI. */
-#define MAX_NR_EVTCHNS NR_EVENT_CHANNELS
+#define MAX_NR_EVTCHNS (max_t(unsigned, NR_EVENT_CHANNELS, \
+                              1 << EVTCHN_FIFO_LINK_BITS))
 
 #define EVTCHNS_PER_BUCKET (PAGE_SIZE / next_power_of_2(sizeof(struct evtchn)))
 #define EVTCHNS_PER_GROUP  (BUCKETS_PER_GROUP * EVTCHNS_PER_BUCKET)
@@ -95,6 +96,7 @@ struct evtchn
         } pirq;        /* state == ECS_PIRQ */
         u16 virq;      /* state == ECS_VIRQ */
     } u;
+    u8 priority;
 #ifdef FLASK_ENABLE
     void *ssid;
 #endif
@@ -209,6 +211,8 @@ struct vcpu
     /* Guest-specified relocation of vcpu_info. */
     unsigned long vcpu_info_mfn;
 
+    struct evtchn_fifo_vcpu *evtchn_fifo;
+
     struct arch_vcpu arch;
 };
 
@@ -290,6 +294,7 @@ struct domain
     unsigned         max_evtchn_port;
     spinlock_t       event_lock;
     const struct evtchn_port_ops *evtchn_port_ops;
+    struct evtchn_fifo_domain *evtchn_fifo;
 
     struct grant_table *grant_table;
 
-- 
1.7.2.5


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.