[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 12/12] xen/events: use the FIFO-based ABI if available



From: David Vrabel <david.vrabel@xxxxxxxxxx>

If the hypervisor supports the FIFO-based ABI, enable it by
initializing the control block for the boot VCPU and subsequent VCPUs
as they a brought up.  The event array is expanded as required when
event ports are setup.

This implementation has some known limitations:

- The number of event channels is not raised above 4096 as this
  requires changing the way some internal structures were allocated.

- Migration will not work as the control blocks or event arrays are
  not remapped by Xen at the destination.

- The timer VIRQ which previously was treated as the highest priority
  event has the default priority.

Signed-off-by: David Vrabel <david.vrabel@xxxxxxxxxx>
---
 drivers/xen/events/Kbuild            |    1 +
 drivers/xen/events/events.c          |    7 +-
 drivers/xen/events/events_internal.h |    2 +
 drivers/xen/events/fifo.c            |  312 ++++++++++++++++++++++++++++++++++
 4 files changed, 321 insertions(+), 1 deletions(-)
 create mode 100644 drivers/xen/events/fifo.c

diff --git a/drivers/xen/events/Kbuild b/drivers/xen/events/Kbuild
index aea331e..74644d0 100644
--- a/drivers/xen/events/Kbuild
+++ b/drivers/xen/events/Kbuild
@@ -1,2 +1,3 @@
 obj-y += events.o
+obj-y += fifo.o
 obj-y += n-level.o
diff --git a/drivers/xen/events/events.c b/drivers/xen/events/events.c
index e6895b9..a7124f8 100644
--- a/drivers/xen/events/events.c
+++ b/drivers/xen/events/events.c
@@ -1512,8 +1512,13 @@ void xen_callback_vector(void) {}
 void __init xen_init_IRQ(void)
 {
        int i;
+       int ret;
 
-       evtchn_ops = evtchn_ops_nlevel;
+       ret = xen_evtchn_init_fifo_based();
+       if (ret < 0) {
+               printk(KERN_INFO "xen: falling back to n-level event channels");
+               evtchn_ops = evtchn_ops_nlevel;
+       }
 
        evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
                                    GFP_KERNEL);
diff --git a/drivers/xen/events/events_internal.h 
b/drivers/xen/events/events_internal.h
index 1c71a5d..d6bedb6 100644
--- a/drivers/xen/events/events_internal.h
+++ b/drivers/xen/events/events_internal.h
@@ -124,4 +124,6 @@ static inline void xen_evtchn_handle_events(int cpu)
        return evtchn_ops.handle_events(cpu);
 }
 
+int xen_evtchn_init_fifo_based(void);
+
 #endif /* #ifndef __EVENTS_INTERNAL_H__ */
diff --git a/drivers/xen/events/fifo.c b/drivers/xen/events/fifo.c
new file mode 100644
index 0000000..8f8e390
--- /dev/null
+++ b/drivers/xen/events/fifo.c
@@ -0,0 +1,312 @@
+/*
+ * Xen event channels (FIFO-based ABI)
+ *
+ * Copyright (C) 2013 Citrix Systems R&D ltd.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2 or later.  See the file COPYING for more details.
+ */
+
+#include <linux/linkage.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+
+#include <asm/sync_bitops.h>
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/page.h>
+
+#include <xen/xen.h>
+#include <xen/xen-ops.h>
+#include <xen/events.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/event_channel.h>
+
+#include "events_internal.h"
+
+#define EVENT_WORDS_PER_PAGE (PAGE_SIZE / sizeof(event_word_t))
+#define MAX_EVENT_ARRAY_PAGES ((1 << EVTCHN_FIFO_LINK_BITS)    \
+                              / EVENT_WORDS_PER_PAGE)
+
+static DEFINE_PER_CPU(struct evtchn_fifo_control_block *, cpu_control_block);
+static event_word_t *event_array[MAX_EVENT_ARRAY_PAGES];
+static unsigned event_array_pages;
+
+#define BM(w) ((unsigned long *)(w))
+
+static inline event_word_t *event_word_from_port(int port)
+{
+       int i = port / EVENT_WORDS_PER_PAGE;
+
+       if (i >= event_array_pages)
+               return NULL;
+       return event_array[i] + port;
+}
+
+static int fifo_setup(struct irq_info *info)
+{
+       int port = info->evtchn;
+       int i;
+       int ret = -ENOMEM;
+
+       i = port / EVENT_WORDS_PER_PAGE;
+
+       if (i >= MAX_EVENT_ARRAY_PAGES)
+               return -EINVAL;
+
+       while (i >= event_array_pages) {
+               struct page *array_page = NULL;
+               struct evtchn_expand_array expand_array;
+
+               array_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+               if (array_page == NULL)
+                       goto error;
+
+               expand_array.array_mfn = virt_to_mfn(page_address(array_page));
+
+               ret = HYPERVISOR_event_channel_op(EVTCHNOP_expand_array, 
&expand_array);
+               if (ret < 0) {
+                       __free_page(array_page);
+                       goto error;
+               }
+
+               event_array[event_array_pages++] = page_address(array_page);
+       }
+       return 0;
+
+  error:
+       if (event_array_pages == 0)
+               panic("xen: unable to expand event array with initial page 
(%d)\n", ret);
+       else
+               printk(KERN_ERR "xen: unable to expand event array (%d)\n", 
ret);
+       return ret;
+}
+
+static void fifo_bind_to_cpu(struct irq_info *info, int cpu)
+{
+       /* no-op */
+}
+
+static void fifo_clear_pending(int port)
+{
+       event_word_t *word = event_word_from_port(port);
+       sync_clear_bit(EVTCHN_FIFO_PENDING, BM(word));
+}
+
+static void fifo_set_pending(int port)
+{
+       event_word_t *word = event_word_from_port(port);
+       sync_set_bit(EVTCHN_FIFO_PENDING, BM(word));
+}
+
+static bool fifo_is_pending(int port)
+{
+       event_word_t *word = event_word_from_port(port);
+       return sync_test_bit(EVTCHN_FIFO_PENDING, BM(word));
+}
+
+static bool fifo_test_and_set_mask(int port)
+{
+       event_word_t *word = event_word_from_port(port);
+       return sync_test_and_set_bit(EVTCHN_FIFO_MASKED, BM(word));
+}
+
+static void fifo_mask(int port)
+{
+       event_word_t *word = event_word_from_port(port);
+       if (word)
+               sync_set_bit(EVTCHN_FIFO_MASKED, BM(word));
+}
+
+static void fifo_unmask(int port)
+{
+       unsigned int cpu = get_cpu();
+       bool do_hypercall = false;
+       bool evtchn_pending = false;
+
+       BUG_ON(!irqs_disabled());
+
+       if (unlikely((cpu != cpu_from_evtchn(port))))
+               do_hypercall = true;
+       else {
+               event_word_t *word = event_word_from_port(port);
+
+               sync_clear_bit(EVTCHN_FIFO_MASKED, BM(word));
+               evtchn_pending = sync_test_bit(EVTCHN_FIFO_PENDING, BM(word));
+               if (evtchn_pending)
+                       do_hypercall = true;
+       }
+
+       if (do_hypercall) {
+               struct evtchn_unmask unmask = { .port = port };
+               (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
+       }
+
+       put_cpu();
+}
+
+static uint32_t clear_linked(volatile event_word_t *word)
+{
+    event_word_t n, o, w;
+
+    w = *word;
+
+    do {
+        o = w;
+        n = (w & ~((1 << EVTCHN_FIFO_LINKED) | EVTCHN_FIFO_LINK_MASK));
+    } while ( (w = sync_cmpxchg(word, o, n)) != o );
+
+    return w & EVTCHN_FIFO_LINK_MASK;
+}
+
+static void handle_irq_for_port(int port)
+{
+       int irq;
+       struct irq_desc *desc;
+
+       irq = evtchn_to_irq[port];
+       if (irq != -1) {
+               desc = irq_to_desc(irq);
+               if (desc)
+                       generic_handle_irq_desc(irq, desc);
+       }
+}
+
+static void consume_one_event(struct evtchn_fifo_control_block *control_block,
+                             int priority, uint32_t *ready)
+{
+       volatile uint32_t *head;
+       int port;
+       event_word_t *word;
+       uint32_t link;
+
+       head = &control_block->head[priority];
+
+       rmb(); /* Ensure word is up-to-date before reading head. */
+       port = *head;
+       word = event_word_from_port(port);
+
+       link = clear_linked(word);
+
+       /*
+        * If the link is non-zero, there are more events in the
+        * queue, otherwise the queue is empty.
+        *
+        * We don't set HEAD if the queue is empty as this may race
+        * with Xen adding a new event to the now empty list and
+        * setting HEAD.
+        */
+       if (link != 0)
+               *head = link;
+       else
+               clear_bit(priority, BM(ready));
+
+       if (sync_test_bit(EVTCHN_FIFO_PENDING, BM(word))
+           && !sync_test_bit(EVTCHN_FIFO_MASKED, BM(word)))
+               handle_irq_for_port(port);
+}
+
+#define EVTCHN_FIFO_READY_MASK ((1 << EVTCHN_FIFO_MAX_QUEUES) - 1)
+
+static void fifo_handle_events(int cpu)
+{
+       struct evtchn_fifo_control_block *control_block;
+       uint32_t ready;
+       int q;
+
+       control_block = per_cpu(cpu_control_block, cpu);
+
+       ready = xchg(&control_block->ready, 0);
+
+       while (ready & EVTCHN_FIFO_READY_MASK) {
+               q = find_first_bit(BM(&ready), EVTCHN_FIFO_MAX_QUEUES);
+               consume_one_event(control_block, q, &ready);
+       }
+}
+
+struct evtchn_ops evtchn_ops_fifo = {
+       .setup             = fifo_setup,
+       .bind_to_cpu       = fifo_bind_to_cpu,
+       .clear_pending     = fifo_clear_pending,
+       .set_pending       = fifo_set_pending,
+       .is_pending        = fifo_is_pending,
+       .test_and_set_mask = fifo_test_and_set_mask,
+       .mask              = fifo_mask,
+       .unmask            = fifo_unmask,
+       .handle_events     = fifo_handle_events,
+};
+
+static int __cpuinit fifo_init_control_block(int cpu)
+{
+       struct page *control_block = NULL;
+       struct evtchn_init_control init_control;
+       int ret = -ENOMEM;
+
+       control_block = alloc_page(GFP_KERNEL|__GFP_ZERO);
+       if (control_block == NULL)
+               goto error;
+
+       init_control.control_mfn = virt_to_mfn(page_address(control_block));
+       init_control.offset      = 0;
+       init_control.vcpu        = cpu;
+
+       ret = HYPERVISOR_event_channel_op(EVTCHNOP_init_control, &init_control);
+       if (ret < 0)
+               goto error;
+
+       per_cpu(cpu_control_block, cpu) = page_address(control_block);
+
+       return 0;
+
+  error:
+       __free_page(control_block);
+       return ret;
+}
+
+static int __cpuinit fifo_cpu_notification(struct notifier_block *self,
+                                          unsigned long action, void *hcpu)
+{
+       int cpu = (long)hcpu;
+       int ret = 0;
+
+       switch (action) {
+       case CPU_UP_PREPARE:
+               ret = fifo_init_control_block(cpu);
+               break;
+       default:
+               break;
+       }
+       return ret < 0 ? NOTIFY_BAD : NOTIFY_OK;
+}
+
+static struct notifier_block fifo_cpu_notifier __cpuinitdata = {
+       .notifier_call  = fifo_cpu_notification,
+};
+
+
+int __init xen_evtchn_init_fifo_based(void)
+{
+       int cpu = get_cpu();
+       int ret;
+
+       ret = fifo_init_control_block(cpu);
+       if (ret < 0)
+               goto error;
+
+       printk(KERN_INFO "xen: switching to FIFO-based event channels\n");
+
+       evtchn_ops = evtchn_ops_fifo;
+
+       register_cpu_notifier(&fifo_cpu_notifier);
+
+       put_cpu();
+       return 0;
+
+  error:
+       put_cpu();
+       return ret;
+}
-- 
1.7.2.5


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.