[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC PATCH 3/3] Xen: implement 3-level event channel routines.



Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>
---
 arch/x86/xen/enlighten.c              |    7 +
 drivers/xen/events.c                  |  419 +++++++++++++++++++++++++++++++--
 include/xen/events.h                  |    2 +
 include/xen/interface/event_channel.h |   24 ++
 include/xen/interface/xen.h           |    2 +-
 5 files changed, 437 insertions(+), 17 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bc893e7..f471881 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -43,6 +43,7 @@
 #include <xen/hvm.h>
 #include <xen/hvc-console.h>
 #include <xen/acpi.h>
+#include <xen/events.h>
 
 #include <asm/paravirt.h>
 #include <asm/apic.h>
@@ -195,6 +196,9 @@ void xen_vcpu_restore(void)
                    HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
                        BUG();
        }
+
+       if (evtchn_level_param == 3)
+               xen_event_channel_setup_3level();
 }
 
 static void __init xen_banner(void)
@@ -1028,6 +1032,9 @@ void xen_setup_vcpu_info_placement(void)
        for_each_possible_cpu(cpu)
                xen_vcpu_setup(cpu);
 
+       if (evtchn_level_param == 3)
+               xen_event_channel_setup_3level();
+
        /* xen_vcpu_setup managed to place the vcpu_info within the
           percpu area for all cpus, so make use of it */
        if (have_vcpu_info_placement) {
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index f60ba76..adb94e9 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -52,9 +52,15 @@
 #include <xen/interface/hvm/params.h>
 
 /* N-level event channel, starting from 2 */
+unsigned int evtchn_level_param = -1;
 unsigned int evtchn_level = 2;
 EXPORT_SYMBOL_GPL(evtchn_level);
 
+/* 3-level event channel */
+DEFINE_PER_CPU(unsigned long [sizeof(unsigned long)*8], evtchn_sel_l2);
+unsigned long evtchn_pending[NR_EVENT_CHANNELS_L3/BITS_PER_LONG] 
__page_aligned_bss;
+unsigned long evtchn_mask[NR_EVENT_CHANNELS_L3/BITS_PER_LONG] 
__page_aligned_bss;
+
 struct evtchn_ops {
        unsigned long (*active_evtchns)(unsigned int,
                                        struct shared_info*, unsigned int);
@@ -142,6 +148,29 @@ static struct irq_chip xen_pirq_chip;
 static void enable_dynirq(struct irq_data *data);
 static void disable_dynirq(struct irq_data *data);
 
+static int __init parse_evtchn_level(char *arg)
+{
+       if (!arg)
+               return -EINVAL;
+
+       if (strcmp(arg, "3") == 0)
+               evtchn_level_param = 3;
+
+       return 0;
+}
+early_param("evtchn_level", parse_evtchn_level);
+
+static inline int __is_masked_l2(int chn)
+{
+       struct shared_info *sh = HYPERVISOR_shared_info;
+       return sync_test_and_set_bit(chn, sh->evtchn_mask);
+}
+
+static inline int __is_masked_l3(int chn)
+{
+       return sync_test_and_set_bit(chn, evtchn_mask);
+}
+
 /* Get info for IRQ */
 static struct irq_info *info_for_irq(unsigned irq)
 {
@@ -311,6 +340,15 @@ static inline unsigned long __active_evtchns_l2(unsigned 
int cpu,
                ~sh->evtchn_mask[idx];
 }
 
+static inline unsigned long __active_evtchns_l3(unsigned int cpu,
+                                               struct shared_info *sh,
+                                               unsigned int idx)
+{
+       return evtchn_pending[idx] &
+               per_cpu(cpu_evtchn_mask, cpu)[idx] &
+               ~evtchn_mask[idx];
+}
+
 static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
 {
        int irq = evtchn_to_irq[chn];
@@ -351,18 +389,33 @@ static inline void __clear_evtchn_l2(int port)
        sync_clear_bit(port, &s->evtchn_pending[0]);
 }
 
+static inline void __clear_evtchn_l3(int port)
+{
+       sync_clear_bit(port, &evtchn_pending[0]);
+}
+
 static inline void __set_evtchn_l2(int port)
 {
        struct shared_info *s = HYPERVISOR_shared_info;
        sync_set_bit(port, &s->evtchn_pending[0]);
 }
 
+static inline void __set_evtchn_l3(int port)
+{
+       sync_set_bit(port, &evtchn_pending[0]);
+}
+
 static inline int __test_evtchn_l2(int port)
 {
        struct shared_info *s = HYPERVISOR_shared_info;
        return sync_test_bit(port, &s->evtchn_pending[0]);
 }
 
+static inline int __test_evtchn_l3(int port)
+{
+       return sync_test_bit(port, &evtchn_pending[0]);
+}
+
 /**
  * notify_remote_via_irq - send event to remote end of event channel via irq
  * @irq: irq of event channel to send event to
@@ -386,6 +439,11 @@ static void __mask_evtchn_l2(int port)
        sync_set_bit(port, &s->evtchn_mask[0]);
 }
 
+static void __mask_evtchn_l3(int port)
+{
+       sync_set_bit(port, &evtchn_mask[0]);
+}
+
 static void __unmask_evtchn_l2(int port)
 {
        struct shared_info *s = HYPERVISOR_shared_info;
@@ -416,6 +474,36 @@ static void __unmask_evtchn_l2(int port)
        put_cpu();
 }
 
+static void __unmask_evtchn_l3(int port)
+{
+       unsigned int cpu = get_cpu();
+       int l1cb = BITS_PER_LONG * BITS_PER_LONG;
+       int l2cb = BITS_PER_LONG;
+
+       if (unlikely(cpu != cpu_from_evtchn(port))) {
+               struct evtchn_unmask unmask = { .port = port };
+               (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
+       } else {
+               struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+
+               sync_clear_bit(port, &evtchn_mask[0]);
+
+               /*
+                * The following is basically the equivalent of
+                * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
+                * the interrupt edge' if the channel is masked.
+                */
+               if (sync_test_bit(port, &evtchn_pending[0]) &&
+                   !sync_test_and_set_bit(port / l2cb,
+                                          &per_cpu(evtchn_sel_l2, cpu)[0]) &&
+                   !sync_test_and_set_bit(port / l1cb,
+                                          &vcpu_info->evtchn_pending_sel))
+                       vcpu_info->evtchn_upcall_pending = 1;
+       }
+
+       put_cpu();
+}
+
 static void xen_irq_init(unsigned irq)
 {
        struct irq_info *info;
@@ -1181,6 +1269,7 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector 
vector)
        notify_remote_via_irq(irq);
 }
 
+static DEFINE_SPINLOCK(debug_lock);
 static irqreturn_t __xen_debug_interrupt_l2(int irq, void *dev_id)
 {
        struct shared_info *sh = HYPERVISOR_shared_info;
@@ -1188,7 +1277,6 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, void 
*dev_id)
        unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
        int i;
        unsigned long flags;
-       static DEFINE_SPINLOCK(debug_lock);
        struct vcpu_info *v;
 
        spin_lock_irqsave(&debug_lock, flags);
@@ -1196,13 +1284,13 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, 
void *dev_id)
        printk("\nvcpu %d\n  ", cpu);
 
        for_each_online_cpu(i) {
-               int pending;
+               int masked;
                v = per_cpu(xen_vcpu, i);
-               pending = (get_irq_regs() && i == cpu)
+               masked = (get_irq_regs() && i == cpu)
                        ? xen_irqs_disabled(get_irq_regs())
                        : v->evtchn_upcall_mask;
                printk("%d: masked=%d pending=%d event_sel %0*lx\n  ", i,
-                      pending, v->evtchn_upcall_pending,
+                      masked, v->evtchn_upcall_pending,
                       (int)(sizeof(v->evtchn_pending_sel)*2),
                       v->evtchn_pending_sel);
        }
@@ -1227,7 +1315,7 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, void 
*dev_id)
                       i % 8 == 0 ? "\n   " : " ");
 
        printk("\nlocal cpu%d mask:\n   ", cpu);
-       for (i = (NR_EVENT_CHANNELS(evtchn_level)/BITS_PER_LONG)-1; i >= 0; i--)
+       for (i = (NR_EVENT_CHANNELS(2)/BITS_PER_LONG)-1; i >= 0; i--)
                printk("%0*lx%s", (int)(sizeof(cpu_evtchn[0])*2),
                       cpu_evtchn[i],
                       i % 8 == 0 ? "\n   " : " ");
@@ -1242,7 +1330,7 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, void 
*dev_id)
        }
 
        printk("\npending list:\n");
-       for (i = 0; i < NR_EVENT_CHANNELS(evtchn_level); i++) {
+       for (i = 0; i < NR_EVENT_CHANNELS(2); i++) {
                if (sync_test_bit(i, sh->evtchn_pending)) {
                        int word_idx = i / BITS_PER_LONG;
                        printk("  %d: event %d -> irq %d%s%s%s\n",
@@ -1262,15 +1350,110 @@ static irqreturn_t __xen_debug_interrupt_l2(int irq, 
void *dev_id)
        return IRQ_HANDLED;
 }
 
+static irqreturn_t __xen_debug_interrupt_l3(int irq, void *dev_id)
+{
+       int cpu = smp_processor_id();
+       unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
+       int i, j;
+       unsigned long flags;
+       struct vcpu_info *v;
+
+       spin_lock_irqsave(&debug_lock, flags);
+
+       printk("\nvcpu %d\n  ", cpu);
+
+       for_each_online_cpu(i) {
+               int masked;
+
+               v = per_cpu(xen_vcpu, i);
+               masked = (get_irq_regs() && i == cpu)
+                       ? xen_irqs_disabled(get_irq_regs())
+                       : v->evtchn_upcall_mask;
+               printk("%d: masked=%d pending=%d event_sel_l1 %0*lx\n  ", i,
+                      masked, v->evtchn_upcall_pending,
+                      (int)(sizeof(v->evtchn_pending_sel)*2),
+                      v->evtchn_pending_sel);
+
+               printk("\nevtchn_sel_l2:\n   ");
+               for (j = (sizeof(unsigned long)*8)-1; j >= 0; j--)
+                       printk("%0*lx%s",
+                              (int)(sizeof(evtchn_sel_l2[0])*2),
+                              per_cpu(evtchn_sel_l2, i)[j],
+                              j % 8 == 0 ? "\n   " : " ");
+       }
+
+       v = per_cpu(xen_vcpu, cpu);
+
+       printk("\npending:\n   ");
+       for (i = ARRAY_SIZE(evtchn_pending)-1; i >= 0; i--)
+               printk("%0*lx%s", (int)(sizeof(evtchn_pending[0])*2),
+                      evtchn_pending[i],
+                      i % 8 == 0 ? "\n   " : " ");
+
+       printk("\nglobal mask:\n   ");
+       for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--)
+               printk("%0*lx%s", (int)(sizeof(evtchn_mask[0])*2),
+                      evtchn_mask[i],
+                      i % 8 == 0 ? "\n   " : " ");
+
+       printk("\nglobally unmasked:\n   ");
+       for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--)
+               printk("%0*lx%s", (int)(sizeof(evtchn_mask[0])*2),
+                      evtchn_pending[i] & ~evtchn_mask[i],
+                      i % 8 == 0 ? "\n   " : " ");
+
+       printk("\nlocal cpu%d mask:\n   ", cpu);
+       for (i = (NR_EVENT_CHANNELS(3)/BITS_PER_LONG)-1; i >= 0; i--)
+               printk("%0*lx%s", (int)(sizeof(cpu_evtchn[0])*2),
+                      cpu_evtchn[i],
+                      i % 8 == 0 ? "\n   " : " ");
+
+       printk("\nlocally unmasked:\n   ");
+       for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--) {
+               unsigned long pending = evtchn_pending[i]
+                       & ~evtchn_mask[i]
+                       & cpu_evtchn[i];
+               printk("%0*lx%s", (int)(sizeof(evtchn_mask[0])*2),
+                      pending, i % 8 == 0 ? "\n   " : " ");
+       }
+
+       printk("\npending list:\n");
+       for (i = 0; i < NR_EVENT_CHANNELS(3); i++) {
+               if (sync_test_bit(i, evtchn_pending)) {
+                       int word_idx_l1 = i / (BITS_PER_LONG * BITS_PER_LONG);
+                       int word_idx_l2 = i / BITS_PER_LONG;
+                       printk("  %d: event %d -> irq %d%s%s%s%s\n",
+                              cpu_from_evtchn(i), i,
+                              evtchn_to_irq[i],
+                              sync_test_bit(word_idx_l1, 
&v->evtchn_pending_sel)
+                                            ? "" : " l1-clear",
+                              sync_test_bit(word_idx_l2, 
per_cpu(evtchn_sel_l2, cpu))
+                                            ? "" : " l2-clear",
+                              !sync_test_bit(i, evtchn_mask)
+                                            ? "" : " globally-masked",
+                              sync_test_bit(i, cpu_evtchn)
+                                            ? "" : " locally-masked");
+               }
+       }
+
+       spin_unlock_irqrestore(&debug_lock, flags);
+
+       return IRQ_HANDLED;
+}
+
 irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
 {
        return eops->xen_debug_interrupt(irq, dev_id);
 }
 
 static DEFINE_PER_CPU(unsigned, xed_nesting_count);
+
+/* 2-level event channel does not use current_word_idx_l2 */
 static DEFINE_PER_CPU(unsigned int, current_word_idx);
+static DEFINE_PER_CPU(unsigned int, current_word_idx_l2);
 static DEFINE_PER_CPU(unsigned int, current_bit_idx);
 
+
 /*
  * Mask out the i least significant bits of w
  */
@@ -1303,7 +1486,8 @@ static void __xen_evtchn_do_upcall_l2(void)
                if (__this_cpu_inc_return(xed_nesting_count) - 1)
                        goto out;
 
-#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
+#ifndef CONFIG_X86
+               /* No need for a barrier -- XCHG is a barrier on x86. */
                /* Clear master flag /before/ clearing selector flag. */
                wmb();
 #endif
@@ -1392,6 +1576,155 @@ out:
        put_cpu();
 }
 
+void __xen_evtchn_do_upcall_l3(void)
+{
+       struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+       unsigned count;
+       int start_word_idx_l1, start_word_idx_l2, start_bit_idx;
+       int word_idx_l1, word_idx_l2, bit_idx;
+       int i, j;
+       unsigned long l1cb, l2cb;
+       int cpu = get_cpu();
+
+       l1cb = BITS_PER_LONG * BITS_PER_LONG;
+       l2cb = BITS_PER_LONG;
+
+       do {
+               unsigned long pending_words_l1;
+
+               vcpu_info->evtchn_upcall_pending = 0;
+
+               if (__this_cpu_inc_return(xed_nesting_count) - 1)
+                       goto out;
+#ifndef CONFIG_X86
+               /* No need for a barrier -- XCHG is a barrier on x86. */
+               /* Clear master flag /before/ clearing selector flag. */
+               wmb();
+#endif
+               /* here we get l1 pending selector */
+               pending_words_l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
+
+               start_word_idx_l1 = __this_cpu_read(current_word_idx);
+               start_word_idx_l2 = __this_cpu_read(current_word_idx_l2);
+               start_bit_idx = __this_cpu_read(current_bit_idx);
+
+               word_idx_l1 = start_word_idx_l1;
+
+               /* loop through l1, try to pick up l2 */
+               for (i = 0; pending_words_l1 != 0; i++) {
+                       unsigned long words_l1;
+                       unsigned long pending_words_l2;
+                       unsigned long pwl2idx;
+
+                       words_l1 = MASK_LSBS(pending_words_l1, word_idx_l1);
+
+                       if (words_l1 == 0) {
+                               word_idx_l1 = 0;
+                               start_word_idx_l2 = 0;
+                               continue;
+                       }
+
+                       word_idx_l1 = __ffs(words_l1);
+
+                       pwl2idx = word_idx_l1 * BITS_PER_LONG;
+
+                       pending_words_l2 =
+                               xchg(&per_cpu(evtchn_sel_l2, cpu)[pwl2idx],
+                                    0);
+
+                       word_idx_l2 = 0;
+                       if (word_idx_l1 == start_word_idx_l1) {
+                               if (i == 0)
+                                       word_idx_l2 = start_word_idx_l2;
+                               else
+                                       word_idx_l2 &= (1UL << 
start_word_idx_l2) - 1;
+                       }
+
+                       for (j = 0; pending_words_l2 != 0; j++) {
+                               unsigned long pending_bits;
+                               unsigned long words_l2;
+                               unsigned long idx;
+
+                               words_l2 = MASK_LSBS(pending_words_l2,
+                                                    word_idx_l2);
+
+                               if (words_l2 == 0) {
+                                       word_idx_l2 = 0;
+                                       bit_idx = 0;
+                                       continue;
+                               }
+
+                               word_idx_l2 = __ffs(words_l2);
+
+                               idx = word_idx_l1*BITS_PER_LONG+word_idx_l2;
+                               pending_bits =
+                                       eops->active_evtchns(cpu, NULL, idx);
+
+                               bit_idx = 0;
+                               if (word_idx_l2 == start_word_idx_l2) {
+                                       if (j == 0)
+                                               bit_idx = start_bit_idx;
+                                       else
+                                               bit_idx &= 
(1UL<<start_bit_idx)-1;
+                               }
+
+                               /* process port */
+                               do {
+                                       unsigned long bits;
+                                       int port, irq;
+                                       struct irq_desc *desc;
+
+                                       bits = MASK_LSBS(pending_bits, bit_idx);
+
+                                       if (bits == 0)
+                                               break;
+
+                                       bit_idx = __ffs(bits);
+
+                                       port = word_idx_l1 * l1cb +
+                                               word_idx_l2 * l2cb +
+                                               bit_idx;
+
+                                       irq = evtchn_to_irq[port];
+
+                                       if (irq != -1) {
+                                               desc = irq_to_desc(irq);
+                                               if (desc)
+                                                       
generic_handle_irq_desc(irq, desc);
+                                       }
+
+                                       bit_idx = (bit_idx + 1) % BITS_PER_LONG;
+
+                                       __this_cpu_write(current_bit_idx, 
bit_idx);
+                                       __this_cpu_write(current_word_idx_l2,
+                                                        bit_idx ? word_idx_l2 :
+                                                        (word_idx_l2+1) % 
BITS_PER_LONG);
+                                       __this_cpu_write(current_word_idx_l2,
+                                                        word_idx_l2 ? 
word_idx_l1 :
+                                                        (word_idx_l1+1) % 
BITS_PER_LONG);
+                               } while (bit_idx != 0);
+
+                               if ((word_idx_l2 != start_word_idx_l2) || (j != 
0))
+                                       pending_words_l2 &= ~(1UL << 
word_idx_l2);
+
+                               word_idx_l2 = (word_idx_l2) % BITS_PER_LONG;
+                       }
+
+                       if ((word_idx_l1 != start_word_idx_l1) || (i != 0))
+                               pending_words_l1 &= ~(1UL << word_idx_l1);
+
+                       word_idx_l1 = (word_idx_l1) % BITS_PER_LONG;
+               }
+
+               BUG_ON(!irqs_disabled());
+               count = __this_cpu_read(xed_nesting_count);
+               __this_cpu_write(xed_nesting_count, 0);
+       } while (count != 1 || vcpu_info->evtchn_upcall_pending);
+
+out:
+       put_cpu();
+}
+
 void xen_evtchn_do_upcall(struct pt_regs *regs)
 {
        struct pt_regs *old_regs = set_irq_regs(regs);
@@ -1525,12 +1858,6 @@ static void mask_ack_dynirq(struct irq_data *data)
        ack_dynirq(data);
 }
 
-static inline int __is_masked_l2(int chn)
-{
-       struct shared_info *sh = HYPERVISOR_shared_info;
-       return sync_test_and_set_bit(chn, sh->evtchn_mask);
-}
-
 static int retrigger_dynirq(struct irq_data *data)
 {
        int evtchn = evtchn_from_irq(data->irq);
@@ -1821,14 +2148,74 @@ static struct evtchn_ops evtchn_ops_l2 __read_mostly = {
        .xen_debug_interrupt = __xen_debug_interrupt_l2,
 };
 
+static struct evtchn_ops evtchn_ops_l3 __read_mostly = {
+       .active_evtchns = __active_evtchns_l3,
+       .clear_evtchn = __clear_evtchn_l3,
+       .set_evtchn = __set_evtchn_l3,
+       .test_evtchn = __test_evtchn_l3,
+       .mask_evtchn = __mask_evtchn_l3,
+       .unmask_evtchn = __unmask_evtchn_l3,
+       .is_masked = __is_masked_l3,
+       .xen_evtchn_do_upcall = __xen_evtchn_do_upcall_l3,
+       .xen_debug_interrupt = __xen_debug_interrupt_l3,
+};
+
+int xen_event_channel_setup_3level(void)
+{
+       evtchn_register_nlevel_t reg;
+       int i, nr_pages, cpu;
+       unsigned long mfns[nr_cpu_ids];
+       unsigned long offsets[nr_cpu_ids];
+       int rc = -EINVAL;
+
+       memset(&reg, 0, sizeof(reg));
+
+       reg.level = 3;
+       nr_pages = (sizeof(unsigned long) == 4 ? 1 : 8);
+
+       for (i = 0; i < nr_pages; i++) {
+               unsigned long offset = PAGE_SIZE * i;
+               reg.u.l3.evtchn_pending[i] =
+                       arbitrary_virt_to_mfn(
+                               (void *)((unsigned long)evtchn_pending+offset));
+               reg.u.l3.evtchn_mask[i] =
+                       arbitrary_virt_to_mfn(
+                               (void *)((unsigned long)evtchn_mask+offset));
+       }
+
+       reg.u.l3.l2sel_mfn = mfns;
+       reg.u.l3.l2sel_offset = offsets;
+       reg.u.l3.nr_vcpus = nr_cpu_ids;
+
+       for_each_possible_cpu(cpu) {
+               reg.u.l3.l2sel_mfn[cpu] =
+                       arbitrary_virt_to_mfn(&per_cpu(evtchn_sel_l2, cpu));
+               reg.u.l3.l2sel_offset[cpu] =
+                       offset_in_page(&per_cpu(evtchn_sel_l2, cpu));
+       }
+
+       rc = HYPERVISOR_event_channel_op(EVTCHNOP_register_nlevel, &reg);
+
+       if (rc == 0)
+               evtchn_level = 3;
+
+       return rc;
+}
+EXPORT_SYMBOL_GPL(xen_event_channel_setup_3level);
+
 void __init xen_init_IRQ(void)
 {
        int i, rc;
        int cpu;
 
-       /* Setup 2-level event channel */
-       eops = &evtchn_ops_l2;
-       evtchn_level = 2;
+       switch (evtchn_level) {
+       case 2:
+               eops = &evtchn_ops_l2; break;
+       case 3:
+               eops = &evtchn_ops_l3; break;
+       default:
+               BUG();
+       }
 
        evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS(evtchn_level),
                                sizeof(*evtchn_to_irq),
diff --git a/include/xen/events.h b/include/xen/events.h
index bc10f22..87696fc 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -111,5 +111,7 @@ int xen_test_irq_shared(int irq);
 
 /* N-level event channels */
 extern unsigned int evtchn_level;
+extern unsigned int evtchn_level_param;
+int xen_event_channel_setup_3level(void);
 
 #endif /* _XEN_EVENTS_H */
diff --git a/include/xen/interface/event_channel.h 
b/include/xen/interface/event_channel.h
index f494292..f764d21 100644
--- a/include/xen/interface/event_channel.h
+++ b/include/xen/interface/event_channel.h
@@ -190,6 +190,30 @@ struct evtchn_reset {
 };
 typedef struct evtchn_reset evtchn_reset_t;
 
+/*
+ * EVTCHNOP_register_nlevel: Register N level event channels.
+ * NOTES:
+ *   1. currently only 3-level is supported.
+ *   2. should fall back to basic 2-level if this call fails.
+ */
+#define EVTCHNOP_register_nlevel 11
+#define MAX_L3_PAGES 8         /* 8 pages for 64 bits */
+struct evtchn_register_3level {
+       unsigned long evtchn_pending[MAX_L3_PAGES];
+       unsigned long evtchn_mask[MAX_L3_PAGES];
+       unsigned long *l2sel_mfn;
+       unsigned long *l2sel_offset;
+       unsigned int nr_vcpus;
+};
+
+struct evtchn_register_nlevel {
+       uint32_t level;
+       union {
+               struct evtchn_register_3level l3;
+       } u;
+};
+typedef struct evtchn_register_nlevel evtchn_register_nlevel_t;
+
 struct evtchn_op {
        uint32_t cmd; /* EVTCHNOP_* */
        union {
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index c66e1ff..7cb9d8f 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -289,7 +289,7 @@ DEFINE_GUEST_HANDLE_STRUCT(multicall_entry);
  *  32k if a long is 32 bits; 256k if a long is 64 bits.
  */
 #define NR_EVENT_CHANNELS_L2 (sizeof(unsigned long) * sizeof(unsigned long) * 
64)
-#define NR_EVENT_CHANNELS_L3 (NR_EVENT_CHANNELS_L2 * sizeof(unsigned long))
+#define NR_EVENT_CHANNELS_L3 (NR_EVENT_CHANNELS_L2 * 64)
 #define NR_EVENT_CHANNELS(x) ({ unsigned int __v = 0;  \
        switch (x) {                                    \
        case 2:                                         \
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.