|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC PATCH V2 5/8] xen: implement 3-level event channel routines
Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>
---
drivers/xen/events.c | 407 +++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 385 insertions(+), 22 deletions(-)
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 913ef0c..5b45441 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -57,6 +57,16 @@ EXPORT_SYMBOL_GPL(evtchn_level);
unsigned int nr_event_channels;
EXPORT_SYMBOL_GPL(nr_event_channels);
+/* 2nd level selector for 3-level event channel */
+DEFINE_PER_CPU(unsigned long [sizeof(unsigned long) * 8], evtchn_sel_l2);
+/* shared bitmaps for 3-level event channel */
+#define __NR_ELEMS (NR_EVENT_CHANNELS_L3/BITS_PER_LONG)
+unsigned long evtchn_pending[__NR_ELEMS] __page_aligned_bss;
+unsigned long evtchn_mask [__NR_ELEMS] __page_aligned_bss;
+#undef __NR_ELEMS
+/* Helper macros */
+#define LONG_BITORDER (BITS_PER_LONG == 64 ? 6 : 5)
+
struct evtchn_ops {
unsigned long (*active_evtchns) (unsigned int cpu,
struct shared_info *sh,
@@ -314,6 +324,15 @@ static inline unsigned long active_evtchns_l2(unsigned int
cpu,
~sh->evtchn_mask[idx];
}
+static inline unsigned long active_evtchns_l3(unsigned int cpu,
+ struct shared_info *sh,
+ unsigned int idx)
+{
+ return evtchn_pending[idx] &
+ per_cpu(cpu_evtchn_mask, cpu)[idx] &
+ ~evtchn_mask[idx];
+}
+
static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
{
int irq = evtchn_to_irq[chn];
@@ -353,18 +372,32 @@ static inline void clear_evtchn_l2(int port)
sync_clear_bit(port, &s->evtchn_pending[0]);
}
+static inline void clear_evtchn_l3(int port)
+{
+ sync_clear_bit(port, &evtchn_pending[0]);
+}
+
static inline void set_evtchn_l2(int port)
{
struct shared_info *s = HYPERVISOR_shared_info;
sync_set_bit(port, &s->evtchn_pending[0]);
}
+static inline void set_evtchn_l3(int port)
+{
+ sync_set_bit(port, &evtchn_pending[0]);
+}
+
static inline int test_evtchn_l2(int port)
{
struct shared_info *s = HYPERVISOR_shared_info;
return sync_test_bit(port, &s->evtchn_pending[0]);
}
+static inline int test_evtchn_l3(int port)
+{
+ return sync_test_bit(port, &evtchn_pending[0]);
+}
/**
* notify_remote_via_irq - send event to remote end of event channel via irq
@@ -389,6 +422,11 @@ static void mask_evtchn_l2(int port)
sync_set_bit(port, &s->evtchn_mask[0]);
}
+static void mask_evtchn_l3(int port)
+{
+ sync_set_bit(port, &evtchn_mask[0]);
+}
+
static void unmask_evtchn_l2(int port)
{
struct shared_info *s = HYPERVISOR_shared_info;
@@ -419,6 +457,40 @@ static void unmask_evtchn_l2(int port)
put_cpu();
}
+static void unmask_evtchn_l3(int port)
+{
+ unsigned int cpu = get_cpu();
+ unsigned int l1bit = port >> (LONG_BITORDER << 1);
+ unsigned int l2bit = port >> LONG_BITORDER;
+
+ BUG_ON(!irqs_disabled());
+
+ /* Slow path (hypercall) if this is a non-local port. */
+ if (unlikely(cpu != cpu_from_evtchn(port))) {
+ struct evtchn_unmask unmask = { .port = port };
+ (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
+ } else {
+ struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+
+ sync_clear_bit(port, &evtchn_mask[0]);
+
+ /*
+ * The following is basically the equivalent of
+ * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
+ * the interrupt edge' if the channel is masked.
+ */
+ if (sync_test_bit(port, &evtchn_pending[0]) &&
+ !sync_test_and_set_bit(l2bit,
+ &per_cpu(evtchn_sel_l2, cpu)[0]) &&
+ !sync_test_and_set_bit(l1bit,
+ &vcpu_info->evtchn_pending_sel))
+ vcpu_info->evtchn_upcall_pending = 1;
+ }
+
+ put_cpu();
+}
+
+
static void xen_irq_init(unsigned irq)
{
struct irq_info *info;
@@ -1190,25 +1262,8 @@ static irqreturn_t debug_interrupt_l2(int irq, void
*dev_id)
int cpu = smp_processor_id();
unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
int i;
- unsigned long flags;
- static DEFINE_SPINLOCK(debug_lock);
struct vcpu_info *v;
- spin_lock_irqsave(&debug_lock, flags);
-
- printk("\nvcpu %d\n ", cpu);
-
- for_each_online_cpu(i) {
- int pending;
- v = per_cpu(xen_vcpu, i);
- pending = (get_irq_regs() && i == cpu)
- ? xen_irqs_disabled(get_irq_regs())
- : v->evtchn_upcall_mask;
- printk("%d: masked=%d pending=%d event_sel %0*lx\n ", i,
- pending, v->evtchn_upcall_pending,
- (int)(sizeof(v->evtchn_pending_sel)*2),
- v->evtchn_pending_sel);
- }
v = per_cpu(xen_vcpu, cpu);
printk("\npending:\n ");
@@ -1260,18 +1315,143 @@ static irqreturn_t debug_interrupt_l2(int irq, void
*dev_id)
}
}
- spin_unlock_irqrestore(&debug_lock, flags);
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t debug_interrupt_l3(int irq, void *dev_id)
+{
+ int cpu = smp_processor_id();
+ unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
+ int i;
+ struct vcpu_info *v;
+
+ v = per_cpu(xen_vcpu, cpu);
+
+ printk("\npending (only show words which have bits set to 1):\n ");
+ for (i = ARRAY_SIZE(evtchn_pending)-1; i >= 0; i--)
+ if (evtchn_pending[i] != 0UL) {
+ printk(" word index %d %0*lx\n",
+ i,
+ (int)sizeof(evtchn_pending[0])*2,
+ evtchn_pending[i]);
+ }
+
+ printk("\nglobal mask (only show words which have bits set to 0):\n
");
+ for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--)
+ if (evtchn_mask[i] != ~0UL) {
+ printk(" word index %d %0*lx\n",
+ i,
+ (int)sizeof(evtchn_mask[0])*2,
+ evtchn_mask[i]);
+ }
+
+ printk("\nglobally unmasked (only show result words which have bits set
to 1):\n ");
+ for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--)
+ if ((evtchn_pending[i] & ~evtchn_mask[i]) != 0UL) {
+ printk(" word index %d %0*lx\n",
+ i,
+ (int)(sizeof(evtchn_mask[0])*2),
+ evtchn_pending[i] & ~evtchn_mask[i]);
+ }
+
+ printk("\nlocal cpu%d mask (only show words which have bits set to
1):\n ", cpu);
+ for (i = (NR_EVENT_CHANNELS_L3/BITS_PER_LONG)-1; i >= 0; i--)
+ if (cpu_evtchn[i] != 0UL) {
+ printk(" word index %d %0*lx\n",
+ i,
+ (int)(sizeof(cpu_evtchn[0])*2),
+ cpu_evtchn[i]);
+ }
+
+ printk("\nlocally unmasked (only show result words which have bits set
to 1):\n ");
+ for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--) {
+ unsigned long pending = evtchn_pending[i]
+ & ~evtchn_mask[i]
+ & cpu_evtchn[i];
+ if (pending != 0UL) {
+ printk(" word index %d %0*lx\n",
+ i,
+ (int)(sizeof(evtchn_mask[0])*2),
+ pending);
+ }
+ }
+
+ printk("\npending list:\n");
+ for (i = 0; i < NR_EVENT_CHANNELS_L3; i++) {
+ if (sync_test_bit(i, evtchn_pending)) {
+ int word_idx = i / (BITS_PER_LONG * BITS_PER_LONG);
+ int word_idx_l2 = i / BITS_PER_LONG;
+ printk(" %d: event %d -> irq %d%s%s%s%s\n",
+ cpu_from_evtchn(i), i,
+ evtchn_to_irq[i],
+ !sync_test_bit(word_idx, &v->evtchn_pending_sel)
+ ? "" : " l1-clear",
+ !sync_test_bit(word_idx_l2,
per_cpu(evtchn_sel_l2, cpu))
+ ? "" : " l2-clear",
+ sync_test_bit(i, evtchn_mask)
+ ? "" : " globally-masked",
+ sync_test_bit(i, cpu_evtchn)
+ ? "" : " locally-masked");
+ }
+ }
return IRQ_HANDLED;
}
irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
{
- return eops->debug_interrupt(irq, dev_id);
+ irqreturn_t rc;
+ static DEFINE_SPINLOCK(debug_lock);
+ unsigned long flags;
+ int cpu = smp_processor_id();
+ struct vcpu_info *v;
+ int i;
+
+ spin_lock_irqsave(&debug_lock, flags);
+
+ printk("\nvcpu %d\n ", cpu);
+
+ for_each_online_cpu(i) {
+ int pending;
+ v = per_cpu(xen_vcpu, i);
+ pending = (get_irq_regs() && i == cpu)
+ ? xen_irqs_disabled(get_irq_regs())
+ : v->evtchn_upcall_mask;
+ printk("%d: masked=%d pending=%d event_sel %0*lx\n ", i,
+ pending, v->evtchn_upcall_pending,
+ (int)(sizeof(v->evtchn_pending_sel)*2),
+ v->evtchn_pending_sel);
+ }
+
+ rc = eops->debug_interrupt(irq, dev_id);
+
+ spin_unlock_irqrestore(&debug_lock, flags);
+
+ return rc;
}
+/* The following per-cpu variables are used to save current state of event
+ * processing loop.
+ *
+ * 2-level event channel:
+ * current_word_idx is the bit index in L1 selector indicating the currently
+ * processing word in shared bitmap.
+ * current_bit_idx is the bit index in the currently processing word in shared
+ * bitmap.
+ * N.B. current_word_idx_l2 is not used.
+ *
+ * 3-level event channel:
+ * current_word_idx is the bit index in L1 selector indicating the currently
+ * processing word in L2 selector.
+ * current_word_idx_l2 is the bit index in L2 selector word indicating the
+ * currently processing word in shared bitmap.
+ * current_bit_idx is the bit index in the currently processing word in shared
+ * bitmap.
+ *
+ */
static DEFINE_PER_CPU(unsigned, xed_nesting_count);
static DEFINE_PER_CPU(unsigned int, current_word_idx);
+static DEFINE_PER_CPU(unsigned int, current_word_idx_l2);
static DEFINE_PER_CPU(unsigned int, current_bit_idx);
/*
@@ -1395,6 +1575,163 @@ out:
put_cpu();
}
+/*
+ * In the 3-level event channel implementation, the first level is a
+ * bitset of words which contain pending bits in the second level.
+ * The second level is another bitsets which contain pending bits in
+ * the third level. The third level is a bit set of pending events
+ * themselves.
+ */
+static void do_upcall_l3(void)
+{
+ struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+ unsigned count;
+ int start_word_idx_l1, start_word_idx_l2, start_bit_idx;
+ int word_idx_l1, word_idx_l2, bit_idx;
+ int i, j;
+ unsigned long l1cb, l2cb;
+ int cpu = get_cpu();
+
+ l1cb = BITS_PER_LONG * BITS_PER_LONG;
+ l2cb = BITS_PER_LONG;
+
+ do {
+ unsigned long pending_words_l1;
+
+ vcpu_info->evtchn_upcall_pending = 0;
+
+ if (__this_cpu_inc_return(xed_nesting_count) - 1)
+ goto out;
+#ifndef CONFIG_X86
+ /* No need for a barrier -- XCHG is a barrier on x86. */
+ /* Clear master flag /before/ clearing selector flag. */
+ wmb();
+#endif
+ /* here we get l1 pending selector */
+ pending_words_l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
+
+ start_word_idx_l1 = __this_cpu_read(current_word_idx);
+ start_word_idx_l2 = __this_cpu_read(current_word_idx_l2);
+ start_bit_idx = __this_cpu_read(current_bit_idx);
+
+ word_idx_l1 = start_word_idx_l1;
+
+ /* loop through l1, try to pick up l2 */
+ for (i = 0; pending_words_l1 != 0; i++) {
+ unsigned long words_l1;
+ unsigned long pending_words_l2;
+ unsigned long pwl2idx;
+
+ words_l1 = MASK_LSBS(pending_words_l1, word_idx_l1);
+
+ if (words_l1 == 0) {
+ word_idx_l1 = 0;
+ start_word_idx_l2 = 0;
+ continue;
+ }
+
+ word_idx_l1 = __ffs(words_l1);
+
+ pwl2idx = word_idx_l1 * BITS_PER_LONG;
+
+ pending_words_l2 =
+ xchg(&per_cpu(evtchn_sel_l2, cpu)[pwl2idx],
+ 0);
+
+ word_idx_l2 = 0;
+ if (word_idx_l1 == start_word_idx_l1) {
+ if (i == 0)
+ word_idx_l2 = start_word_idx_l2;
+ else
+ word_idx_l2 &= (1UL <<
start_word_idx_l2) - 1;
+ }
+
+ for (j = 0; pending_words_l2 != 0; j++) {
+ unsigned long pending_bits;
+ unsigned long words_l2;
+ unsigned long idx;
+
+ words_l2 = MASK_LSBS(pending_words_l2,
+ word_idx_l2);
+
+ if (words_l2 == 0) {
+ word_idx_l2 = 0;
+ bit_idx = 0;
+ continue;
+ }
+
+ word_idx_l2 = __ffs(words_l2);
+
+ idx = word_idx_l1*BITS_PER_LONG+word_idx_l2;
+ pending_bits =
+ eops->active_evtchns(cpu, NULL, idx);
+
+ bit_idx = 0;
+ if (word_idx_l2 == start_word_idx_l2) {
+ if (j == 0)
+ bit_idx = start_bit_idx;
+ else
+ bit_idx &=
(1UL<<start_bit_idx)-1;
+ }
+
+ /* process port */
+ do {
+ unsigned long bits;
+ int port, irq;
+ struct irq_desc *desc;
+
+ bits = MASK_LSBS(pending_bits, bit_idx);
+
+ if (bits == 0)
+ break;
+
+ bit_idx = __ffs(bits);
+
+ port = word_idx_l1 * l1cb +
+ word_idx_l2 * l2cb +
+ bit_idx;
+
+ irq = evtchn_to_irq[port];
+
+ if (irq != -1) {
+ desc = irq_to_desc(irq);
+ if (desc)
+
generic_handle_irq_desc(irq, desc);
+ }
+
+ bit_idx = (bit_idx + 1) % BITS_PER_LONG;
+
+ __this_cpu_write(current_bit_idx,
bit_idx);
+ __this_cpu_write(current_word_idx_l2,
+ bit_idx ? word_idx_l2 :
+ (word_idx_l2+1) %
BITS_PER_LONG);
+ __this_cpu_write(current_word_idx_l2,
+ word_idx_l2 ?
word_idx_l1 :
+ (word_idx_l1+1) %
BITS_PER_LONG);
+ } while (bit_idx != 0);
+
+ if ((word_idx_l2 != start_word_idx_l2) || (j !=
0))
+ pending_words_l2 &= ~(1UL <<
word_idx_l2);
+
+ word_idx_l2 = (word_idx_l2) % BITS_PER_LONG;
+ }
+
+ if ((word_idx_l1 != start_word_idx_l1) || (i != 0))
+ pending_words_l1 &= ~(1UL << word_idx_l1);
+
+ word_idx_l1 = (word_idx_l1) % BITS_PER_LONG;
+ }
+
+ BUG_ON(!irqs_disabled());
+ count = __this_cpu_read(xed_nesting_count);
+ __this_cpu_write(xed_nesting_count, 0);
+ } while (count != 1 || vcpu_info->evtchn_upcall_pending);
+
+out:
+ put_cpu();
+}
+
+
void xen_evtchn_do_upcall(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
@@ -1534,6 +1871,11 @@ static inline int test_and_set_mask_l2(int chn)
return sync_test_and_set_bit(chn, sh->evtchn_mask);
}
+static inline int test_and_set_mask_l3(int chn)
+{
+ return sync_test_and_set_bit(chn, evtchn_mask);
+}
+
static int retrigger_dynirq(struct irq_data *data)
{
int evtchn = evtchn_from_irq(data->irq);
@@ -1824,14 +2166,35 @@ static struct evtchn_ops evtchn_ops_l2 __read_mostly = {
.debug_interrupt = debug_interrupt_l2,
};
+static struct evtchn_ops evtchn_ops_l3 __read_mostly = {
+ .active_evtchns = active_evtchns_l3,
+ .clear_evtchn = clear_evtchn_l3,
+ .set_evtchn = set_evtchn_l3,
+ .test_evtchn = test_evtchn_l3,
+ .mask_evtchn = mask_evtchn_l3,
+ .unmask_evtchn = unmask_evtchn_l3,
+ .test_and_set_mask = test_and_set_mask_l3,
+ .do_upcall = do_upcall_l3,
+ .debug_interrupt = debug_interrupt_l3,
+};
+
void __init xen_init_IRQ(void)
{
int i, rc;
int cpu;
- evtchn_level = 2;
- nr_event_channels = NR_EVENT_CHANNELS_L2;
- eops = &evtchn_ops_l2;
+ switch (evtchn_level) {
+ case 2:
+ nr_event_channels = NR_EVENT_CHANNELS_L2;
+ eops = &evtchn_ops_l2;
+ break;
+ case 3:
+ nr_event_channels = NR_EVENT_CHANNELS_L3;
+ eops = &evtchn_ops_l3;
+ break;
+ default:
+ BUG();
+ }
/* Setup 2-level event channel */
evtchn_to_irq = kcalloc(nr_event_channels, sizeof(*evtchn_to_irq),
--
1.7.10.4
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |