[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [RFC PATCH V2 5/8] xen: implement 3-level event channel routines
Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx> --- drivers/xen/events.c | 407 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 385 insertions(+), 22 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 913ef0c..5b45441 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -57,6 +57,16 @@ EXPORT_SYMBOL_GPL(evtchn_level); unsigned int nr_event_channels; EXPORT_SYMBOL_GPL(nr_event_channels); +/* 2nd level selector for 3-level event channel */ +DEFINE_PER_CPU(unsigned long [sizeof(unsigned long) * 8], evtchn_sel_l2); +/* shared bitmaps for 3-level event channel */ +#define __NR_ELEMS (NR_EVENT_CHANNELS_L3/BITS_PER_LONG) +unsigned long evtchn_pending[__NR_ELEMS] __page_aligned_bss; +unsigned long evtchn_mask [__NR_ELEMS] __page_aligned_bss; +#undef __NR_ELEMS +/* Helper macros */ +#define LONG_BITORDER (BITS_PER_LONG == 64 ? 6 : 5) + struct evtchn_ops { unsigned long (*active_evtchns) (unsigned int cpu, struct shared_info *sh, @@ -314,6 +324,15 @@ static inline unsigned long active_evtchns_l2(unsigned int cpu, ~sh->evtchn_mask[idx]; } +static inline unsigned long active_evtchns_l3(unsigned int cpu, + struct shared_info *sh, + unsigned int idx) +{ + return evtchn_pending[idx] & + per_cpu(cpu_evtchn_mask, cpu)[idx] & + ~evtchn_mask[idx]; +} + static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) { int irq = evtchn_to_irq[chn]; @@ -353,18 +372,32 @@ static inline void clear_evtchn_l2(int port) sync_clear_bit(port, &s->evtchn_pending[0]); } +static inline void clear_evtchn_l3(int port) +{ + sync_clear_bit(port, &evtchn_pending[0]); +} + static inline void set_evtchn_l2(int port) { struct shared_info *s = HYPERVISOR_shared_info; sync_set_bit(port, &s->evtchn_pending[0]); } +static inline void set_evtchn_l3(int port) +{ + sync_set_bit(port, &evtchn_pending[0]); +} + static inline int test_evtchn_l2(int port) { struct shared_info *s = HYPERVISOR_shared_info; return sync_test_bit(port, &s->evtchn_pending[0]); } +static inline int test_evtchn_l3(int port) +{ + return sync_test_bit(port, &evtchn_pending[0]); +} /** * notify_remote_via_irq - send event to remote end of event channel via irq @@ -389,6 +422,11 @@ static void mask_evtchn_l2(int port) sync_set_bit(port, &s->evtchn_mask[0]); } +static void mask_evtchn_l3(int port) +{ + sync_set_bit(port, &evtchn_mask[0]); +} + static void unmask_evtchn_l2(int port) { struct shared_info *s = HYPERVISOR_shared_info; @@ -419,6 +457,40 @@ static void unmask_evtchn_l2(int port) put_cpu(); } +static void unmask_evtchn_l3(int port) +{ + unsigned int cpu = get_cpu(); + unsigned int l1bit = port >> (LONG_BITORDER << 1); + unsigned int l2bit = port >> LONG_BITORDER; + + BUG_ON(!irqs_disabled()); + + /* Slow path (hypercall) if this is a non-local port. */ + if (unlikely(cpu != cpu_from_evtchn(port))) { + struct evtchn_unmask unmask = { .port = port }; + (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); + } else { + struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); + + sync_clear_bit(port, &evtchn_mask[0]); + + /* + * The following is basically the equivalent of + * 'hw_resend_irq'. Just like a real IO-APIC we 'lose + * the interrupt edge' if the channel is masked. + */ + if (sync_test_bit(port, &evtchn_pending[0]) && + !sync_test_and_set_bit(l2bit, + &per_cpu(evtchn_sel_l2, cpu)[0]) && + !sync_test_and_set_bit(l1bit, + &vcpu_info->evtchn_pending_sel)) + vcpu_info->evtchn_upcall_pending = 1; + } + + put_cpu(); +} + + static void xen_irq_init(unsigned irq) { struct irq_info *info; @@ -1190,25 +1262,8 @@ static irqreturn_t debug_interrupt_l2(int irq, void *dev_id) int cpu = smp_processor_id(); unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu); int i; - unsigned long flags; - static DEFINE_SPINLOCK(debug_lock); struct vcpu_info *v; - spin_lock_irqsave(&debug_lock, flags); - - printk("\nvcpu %d\n ", cpu); - - for_each_online_cpu(i) { - int pending; - v = per_cpu(xen_vcpu, i); - pending = (get_irq_regs() && i == cpu) - ? xen_irqs_disabled(get_irq_regs()) - : v->evtchn_upcall_mask; - printk("%d: masked=%d pending=%d event_sel %0*lx\n ", i, - pending, v->evtchn_upcall_pending, - (int)(sizeof(v->evtchn_pending_sel)*2), - v->evtchn_pending_sel); - } v = per_cpu(xen_vcpu, cpu); printk("\npending:\n "); @@ -1260,18 +1315,143 @@ static irqreturn_t debug_interrupt_l2(int irq, void *dev_id) } } - spin_unlock_irqrestore(&debug_lock, flags); + return IRQ_HANDLED; +} + +static irqreturn_t debug_interrupt_l3(int irq, void *dev_id) +{ + int cpu = smp_processor_id(); + unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu); + int i; + struct vcpu_info *v; + + v = per_cpu(xen_vcpu, cpu); + + printk("\npending (only show words which have bits set to 1):\n "); + for (i = ARRAY_SIZE(evtchn_pending)-1; i >= 0; i--) + if (evtchn_pending[i] != 0UL) { + printk(" word index %d %0*lx\n", + i, + (int)sizeof(evtchn_pending[0])*2, + evtchn_pending[i]); + } + + printk("\nglobal mask (only show words which have bits set to 0):\n "); + for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--) + if (evtchn_mask[i] != ~0UL) { + printk(" word index %d %0*lx\n", + i, + (int)sizeof(evtchn_mask[0])*2, + evtchn_mask[i]); + } + + printk("\nglobally unmasked (only show result words which have bits set to 1):\n "); + for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--) + if ((evtchn_pending[i] & ~evtchn_mask[i]) != 0UL) { + printk(" word index %d %0*lx\n", + i, + (int)(sizeof(evtchn_mask[0])*2), + evtchn_pending[i] & ~evtchn_mask[i]); + } + + printk("\nlocal cpu%d mask (only show words which have bits set to 1):\n ", cpu); + for (i = (NR_EVENT_CHANNELS_L3/BITS_PER_LONG)-1; i >= 0; i--) + if (cpu_evtchn[i] != 0UL) { + printk(" word index %d %0*lx\n", + i, + (int)(sizeof(cpu_evtchn[0])*2), + cpu_evtchn[i]); + } + + printk("\nlocally unmasked (only show result words which have bits set to 1):\n "); + for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--) { + unsigned long pending = evtchn_pending[i] + & ~evtchn_mask[i] + & cpu_evtchn[i]; + if (pending != 0UL) { + printk(" word index %d %0*lx\n", + i, + (int)(sizeof(evtchn_mask[0])*2), + pending); + } + } + + printk("\npending list:\n"); + for (i = 0; i < NR_EVENT_CHANNELS_L3; i++) { + if (sync_test_bit(i, evtchn_pending)) { + int word_idx = i / (BITS_PER_LONG * BITS_PER_LONG); + int word_idx_l2 = i / BITS_PER_LONG; + printk(" %d: event %d -> irq %d%s%s%s%s\n", + cpu_from_evtchn(i), i, + evtchn_to_irq[i], + !sync_test_bit(word_idx, &v->evtchn_pending_sel) + ? "" : " l1-clear", + !sync_test_bit(word_idx_l2, per_cpu(evtchn_sel_l2, cpu)) + ? "" : " l2-clear", + sync_test_bit(i, evtchn_mask) + ? "" : " globally-masked", + sync_test_bit(i, cpu_evtchn) + ? "" : " locally-masked"); + } + } return IRQ_HANDLED; } irqreturn_t xen_debug_interrupt(int irq, void *dev_id) { - return eops->debug_interrupt(irq, dev_id); + irqreturn_t rc; + static DEFINE_SPINLOCK(debug_lock); + unsigned long flags; + int cpu = smp_processor_id(); + struct vcpu_info *v; + int i; + + spin_lock_irqsave(&debug_lock, flags); + + printk("\nvcpu %d\n ", cpu); + + for_each_online_cpu(i) { + int pending; + v = per_cpu(xen_vcpu, i); + pending = (get_irq_regs() && i == cpu) + ? xen_irqs_disabled(get_irq_regs()) + : v->evtchn_upcall_mask; + printk("%d: masked=%d pending=%d event_sel %0*lx\n ", i, + pending, v->evtchn_upcall_pending, + (int)(sizeof(v->evtchn_pending_sel)*2), + v->evtchn_pending_sel); + } + + rc = eops->debug_interrupt(irq, dev_id); + + spin_unlock_irqrestore(&debug_lock, flags); + + return rc; } +/* The following per-cpu variables are used to save current state of event + * processing loop. + * + * 2-level event channel: + * current_word_idx is the bit index in L1 selector indicating the currently + * processing word in shared bitmap. + * current_bit_idx is the bit index in the currently processing word in shared + * bitmap. + * N.B. current_word_idx_l2 is not used. + * + * 3-level event channel: + * current_word_idx is the bit index in L1 selector indicating the currently + * processing word in L2 selector. + * current_word_idx_l2 is the bit index in L2 selector word indicating the + * currently processing word in shared bitmap. + * current_bit_idx is the bit index in the currently processing word in shared + * bitmap. + * + */ static DEFINE_PER_CPU(unsigned, xed_nesting_count); static DEFINE_PER_CPU(unsigned int, current_word_idx); +static DEFINE_PER_CPU(unsigned int, current_word_idx_l2); static DEFINE_PER_CPU(unsigned int, current_bit_idx); /* @@ -1395,6 +1575,163 @@ out: put_cpu(); } +/* + * In the 3-level event channel implementation, the first level is a + * bitset of words which contain pending bits in the second level. + * The second level is another bitsets which contain pending bits in + * the third level. The third level is a bit set of pending events + * themselves. + */ +static void do_upcall_l3(void) +{ + struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); + unsigned count; + int start_word_idx_l1, start_word_idx_l2, start_bit_idx; + int word_idx_l1, word_idx_l2, bit_idx; + int i, j; + unsigned long l1cb, l2cb; + int cpu = get_cpu(); + + l1cb = BITS_PER_LONG * BITS_PER_LONG; + l2cb = BITS_PER_LONG; + + do { + unsigned long pending_words_l1; + + vcpu_info->evtchn_upcall_pending = 0; + + if (__this_cpu_inc_return(xed_nesting_count) - 1) + goto out; +#ifndef CONFIG_X86 + /* No need for a barrier -- XCHG is a barrier on x86. */ + /* Clear master flag /before/ clearing selector flag. */ + wmb(); +#endif + /* here we get l1 pending selector */ + pending_words_l1 = xchg(&vcpu_info->evtchn_pending_sel, 0); + + start_word_idx_l1 = __this_cpu_read(current_word_idx); + start_word_idx_l2 = __this_cpu_read(current_word_idx_l2); + start_bit_idx = __this_cpu_read(current_bit_idx); + + word_idx_l1 = start_word_idx_l1; + + /* loop through l1, try to pick up l2 */ + for (i = 0; pending_words_l1 != 0; i++) { + unsigned long words_l1; + unsigned long pending_words_l2; + unsigned long pwl2idx; + + words_l1 = MASK_LSBS(pending_words_l1, word_idx_l1); + + if (words_l1 == 0) { + word_idx_l1 = 0; + start_word_idx_l2 = 0; + continue; + } + + word_idx_l1 = __ffs(words_l1); + + pwl2idx = word_idx_l1 * BITS_PER_LONG; + + pending_words_l2 = + xchg(&per_cpu(evtchn_sel_l2, cpu)[pwl2idx], + 0); + + word_idx_l2 = 0; + if (word_idx_l1 == start_word_idx_l1) { + if (i == 0) + word_idx_l2 = start_word_idx_l2; + else + word_idx_l2 &= (1UL << start_word_idx_l2) - 1; + } + + for (j = 0; pending_words_l2 != 0; j++) { + unsigned long pending_bits; + unsigned long words_l2; + unsigned long idx; + + words_l2 = MASK_LSBS(pending_words_l2, + word_idx_l2); + + if (words_l2 == 0) { + word_idx_l2 = 0; + bit_idx = 0; + continue; + } + + word_idx_l2 = __ffs(words_l2); + + idx = word_idx_l1*BITS_PER_LONG+word_idx_l2; + pending_bits = + eops->active_evtchns(cpu, NULL, idx); + + bit_idx = 0; + if (word_idx_l2 == start_word_idx_l2) { + if (j == 0) + bit_idx = start_bit_idx; + else + bit_idx &= (1UL<<start_bit_idx)-1; + } + + /* process port */ + do { + unsigned long bits; + int port, irq; + struct irq_desc *desc; + + bits = MASK_LSBS(pending_bits, bit_idx); + + if (bits == 0) + break; + + bit_idx = __ffs(bits); + + port = word_idx_l1 * l1cb + + word_idx_l2 * l2cb + + bit_idx; + + irq = evtchn_to_irq[port]; + + if (irq != -1) { + desc = irq_to_desc(irq); + if (desc) + generic_handle_irq_desc(irq, desc); + } + + bit_idx = (bit_idx + 1) % BITS_PER_LONG; + + __this_cpu_write(current_bit_idx, bit_idx); + __this_cpu_write(current_word_idx_l2, + bit_idx ? word_idx_l2 : + (word_idx_l2+1) % BITS_PER_LONG); + __this_cpu_write(current_word_idx_l2, + word_idx_l2 ? word_idx_l1 : + (word_idx_l1+1) % BITS_PER_LONG); + } while (bit_idx != 0); + + if ((word_idx_l2 != start_word_idx_l2) || (j != 0)) + pending_words_l2 &= ~(1UL << word_idx_l2); + + word_idx_l2 = (word_idx_l2) % BITS_PER_LONG; + } + + if ((word_idx_l1 != start_word_idx_l1) || (i != 0)) + pending_words_l1 &= ~(1UL << word_idx_l1); + + word_idx_l1 = (word_idx_l1) % BITS_PER_LONG; + } + + BUG_ON(!irqs_disabled()); + count = __this_cpu_read(xed_nesting_count); + __this_cpu_write(xed_nesting_count, 0); + } while (count != 1 || vcpu_info->evtchn_upcall_pending); + +out: + put_cpu(); +} + + void xen_evtchn_do_upcall(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -1534,6 +1871,11 @@ static inline int test_and_set_mask_l2(int chn) return sync_test_and_set_bit(chn, sh->evtchn_mask); } +static inline int test_and_set_mask_l3(int chn) +{ + return sync_test_and_set_bit(chn, evtchn_mask); +} + static int retrigger_dynirq(struct irq_data *data) { int evtchn = evtchn_from_irq(data->irq); @@ -1824,14 +2166,35 @@ static struct evtchn_ops evtchn_ops_l2 __read_mostly = { .debug_interrupt = debug_interrupt_l2, }; +static struct evtchn_ops evtchn_ops_l3 __read_mostly = { + .active_evtchns = active_evtchns_l3, + .clear_evtchn = clear_evtchn_l3, + .set_evtchn = set_evtchn_l3, + .test_evtchn = test_evtchn_l3, + .mask_evtchn = mask_evtchn_l3, + .unmask_evtchn = unmask_evtchn_l3, + .test_and_set_mask = test_and_set_mask_l3, + .do_upcall = do_upcall_l3, + .debug_interrupt = debug_interrupt_l3, +}; + void __init xen_init_IRQ(void) { int i, rc; int cpu; - evtchn_level = 2; - nr_event_channels = NR_EVENT_CHANNELS_L2; - eops = &evtchn_ops_l2; + switch (evtchn_level) { + case 2: + nr_event_channels = NR_EVENT_CHANNELS_L2; + eops = &evtchn_ops_l2; + break; + case 3: + nr_event_channels = NR_EVENT_CHANNELS_L3; + eops = &evtchn_ops_l3; + break; + default: + BUG(); + } /* Setup 2-level event channel */ evtchn_to_irq = kcalloc(nr_event_channels, sizeof(*evtchn_to_irq), -- 1.7.10.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |