[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [RFC] Interrupt latency measurement technique
Hi Jonatan I didn't pass xen_timer_val to dom0. I stored time counters in the static buffer in Xen, and after in dom0, and then compare them using Exel. During printing of buffer I can do with counters whatever I want - convert them to nanosecond, etc Let me show the code which does this in Xen: diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c index 39e0f2f..340dfa5 100644 --- a/xen/arch/arm/gic.c +++ b/xen/arch/arm/gic.c @@ -847,12 +847,39 @@ static void do_sgi(struct cpu_user_regs *regs, int othercpu, enum gic_sgi sgi) GICC[GICC_DIR] = sgi; } + +s_time_t ticks_to_ns(uint64_t ticks); + +static const u32 test_irq = 57; <-- IRQ, which I profiled (Display IRQ on my platform) +static u32 total_irqs; + +#define NUM_TICKS_TO_STORE 100 +struct irq_tick { + u64 cnt_tick; + u64 cnt_off; +}; + +static struct irq_tick irq_ticks[NUM_TICKS_TO_STORE]; <-- buffer to store ticks +static u32 offs; + +static void print_irq_ticks(void) +{ + u32 i; + for ( i = 0; i < NUM_TICKS_TO_STORE; i++ ) + { + printk("[%02u] irq %u tick -> %llu off -> %llu off_ns -> %llu dom_tick %llu dom_tick_ns -> %llu cpu_khz %lu total %u\n", + i, test_irq, irq_ticks[i].cnt_tick, irq_ticks[i].cnt_off, ticks_to_ns(irq_ticks[i].cnt_off), + irq_ticks[i].cnt_tick - irq_ticks[i].cnt_off, + ticks_to_ns(irq_ticks[i].cnt_tick - irq_ticks[i].cnt_off), cpu_khz, total_irqs); + } +} + /* Accept an interrupt from the GIC and dispatch its handler */ void gic_interrupt(struct cpu_user_regs *regs, int is_fiq) { uint32_t intack; unsigned int irq; - + static u32 step; do { intack = GICC[GICC_IAR]; @@ -860,6 +887,22 @@ void gic_interrupt(struct cpu_user_regs *regs, int is_fiq) if ( likely(irq >= 16 && irq < 1021) ) { + if ( irq == test_irq ) + { + total_irqs++; + irq_ticks[step].cnt_tick = READ_SYSREG64(CNTPCT_EL0); <-- Timer base value + irq_ticks[step].cnt_off = READ_SYSREG64(CNTVOFF_EL2); <--Time offset for domain, dom0 in my case + + step++; + if ( step % NUM_TICKS_TO_STORE == 0 ) + { + print_irq_ticks(); + step = 0; + } + } CNTVOFF_EL2 register contains constant value, it is a difference between tick in Xen and tick in dom0. If you subtract base tick value and this offset you'll get tick value in dom0. After, in dom0 you check CNTPCT_EL0 and that's it. You have 2 tick values for dom0 - one from Xen, second from dom0 itself. My diff for dom0 contains some overhead - I copy pasted some Xen code there: diff --git a/drivers/video/omap2/dss/dispc-compat.c b/drivers/video/omap2/dss/dispc-compat.c index 4b31058..71c807e 100644 --- a/drivers/video/omap2/dss/dispc-compat.c +++ b/drivers/video/omap2/dss/dispc-compat.c @@ -255,6 +255,63 @@ static void print_irq_status(u32 status) #undef PIS } + +#define CNTPCT p15,0,c14 /* Time counter value */ +#define CNTPCT_EL0 CNTPCT + +#define __CP64(r1, r2, coproc, opc, crm) coproc, opc, r1, r2, crm +#define CP64(r, name...) __CP64(r, name) +#define LOAD_CP64(r, name...) "mrrc " __stringify(CP64(%r, %H##r, name)) ";" +#define READ_CP64(name...) ({ \ + register uint64_t _r; \ + asm volatile(LOAD_CP64(0, name) : "=r" (_r)); \ + _r; }) +#define READ_SYSREG64(R...) READ_CP64(R) + +static u64 muldiv64(u64 a, u32 b, u32 c) +{ +#define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) + union { + u64 ll; + struct { + u32 low, high; + } l; + } u, res; + u64 rl, rh; + + u.ll = a; + rl = (u64)u.l.low * (u64)b; + rh = (u64)u.l.high * (u64)b; + rh += (rl >> 32); + res.l.high = div64_u64(rh, c); + res.l.low = div64_u64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c); + return res.ll; +} + +static const u32 cpu_khz = 6144; +static inline s64 ticks_to_ns(uint64_t ticks) +{ +#define SECONDS(_s) ((s64)((_s) * 1000000000ULL)) + + return muldiv64(ticks, SECONDS(1), 1000 * cpu_khz); +} + +static const u32 test_irq = 57; +static u32 total_irqs; + +#define NUM_TICKS_TO_STORE 100 +static u64 irq_ticks[NUM_TICKS_TO_STORE]; + +static void print_irq_ticks(void) +{ + u32 i; + for ( i = 0; i < NUM_TICKS_TO_STORE; i++ ) + { + printk("(Dom0)[%02u] irq %u tick -> %lld tick_ns %lld cpu_khz %u total %u\n", + i, test_irq, irq_ticks[i], ticks_to_ns(irq_ticks[i]), cpu_khz, total_irqs); + } +} + /* Called from dss.c. Note that we don't touch clocks here, * but we presume they are on because we got an IRQ. However, * an irq handler may turn the clocks off, so we may not have @@ -267,6 +324,17 @@ static irqreturn_t omap_dispc_irq_handler(int irq, void *arg) u32 unhandled_errors; struct omap_dispc_isr_data *isr_data; struct omap_dispc_isr_data registered_isr[DISPC_MAX_NR_ISRS]; + static u32 step; + + total_irqs++; + irq_ticks[step] = READ_SYSREG64(CNTPCT_EL0); + + step++; + if ( step % NUM_TICKS_TO_STORE == 0 ) + { + print_irq_ticks(); + step = 0; + } It would be nice to know whether this works for you. Regards, Andrii On Fri, Aug 29, 2014 at 9:07 PM, Jonathan Fraser <jfraser@xxxxxxxxxxxx> wrote: > I need to make the same measurements. > How did you pass the value of 'xen_timer_val' to the dom0 kernel? I can > store it in a scratch hardware register in the SoC, but how did you do > it? > > Thanks, > Jon > > > On Wed, 2014-08-27 at 13:07 +0300, Andrii Tseglytskyi wrote: >> Test setup is the following: >> >> Platform - Jacinto6 (OMAP5) with two ARMv7 cores (Cortex A15), 1.5 Gb of RAM >> Hypervisor - Xen 4.4 stable >> Dom0 - Linux 3.8 running with 2 vcpus, 128 Mb RAM >> DomU was not launched during the test >> >> Regards, >> Andrii >> >> >> On Wed, Aug 27, 2014 at 12:14 PM, Andrii Tseglytskyi >> <andrii.tseglytskyi@xxxxxxxxxxxxxxx> wrote: >> > Hi, >> > >> > I need to measure IRQ latency introduced by Xen: (GIC -> Xen IRQ >> > handler -> Dom0 IRQ handler) >> > I need to know how many time I spend in Xen IRQ handler. >> > >> > Can someone comment - is the following algorithm works: >> > >> > - in function xen/arch/arm/gic.c: gic_interrupt() store timer counter >> > value: >> > xen_timer_val = READ_SYSREG64(CNTPCT_EL0) - READ_SYSREG64(CNTVOFF_EL2) >> > >> > - in any IRQ handler in dom0 store timer counter value: >> > dom0_timer_val = READ_SYSREG64(CNTPCT_EL0) >> > >> > - calculate time diff in nanoseconds: >> > time_diff_ns = ticks_to_ns(dom0_timer_val - xen_timer_val) >> > >> > Using this technique I measured display IRQ latency and got about >> > - 20 to 30 usec latency on 1 GHz MPU frequency >> > - 10 to 20 usec latencyon 1.5 GHz MPU frequency >> > >> > Are these numbers expectable? Can this technique be used? >> > >> > Regards, >> > Andrii >> > >> > -- >> > >> > Andrii Tseglytskyi | Embedded Dev >> > GlobalLogic >> > www.globallogic.com >> >> >> > -- Andrii Tseglytskyi | Embedded Dev GlobalLogic www.globallogic.com _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |