|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [RFC] Interrupt latency measurement technique
Hi Jonatan
I didn't pass xen_timer_val to dom0. I stored time counters in the
static buffer in Xen, and after in dom0, and then compare them using
Exel.
During printing of buffer I can do with counters whatever I want -
convert them to nanosecond, etc
Let me show the code which does this in Xen:
diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index 39e0f2f..340dfa5 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -847,12 +847,39 @@ static void do_sgi(struct cpu_user_regs *regs,
int othercpu, enum gic_sgi sgi)
GICC[GICC_DIR] = sgi;
}
+
+s_time_t ticks_to_ns(uint64_t ticks);
+
+static const u32 test_irq = 57; <-- IRQ, which I profiled (Display
IRQ on my platform)
+static u32 total_irqs;
+
+#define NUM_TICKS_TO_STORE 100
+struct irq_tick {
+ u64 cnt_tick;
+ u64 cnt_off;
+};
+
+static struct irq_tick irq_ticks[NUM_TICKS_TO_STORE]; <-- buffer to store ticks
+static u32 offs;
+
+static void print_irq_ticks(void)
+{
+ u32 i;
+ for ( i = 0; i < NUM_TICKS_TO_STORE; i++ )
+ {
+ printk("[%02u] irq %u tick -> %llu off -> %llu off_ns -> %llu
dom_tick %llu dom_tick_ns -> %llu cpu_khz %lu total %u\n",
+ i, test_irq, irq_ticks[i].cnt_tick,
irq_ticks[i].cnt_off, ticks_to_ns(irq_ticks[i].cnt_off),
+ irq_ticks[i].cnt_tick - irq_ticks[i].cnt_off,
+ ticks_to_ns(irq_ticks[i].cnt_tick -
irq_ticks[i].cnt_off), cpu_khz, total_irqs);
+ }
+}
+
/* Accept an interrupt from the GIC and dispatch its handler */
void gic_interrupt(struct cpu_user_regs *regs, int is_fiq)
{
uint32_t intack;
unsigned int irq;
-
+ static u32 step;
do {
intack = GICC[GICC_IAR];
@@ -860,6 +887,22 @@ void gic_interrupt(struct cpu_user_regs *regs, int is_fiq)
if ( likely(irq >= 16 && irq < 1021) )
{
+ if ( irq == test_irq )
+ {
+ total_irqs++;
+ irq_ticks[step].cnt_tick = READ_SYSREG64(CNTPCT_EL0);
<-- Timer base value
+ irq_ticks[step].cnt_off = READ_SYSREG64(CNTVOFF_EL2);
<--Time offset for domain, dom0 in my case
+
+ step++;
+ if ( step % NUM_TICKS_TO_STORE == 0 )
+ {
+ print_irq_ticks();
+ step = 0;
+ }
+ }
CNTVOFF_EL2 register contains constant value, it is a difference
between tick in Xen and tick in dom0. If you subtract base tick value
and this offset you'll get tick value in dom0.
After, in dom0 you check CNTPCT_EL0 and that's it. You have 2 tick
values for dom0 - one from Xen, second from dom0 itself.
My diff for dom0 contains some overhead - I copy pasted some Xen code there:
diff --git a/drivers/video/omap2/dss/dispc-compat.c
b/drivers/video/omap2/dss/dispc-compat.c
index 4b31058..71c807e 100644
--- a/drivers/video/omap2/dss/dispc-compat.c
+++ b/drivers/video/omap2/dss/dispc-compat.c
@@ -255,6 +255,63 @@ static void print_irq_status(u32 status)
#undef PIS
}
+
+#define CNTPCT p15,0,c14 /* Time counter value */
+#define CNTPCT_EL0 CNTPCT
+
+#define __CP64(r1, r2, coproc, opc, crm) coproc, opc, r1, r2, crm
+#define CP64(r, name...) __CP64(r, name)
+#define LOAD_CP64(r, name...) "mrrc " __stringify(CP64(%r, %H##r, name)) ";"
+#define READ_CP64(name...) ({ \
+ register uint64_t _r; \
+ asm volatile(LOAD_CP64(0, name) : "=r" (_r)); \
+ _r; })
+#define READ_SYSREG64(R...) READ_CP64(R)
+
+static u64 muldiv64(u64 a, u32 b, u32 c)
+{
+#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
+ union {
+ u64 ll;
+ struct {
+ u32 low, high;
+ } l;
+ } u, res;
+ u64 rl, rh;
+
+ u.ll = a;
+ rl = (u64)u.l.low * (u64)b;
+ rh = (u64)u.l.high * (u64)b;
+ rh += (rl >> 32);
+ res.l.high = div64_u64(rh, c);
+ res.l.low = div64_u64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
+ return res.ll;
+}
+
+static const u32 cpu_khz = 6144;
+static inline s64 ticks_to_ns(uint64_t ticks)
+{
+#define SECONDS(_s) ((s64)((_s) * 1000000000ULL))
+
+ return muldiv64(ticks, SECONDS(1), 1000 * cpu_khz);
+}
+
+static const u32 test_irq = 57;
+static u32 total_irqs;
+
+#define NUM_TICKS_TO_STORE 100
+static u64 irq_ticks[NUM_TICKS_TO_STORE];
+
+static void print_irq_ticks(void)
+{
+ u32 i;
+ for ( i = 0; i < NUM_TICKS_TO_STORE; i++ )
+ {
+ printk("(Dom0)[%02u] irq %u tick -> %lld tick_ns %lld cpu_khz
%u total %u\n",
+ i, test_irq, irq_ticks[i], ticks_to_ns(irq_ticks[i]),
cpu_khz, total_irqs);
+ }
+}
+
/* Called from dss.c. Note that we don't touch clocks here,
* but we presume they are on because we got an IRQ. However,
* an irq handler may turn the clocks off, so we may not have
@@ -267,6 +324,17 @@ static irqreturn_t omap_dispc_irq_handler(int
irq, void *arg)
u32 unhandled_errors;
struct omap_dispc_isr_data *isr_data;
struct omap_dispc_isr_data registered_isr[DISPC_MAX_NR_ISRS];
+ static u32 step;
+
+ total_irqs++;
+ irq_ticks[step] = READ_SYSREG64(CNTPCT_EL0);
+
+ step++;
+ if ( step % NUM_TICKS_TO_STORE == 0 )
+ {
+ print_irq_ticks();
+ step = 0;
+ }
It would be nice to know whether this works for you.
Regards,
Andrii
On Fri, Aug 29, 2014 at 9:07 PM, Jonathan Fraser <jfraser@xxxxxxxxxxxx> wrote:
> I need to make the same measurements.
> How did you pass the value of 'xen_timer_val' to the dom0 kernel? I can
> store it in a scratch hardware register in the SoC, but how did you do
> it?
>
> Thanks,
> Jon
>
>
> On Wed, 2014-08-27 at 13:07 +0300, Andrii Tseglytskyi wrote:
>> Test setup is the following:
>>
>> Platform - Jacinto6 (OMAP5) with two ARMv7 cores (Cortex A15), 1.5 Gb of RAM
>> Hypervisor - Xen 4.4 stable
>> Dom0 - Linux 3.8 running with 2 vcpus, 128 Mb RAM
>> DomU was not launched during the test
>>
>> Regards,
>> Andrii
>>
>>
>> On Wed, Aug 27, 2014 at 12:14 PM, Andrii Tseglytskyi
>> <andrii.tseglytskyi@xxxxxxxxxxxxxxx> wrote:
>> > Hi,
>> >
>> > I need to measure IRQ latency introduced by Xen: (GIC -> Xen IRQ
>> > handler -> Dom0 IRQ handler)
>> > I need to know how many time I spend in Xen IRQ handler.
>> >
>> > Can someone comment - is the following algorithm works:
>> >
>> > - in function xen/arch/arm/gic.c: gic_interrupt() store timer counter
>> > value:
>> > xen_timer_val = READ_SYSREG64(CNTPCT_EL0) - READ_SYSREG64(CNTVOFF_EL2)
>> >
>> > - in any IRQ handler in dom0 store timer counter value:
>> > dom0_timer_val = READ_SYSREG64(CNTPCT_EL0)
>> >
>> > - calculate time diff in nanoseconds:
>> > time_diff_ns = ticks_to_ns(dom0_timer_val - xen_timer_val)
>> >
>> > Using this technique I measured display IRQ latency and got about
>> > - 20 to 30 usec latency on 1 GHz MPU frequency
>> > - 10 to 20 usec latencyon 1.5 GHz MPU frequency
>> >
>> > Are these numbers expectable? Can this technique be used?
>> >
>> > Regards,
>> > Andrii
>> >
>> > --
>> >
>> > Andrii Tseglytskyi | Embedded Dev
>> > GlobalLogic
>> > www.globallogic.com
>>
>>
>>
>
--
Andrii Tseglytskyi | Embedded Dev
GlobalLogic
www.globallogic.com
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |