[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v3] VT-d PI: track the vcpu number in pi blocking list
This patch adds a field, counter, in struct vmx_pi_blocking_vcpu to track how many entries are on the pi blocking list. In order to analyze list operation frequence and obtain the list length, add some relevant events to xentrace and some associated code in xenalyze. As to xenalyze, for removing from pi list can happen in various contexts (interrupt context, and non-interrupt context) and be done by the vcpu itself or others, some of the contexts would incur that toplevel_assert_check() fails. To bypass this check, this patch adds a new type TRC_HVM_ASYNC and for this new type event, the context would not be checked. Signed-off-by: Chao Gao <chao.gao@xxxxxxxxx> --- tools/xentrace/formats | 3 + tools/xentrace/xenalyze.c | 154 +++++++++++++++++++++++++++++++++++++--- xen/arch/x86/hvm/vmx/vmx.c | 33 +++++++-- xen/include/asm-x86/hvm/trace.h | 1 + xen/include/public/trace.h | 5 ++ 5 files changed, 182 insertions(+), 14 deletions(-) diff --git a/tools/xentrace/formats b/tools/xentrace/formats index 8b31780..54e0b11 100644 --- a/tools/xentrace/formats +++ b/tools/xentrace/formats @@ -125,6 +125,9 @@ 0x00082020 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) INTR_WINDOW [ value = 0x%(1)08x ] 0x00082021 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) NPF [ gpa = 0x%(2)08x%(1)08x mfn = 0x%(4)08x%(3)08x qual = 0x%(5)04x p2mt = 0x%(6)04x ] 0x00082023 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) TRAP [ vector = 0x%(1)02x ] +0x00082026 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) PI_LIST_ADD [ domid = 0x%(1)04x vcpu = 0x%(2)04x, pcpu = 0x%(3)04x, #entry = 0x%(4)04x ] + +0x00088001 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) ASYNC_PI_LIST_DEL [ domid = 0x%(1)04x vcpu = 0x%(2)04x ] 0x0010f001 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) page_grant_map [ domid = %(1)d ] 0x0010f002 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) page_grant_unmap [ domid = %(1)d ] diff --git a/tools/xentrace/xenalyze.c b/tools/xentrace/xenalyze.c index fa608ad..fbc2429 100644 --- a/tools/xentrace/xenalyze.c +++ b/tools/xentrace/xenalyze.c @@ -296,6 +296,7 @@ struct symbol_struct { }; void error(enum error_level l, struct record_info *ri); +struct vcpu_data * vcpu_find(int did, int vid); void parse_symbol_file(char *fn) { unsigned long long last_addr = 0; @@ -944,6 +945,7 @@ enum { HVM_EVENT_TRAP, HVM_EVENT_TRAP_DEBUG, HVM_EVENT_VLAPIC, + HVM_EVENT_PI_LIST_ADD, HVM_EVENT_HANDLER_MAX }; char * hvm_event_handler_name[HVM_EVENT_HANDLER_MAX] = { @@ -979,13 +981,15 @@ char * hvm_event_handler_name[HVM_EVENT_HANDLER_MAX] = { "realmode_emulate", "trap", "trap_debug", - "vlapic" + "vlapic", + "pi_list_add", }; enum { HVM_VOL_VMENTRY, HVM_VOL_VMEXIT, HVM_VOL_HANDLER, + HVM_VOL_ASYNC, HVM_VOL_MAX }; @@ -1012,6 +1016,7 @@ char *hvm_vol_name[HVM_VOL_MAX] = { [HVM_VOL_VMENTRY]="vmentry", [HVM_VOL_VMEXIT] ="vmexit", [HVM_VOL_HANDLER]="handler", + [HVM_VOL_ASYNC]="async", }; enum { @@ -1337,6 +1342,9 @@ struct hvm_data { struct { struct io_address *mmio, *pio; } io; + struct { + int pi_list_add, pi_list_del; + } pi; } summary; /* In-flight accumulation information */ @@ -1391,6 +1399,9 @@ struct hvm_data { /* Historical info */ tsc_t last_rdtsc; + + /* Destination pcpu of posted interrupt's wakeup interrupt */ + int pi_cpu; }; enum { @@ -1457,6 +1468,8 @@ void init_hvm_data(struct hvm_data *h, struct vcpu_data *v) { } for(i=0; i<GUEST_INTERRUPT_MAX+1; i++) h->summary.guest_interrupt[i].count=0; + + h->pi_cpu = -1; } /* PV data */ @@ -1766,6 +1779,14 @@ char * toplevel_name[TOPLEVEL_MAX] = { [TOPLEVEL_HW]="hw", }; +enum { + SUBLEVEL_HVM_ENTRYEXIT=1, + SUBLEVEL_HVM_HANDLER, + SUBLEVEL_HVM_EMUL, + SUBLEVEL_HVM_ASYNC, + SUBLEVEL_HVM_MAX=SUBLEVEL_HVM_ASYNC+1, +}; + struct trace_volume { unsigned long long toplevel[TOPLEVEL_MAX]; unsigned long long sched_verbose; @@ -1852,6 +1873,9 @@ struct pcpu_info { tsc_t tsc; struct cycle_summary idle, running, lost; } time; + + /* Posted Interrupt List Length */ + int pi_list_length; }; void __fill_in_record_info(struct pcpu_info *p); @@ -4726,6 +4750,71 @@ void hvm_generic_dump(struct record_info *ri, char * prefix) printf(" ]\n"); } +void hvm_pi_list_add_process(struct record_info *ri, struct hvm_data *h) +{ + struct { + int did; + int vid; + int pcpu; + int len; + } *data; + struct vcpu_data *v; + + data = (typeof(data))ri->rec.u.tsc.data; + v = vcpu_find(data->did, data->vid); + if ( !v->hvm.init ) + init_hvm_data(&v->hvm, v); + + if ( opt.dump_all ) + printf("d%uv%u is added to pi blocking list of pcpu%u. " + "The list length is now %d\n", + data->did, data->vid, data->pcpu, data->len); + + v->hvm.pi_cpu = data->pcpu; + v->hvm.summary.pi.pi_list_add++; + if ( data->pcpu > P.max_active_pcpu || !P.pcpu[data->pcpu].active ) + fprintf(stderr, "Strange! pcpu%u is inactive but a vcpu is added" + "to it", data->pcpu); + else if ( P.pcpu[data->pcpu].pi_list_length == -1 ) + P.pcpu[data->pcpu].pi_list_length = data->len; + else if ( data->len != ++P.pcpu[data->pcpu].pi_list_length ) + /* + * Correct pi list length. Removing one vcpu that is already on the + * list before tracing starts would not decrease the pi list length; + * the list length would be inaccuate. + */ + P.pcpu[data->pcpu].pi_list_length = data->len; +} + +void hvm_pi_list_del_process(struct record_info *ri) +{ + struct { + int did; + int vid; + } *data; + struct vcpu_data *v; + + data = (typeof(data))ri->rec.u.tsc.data; + v = vcpu_find(data->did, data->vid); + if ( !v->hvm.init ) + init_hvm_data(&v->hvm, v); + + if ( opt.dump_all ) + { + if ( v->hvm.pi_cpu != -1 ) + printf("d%uv%u is removed from pi blocking list of pcpu%u\n", + data->did, data->vid, v->hvm.pi_cpu); + else + printf("d%uv%u is removed from pi blocking list\n", + data->did, data->vid); + } + + if ( P.pcpu[v->hvm.pi_cpu].pi_list_length != -1 ) + P.pcpu[v->hvm.pi_cpu].pi_list_length--; + v->hvm.pi_cpu = -1; + v->hvm.summary.pi.pi_list_del++; +} + void hvm_handler_process(struct record_info *ri, struct hvm_data *h) { /* Wait for first vmexit to initialize */ if(!h->init) @@ -4763,6 +4852,9 @@ void hvm_handler_process(struct record_info *ri, struct hvm_data *h) { case TRC_HVM_INTR_WINDOW: hvm_intr_window_process(ri, h); break; + case TRC_HVM_PI_LIST_ADD: + hvm_pi_list_add_process(ri, h); + break; case TRC_HVM_OP_DESTROY_PROC: if(h->v->cr3.data) { struct cr3_value_struct *cur = h->v->cr3.data; @@ -4862,7 +4954,6 @@ needs_vmexit: void vcpu_next_update(struct pcpu_info *p, struct vcpu_data *next, tsc_t tsc); void vcpu_prev_update(struct pcpu_info *p, struct vcpu_data *prev, tsc_t tsc, int new_runstate); -struct vcpu_data * vcpu_find(int did, int vid); void lose_vcpu(struct vcpu_data *v, tsc_t tsc); int domain_runstate(struct domain_data *d) { @@ -5267,6 +5358,21 @@ void hvm_process(struct pcpu_info *p) struct vcpu_data *v = p->current; struct hvm_data *h = &v->hvm; + if(ri->evt.sub == 8) + { + UPDATE_VOLUME(p, hvm[HVM_VOL_ASYNC], ri->size); + + switch(ri->event) { + case TRC_HVM_ASYNC_PI_LIST_DEL: + hvm_pi_list_del_process(ri); + break; + + default: + fprintf(warn, "Unknown hvm event: %x\n", ri->event); + } + return; + } + assert(p->current); if(vcpu_set_data_type(p->current, VCPU_DATA_HVM)) @@ -5359,6 +5465,10 @@ void hvm_summary(struct hvm_data *h) { i, h->summary.ipi_count[i]); hvm_io_address_summary(h->summary.io.pio, "IO address summary:"); hvm_io_address_summary(h->summary.io.mmio, "MMIO address summary:"); + + printf("Posted Interrupt:\n"); + printf(" List Add: %u\n", h->summary.pi.pi_list_add); + printf(" List Del: %u\n", h->summary.pi.pi_list_del); } /* ---- Shadow records ---- */ @@ -8962,6 +9072,7 @@ off_t scan_for_new_pcpu(off_t offset) { p->file_offset = offset; p->next_cpu_change_offset = offset; + p->pi_list_length = -1; record_order_insert(p); @@ -9142,7 +9253,6 @@ int find_toplevel_event(struct record_info *ri) return toplevel; } - void process_cpu_change(struct pcpu_info *p) { struct record_info *ri = &p->ri; struct cpu_change_data *r = (typeof(r))ri->d; @@ -9190,6 +9300,7 @@ void process_cpu_change(struct pcpu_info *p) { p2->ri.d = p2->ri.rec.u.notsc.data; p2->file_offset = p->file_offset; p2->next_cpu_change_offset = p->file_offset; + p2->pi_list_length = -1; fprintf(warn, "%s: Activating pcpu %d at offset %lld\n", __func__, r->cpu, (unsigned long long)p->file_offset); @@ -9276,26 +9387,52 @@ void process_cpu_change(struct pcpu_info *p) { } } -struct tl_assert_mask { +struct assert_mask { unsigned p_current:1, - not_idle_domain:1; + not_idle_domain:1, + check_sublevel:1; int vcpu_data_mode; + int sub_max; + struct assert_mask *sub; }; -static struct tl_assert_mask tl_assert_checks[TOPLEVEL_MAX] = { - [TRC_HVM_MAIN]={ .p_current=1, .not_idle_domain=1, .vcpu_data_mode=VCPU_DATA_HVM }, + +static struct assert_mask sl_hvm_assert_mask[SUBLEVEL_HVM_MAX] = { + [SUBLEVEL_HVM_ENTRYEXIT] = { .p_current=1, .not_idle_domain=1, .vcpu_data_mode=VCPU_DATA_HVM }, + [SUBLEVEL_HVM_HANDLER] = { .p_current=1, .not_idle_domain=1, .vcpu_data_mode=VCPU_DATA_HVM }, + [SUBLEVEL_HVM_EMUL] = { .p_current=1, .not_idle_domain=1, .vcpu_data_mode=VCPU_DATA_HVM }, +}; + +static struct assert_mask tl_assert_checks[TOPLEVEL_MAX] = { + [TRC_HVM_MAIN]={ .check_sublevel=1, .sub=sl_hvm_assert_mask, .sub_max=SUBLEVEL_HVM_MAX }, [TRC_SHADOW_MAIN]={ .p_current=1, .not_idle_domain=1, .vcpu_data_mode=VCPU_DATA_HVM }, [TRC_PV_MAIN]={ .p_current=1, .not_idle_domain=1, .vcpu_data_mode=VCPU_DATA_PV }, }; +/* Other sub types are reserved */ +static int sublevel_to_index[16] = { + [1] = 1, + [2] = 2, + [4] = 3, + [8] = 4, +}; + /* There are a lot of common assumptions for the various processing * routines. Check them all in one place, doing something else if * they don't pass. */ int toplevel_assert_check(int toplevel, struct pcpu_info *p) { - struct tl_assert_mask mask; + struct assert_mask mask; mask = tl_assert_checks[toplevel]; + if (mask.check_sublevel) + { + int sub = sublevel_to_index[p->ri.evt.sub]; + + assert(mask.sub && (sub < mask.sub_max)); + mask = mask.sub[sub]; + } + if (mask.p_current && p->current == NULL) { fprintf(warn, "WARNING: p->current null! Not processing\n"); @@ -9362,7 +9499,6 @@ void process_record(struct pcpu_info *p) { if(opt.dump_all) create_dump_header(ri, p); - toplevel = find_toplevel_event(ri); if ( toplevel < 0 ) return; diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index c8ef18a..abbf16b 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -82,6 +82,7 @@ static int vmx_vmfunc_intercept(struct cpu_user_regs *regs); struct vmx_pi_blocking_vcpu { struct list_head list; spinlock_t lock; + uint32_t counter; }; /* @@ -119,6 +120,9 @@ static void vmx_vcpu_block(struct vcpu *v) */ ASSERT(old_lock == NULL); + per_cpu(vmx_pi_blocking, v->processor).counter++; + TRACE_4D(TRC_HVM_PI_LIST_ADD, v->domain->domain_id, v->vcpu_id, + v->processor, per_cpu(vmx_pi_blocking, v->processor).counter); list_add_tail(&v->arch.hvm_vmx.pi_blocking.list, &per_cpu(vmx_pi_blocking, v->processor).list); spin_unlock_irqrestore(pi_blocking_list_lock, flags); @@ -186,6 +190,9 @@ static void vmx_pi_unblock_vcpu(struct vcpu *v) { ASSERT(v->arch.hvm_vmx.pi_blocking.lock == pi_blocking_list_lock); list_del(&v->arch.hvm_vmx.pi_blocking.list); + container_of(pi_blocking_list_lock, + struct vmx_pi_blocking_vcpu, lock)->counter--; + TRACE_2D(TRC_HVM_ASYNC_PI_LIST_DEL, v->domain->domain_id, v->vcpu_id); v->arch.hvm_vmx.pi_blocking.lock = NULL; } @@ -220,6 +227,8 @@ void vmx_pi_desc_fixup(unsigned int cpu) list_for_each_entry_safe(vmx, tmp, blocked_vcpus, pi_blocking.list) { + struct vcpu *v = container_of(vmx, struct vcpu, arch.hvm_vmx); + /* * Suppress notification or we may miss an interrupt when the * target cpu is dying. @@ -234,8 +243,11 @@ void vmx_pi_desc_fixup(unsigned int cpu) if ( pi_test_on(&vmx->pi_desc) ) { list_del(&vmx->pi_blocking.list); + TRACE_2D(TRC_HVM_ASYNC_PI_LIST_DEL, v->domain->domain_id, + v->vcpu_id); + per_cpu(vmx_pi_blocking, cpu).counter--; vmx->pi_blocking.lock = NULL; - vcpu_unblock(container_of(vmx, struct vcpu, arch.hvm_vmx)); + vcpu_unblock(v); } else { @@ -258,6 +270,12 @@ void vmx_pi_desc_fixup(unsigned int cpu) list_move(&vmx->pi_blocking.list, &per_cpu(vmx_pi_blocking, new_cpu).list); + per_cpu(vmx_pi_blocking, cpu).counter--; + per_cpu(vmx_pi_blocking, new_cpu).counter++; + TRACE_2D(TRC_HVM_ASYNC_PI_LIST_DEL, v->domain->domain_id, + v->vcpu_id); + TRACE_4D(TRC_HVM_PI_LIST_ADD, v->domain->domain_id, v->vcpu_id, + new_cpu, per_cpu(vmx_pi_blocking, new_cpu).counter); vmx->pi_blocking.lock = new_lock; spin_unlock(new_lock); @@ -2358,9 +2376,9 @@ static struct hvm_function_table __initdata vmx_function_table = { static void pi_wakeup_interrupt(struct cpu_user_regs *regs) { struct arch_vmx_struct *vmx, *tmp; - spinlock_t *lock = &per_cpu(vmx_pi_blocking, smp_processor_id()).lock; - struct list_head *blocked_vcpus = - &per_cpu(vmx_pi_blocking, smp_processor_id()).list; + unsigned int cpu = smp_processor_id(); + spinlock_t *lock = &per_cpu(vmx_pi_blocking, cpu).lock; + struct list_head *blocked_vcpus = &per_cpu(vmx_pi_blocking, cpu).list; ack_APIC_irq(); this_cpu(irq_count)++; @@ -2376,10 +2394,15 @@ static void pi_wakeup_interrupt(struct cpu_user_regs *regs) { if ( pi_test_on(&vmx->pi_desc) ) { + struct vcpu *v = container_of(vmx, struct vcpu, arch.hvm_vmx); + list_del(&vmx->pi_blocking.list); + TRACE_2D(TRC_HVM_ASYNC_PI_LIST_DEL, v->domain->domain_id, + v->vcpu_id); + per_cpu(vmx_pi_blocking, cpu).counter--; ASSERT(vmx->pi_blocking.lock == lock); vmx->pi_blocking.lock = NULL; - vcpu_unblock(container_of(vmx, struct vcpu, arch.hvm_vmx)); + vcpu_unblock(v); } } diff --git a/xen/include/asm-x86/hvm/trace.h b/xen/include/asm-x86/hvm/trace.h index de802a6..b74ffdd 100644 --- a/xen/include/asm-x86/hvm/trace.h +++ b/xen/include/asm-x86/hvm/trace.h @@ -54,6 +54,7 @@ #define DO_TRC_HVM_TRAP DEFAULT_HVM_MISC #define DO_TRC_HVM_TRAP_DEBUG DEFAULT_HVM_MISC #define DO_TRC_HVM_VLAPIC DEFAULT_HVM_MISC +#define DO_TRC_HVM_PI_LIST_ADD DEFAULT_HVM_MISC #define TRC_PAR_LONG(par) ((par)&0xFFFFFFFF),((par)>>32) diff --git a/xen/include/public/trace.h b/xen/include/public/trace.h index 7f2e891..15ea87c 100644 --- a/xen/include/public/trace.h +++ b/xen/include/public/trace.h @@ -53,6 +53,7 @@ #define TRC_HVM_ENTRYEXIT 0x00081000 /* VMENTRY and #VMEXIT */ #define TRC_HVM_HANDLER 0x00082000 /* various HVM handlers */ #define TRC_HVM_EMUL 0x00084000 /* emulated devices */ +#define TRC_HVM_ASYNC 0x00088000 /* Asynchronous events */ #define TRC_SCHED_MIN 0x00021000 /* Just runstate changes */ #define TRC_SCHED_CLASS 0x00022000 /* Scheduler-specific */ @@ -234,6 +235,7 @@ #define TRC_HVM_TRAP (TRC_HVM_HANDLER + 0x23) #define TRC_HVM_TRAP_DEBUG (TRC_HVM_HANDLER + 0x24) #define TRC_HVM_VLAPIC (TRC_HVM_HANDLER + 0x25) +#define TRC_HVM_PI_LIST_ADD (TRC_HVM_HANDLER + 0x26) #define TRC_HVM_IOPORT_WRITE (TRC_HVM_HANDLER + 0x216) #define TRC_HVM_IOMEM_WRITE (TRC_HVM_HANDLER + 0x217) @@ -257,6 +259,9 @@ #define TRC_HVM_EMUL_PIC_PEND_IRQ_CALL (TRC_HVM_EMUL + 0x10) #define TRC_HVM_EMUL_LAPIC_PIC_INTR (TRC_HVM_EMUL + 0x11) +/* Trace asynconous events for HVM */ +#define TRC_HVM_ASYNC_PI_LIST_DEL (TRC_HVM_ASYNC + 0x1) + /* trace events for per class */ #define TRC_PM_FREQ_CHANGE (TRC_HW_PM + 0x01) #define TRC_PM_IDLE_ENTRY (TRC_HW_PM + 0x02) -- 1.8.3.1 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx https://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |