[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] encounter a problem about xenalyze



Hi, George.

Per your suggesion, I tried to add some code to xenalyze.c to analyze
some newly added events. To have more understanding, you can read the
patch at the bottom. In this patch, I try to count how many times a
given vcpu is added and removed from a pi blocking list. After
collecting some data, I compared the result of different decoder,
xenalyze and xentrace_format. I found that xentrace_format shows
one vcpu is added and removed 2499 times respectively. But xenalyze
shows the identical vcpu is added 2499 times and removed only about
700 times. Some other vcpus also encounter this weird thing.

Could give me some suggestion to debug this problem?
Or, are there some bugs in this patch?

Thanks
Chao

---8<---
From 01c10aa7edcded99017d88ac4a7fff19168f132d Mon Sep 17 00:00:00 2001
From: Chao Gao <chao.gao@xxxxxxxxx>
Date: Wed, 26 Apr 2017 03:39:06 +0800
Subject: [PATCH 1/3] VT-d PI: track the vcpu number in pi blocking list

This patch adds a field, counter, in struct vmx_pi_blocking_vcpu to track
how many entries are on the pi blocking list. In order to analyze list
operation frequence and obtain the list length, add some relevant events
to xentrace and some associated code in xenalyze.

Signed-off-by: Chao Gao <chao.gao@xxxxxxxxx>
---
 tools/xentrace/formats          |  2 +
 tools/xentrace/xenalyze.c       | 95 ++++++++++++++++++++++++++++++++++++++++-
 xen/arch/x86/hvm/vmx/vmx.c      | 30 ++++++++++---
 xen/include/asm-x86/hvm/trace.h |  2 +
 xen/include/public/trace.h      |  2 +
 5 files changed, 124 insertions(+), 7 deletions(-)

diff --git a/tools/xentrace/formats b/tools/xentrace/formats
index 8b31780..e372d4d 100644
--- a/tools/xentrace/formats
+++ b/tools/xentrace/formats
@@ -125,6 +125,8 @@
 0x00082020  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  INTR_WINDOW [ value = 
0x%(1)08x ]
 0x00082021  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  NPF         [ gpa = 
0x%(2)08x%(1)08x mfn = 0x%(4)08x%(3)08x qual = 0x%(5)04x p2mt = 0x%(6)04x ]
 0x00082023  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  TRAP        [ vector = 
0x%(1)02x ]
+0x00082026  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PI_LIST_ADD [ domid = 
0x%(1)04x vcpu = 0x%(2)04x, pcpu = 0x%(3)04x, #entry = 0x%(4)04x ]
+0x00082027  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  PI_LIST_DEL [ domid = 
0x%(1)04x vcpu = 0x%(2)04x ]
 
 0x0010f001  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  page_grant_map      [ domid = 
%(1)d ]
 0x0010f002  CPU%(cpu)d  %(tsc)d (+%(reltsc)8d)  page_grant_unmap    [ domid = 
%(1)d ]
diff --git a/tools/xentrace/xenalyze.c b/tools/xentrace/xenalyze.c
index fa608ad..18f7bfc 100644
--- a/tools/xentrace/xenalyze.c
+++ b/tools/xentrace/xenalyze.c
@@ -296,6 +296,7 @@ struct symbol_struct {
 };
 
 void error(enum error_level l, struct record_info *ri);
+struct vcpu_data * vcpu_find(int did, int vid);
 
 void parse_symbol_file(char *fn) {
     unsigned long long last_addr = 0;
@@ -944,6 +945,8 @@ enum {
     HVM_EVENT_TRAP,
     HVM_EVENT_TRAP_DEBUG,
     HVM_EVENT_VLAPIC,
+    HVM_EVENT_PI_LIST_ADD,
+    HVM_EVENT_PI_LIST_DEL,
     HVM_EVENT_HANDLER_MAX
 };
 char * hvm_event_handler_name[HVM_EVENT_HANDLER_MAX] = {
@@ -979,7 +982,9 @@ char * hvm_event_handler_name[HVM_EVENT_HANDLER_MAX] = {
     "realmode_emulate",
     "trap",
     "trap_debug",
-    "vlapic"
+    "vlapic",
+    "pi_list_add",
+    "pi_list_del"
 };
 
 enum {
@@ -1337,6 +1342,9 @@ struct hvm_data {
         struct {
             struct io_address *mmio, *pio;
         } io;
+        struct {
+            int pi_list_add, pi_list_del;
+        } pi;
     } summary;
 
     /* In-flight accumulation information */
@@ -1391,6 +1399,9 @@ struct hvm_data {
 
     /* Historical info */
     tsc_t last_rdtsc;
+
+    /* Destination pcpu of posted interrupt's wakeup interrupt */
+    int pi_cpu;
 };
 
 enum {
@@ -1457,6 +1468,8 @@ void init_hvm_data(struct hvm_data *h, struct vcpu_data 
*v) {
     }
     for(i=0; i<GUEST_INTERRUPT_MAX+1; i++)
         h->summary.guest_interrupt[i].count=0;
+
+    h->pi_cpu = -1;
 }
 
 /* PV data */
@@ -1852,6 +1865,9 @@ struct pcpu_info {
         tsc_t tsc;
         struct cycle_summary idle, running, lost;
     } time;
+
+    /* Posted Interrupt List Length */
+    int pi_list_length;
 };
 
 void __fill_in_record_info(struct pcpu_info *p);
@@ -4726,6 +4742,71 @@ void hvm_generic_dump(struct record_info *ri, char * 
prefix)
     printf(" ]\n");
 }
 
+void hvm_pi_list_add_process(struct record_info *ri, struct hvm_data *h)
+{
+    struct {
+        int did;
+        int vid;
+        int pcpu;
+        int len;
+    } *data;
+    struct vcpu_data *v;
+
+    data = (typeof(data))ri->rec.u.tsc.data;
+    v = vcpu_find(data->did, data->vid);
+    if ( !v->hvm.init )
+        init_hvm_data(&v->hvm, v);
+
+    if ( opt.dump_all )
+        printf("d%uv%u is added to pi blocking list of pcpu%u. "
+               "The list length is now %d\n",
+               data->did, data->vid, data->pcpu, data->len);
+
+    v->hvm.pi_cpu = data->pcpu;
+    v->hvm.summary.pi.pi_list_add++;
+    if ( data->pcpu > P.max_active_pcpu || !P.pcpu[data->pcpu].active )
+        fprintf(stderr, "Strange! pcpu%u is inactive but a vcpu is added"
+                "to it", data->pcpu);
+    else if ( P.pcpu[data->pcpu].pi_list_length == -1 )
+        P.pcpu[data->pcpu].pi_list_length = data->len;
+    else if ( data->len != ++P.pcpu[data->pcpu].pi_list_length )
+    {
+        fprintf(stderr, "pcpu%u's pi list length %d isn't equal to "
+                "value %d got by analysis. Correct it!\n",
+                data->pcpu, data->len, P.pcpu[data->pcpu].pi_list_length);
+        P.pcpu[data->pcpu].pi_list_length = data->len;
+    }
+}
+
+void hvm_pi_list_del_process(struct record_info *ri, struct hvm_data *h)
+{
+    struct {
+        int did;
+        int vid;
+    } *data;
+    struct vcpu_data *v;
+
+    data = (typeof(data))ri->rec.u.tsc.data;
+    v = vcpu_find(data->did, data->vid);
+    if ( !v->hvm.init )
+        init_hvm_data(&v->hvm, v);
+
+    if ( opt.dump_all )
+    {
+        if ( v->hvm.pi_cpu != -1 )
+            printf("d%uv%u is removed from pi blocking list of pcpu%u\n",
+                   data->did, data->vid, v->hvm.pi_cpu);
+        else
+            printf("d%uv%u is removed from pi blocking list\n",
+                   data->did, data->vid);
+    }
+
+    if ( P.pcpu[v->hvm.pi_cpu].pi_list_length != -1 )
+        P.pcpu[v->hvm.pi_cpu].pi_list_length--;
+    v->hvm.pi_cpu = -1;
+    v->hvm.summary.pi.pi_list_del++;
+}
+
 void hvm_handler_process(struct record_info *ri, struct hvm_data *h) {
     /* Wait for first vmexit to initialize */
     if(!h->init)
@@ -4763,6 +4844,12 @@ void hvm_handler_process(struct record_info *ri, struct 
hvm_data *h) {
     case TRC_HVM_INTR_WINDOW:
         hvm_intr_window_process(ri, h);
         break;
+    case TRC_HVM_PI_LIST_ADD:
+        hvm_pi_list_add_process(ri, h);
+        break;
+    case TRC_HVM_PI_LIST_DEL:
+        hvm_pi_list_del_process(ri, h);
+        break;
     case TRC_HVM_OP_DESTROY_PROC:
         if(h->v->cr3.data) {
             struct cr3_value_struct *cur = h->v->cr3.data;
@@ -4862,7 +4949,6 @@ needs_vmexit:
 void vcpu_next_update(struct pcpu_info *p, struct vcpu_data *next, tsc_t tsc);
 void vcpu_prev_update(struct pcpu_info *p, struct vcpu_data *prev,
                       tsc_t tsc, int new_runstate);
-struct vcpu_data * vcpu_find(int did, int vid);
 void lose_vcpu(struct vcpu_data *v, tsc_t tsc);
 
 int domain_runstate(struct domain_data *d) {
@@ -5359,6 +5445,10 @@ void hvm_summary(struct hvm_data *h) {
                   i, h->summary.ipi_count[i]);
    hvm_io_address_summary(h->summary.io.pio, "IO address summary:");
    hvm_io_address_summary(h->summary.io.mmio, "MMIO address summary:");
+
+   printf("Posted Interrupt:\n");
+   printf(" List Add: %u\n", h->summary.pi.pi_list_add);
+   printf(" List Del: %u\n", h->summary.pi.pi_list_del);
 }
 
 /* ---- Shadow records ---- */
@@ -8962,6 +9052,7 @@ off_t scan_for_new_pcpu(off_t offset) {
 
         p->file_offset = offset;
         p->next_cpu_change_offset = offset;
+        p->pi_list_length = -1;
 
         record_order_insert(p);
 
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index c8ef18a..4417adb 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -82,6 +82,7 @@ static int vmx_vmfunc_intercept(struct cpu_user_regs *regs);
 struct vmx_pi_blocking_vcpu {
     struct list_head     list;
     spinlock_t           lock;
+    uint32_t             counter;
 };
 
 /*
@@ -119,6 +120,9 @@ static void vmx_vcpu_block(struct vcpu *v)
      */
     ASSERT(old_lock == NULL);
 
+    per_cpu(vmx_pi_blocking, v->processor).counter++;
+    TRACE_4D(TRC_HVM_PI_LIST_ADD, v->domain->domain_id, v->vcpu_id,
+             v->processor, per_cpu(vmx_pi_blocking, v->processor).counter);
     list_add_tail(&v->arch.hvm_vmx.pi_blocking.list,
                   &per_cpu(vmx_pi_blocking, v->processor).list);
     spin_unlock_irqrestore(pi_blocking_list_lock, flags);
@@ -186,6 +190,9 @@ static void vmx_pi_unblock_vcpu(struct vcpu *v)
     {
         ASSERT(v->arch.hvm_vmx.pi_blocking.lock == pi_blocking_list_lock);
         list_del(&v->arch.hvm_vmx.pi_blocking.list);
+        container_of(pi_blocking_list_lock,
+                     struct vmx_pi_blocking_vcpu, lock)->counter--;
+        TRACE_2D(TRC_HVM_PI_LIST_DEL, v->domain->domain_id, v->vcpu_id);
         v->arch.hvm_vmx.pi_blocking.lock = NULL;
     }
 
@@ -220,6 +227,8 @@ void vmx_pi_desc_fixup(unsigned int cpu)
 
     list_for_each_entry_safe(vmx, tmp, blocked_vcpus, pi_blocking.list)
     {
+        struct vcpu *v = container_of(vmx, struct vcpu, arch.hvm_vmx);
+
         /*
          * Suppress notification or we may miss an interrupt when the
          * target cpu is dying.
@@ -234,8 +243,10 @@ void vmx_pi_desc_fixup(unsigned int cpu)
         if ( pi_test_on(&vmx->pi_desc) )
         {
             list_del(&vmx->pi_blocking.list);
+            TRACE_2D(TRC_HVM_PI_LIST_DEL, v->domain->domain_id, v->vcpu_id);
+            per_cpu(vmx_pi_blocking, cpu).counter--;
             vmx->pi_blocking.lock = NULL;
-            vcpu_unblock(container_of(vmx, struct vcpu, arch.hvm_vmx));
+            vcpu_unblock(v);
         }
         else
         {
@@ -258,6 +269,11 @@ void vmx_pi_desc_fixup(unsigned int cpu)
 
             list_move(&vmx->pi_blocking.list,
                       &per_cpu(vmx_pi_blocking, new_cpu).list);
+            per_cpu(vmx_pi_blocking, cpu).counter--;
+            per_cpu(vmx_pi_blocking, new_cpu).counter++;
+            TRACE_2D(TRC_HVM_PI_LIST_DEL, v->domain->domain_id, v->vcpu_id);
+            TRACE_4D(TRC_HVM_PI_LIST_ADD, v->domain->domain_id, v->vcpu_id,
+                     new_cpu, per_cpu(vmx_pi_blocking, new_cpu).counter);
             vmx->pi_blocking.lock = new_lock;
 
             spin_unlock(new_lock);
@@ -2358,9 +2374,9 @@ static struct hvm_function_table __initdata 
vmx_function_table = {
 static void pi_wakeup_interrupt(struct cpu_user_regs *regs)
 {
     struct arch_vmx_struct *vmx, *tmp;
-    spinlock_t *lock = &per_cpu(vmx_pi_blocking, smp_processor_id()).lock;
-    struct list_head *blocked_vcpus =
-               &per_cpu(vmx_pi_blocking, smp_processor_id()).list;
+    unsigned int cpu = smp_processor_id();
+    spinlock_t *lock = &per_cpu(vmx_pi_blocking, cpu).lock;
+    struct list_head *blocked_vcpus = &per_cpu(vmx_pi_blocking, cpu).list;
 
     ack_APIC_irq();
     this_cpu(irq_count)++;
@@ -2376,10 +2392,14 @@ static void pi_wakeup_interrupt(struct cpu_user_regs 
*regs)
     {
         if ( pi_test_on(&vmx->pi_desc) )
         {
+            struct vcpu *v = container_of(vmx, struct vcpu, arch.hvm_vmx);
+
             list_del(&vmx->pi_blocking.list);
+            TRACE_2D(TRC_HVM_PI_LIST_DEL, v->domain->domain_id, v->vcpu_id);
+            per_cpu(vmx_pi_blocking, cpu).counter--;
             ASSERT(vmx->pi_blocking.lock == lock);
             vmx->pi_blocking.lock = NULL;
-            vcpu_unblock(container_of(vmx, struct vcpu, arch.hvm_vmx));
+            vcpu_unblock(v);
         }
     }
 
diff --git a/xen/include/asm-x86/hvm/trace.h b/xen/include/asm-x86/hvm/trace.h
index de802a6..97e9eff 100644
--- a/xen/include/asm-x86/hvm/trace.h
+++ b/xen/include/asm-x86/hvm/trace.h
@@ -54,6 +54,8 @@
 #define DO_TRC_HVM_TRAP             DEFAULT_HVM_MISC
 #define DO_TRC_HVM_TRAP_DEBUG       DEFAULT_HVM_MISC
 #define DO_TRC_HVM_VLAPIC           DEFAULT_HVM_MISC
+#define DO_TRC_HVM_PI_LIST_ADD      DEFAULT_HVM_MISC
+#define DO_TRC_HVM_PI_LIST_DEL      DEFAULT_HVM_MISC
 
 
 #define TRC_PAR_LONG(par) ((par)&0xFFFFFFFF),((par)>>32)
diff --git a/xen/include/public/trace.h b/xen/include/public/trace.h
index 7f2e891..7d182ea 100644
--- a/xen/include/public/trace.h
+++ b/xen/include/public/trace.h
@@ -234,6 +234,8 @@
 #define TRC_HVM_TRAP             (TRC_HVM_HANDLER + 0x23)
 #define TRC_HVM_TRAP_DEBUG       (TRC_HVM_HANDLER + 0x24)
 #define TRC_HVM_VLAPIC           (TRC_HVM_HANDLER + 0x25)
+#define TRC_HVM_PI_LIST_ADD      (TRC_HVM_HANDLER + 0x26)
+#define TRC_HVM_PI_LIST_DEL      (TRC_HVM_HANDLER + 0x27)
 
 #define TRC_HVM_IOPORT_WRITE    (TRC_HVM_HANDLER + 0x216)
 #define TRC_HVM_IOMEM_WRITE     (TRC_HVM_HANDLER + 0x217)
-- 
1.8.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.