|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen master] xenalyze: add to tools/xentrace/
commit fd19a9976e7ce63bf07f8af1b9b5e8bb5812d51d
Author: Olaf Hering <olaf@xxxxxxxxx>
AuthorDate: Thu Jun 11 16:30:38 2015 +0000
Commit: Ian Campbell <ian.campbell@xxxxxxxxxx>
CommitDate: Tue Jun 16 11:34:59 2015 +0100
xenalyze: add to tools/xentrace/
This merges xenalyze.hg, changeset 150:24308507be1d,
into tools/xentrace/xenalyze.c to have the tool and
public/trace.h in one place.
Adjust code to use public/trace.h instead of private trace.h
Signed-off-by: Olaf Hering <olaf@xxxxxxxxx>
Acked-by: Wei Liu <wei.liu2@xxxxxxxxxx>
Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>
Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Cc: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>
Cc: Ian Campbell <ian.campbell@xxxxxxxxxx>
Cc: Wei Liu <wei.liu2@xxxxxxxxxx>
[ ijc -- wrap $(BIN) install in a check in case it is empty (which it
is on !x86, avoid BIN += since it results in BIN = ' ' on
!x86 ]
---
.gitignore | 1 +
tools/xentrace/Makefile | 9 +-
tools/xentrace/analyze.h | 107 +
tools/xentrace/mread.c | 160 +
tools/xentrace/mread.h | 18 +
tools/xentrace/pv.h | 41 +
tools/xentrace/xenalyze.c |10407 +++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 10742 insertions(+), 1 deletions(-)
diff --git a/.gitignore b/.gitignore
index 3bc9cd9..3f42ded 100644
--- a/.gitignore
+++ b/.gitignore
@@ -173,6 +173,7 @@ tools/misc/gtracestat
tools/misc/xenlockprof
tools/misc/lowmemd
tools/misc/xencov
+tools/xentrace/xenalyze
tools/pygrub/build/*
tools/python/build/*
tools/security/secpol_tool
diff --git a/tools/xentrace/Makefile b/tools/xentrace/Makefile
index 5360960..7d874a3 100644
--- a/tools/xentrace/Makefile
+++ b/tools/xentrace/Makefile
@@ -6,7 +6,8 @@ CFLAGS += -Werror
CFLAGS += $(CFLAGS_libxenctrl)
LDLIBS += $(LDLIBS_libxenctrl)
-BIN =
+BIN-$(CONFIG_X86) = xenalyze
+BIN = $(BIN-y)
SBIN = xentrace xentrace_setsize
LIBBIN = xenctx
SCRIPTS = xentrace_format
@@ -26,6 +27,9 @@ install: build
[ -z "$(LIBBIN)" ] || $(INSTALL_DIR) $(DESTDIR)$(LIBEXEC_BIN)
$(INSTALL_DIR) $(DESTDIR)$(MAN1DIR)
$(INSTALL_DIR) $(DESTDIR)$(MAN8DIR)
+ifneq ($(BIN),)
+ $(INSTALL_PROG) $(BIN) $(DESTDIR)$(bindir)
+endif
$(INSTALL_PROG) $(SBIN) $(DESTDIR)$(sbindir)
$(INSTALL_PYTHON_PROG) $(SCRIPTS) $(DESTDIR)$(bindir)
[ -z "$(LIBBIN)" ] || $(INSTALL_PROG) $(LIBBIN) $(DESTDIR)$(LIBEXEC_BIN)
@@ -48,5 +52,8 @@ xenctx: xenctx.o
xentrace_setsize: setsize.o
$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) $(APPEND_LDFLAGS)
+xenalyze: xenalyze.o mread.o
+ $(CC) $(LDFLAGS) -o $@ $^ $(LDLIBS) $(APPEND_LDFLAGS)
+
-include $(DEPS)
diff --git a/tools/xentrace/analyze.h b/tools/xentrace/analyze.h
new file mode 100644
index 0000000..40ee551
--- /dev/null
+++ b/tools/xentrace/analyze.h
@@ -0,0 +1,107 @@
+#ifndef __ANALYZE_H
+# define __ANALYZE_H
+
+#include <stdint.h>
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#define TRC_GEN_MAIN 0
+#define TRC_SCHED_MAIN 1
+#define TRC_DOM0OP_MAIN 2
+#define TRC_HVM_MAIN 3
+#define TRC_MEM_MAIN 4
+#define TRC_PV_MAIN 5
+#define TRC_SHADOW_MAIN 6
+#define TRC_HW_MAIN 7
+
+#define TRC_LOST_RECORDS_END (TRC_GEN + 50)
+
+#define NR_CPUS 128
+#if __x86_64__
+# define BITS_PER_LONG 64
+#else
+# define BITS_PER_LONG 32
+#endif
+
+#define BITS_TO_LONGS(bits) \
+ (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG)
+#define DECLARE_BITMAP(name,bits) \
+ unsigned long name[BITS_TO_LONGS(bits)]
+typedef struct cpumask{ DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
+
+enum {
+ TRCE_SFLAG_SET_AD,
+ TRCE_SFLAG_SET_A,
+ TRCE_SFLAG_SHADOW_L1_GET_REF,
+ TRCE_SFLAG_SHADOW_L1_PUT_REF,
+ TRCE_SFLAG_L2_PROPAGATE,
+ TRCE_SFLAG_SET_CHANGED,
+ TRCE_SFLAG_SET_FLUSH,
+ TRCE_SFLAG_SET_ERROR,
+ TRCE_SFLAG_DEMOTE,
+ TRCE_SFLAG_PROMOTE,
+ TRCE_SFLAG_WRMAP,
+ TRCE_SFLAG_WRMAP_GUESS_FOUND,
+ TRCE_SFLAG_WRMAP_BRUTE_FORCE,
+ TRCE_SFLAG_EARLY_UNSHADOW,
+ TRCE_SFLAG_EMULATION_2ND_PT_WRITTEN,
+ TRCE_SFLAG_EMULATION_LAST_FAILED,
+ TRCE_SFLAG_EMULATE_FULL_PT,
+ TRCE_SFLAG_PREALLOC_UNPIN,
+ TRCE_SFLAG_PREALLOC_UNHOOK
+};
+
+#define TRC_HVM_OP_DESTROY_PROC (TRC_HVM_HANDLER + 0x100)
+
+typedef unsigned long long tsc_t;
+
+/* -- on-disk trace buffer definitions -- */
+struct trace_record {
+ union {
+ struct {
+ unsigned event:28,
+ extra_words:3,
+ cycle_flag:1;
+ union {
+ struct {
+ uint32_t tsc_lo, tsc_hi;
+ uint32_t data[7];
+ } tsc;
+ struct {
+ uint32_t data[7];
+ } notsc;
+ } u;
+ };
+ uint32_t raw[8];
+ };
+};
+
+/* -- General info about a current record -- */
+struct time_struct {
+ unsigned long long time;
+ unsigned int s, ns;
+};
+
+#define DUMP_HEADER_MAX 256
+
+struct record_info {
+ int cpu;
+ tsc_t tsc;
+ union {
+ unsigned event;
+ struct {
+ unsigned minor:12,
+ sub:4,
+ main:12,
+ unused:4;
+ } evt;
+ };
+ int extra_words;
+ int size;
+ uint32_t *d;
+ char dump_header[DUMP_HEADER_MAX];
+ struct time_struct t;
+ struct trace_record rec;
+};
+
+#endif
diff --git a/tools/xentrace/mread.c b/tools/xentrace/mread.c
new file mode 100644
index 0000000..a63d16c
--- /dev/null
+++ b/tools/xentrace/mread.c
@@ -0,0 +1,160 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include "mread.h"
+
+mread_handle_t mread_init(int fd)
+{
+ struct stat64 s;
+ mread_handle_t h;
+
+ h=malloc(sizeof(struct mread_ctrl));
+
+ if (!h)
+ {
+ perror("malloc");
+ exit(1);
+ }
+
+ bzero(h, sizeof(struct mread_ctrl));
+
+ h->fd = fd;
+
+ fstat64(fd, &s);
+ h->file_size = s.st_size;
+
+ return h;
+}
+
+ssize_t mread64(mread_handle_t h, void *rec, ssize_t len, loff_t offset)
+{
+ /* Idea: have a "cache" of N mmaped regions. If the offset is
+ * in one of the regions, just copy it. If not, evict one of the
+ * regions and map the appropriate range.
+ *
+ * Basic algorithm:
+ * - See if the offset is in one of the regions
+ * - If not, map it
+ * - evict an old region
+ * - map the new region
+ * - Copy
+ */
+ char * b=NULL;
+ int bind=-1;
+ loff_t boffset=0;
+ ssize_t bsize;
+
+#define dprintf(x...)
+//#define dprintf fprintf
+
+ dprintf(warn, "%s: offset %llx len %d\n", __func__,
+ offset, len);
+ if ( offset > h->file_size )
+ {
+ dprintf(warn, " offset > file size %llx, returning 0\n",
+ h->file_size);
+ return 0;
+ }
+ if ( offset + len > h->file_size )
+ {
+ dprintf(warn, " offset+len > file size %llx, truncating\n",
+ h->file_size);
+ len = h->file_size - offset;
+ }
+
+ /* Try to find the offset in our range */
+ dprintf(warn, " Trying last, %d\n", last);
+ if ( h->map[h->last].buffer
+ && (offset & MREAD_BUF_MASK) == h->map[h->last].start_offset )
+ {
+ bind=h->last;
+ goto copy;
+ }
+
+ /* Scan to see if it's anywhere else */
+ dprintf(warn, " Scanning\n");
+ for(bind=0; bind<MREAD_MAPS; bind++)
+ if ( h->map[bind].buffer
+ && (offset & MREAD_BUF_MASK) == h->map[bind].start_offset )
+ {
+ dprintf(warn, " Found, index %d\n", bind);
+ break;
+ }
+
+ /* If we didn't find it, evict someone and map it */
+ if ( bind == MREAD_MAPS )
+ {
+ dprintf(warn, " Clock\n");
+ while(1)
+ {
+ h->clock++;
+ if(h->clock >= MREAD_MAPS)
+ h->clock=0;
+ dprintf(warn, " %d\n", h->clock);
+ if(h->map[h->clock].buffer == NULL)
+ {
+ dprintf(warn, " Buffer null, using\n");
+ break;
+ }
+ if(!h->map[h->clock].accessed)
+ {
+ dprintf(warn, " Not accessed, using\n");
+ break;
+ }
+ h->map[h->clock].accessed=0;
+ }
+ if(h->map[h->clock].buffer)
+ {
+ dprintf(warn, " Unmapping\n");
+ munmap(h->map[h->clock].buffer, MREAD_BUF_SIZE);
+ }
+ /* FIXME: Try MAP_HUGETLB? */
+ /* FIXME: Make sure this works on large files... */
+ h->map[h->clock].start_offset = offset & MREAD_BUF_MASK;
+ dprintf(warn, " Mapping %llx from offset %llx\n",
+ MREAD_BUF_SIZE, h->map[h->clock].start_offset);
+ h->map[h->clock].buffer = mmap(NULL, MREAD_BUF_SIZE, PROT_READ,
+ MAP_SHARED,
+ h->fd,
+ h->map[h->clock].start_offset);
+ dprintf(warn, " mmap returned %p\n", h->map[h->clock].buffer);
+ if ( h->map[h->clock].buffer == MAP_FAILED )
+ {
+ h->map[h->clock].buffer = NULL;
+ perror("mmap");
+ exit(1);
+ }
+ bind = h->clock;
+ }
+
+ h->last=bind;
+copy:
+ h->map[bind].accessed=1;
+ b=h->map[bind].buffer;
+ boffset=offset - h->map[bind].start_offset;
+ if ( boffset + len > MREAD_BUF_SIZE )
+ bsize = MREAD_BUF_SIZE - boffset;
+ else
+ bsize = len;
+ dprintf(warn, " Using index %d, buffer at %p, buffer offset %llx len %d\n",
+ bind, b, boffset, bsize);
+
+ bcopy(b+boffset, rec, bsize);
+
+ /* Handle the boundary case; make sure this is after doing anything
+ * with the static variables*/
+ if ( len > bsize )
+ {
+ dprintf(warn, " Finishing up by reading l %d o %llx\n",
+ len-bsize, offset+bsize);
+ mread64(h, rec+bsize, len-bsize, offset+bsize);
+ }
+
+ /* FIXME: ?? */
+ return len;
+#undef dprintf
+}
diff --git a/tools/xentrace/mread.h b/tools/xentrace/mread.h
new file mode 100644
index 0000000..8df41a8
--- /dev/null
+++ b/tools/xentrace/mread.h
@@ -0,0 +1,18 @@
+#define MREAD_MAPS 8
+#define MREAD_BUF_SHIFT 9
+#define PAGE_SHIFT 12
+#define MREAD_BUF_SIZE (1ULL<<(PAGE_SHIFT+MREAD_BUF_SHIFT))
+#define MREAD_BUF_MASK (~(MREAD_BUF_SIZE-1))
+typedef struct mread_ctrl {
+ int fd;
+ loff_t file_size;
+ struct mread_buffer {
+ char * buffer;
+ loff_t start_offset;
+ int accessed;
+ } map[MREAD_MAPS];
+ int clock, last;
+} *mread_handle_t;
+
+mread_handle_t mread_init(int fd);
+ssize_t mread64(mread_handle_t h, void *dst, ssize_t len, loff_t offset);
diff --git a/tools/xentrace/pv.h b/tools/xentrace/pv.h
new file mode 100644
index 0000000..3e6ad77
--- /dev/null
+++ b/tools/xentrace/pv.h
@@ -0,0 +1,41 @@
+/*
+ * PV event decoding.
+ *
+ * Copyright (C) 2012 Citrix Systems R&D Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+#ifndef __PV_H
+
+#include "analyze.h"
+#include <xen/trace.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ARG_MISSING 0x0
+#define ARG_32BIT 0x1
+#define ARG_64BIT 0x2
+
+#define MMU_UPDATE_PREEMPTED (~(~0U>>1))
+
+static inline uint32_t pv_hypercall_op(const struct record_info *ri)
+{
+ return ri->d[0] & ~TRC_PV_HYPERCALL_V2_ARG_MASK;
+}
+
+static inline int pv_hypercall_arg_present(const struct record_info *ri, int
arg)
+{
+ return (ri->d[0] >> (20 + 2*arg)) & 0x3;
+}
+
+void pv_hypercall_gather_args(const struct record_info *ri, uint64_t *args);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif
diff --git a/tools/xentrace/xenalyze.c b/tools/xentrace/xenalyze.c
new file mode 100644
index 0000000..a0a3ac1
--- /dev/null
+++ b/tools/xentrace/xenalyze.c
@@ -0,0 +1,10407 @@
+/*
+ * xenalyze.c: Analyzing xentrace output
+ *
+ * Written by George Dunlap.
+ *
+ * Copyright (c) 2006-2007, XenSource Inc.
+ * Copyright (c) 2007-2008, Citrix Systems R&D Ltd, UK
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#define _XOPEN_SOURCE 600
+#include <stdio.h>
+#include <stdlib.h>
+#include <argp.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <xen/trace.h>
+#include "analyze.h"
+#include "mread.h"
+#include "pv.h"
+#include <errno.h>
+#include <strings.h>
+#include <string.h>
+#include <assert.h>
+
+struct mread_ctrl;
+
+
+#define DEFAULT_CPU_HZ 2400000000LL
+#define QHZ_FROM_HZ(_hz) (((_hz) << 10)/ 1000000000)
+
+#define ADDR_SPACE_BITS 48
+#define DEFAULT_SAMPLE_SIZE 10240
+#define DEFAULT_INTERVAL_LENGTH 1000
+
+struct array_struct {
+ unsigned long long *values;
+ int count;
+};
+
+#define warn_once(_x...) \
+ do { \
+ static int _w=1; \
+ if ( _w ) { \
+ _w=0; \
+ fprintf(warn, ##_x); \
+ } \
+ } while(0) \
+
+/* -- Global variables -- */
+struct {
+ int fd;
+ struct mread_ctrl *mh;
+ struct symbol_struct * symbols;
+ char * symbol_file;
+ char * trace_file;
+ int output_defined;
+ loff_t file_size;
+ struct {
+ loff_t update_offset;
+ int pipe[2];
+ FILE* out;
+ int pid;
+ } progress;
+} G = {
+ .fd=-1,
+ .symbols = NULL,
+ .symbol_file = NULL,
+ .trace_file = NULL,
+ .output_defined = 0,
+ .file_size = 0,
+ .progress = { .update_offset = 0 },
+};
+
+/*
+ Kinds of errors:
+ Unexpected values
+ - RIP with information in high bits (not all 0 or 1)
+ - exit reason too high
+ Unexpected record layout
+ - x64 bit set in PIO,PV_PTWR_EMULATION_PAE,
+ - Unknown minor type (PV_PTWR_EMULATION, RUNSTATE_CHANGE
+ - Wrong record size
+ - More than one bit set in evt.main field
+ Unexpected sequences
+ - wake tsc tracking
+ - TSC dependency loop
+ - Mismatch between non-running old event states
+ - Runstate continue while running on another pcpu
+ - lost_record_end seen in non-lost pcpu
+ - Unexpected non-CPU_CHANGE record during new_pcpu scan
+ - record tsc < interval start tsc
+ - lost_record tsc !> order tsc
+ Limited resources
+ - interrupt interval slots
+ - record cpu > MAX_CPUS
+ Algorithm asserts
+ - Duplicate CR3/domain values
+ - Logic holes
+ - domain runstates
+ - runstate / tsc skew
+ - vcpu_{prev,next}_update p->current{==,!=}null
+ - vcpu start conditions
+ - lost_cpu count higher than # of seen cpus / < 0
+ - lost cpu has non-null p->current
+ Symbol file
+ -file doesn't open
+ -file not ordered
+ System
+ - short read
+ - malloc failed
+ Args
+ - Invalid cpu_hz value / suffix
+ - No trace file
+ - Can't open trace file
+*/
+enum error_level {
+ ERR_NONE=0,
+ ERR_STRICT, /* Be unreasonably picky */
+ ERR_WARN, /* Something midly unexpected */
+ ERR_SANITY, /* Sanity checks: RIP with info in high bits */
+ ERR_RECORD, /* Something that keeps you from processing the record */
+ ERR_FILE, /* Probably caused by a corrupt file */
+ ERR_LIMIT, /* Exceeded limits; data will be lost */
+ ERR_MAX_TOLERABLE=ERR_LIMIT,
+ /* -- Unrecoverable past this point -- */
+ ERR_ASSERT, /* Algoritm assert */
+ ERR_SYSTEM, /* System error: cannot allocate memory, short read, &c */
+};
+
+int verbosity = 5;
+
+struct {
+ unsigned
+ scatterplot_interrupt_eip:1,
+ scatterplot_cpi:1,
+ scatterplot_unpin_promote:1,
+ scatterplot_cr3_switch:1,
+ scatterplot_wake_to_halt:1,
+ scatterplot_io:1,
+ scatterplot_vmexit_eip:1,
+ scatterplot_runstate:1,
+ scatterplot_runstate_time:1,
+ scatterplot_pcpu:1,
+ scatterplot_extint_cycles:1,
+ scatterplot_rdtsc:1,
+ scatterplot_irq:1,
+ histogram_interrupt_eip:1,
+ interval_mode:1,
+ dump_all:1,
+ dump_raw_process:1,
+ dump_raw_reads:1,
+ dump_no_processing:1,
+ dump_ipi_latency:1,
+ dump_trace_volume_on_lost_record:1,
+ dump_show_power_states:1,
+ with_cr3_enumeration:1,
+ with_pio_enumeration:1,
+ with_mmio_enumeration:1,
+ with_interrupt_eip_enumeration:1,
+ show_default_domain_summary:1,
+ mmio_enumeration_skip_vga:1,
+ progress:1,
+ svm_mode:1,
+ summary:1,
+ report_pcpu:1,
+ tsc_loop_fatal:1,
+ summary_info;
+ long long cpu_qhz, cpu_hz;
+ int scatterplot_interrupt_vector;
+ int scatterplot_extint_cycles_vector;
+ int scatterplot_io_port;
+ int histogram_interrupt_vector;
+ unsigned long long histogram_interrupt_increment;
+ int interrupt_eip_enumeration_vector;
+ int default_guest_paging_levels;
+ int sample_size;
+ enum error_level tolerance; /* Tolerate up to this level of error */
+ struct {
+ tsc_t cycles;
+ /* Used if interval is specified in seconds to delay calculating
+ * time_interval until all arguments have been processed (specifically,
+ * cpu_hz). */
+ unsigned msec;
+ enum {
+ INTERVAL_CR3_SCHEDULE_TIME,
+ INTERVAL_CR3_SCHEDULE_ORDERED,
+ INTERVAL_CR3_SHORT_SUMMARY,
+ INTERVAL_DOMAIN_TOTAL_TIME,
+ INTERVAL_DOMAIN_SHORT_SUMMARY,
+ INTERVAL_DOMAIN_GUEST_INTERRUPT,
+ INTERVAL_DOMAIN_GRANT_MAPS
+ } output;
+ enum {
+ INTERVAL_MODE_CUSTOM,
+ INTERVAL_MODE_ARRAY,
+ INTERVAL_MODE_LIST
+ } mode;
+ enum {
+ INTERVAL_CHECK_NONE,
+ INTERVAL_CHECK_CR3,
+ INTERVAL_CHECK_DOMAIN
+ } check;
+ /* Options for specific interval output types */
+ union {
+ struct array_struct array;
+ };
+ int count;
+ } interval;
+} opt = {
+ .scatterplot_interrupt_eip=0,
+ .scatterplot_cpi=0,
+ .scatterplot_unpin_promote=0,
+ .scatterplot_cr3_switch=0,
+ .scatterplot_wake_to_halt=0,
+ .scatterplot_vmexit_eip=0,
+ .scatterplot_runstate=0,
+ .scatterplot_runstate_time=0,
+ .scatterplot_pcpu=0,
+ .scatterplot_extint_cycles=0,
+ .scatterplot_rdtsc=0,
+ .scatterplot_irq=0,
+ .histogram_interrupt_eip=0,
+ .dump_all = 0,
+ .dump_raw_process = 0,
+ .dump_raw_reads = 0,
+ .dump_no_processing = 0,
+ .dump_ipi_latency = 0,
+ .dump_trace_volume_on_lost_record = 0,
+ .dump_show_power_states = 0,
+ .with_cr3_enumeration = 0,
+ .with_pio_enumeration = 1,
+ .with_mmio_enumeration = 0,
+ .with_interrupt_eip_enumeration = 0,
+ .show_default_domain_summary = 0,
+ .mmio_enumeration_skip_vga = 1,
+ .progress = 0,
+ .svm_mode = 0,
+ .summary = 0,
+ .report_pcpu = 0,
+ .tsc_loop_fatal = 0,
+ .cpu_hz = DEFAULT_CPU_HZ,
+ /* Pre-calculate a multiplier that makes the rest of the
+ * calculations easier */
+ .cpu_qhz = QHZ_FROM_HZ(DEFAULT_CPU_HZ),
+ .default_guest_paging_levels = 2,
+ .sample_size = DEFAULT_SAMPLE_SIZE,
+ .tolerance = ERR_SANITY,
+ .interval = { .msec = DEFAULT_INTERVAL_LENGTH },
+};
+
+FILE *warn = NULL;
+
+/* -- Summary data -- */
+struct cycle_framework {
+ tsc_t first_tsc, last_tsc, total_cycles;
+};
+
+struct interval_element {
+ int count;
+ long long cycles;
+ long long instructions;
+};
+
+struct event_cycle_summary {
+ int count, cycles_count;
+ long long cycles;
+ long long *cycles_sample;
+ struct interval_element interval;
+};
+
+struct cycle_summary {
+ int count;
+ unsigned long long cycles;
+ long long *sample;
+ struct interval_element interval;
+};
+
+struct weighted_cpi_summary {
+ int count;
+ unsigned long long instructions;
+ unsigned long long cycles;
+ float *cpi;
+ unsigned long long *cpi_weight;
+ struct interval_element interval;
+};
+
+/* -- Symbol list information -- */
+#define SYMBOL_ENTRIES_PER_STRUCT 1023
+#define SYMBOL_NAME_SIZE 124
+struct symbol_struct {
+ int count;
+ struct {
+ unsigned long long addr;
+ char name[SYMBOL_NAME_SIZE];
+ } symbols[SYMBOL_ENTRIES_PER_STRUCT];
+ struct symbol_struct *next;
+};
+
+void error(enum error_level l, struct record_info *ri);
+
+void parse_symbol_file(char *fn) {
+ unsigned long long last_addr = 0;
+ FILE * symbol_file;
+ struct symbol_struct ** p=&G.symbols;
+
+ if((symbol_file=fopen(fn, "rb"))==NULL) {
+ fprintf(stderr, "Could not open symbol file %s\n", fn);
+ perror("open");
+ error(ERR_SYSTEM, NULL);
+ }
+ while(!feof(symbol_file)) {
+ /* Allocate a new struct if we need it */
+ if(!*p) {
+ *p = malloc(sizeof(**p));
+ if(!*p) {
+ fprintf(stderr, "Malloc failed!\n");
+ error(ERR_SYSTEM, NULL);
+ }
+ (*p)->count=0;
+ (*p)->next=NULL;
+ }
+
+ /* FIXME -- use SYMBOL_NAME_SIZE */
+ /* FIXME -- use regexp. This won't work for symbols with spaces (yes
they exist) */
+ (*p)->symbols[(*p)->count].addr = 0xDEADBEEF;
+ if ( fscanf(symbol_file, "%llx %128s",
+ &(*p)->symbols[(*p)->count].addr,
+ (*p)->symbols[(*p)->count].name) == 0 )
+ break;
+
+
+ if( ((*p)->symbols[(*p)->count].addr > 0)
+ && ((*p)->symbols[(*p)->count].addr < last_addr) ) {
+ fprintf(stderr, "Symbol file not properly ordered: %llx %s <
%llx!\n",
+ (*p)->symbols[(*p)->count].addr,
+ (*p)->symbols[(*p)->count].name,
+ last_addr);
+ /* Could be recovered from; just free existing strings and set
symbols to NULL */
+ error(ERR_ASSERT, NULL);
+ } else
+ last_addr = (*p)->symbols[(*p)->count].addr;
+
+ (*p)->count++;
+
+ /* If this struct is full, point to the next. It will be allocated
+ if needed. */
+ if((*p)->count == SYMBOL_ENTRIES_PER_STRUCT) {
+ p=&((*p)->next);
+ }
+ }
+}
+
+/* WARNING not thread safe */
+char * find_symbol(unsigned long long addr) {
+ struct symbol_struct * p=G.symbols;
+ int i;
+ char * lastname="ZERO";
+ unsigned long long offset=addr;
+ static char name[128];
+
+ if(!p) {
+ name[0]=0;
+ return name;
+ }
+
+ while(1) {
+ if(!p)
+ goto finish;
+ for(i=0; i<p->count; i++) {
+ if(p->symbols[i].addr > addr)
+ goto finish;
+ else {
+ lastname=p->symbols[i].name;
+ offset=addr - p->symbols[i].addr;
+ }
+ }
+ p=p->next;
+ }
+ finish:
+ snprintf(name, 128, "(%s +%llx)",
+ lastname, offset);
+ return name;
+}
+
+/* -- Eip list data -- */
+enum {
+ EIP_LIST_TYPE_NONE=0,
+ EIP_LIST_TYPE_MAX
+};
+
+struct eip_list_struct {
+ struct eip_list_struct *next;
+ unsigned long long eip;
+ struct event_cycle_summary summary;
+ int type;
+ void * extra;
+};
+
+struct {
+ void (*update)(struct eip_list_struct *, void *);
+ void (*new)(struct eip_list_struct *, void *);
+ void (*dump)(struct eip_list_struct *);
+} eip_list_type[EIP_LIST_TYPE_MAX] = {
+ [EIP_LIST_TYPE_NONE] = {
+ .update=NULL,
+ .new=NULL,
+ .dump=NULL },
+};
+
+
+/* --- HVM class of events --- */
+
+/*
+ * -- Algorithms --
+ *
+ * Interrupt Wake-to-halt detection
+ *
+ * Purpose: To correlate device interrupts to vcpu runtime.
+ *
+ * Diagram:
+ * ...
+ * blocked -> runnable <- set to waking
+ * ...
+ * runnable -> running
+ * inj_virq A <- Note "waking" interrupt
+ * vmenter <- Start tsc of "wake-to-halt" interval.
+ Turn off 'waking'.
+ * ...
+ * inj_virq B <- Note alternate interrupt
+ * vmenter <- Start tsc of "interrupt-to-halt" interval
+ * ...
+ * vmexit <- End tsc of "x-to-halt" interval
+ * running -> blocked <- Process
+ *
+ * The "waking" interrupts we want to sub-classify into
+ * "wake-only" (when interrupt was the only interrupt from wake to halt) and
+ * "wake-all" (whether this was the only interrupt or not).
+ */
+
+/* VMX data */
+#define EXIT_REASON_EXCEPTION_NMI 0
+#define EXIT_REASON_EXTERNAL_INTERRUPT 1
+#define EXIT_REASON_TRIPLE_FAULT 2
+#define EXIT_REASON_INIT 3
+#define EXIT_REASON_SIPI 4
+#define EXIT_REASON_IO_SMI 5
+#define EXIT_REASON_OTHER_SMI 6
+#define EXIT_REASON_PENDING_INTERRUPT 7
+#define EXIT_REASON_PENDING_VIRT_NMI 8
+#define EXIT_REASON_TASK_SWITCH 9
+#define EXIT_REASON_CPUID 10
+#define EXIT_REASON_GETSEC 11
+#define EXIT_REASON_HLT 12
+#define EXIT_REASON_INVD 13
+#define EXIT_REASON_INVLPG 14
+#define EXIT_REASON_RDPMC 15
+#define EXIT_REASON_RDTSC 16
+#define EXIT_REASON_RSM 17
+#define EXIT_REASON_VMCALL 18
+#define EXIT_REASON_VMCLEAR 19
+#define EXIT_REASON_VMLAUNCH 20
+#define EXIT_REASON_VMPTRLD 21
+#define EXIT_REASON_VMPTRST 22
+#define EXIT_REASON_VMREAD 23
+#define EXIT_REASON_VMRESUME 24
+#define EXIT_REASON_VMWRITE 25
+#define EXIT_REASON_VMOFF 26
+#define EXIT_REASON_VMON 27
+#define EXIT_REASON_CR_ACCESS 28
+#define EXIT_REASON_DR_ACCESS 29
+#define EXIT_REASON_IO_INSTRUCTION 30
+#define EXIT_REASON_MSR_READ 31
+#define EXIT_REASON_MSR_WRITE 32
+#define EXIT_REASON_INVALID_GUEST_STATE 33
+#define EXIT_REASON_MSR_LOADING 34
+#define EXIT_REASON_MWAIT_INSTRUCTION 36
+#define EXIT_REASON_MONITOR_TRAP_FLAG 37
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION 40
+#define EXIT_REASON_MACHINE_CHECK 41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS 44
+#define EXIT_REASON_ACCESS_GDTR_OR_IDTR 46
+#define EXIT_REASON_ACCESS_LDTR_OR_TR 47
+#define EXIT_REASON_EPT_VIOLATION 48
+#define EXIT_REASON_EPT_MISCONFIG 49
+#define EXIT_REASON_INVEPT 50
+#define EXIT_REASON_RDTSCP 51
+#define EXIT_REASON_VMX_PREEMPTION_TIMER_EXPIRED 52
+#define EXIT_REASON_INVVPID 53
+#define EXIT_REASON_WBINVD 54
+#define EXIT_REASON_XSETBV 55
+
+#define HVM_VMX_EXIT_REASON_MAX (EXIT_REASON_XSETBV+1)
+
+char * hvm_vmx_exit_reason_name[HVM_VMX_EXIT_REASON_MAX] = {
+ [0] = "NONE",
+ [EXIT_REASON_EXCEPTION_NMI]="EXCEPTION_NMI",
+ [EXIT_REASON_EXTERNAL_INTERRUPT]="EXTERNAL_INTERRUPT",
+ [EXIT_REASON_TRIPLE_FAULT]="TRIPLE_FAULT",
+ [EXIT_REASON_INIT]="INIT",
+ [EXIT_REASON_SIPI]="SIPI",
+ [EXIT_REASON_IO_SMI]="IO_SMI",
+ [EXIT_REASON_OTHER_SMI]="OTHER_SMI",
+ [EXIT_REASON_PENDING_INTERRUPT]="PENDING_INTERRUPT",
+ [EXIT_REASON_PENDING_VIRT_NMI]="PENDING_VIRT_NMI",
+ [EXIT_REASON_TASK_SWITCH]="TASK_SWITCH",
+ [EXIT_REASON_CPUID]="CPUID",
+ [EXIT_REASON_GETSEC]="GETSEC",
+ [EXIT_REASON_HLT]="HLT",
+ [EXIT_REASON_INVD]="INVD",
+ [EXIT_REASON_INVLPG]="INVLPG",
+ [EXIT_REASON_RDPMC]="RDPMC",
+ [EXIT_REASON_RDTSC]="RDTSC",
+ [EXIT_REASON_RSM]="RSM",
+ [EXIT_REASON_VMCALL]="VMCALL",
+ [EXIT_REASON_VMCLEAR]="VMCLEAR",
+ [EXIT_REASON_VMLAUNCH]="VMLAUNCH",
+ [EXIT_REASON_VMPTRLD]="VMPTRLD",
+ [EXIT_REASON_VMPTRST]="VMPTRST",
+ [EXIT_REASON_VMREAD]="VMREAD",
+ [EXIT_REASON_VMRESUME]="VMRESUME",
+ [EXIT_REASON_VMWRITE]="VMWRITE",
+ [EXIT_REASON_VMOFF]="VMOFF",
+ [EXIT_REASON_VMON]="VMON",
+ [EXIT_REASON_CR_ACCESS]="CR_ACCESS",
+ [EXIT_REASON_DR_ACCESS]="DR_ACCESS",
+ [EXIT_REASON_IO_INSTRUCTION]="IO_INSTRUCTION",
+ [EXIT_REASON_MSR_READ]="MSR_READ",
+ [EXIT_REASON_MSR_WRITE]="MSR_WRITE",
+ [EXIT_REASON_INVALID_GUEST_STATE]="INVALID_GUEST_STATE",
+ [EXIT_REASON_MSR_LOADING]="MSR_LOADING",
+ [EXIT_REASON_MWAIT_INSTRUCTION]="MWAIT_INSTRUCTION",
+ [EXIT_REASON_MONITOR_TRAP_FLAG]="MONITOR_TRAP_FLAG",
+ [EXIT_REASON_MONITOR_INSTRUCTION]="MONITOR_INSTRUCTION",
+ [EXIT_REASON_PAUSE_INSTRUCTION]="PAUSE_INSTRUCTION",
+ [EXIT_REASON_MACHINE_CHECK]="MACHINE_CHECK",
+ [EXIT_REASON_TPR_BELOW_THRESHOLD]="TPR_BELOW_THRESHOLD",
+ [EXIT_REASON_APIC_ACCESS]="APIC_ACCESS",
+ [EXIT_REASON_EPT_VIOLATION]="EPT_VIOLATION",
+ [EXIT_REASON_EPT_MISCONFIG]="EPT_MISCONFIG",
+ [EXIT_REASON_INVEPT]="INVEPT",
+ [EXIT_REASON_RDTSCP]="RDTSCP",
+ [EXIT_REASON_VMX_PREEMPTION_TIMER_EXPIRED]="VMX_PREEMPTION_TIMER_EXPIRED",
+ [EXIT_REASON_INVVPID]="INVVPID",
+ [EXIT_REASON_WBINVD]="WBINVD",
+ [EXIT_REASON_XSETBV]="XSETBV",
+};
+
+/* SVM data */
+enum VMEXIT_EXITCODE
+{
+ /* control register read exitcodes */
+ VMEXIT_CR0_READ = 0,
+ VMEXIT_CR1_READ = 1,
+ VMEXIT_CR2_READ = 2,
+ VMEXIT_CR3_READ = 3,
+ VMEXIT_CR4_READ = 4,
+ VMEXIT_CR5_READ = 5,
+ VMEXIT_CR6_READ = 6,
+ VMEXIT_CR7_READ = 7,
+ VMEXIT_CR8_READ = 8,
+ VMEXIT_CR9_READ = 9,
+ VMEXIT_CR10_READ = 10,
+ VMEXIT_CR11_READ = 11,
+ VMEXIT_CR12_READ = 12,
+ VMEXIT_CR13_READ = 13,
+ VMEXIT_CR14_READ = 14,
+ VMEXIT_CR15_READ = 15,
+
+ /* control register write exitcodes */
+ VMEXIT_CR0_WRITE = 16,
+ VMEXIT_CR1_WRITE = 17,
+ VMEXIT_CR2_WRITE = 18,
+ VMEXIT_CR3_WRITE = 19,
+ VMEXIT_CR4_WRITE = 20,
+ VMEXIT_CR5_WRITE = 21,
+ VMEXIT_CR6_WRITE = 22,
+ VMEXIT_CR7_WRITE = 23,
+ VMEXIT_CR8_WRITE = 24,
+ VMEXIT_CR9_WRITE = 25,
+ VMEXIT_CR10_WRITE = 26,
+ VMEXIT_CR11_WRITE = 27,
+ VMEXIT_CR12_WRITE = 28,
+ VMEXIT_CR13_WRITE = 29,
+ VMEXIT_CR14_WRITE = 30,
+ VMEXIT_CR15_WRITE = 31,
+
+ /* debug register read exitcodes */
+ VMEXIT_DR0_READ = 32,
+ VMEXIT_DR1_READ = 33,
+ VMEXIT_DR2_READ = 34,
+ VMEXIT_DR3_READ = 35,
+ VMEXIT_DR4_READ = 36,
+ VMEXIT_DR5_READ = 37,
+ VMEXIT_DR6_READ = 38,
+ VMEXIT_DR7_READ = 39,
+ VMEXIT_DR8_READ = 40,
+ VMEXIT_DR9_READ = 41,
+ VMEXIT_DR10_READ = 42,
+ VMEXIT_DR11_READ = 43,
+ VMEXIT_DR12_READ = 44,
+ VMEXIT_DR13_READ = 45,
+ VMEXIT_DR14_READ = 46,
+ VMEXIT_DR15_READ = 47,
+
+ /* debug register write exitcodes */
+ VMEXIT_DR0_WRITE = 48,
+ VMEXIT_DR1_WRITE = 49,
+ VMEXIT_DR2_WRITE = 50,
+ VMEXIT_DR3_WRITE = 51,
+ VMEXIT_DR4_WRITE = 52,
+ VMEXIT_DR5_WRITE = 53,
+ VMEXIT_DR6_WRITE = 54,
+ VMEXIT_DR7_WRITE = 55,
+ VMEXIT_DR8_WRITE = 56,
+ VMEXIT_DR9_WRITE = 57,
+ VMEXIT_DR10_WRITE = 58,
+ VMEXIT_DR11_WRITE = 59,
+ VMEXIT_DR12_WRITE = 60,
+ VMEXIT_DR13_WRITE = 61,
+ VMEXIT_DR14_WRITE = 62,
+ VMEXIT_DR15_WRITE = 63,
+
+ /* processor exception exitcodes (VMEXIT_EXCP[0-31]) */
+ VMEXIT_EXCEPTION_DE = 64, /* divide-by-zero-error */
+ VMEXIT_EXCEPTION_DB = 65, /* debug */
+ VMEXIT_EXCEPTION_NMI = 66, /* non-maskable-interrupt */
+ VMEXIT_EXCEPTION_BP = 67, /* breakpoint */
+ VMEXIT_EXCEPTION_OF = 68, /* overflow */
+ VMEXIT_EXCEPTION_BR = 69, /* bound-range */
+ VMEXIT_EXCEPTION_UD = 70, /* invalid-opcode*/
+ VMEXIT_EXCEPTION_NM = 71, /* device-not-available */
+ VMEXIT_EXCEPTION_DF = 72, /* double-fault */
+ VMEXIT_EXCEPTION_09 = 73, /* unsupported (reserved) */
+ VMEXIT_EXCEPTION_TS = 74, /* invalid-tss */
+ VMEXIT_EXCEPTION_NP = 75, /* segment-not-present */
+ VMEXIT_EXCEPTION_SS = 76, /* stack */
+ VMEXIT_EXCEPTION_GP = 77, /* general-protection */
+ VMEXIT_EXCEPTION_PF = 78, /* page-fault */
+ VMEXIT_EXCEPTION_15 = 79, /* reserved */
+ VMEXIT_EXCEPTION_MF = 80, /* x87 floating-point exception-pending */
+ VMEXIT_EXCEPTION_AC = 81, /* alignment-check */
+ VMEXIT_EXCEPTION_MC = 82, /* machine-check */
+ VMEXIT_EXCEPTION_XF = 83, /* simd floating-point */
+
+ /* exceptions 20-31 (exitcodes 84-95) are reserved */
+
+ /* ...and the rest of the #VMEXITs */
+ VMEXIT_INTR = 96,
+ VMEXIT_NMI = 97,
+ VMEXIT_SMI = 98,
+ VMEXIT_INIT = 99,
+ VMEXIT_VINTR = 100,
+ VMEXIT_CR0_SEL_WRITE = 101,
+ VMEXIT_IDTR_READ = 102,
+ VMEXIT_GDTR_READ = 103,
+ VMEXIT_LDTR_READ = 104,
+ VMEXIT_TR_READ = 105,
+ VMEXIT_IDTR_WRITE = 106,
+ VMEXIT_GDTR_WRITE = 107,
+ VMEXIT_LDTR_WRITE = 108,
+ VMEXIT_TR_WRITE = 109,
+ VMEXIT_RDTSC = 110,
+ VMEXIT_RDPMC = 111,
+ VMEXIT_PUSHF = 112,
+ VMEXIT_POPF = 113,
+ VMEXIT_CPUID = 114,
+ VMEXIT_RSM = 115,
+ VMEXIT_IRET = 116,
+ VMEXIT_SWINT = 117,
+ VMEXIT_INVD = 118,
+ VMEXIT_PAUSE = 119,
+ VMEXIT_HLT = 120,
+ VMEXIT_INVLPG = 121,
+ VMEXIT_INVLPGA = 122,
+ VMEXIT_IOIO = 123,
+ VMEXIT_MSR = 124,
+ VMEXIT_TASK_SWITCH = 125,
+ VMEXIT_FERR_FREEZE = 126,
+ VMEXIT_SHUTDOWN = 127,
+ VMEXIT_VMRUN = 128,
+ VMEXIT_VMMCALL = 129,
+ VMEXIT_VMLOAD = 130,
+ VMEXIT_VMSAVE = 131,
+ VMEXIT_STGI = 132,
+ VMEXIT_CLGI = 133,
+ VMEXIT_SKINIT = 134,
+ VMEXIT_RDTSCP = 135,
+ VMEXIT_ICEBP = 136,
+ VMEXIT_WBINVD = 137,
+ VMEXIT_MONITOR = 138,
+ VMEXIT_MWAIT = 139,
+ VMEXIT_MWAIT_CONDITIONAL= 140,
+ VMEXIT_NPF = 1024, /* nested paging fault */
+ VMEXIT_INVALID = -1
+};
+
+#define HVM_SVM_EXIT_REASON_MAX 1025
+char * hvm_svm_exit_reason_name[HVM_SVM_EXIT_REASON_MAX] = {
+ /* 0-15 */
+ "VMEXIT_CR0_READ",
+ "VMEXIT_CR1_READ",
+ "VMEXIT_CR2_READ",
+ "VMEXIT_CR3_READ",
+ "VMEXIT_CR4_READ",
+ "VMEXIT_CR5_READ",
+ "VMEXIT_CR6_READ",
+ "VMEXIT_CR7_READ",
+ "VMEXIT_CR8_READ",
+ "VMEXIT_CR9_READ",
+ "VMEXIT_CR10_READ",
+ "VMEXIT_CR11_READ",
+ "VMEXIT_CR12_READ",
+ "VMEXIT_CR13_READ",
+ "VMEXIT_CR14_READ",
+ "VMEXIT_CR15_READ",
+ /* 16-31 */
+ "VMEXIT_CR0_WRITE",
+ "VMEXIT_CR1_WRITE",
+ "VMEXIT_CR2_WRITE",
+ "VMEXIT_CR3_WRITE",
+ "VMEXIT_CR4_WRITE",
+ "VMEXIT_CR5_WRITE",
+ "VMEXIT_CR6_WRITE",
+ "VMEXIT_CR7_WRITE",
+ "VMEXIT_CR8_WRITE",
+ "VMEXIT_CR9_WRITE",
+ "VMEXIT_CR10_WRITE",
+ "VMEXIT_CR11_WRITE",
+ "VMEXIT_CR12_WRITE",
+ "VMEXIT_CR13_WRITE",
+ "VMEXIT_CR14_WRITE",
+ "VMEXIT_CR15_WRITE",
+ /* 32-47 */
+ "VMEXIT_DR0_READ",
+ "VMEXIT_DR1_READ",
+ "VMEXIT_DR2_READ",
+ "VMEXIT_DR3_READ",
+ "VMEXIT_DR4_READ",
+ "VMEXIT_DR5_READ",
+ "VMEXIT_DR6_READ",
+ "VMEXIT_DR7_READ",
+ "VMEXIT_DR8_READ",
+ "VMEXIT_DR9_READ",
+ "VMEXIT_DR10_READ",
+ "VMEXIT_DR11_READ",
+ "VMEXIT_DR12_READ",
+ "VMEXIT_DR13_READ",
+ "VMEXIT_DR14_READ",
+ "VMEXIT_DR15_READ",
+ /* 48-63 */
+ "VMEXIT_DR0_WRITE",
+ "VMEXIT_DR1_WRITE",
+ "VMEXIT_DR2_WRITE",
+ "VMEXIT_DR3_WRITE",
+ "VMEXIT_DR4_WRITE",
+ "VMEXIT_DR5_WRITE",
+ "VMEXIT_DR6_WRITE",
+ "VMEXIT_DR7_WRITE",
+ "VMEXIT_DR8_WRITE",
+ "VMEXIT_DR9_WRITE",
+ "VMEXIT_DR10_WRITE",
+ "VMEXIT_DR11_WRITE",
+ "VMEXIT_DR12_WRITE",
+ "VMEXIT_DR13_WRITE",
+ "VMEXIT_DR14_WRITE",
+ "VMEXIT_DR15_WRITE",
+ /* 64-83 */
+ "VMEXIT_EXCEPTION_DE",
+ "VMEXIT_EXCEPTION_DB",
+ "VMEXIT_EXCEPTION_NMI",
+ "VMEXIT_EXCEPTION_BP",
+ "VMEXIT_EXCEPTION_OF",
+ "VMEXIT_EXCEPTION_BR",
+ "VMEXIT_EXCEPTION_UD",
+ "VMEXIT_EXCEPTION_NM",
+ "VMEXIT_EXCEPTION_DF",
+ "VMEXIT_EXCEPTION_09",
+ "VMEXIT_EXCEPTION_TS",
+ "VMEXIT_EXCEPTION_NP",
+ "VMEXIT_EXCEPTION_SS",
+ "VMEXIT_EXCEPTION_GP",
+ "VMEXIT_EXCEPTION_PF",
+ "VMEXIT_EXCEPTION_15",
+ "VMEXIT_EXCEPTION_MF",
+ "VMEXIT_EXCEPTION_AC",
+ "VMEXIT_EXCEPTION_MC",
+ "VMEXIT_EXCEPTION_XF",
+ /* 84-95 */
+ "VMEXIT_EXCEPTION_20",
+ "VMEXIT_EXCEPTION_21",
+ "VMEXIT_EXCEPTION_22",
+ "VMEXIT_EXCEPTION_23",
+ "VMEXIT_EXCEPTION_24",
+ "VMEXIT_EXCEPTION_25",
+ "VMEXIT_EXCEPTION_26",
+ "VMEXIT_EXCEPTION_27",
+ "VMEXIT_EXCEPTION_28",
+ "VMEXIT_EXCEPTION_29",
+ "VMEXIT_EXCEPTION_30",
+ "VMEXIT_EXCEPTION_31",
+ /* 96-99 */
+ "VMEXIT_INTR",
+ "VMEXIT_NMI",
+ "VMEXIT_SMI",
+ "VMEXIT_INIT",
+ /* 100-109 */
+ "VMEXIT_VINTR",
+ "VMEXIT_CR0_SEL_WRITE",
+ "VMEXIT_IDTR_READ",
+ "VMEXIT_GDTR_READ",
+ "VMEXIT_LDTR_READ",
+ "VMEXIT_TR_READ",
+ "VMEXIT_IDTR_WRITE",
+ "VMEXIT_GDTR_WRITE",
+ "VMEXIT_LDTR_WRITE",
+ "VMEXIT_TR_WRITE",
+ /* 110-119 */
+ "VMEXIT_RDTSC",
+ "VMEXIT_RDPMC",
+ "VMEXIT_PUSHF",
+ "VMEXIT_POPF",
+ "VMEXIT_CPUID",
+ "VMEXIT_RSM",
+ "VMEXIT_IRET",
+ "VMEXIT_SWINT",
+ "VMEXIT_INVD",
+ "VMEXIT_PAUSE",
+ /* 120-129 */
+ "VMEXIT_HLT",
+ "VMEXIT_INVLPG",
+ "VMEXIT_INVLPGA",
+ "VMEXIT_IOIO",
+ "VMEXIT_MSR",
+ "VMEXIT_TASK_SWITCH",
+ "VMEXIT_FERR_FREEZE",
+ "VMEXIT_SHUTDOWN",
+ "VMEXIT_VMRUN",
+ "VMEXIT_VMMCALL",
+ /* 130-139 */
+ "VMEXIT_VMLOAD",
+ "VMEXIT_VMSAVE",
+ "VMEXIT_STGI",
+ "VMEXIT_CLGI",
+ "VMEXIT_SKINIT",
+ "VMEXIT_RDTSCP",
+ "VMEXIT_ICEBP",
+ "VMEXIT_WBINVD",
+ "VMEXIT_MONITOR",
+ "VMEXIT_MWAIT",
+ /* 140 */
+ "VMEXIT_MWAIT_CONDITIONAL",
+ [VMEXIT_NPF] = "VMEXIT_NPF", /* nested paging fault */
+};
+
+
+#if ( HVM_VMX_EXIT_REASON_MAX > HVM_SVM_EXIT_REASON_MAX )
+# define HVM_EXIT_REASON_MAX HVM_VMX_EXIT_REASON_MAX
+# error - Strange!
+#else
+# define HVM_EXIT_REASON_MAX HVM_SVM_EXIT_REASON_MAX
+#endif
+
+/* General hvm information */
+#define SPURIOUS_APIC_VECTOR 0xff
+#define ERROR_APIC_VECTOR 0xfe
+#define INVALIDATE_TLB_VECTOR 0xfd
+#define EVENT_CHECK_VECTOR 0xfc
+#define CALL_FUNCTION_VECTOR 0xfb
+#define THERMAL_APIC_VECTOR 0xfa
+#define LOCAL_TIMER_VECTOR 0xf9
+
+#define EXTERNAL_INTERRUPT_MAX 256
+
+/* Stringify numbers */
+char * hvm_extint_vector_name[EXTERNAL_INTERRUPT_MAX] = {
+ [SPURIOUS_APIC_VECTOR] = "SPURIOS_APIC",
+ [ERROR_APIC_VECTOR] = "ERROR_APIC",
+ [INVALIDATE_TLB_VECTOR]= "INVALIDATE_TLB",
+ [EVENT_CHECK_VECTOR]= "EVENT_CHECK",
+ [CALL_FUNCTION_VECTOR]= "CALL_FUNCTION",
+ [THERMAL_APIC_VECTOR]= "THERMAL_APIC",
+ [LOCAL_TIMER_VECTOR] = "LOCAL_TIMER",
+};
+
+#define HVM_TRAP_MAX 20
+
+char * hvm_trap_name[HVM_TRAP_MAX] = {
+ [0] = "Divide",
+ [1] = "RESERVED",
+ [2] = "NMI",
+ [3] = "Breakpoint",
+ [4] = "Overflow",
+ [5] = "BOUND",
+ [6] = "Invalid Op",
+ [7] = "Coprocessor not present",
+ [8] = "Double Fault",
+ [9] = "Coprocessor segment overrun",
+ [10] = "TSS",
+ [11] = "Segment not present",
+ [12] = "Stack-segment fault",
+ [13] = "GP",
+ [14] = "Page fault",
+ [15] = "RESERVED",
+ [16] = "FPU",
+ [17] = "Alignment check",
+ [18] = "Machine check",
+ [19] = "SIMD",
+};
+
+
+enum {
+ HVM_EVENT_HANDLER_NONE = 0,
+ HVM_EVENT_HANDLER_PF_XEN = 1,
+ HVM_EVENT_HANDLER_PF_INJECT,
+ HVM_EVENT_HANDLER_INJ_EXC,
+ HVM_EVENT_HANDLER_INJ_VIRQ,
+ HVM_EVENT_HANDLER_REINJ_VIRQ,
+ HVM_EVENT_HANDLER_IO_READ,
+ HVM_EVENT_HANDLER_IO_WRITE,
+ HVM_EVENT_HANDLER_CR_READ, /* 8 */
+ HVM_EVENT_HANDLER_CR_WRITE,
+ HVM_EVENT_HANDLER_DR_READ,
+ HVM_EVENT_HANDLER_DR_WRITE,
+ HVM_EVENT_HANDLER_MSR_READ,
+ HVM_EVENT_HANDLER_MSR_WRITE,
+ HVM_EVENT_HANDLER_CPUID,
+ HVM_EVENT_HANDLER_INTR,
+ HVM_EVENT_HANDLER_NMI, /* 16 */
+ HVM_EVENT_HANDLER_SMI,
+ HVM_EVENT_HANDLER_VMCALL,
+ HVM_EVENT_HANDLER_HLT,
+ HVM_EVENT_HANDLER_INVLPG,
+ HVM_EVENT_HANDLER_MCE,
+ HVM_EVENT_HANDLER_IO_ASSIST,
+ HVM_EVENT_HANDLER_MMIO_ASSIST,
+ HVM_EVENT_HANDLER_CLTS,
+ HVM_EVENT_HANDLER_LMSW,
+ HVM_EVENT_RDTSC,
+ HVM_EVENT_INTR_WINDOW=0x20, /* Oops... skipped 0x1b-1f */
+ HVM_EVENT_NPF,
+ HVM_EVENT_REALMODE_EMULATE,
+ HVM_EVENT_TRAP,
+ HVM_EVENT_TRAP_DEBUG,
+ HVM_EVENT_VLAPIC,
+ HVM_EVENT_HANDLER_MAX
+};
+char * hvm_event_handler_name[HVM_EVENT_HANDLER_MAX] = {
+ "(no handler)",
+ "pf_xen",
+ "pf_inject",
+ "inj_exc",
+ "inj_virq",
+ "reinj_virq",
+ "io_read",
+ "io_write",
+ "cr_read", /* 8 */
+ "cr_write",
+ "dr_read",
+ "dr_write",
+ "msr_read",
+ "msr_write",
+ "cpuid",
+ "intr",
+ "nmi", /* 16 */
+ "smi",
+ "vmcall",
+ "hlt",
+ "invlpg",
+ "mce",
+ "io_assist",
+ "mmio_assist",
+ "clts", /* 24 */
+ "lmsw",
+ "rdtsc",
+ [HVM_EVENT_INTR_WINDOW]="intr_window",
+ "npf",
+ "realmode_emulate",
+ "trap",
+ "trap_debug",
+ "vlapic"
+};
+
+enum {
+ HVM_VOL_VMENTRY,
+ HVM_VOL_VMEXIT,
+ HVM_VOL_HANDLER,
+ HVM_VOL_MAX
+};
+
+enum {
+ GUEST_INTERRUPT_CASE_NONE,
+ /* This interrupt woke, no other interrupts until halt */
+ GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ALONE,
+ /* This interrupt woke, maybe another interrupt before halt */
+ GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ANY,
+ /* Time from interrupt (running) to halt */
+ GUEST_INTERRUPT_CASE_INTERRUPT_TO_HALT,
+ GUEST_INTERRUPT_CASE_MAX,
+};
+
+char *guest_interrupt_case_name[] = {
+ [GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ALONE]="wake to halt alone",
+ /* This interrupt woke, maybe another interrupt before halt */
+ [GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ANY] ="wake to halt any ",
+ /* Time from interrupt (running) to halt */
+ [GUEST_INTERRUPT_CASE_INTERRUPT_TO_HALT] ="intr to halt ",
+};
+
+char *hvm_vol_name[HVM_VOL_MAX] = {
+ [HVM_VOL_VMENTRY]="vmentry",
+ [HVM_VOL_VMEXIT] ="vmexit",
+ [HVM_VOL_HANDLER]="handler",
+};
+
+enum {
+ HYPERCALL_set_trap_table = 0,
+ HYPERCALL_mmu_update,
+ HYPERCALL_set_gdt,
+ HYPERCALL_stack_switch,
+ HYPERCALL_set_callbacks,
+ HYPERCALL_fpu_taskswitch,
+ HYPERCALL_sched_op_compat,
+ HYPERCALL_platform_op,
+ HYPERCALL_set_debugreg,
+ HYPERCALL_get_debugreg,
+ HYPERCALL_update_descriptor,
+ HYPERCALL_memory_op=12,
+ HYPERCALL_multicall,
+ HYPERCALL_update_va_mapping,
+ HYPERCALL_set_timer_op,
+ HYPERCALL_event_channel_op_compat,
+ HYPERCALL_xen_version,
+ HYPERCALL_console_io,
+ HYPERCALL_physdev_op_compat,
+ HYPERCALL_grant_table_op,
+ HYPERCALL_vm_assist,
+ HYPERCALL_update_va_mapping_otherdomain,
+ HYPERCALL_iret,
+ HYPERCALL_vcpu_op,
+ HYPERCALL_set_segment_base,
+ HYPERCALL_mmuext_op,
+ HYPERCALL_acm_op,
+ HYPERCALL_nmi_op,
+ HYPERCALL_sched_op,
+ HYPERCALL_callback_op,
+ HYPERCALL_xenoprof_op,
+ HYPERCALL_event_channel_op,
+ HYPERCALL_physdev_op,
+ HYPERCALL_hvm_op,
+ HYPERCALL_sysctl,
+ HYPERCALL_domctl,
+ HYPERCALL_kexec_op,
+ HYPERCALL_MAX
+};
+
+char *hypercall_name[HYPERCALL_MAX] = {
+ [HYPERCALL_set_trap_table]="set_trap_table",
+ [HYPERCALL_mmu_update]="mmu_update",
+ [HYPERCALL_set_gdt]="set_gdt",
+ [HYPERCALL_stack_switch]="stack_switch",
+ [HYPERCALL_set_callbacks]="set_callbacks",
+ [HYPERCALL_fpu_taskswitch]="fpu_taskswitch",
+ [HYPERCALL_sched_op_compat]="sched_op(compat)",
+ [HYPERCALL_platform_op]="platform_op",
+ [HYPERCALL_set_debugreg]="set_debugreg",
+ [HYPERCALL_get_debugreg]="get_debugreg",
+ [HYPERCALL_update_descriptor]="update_descriptor",
+ [HYPERCALL_memory_op]="memory_op",
+ [HYPERCALL_multicall]="multicall",
+ [HYPERCALL_update_va_mapping]="update_va_mapping",
+ [HYPERCALL_set_timer_op]="set_timer_op",
+ [HYPERCALL_event_channel_op_compat]="evtchn_op(compat)",
+ [HYPERCALL_xen_version]="xen_version",
+ [HYPERCALL_console_io]="console_io",
+ [HYPERCALL_physdev_op_compat]="physdev_op(compat)",
+ [HYPERCALL_grant_table_op]="grant_table_op",
+ [HYPERCALL_vm_assist]="vm_assist",
+ [HYPERCALL_update_va_mapping_otherdomain]="update_va_mapping_otherdomain",
+ [HYPERCALL_iret]="iret",
+ [HYPERCALL_vcpu_op]="vcpu_op",
+ [HYPERCALL_set_segment_base]="set_segment_base",
+ [HYPERCALL_mmuext_op]="mmuext_op",
+ [HYPERCALL_acm_op]="acm_op",
+ [HYPERCALL_nmi_op]="nmi_op",
+ [HYPERCALL_sched_op]="sched_op",
+ [HYPERCALL_callback_op]="callback_op",
+ [HYPERCALL_xenoprof_op]="xenoprof_op",
+ [HYPERCALL_event_channel_op]="evtchn_op",
+ [HYPERCALL_physdev_op]="physdev_op",
+ [HYPERCALL_hvm_op]="hvm_op",
+ [HYPERCALL_sysctl]="sysctl",
+ [HYPERCALL_domctl]="domctl",
+ [HYPERCALL_kexec_op]="kexec_op"
+};
+
+enum {
+ PF_XEN_EMUL_LVL_0,
+ PF_XEN_EMUL_LVL_1,
+ PF_XEN_EMUL_LVL_2,
+ PF_XEN_EMUL_LVL_3,
+ PF_XEN_EMUL_LVL_4,
+ PF_XEN_EMUL_EARLY_UNSHADOW,
+ PF_XEN_EMUL_SET_CHANGED,
+ PF_XEN_EMUL_SET_UNCHANGED,
+ PF_XEN_EMUL_SET_FLUSH,
+ PF_XEN_EMUL_SET_ERROR,
+ PF_XEN_EMUL_PROMOTE,
+ PF_XEN_EMUL_DEMOTE,
+ PF_XEN_EMUL_PREALLOC_UNPIN,
+ PF_XEN_EMUL_PREALLOC_UNHOOK,
+ PF_XEN_EMUL_MAX,
+};
+
+char * pf_xen_emul_name[PF_XEN_EMUL_MAX] = {
+ [PF_XEN_EMUL_LVL_0]="non-linmap",
+ [PF_XEN_EMUL_LVL_1]="linmap l1",
+ [PF_XEN_EMUL_LVL_2]="linmap l2",
+ [PF_XEN_EMUL_LVL_3]="linmap l3",
+ [PF_XEN_EMUL_LVL_4]="linmap l4",
+ [PF_XEN_EMUL_EARLY_UNSHADOW]="early unshadow",
+ [PF_XEN_EMUL_SET_UNCHANGED]="set unchanged",
+ [PF_XEN_EMUL_SET_CHANGED]="set changed",
+ [PF_XEN_EMUL_SET_FLUSH]="set changed",
+ [PF_XEN_EMUL_SET_ERROR]="set changed",
+ [PF_XEN_EMUL_PROMOTE]="promote",
+ [PF_XEN_EMUL_DEMOTE]="demote",
+ [PF_XEN_EMUL_PREALLOC_UNPIN]="unpin",
+ [PF_XEN_EMUL_PREALLOC_UNHOOK]="unhook",
+};
+
+/* Rio only */
+enum {
+ PF_XEN_NON_EMUL_VA_USER,
+ PF_XEN_NON_EMUL_VA_KERNEL,
+ PF_XEN_NON_EMUL_EIP_USER,
+ PF_XEN_NON_EMUL_EIP_KERNEL,
+ PF_XEN_NON_EMUL_MAX,
+};
+
+char * pf_xen_non_emul_name[PF_XEN_NON_EMUL_MAX] = {
+ [PF_XEN_NON_EMUL_VA_USER]="va user",
+ [PF_XEN_NON_EMUL_VA_KERNEL]="va kernel",
+ [PF_XEN_NON_EMUL_EIP_USER]="eip user",
+ [PF_XEN_NON_EMUL_EIP_KERNEL]="eip kernel",
+};
+
+enum {
+ PF_XEN_FIXUP_PREALLOC_UNPIN,
+ PF_XEN_FIXUP_PREALLOC_UNHOOK,
+ PF_XEN_FIXUP_UNSYNC,
+ PF_XEN_FIXUP_OOS_ADD,
+ PF_XEN_FIXUP_OOS_EVICT,
+ PF_XEN_FIXUP_PROMOTE,
+ PF_XEN_FIXUP_UPDATE_ONLY,
+ PF_XEN_FIXUP_WRMAP,
+ PF_XEN_FIXUP_BRUTE_FORCE,
+ PF_XEN_FIXUP_MAX,
+};
+
+char * pf_xen_fixup_name[PF_XEN_FIXUP_MAX] = {
+ [PF_XEN_FIXUP_PREALLOC_UNPIN] = "unpin",
+ [PF_XEN_FIXUP_PREALLOC_UNHOOK] = "unhook",
+ [PF_XEN_FIXUP_UNSYNC] = "unsync",
+ [PF_XEN_FIXUP_OOS_ADD] = "oos-add",
+ [PF_XEN_FIXUP_OOS_EVICT] = "oos-evict",
+ [PF_XEN_FIXUP_PROMOTE] = "promote",
+ [PF_XEN_FIXUP_UPDATE_ONLY] = "update",
+ [PF_XEN_FIXUP_WRMAP] = "wrmap",
+ [PF_XEN_FIXUP_BRUTE_FORCE] = "wrmap-bf",
+};
+
+enum {
+ PF_XEN_NOT_SHADOW = 1,
+ PF_XEN_FAST_PROPAGATE,
+ PF_XEN_FAST_MMIO,
+ PF_XEN_FALSE_FAST_PATH,
+ PF_XEN_MMIO,
+ PF_XEN_FIXUP,
+ PF_XEN_DOMF_DYING,
+ PF_XEN_EMULATE,
+ PF_XEN_EMULATE_UNSHADOW_USER,
+ PF_XEN_EMULATE_UNSHADOW_EVTINJ,
+ PF_XEN_EMULATE_UNSHADOW_UNHANDLED,
+ PF_XEN_LAST_FAULT=PF_XEN_EMULATE_UNSHADOW_UNHANDLED,
+ PF_XEN_NON_EMULATE,
+ PF_XEN_NO_HANDLER,
+ PF_XEN_MAX,
+};
+
+#define SHADOW_WRMAP_BF 12
+#define SHADOW_PREALLOC_UNPIN 13
+#define SHADOW_RESYNC_FULL 14
+#define SHADOW_RESYNC_ONLY 15
+
+char * pf_xen_name[PF_XEN_MAX] = {
+ [PF_XEN_NOT_SHADOW]="propagate",
+ [PF_XEN_FAST_PROPAGATE]="fast propagate",
+ [PF_XEN_FAST_MMIO]="fast mmio",
+ [PF_XEN_FALSE_FAST_PATH]="false fast path",
+ [PF_XEN_MMIO]="mmio",
+ [PF_XEN_FIXUP]="fixup",
+ [PF_XEN_DOMF_DYING]="dom dying",
+ [PF_XEN_EMULATE]="emulate",
+ [PF_XEN_EMULATE_UNSHADOW_USER]="unshadow:user-mode",
+ [PF_XEN_EMULATE_UNSHADOW_EVTINJ]="unshadow:evt inj",
+ [PF_XEN_EMULATE_UNSHADOW_UNHANDLED]="unshadow:unhandled instr",
+ [PF_XEN_NON_EMULATE]="fixup|mmio",
+ [PF_XEN_NO_HANDLER]="(no handler)",
+};
+
+#define CORR_VA_INVALID (0ULL-1)
+
+enum {
+ NONPF_MMIO_APIC,
+ NONPF_MMIO_NPF,
+ NONPF_MMIO_UNKNOWN,
+ NONPF_MMIO_MAX
+};
+
+struct mmio_info {
+ unsigned long long gpa;
+ unsigned long long va; /* Filled only by shadow */
+ unsigned data;
+ unsigned data_valid:1, is_write:1;
+};
+
+struct pf_xen_extra {
+ unsigned long long va;
+ union {
+ unsigned flags;
+ struct {
+ unsigned flag_set_ad:1,
+ flag_set_a:1,
+ flag_shadow_l1_get_ref:1,
+ flag_shadow_l1_put_ref:1,
+ flag_l2_propagate:1,
+ flag_set_changed:1,
+ flag_set_flush:1,
+ flag_set_error:1,
+ flag_demote:1,
+ flag_promote:1,
+ flag_wrmap:1,
+ flag_wrmap_guess_found:1,
+ flag_wrmap_brute_force:1,
+ flag_early_unshadow:1,
+ flag_emulation_2nd_pt_written:1,
+ flag_emulation_last_failed:1,
+ flag_emulate_full_pt:1,
+ flag_prealloc_unhook:1,
+ flag_unsync:1,
+ flag_oos_fixup_add:1,
+ flag_oos_fixup_evict:1;
+ };
+ }; /* Miami + ; fixup & emulate */
+ unsigned int error_code; /* Rio only */
+
+ /* Calculated */
+ int pf_case; /* Rio */
+
+ /* MMIO only */
+ unsigned long long gpa;
+ unsigned int data;
+
+ /* Emulate only */
+ unsigned long long gl1e; /* Miami + */
+ unsigned long long wval; /* Miami */
+ unsigned long long corresponding_va;
+ unsigned int pt_index[5], pt_is_lo;
+ int pt_level;
+
+ /* Other */
+ unsigned long long gfn;
+
+ /* Flags */
+ unsigned corr_valid:1,
+ corr_is_kernel:1,
+ va_is_kernel:1;
+};
+
+struct pcpu_info;
+
+#define GUEST_INTERRUPT_MAX 350
+#define FAKE_VECTOR 349
+#define CR_MAX 9
+#define RESYNCS_MAX 17
+#define PF_XEN_FIXUP_UNSYNC_RESYNC_MAX 2
+
+struct hvm_data;
+
+struct hvm_summary_handler_node {
+ void (*handler)(struct hvm_data *, void* data);
+ void *data;
+ struct hvm_summary_handler_node *next;
+};
+
+struct hvm_data {
+ /* Summary information */
+ int init;
+ int vmexit_valid;
+ int summary_info;
+ struct vcpu_data *v; /* up-pointer */
+
+ /* SVM / VMX compatibility. FIXME - should be global */
+ char ** exit_reason_name;
+ int exit_reason_max;
+ struct hvm_summary_handler_node
*exit_reason_summary_handler_list[HVM_EXIT_REASON_MAX];
+
+ /* Information about particular exit reasons */
+ struct {
+ struct event_cycle_summary exit_reason[HVM_EXIT_REASON_MAX];
+ int extint[EXTERNAL_INTERRUPT_MAX+1];
+ int *extint_histogram;
+ struct event_cycle_summary trap[HVM_TRAP_MAX];
+ struct event_cycle_summary pf_xen[PF_XEN_MAX];
+ struct event_cycle_summary pf_xen_emul[PF_XEN_EMUL_MAX];
+ struct event_cycle_summary pf_xen_emul_early_unshadow[5];
+ struct event_cycle_summary pf_xen_non_emul[PF_XEN_NON_EMUL_MAX];
+ struct event_cycle_summary pf_xen_fixup[PF_XEN_FIXUP_MAX];
+ struct event_cycle_summary
pf_xen_fixup_unsync_resync[PF_XEN_FIXUP_UNSYNC_RESYNC_MAX+1];
+ struct event_cycle_summary cr_write[CR_MAX];
+ struct event_cycle_summary cr3_write_resyncs[RESYNCS_MAX+1];
+ struct event_cycle_summary vmcall[HYPERCALL_MAX+1];
+ struct event_cycle_summary generic[HVM_EVENT_HANDLER_MAX];
+ struct event_cycle_summary mmio[NONPF_MMIO_MAX];
+ struct hvm_gi_struct {
+ int count;
+ struct cycle_summary runtime[GUEST_INTERRUPT_CASE_MAX];
+ /* OK, not summary info, but still... */
+ int is_wake;
+ tsc_t start_tsc;
+ } guest_interrupt[GUEST_INTERRUPT_MAX + 1];
+ /* IPI Latency */
+ struct event_cycle_summary ipi_latency;
+ int ipi_count[256];
+ struct {
+ struct io_address *mmio, *pio;
+ } io;
+ } summary;
+
+ /* In-flight accumulation information */
+ struct {
+ union {
+ struct {
+ unsigned port:31,
+ is_write:1;
+ unsigned int val;
+ } io;
+ struct pf_xen_extra pf_xen;
+ struct {
+ unsigned cr;
+ unsigned long long val;
+ int repromote;
+ } cr_write;
+ struct {
+ unsigned addr;
+ unsigned long long val;
+ } msr;
+ struct {
+ unsigned int event;
+ uint32_t d[4];
+ } generic;
+ struct {
+ unsigned eax;
+ } vmcall;
+ struct {
+ unsigned vec;
+ } intr;
+ };
+ /* MMIO gets its separate area, since many exits may use it */
+ struct mmio_info mmio;
+ }inflight;
+ int resyncs;
+ void (*post_process)(struct hvm_data *);
+ tsc_t exit_tsc, arc_cycles, entry_tsc;
+ unsigned long long rip;
+ unsigned exit_reason, event_handler;
+ int short_summary_done:1, prealloc_unpin:1, wrmap_bf:1;
+
+ /* Immediate processing */
+ void *d;
+
+ /* Wake-to-halt detection. See comment above. */
+ struct {
+ unsigned waking:1;
+ /* Wake vector: keep track of time from vmentry until:
+ next halt, or next interrupt */
+ int vector, interrupts, interrupts_wanting_tsc;
+ } w2h;
+
+ /* Historical info */
+ tsc_t last_rdtsc;
+};
+
+enum {
+ HVM_SHORT_SUMMARY_EMULATE,
+ HVM_SHORT_SUMMARY_UNSYNC,
+ HVM_SHORT_SUMMARY_FIXUP,
+ HVM_SHORT_SUMMARY_MMIO,
+ HVM_SHORT_SUMMARY_PROPAGATE,
+ HVM_SHORT_SUMMARY_CR3,
+ HVM_SHORT_SUMMARY_VMCALL,
+ HVM_SHORT_SUMMARY_INTERRUPT,
+ HVM_SHORT_SUMMARY_HLT,
+ HVM_SHORT_SUMMARY_OTHER,
+ HVM_SHORT_SUMMARY_MAX,
+};
+
+char *hvm_short_summary_name[HVM_SHORT_SUMMARY_MAX] = {
+ [HVM_SHORT_SUMMARY_EMULATE] ="emulate",
+ [HVM_SHORT_SUMMARY_UNSYNC] ="unsync",
+ [HVM_SHORT_SUMMARY_FIXUP] ="fixup",
+ [HVM_SHORT_SUMMARY_MMIO] ="mmio",
+ [HVM_SHORT_SUMMARY_PROPAGATE]="propagate",
+ [HVM_SHORT_SUMMARY_CR3] ="cr3",
+ [HVM_SHORT_SUMMARY_VMCALL] ="vmcall",
+ [HVM_SHORT_SUMMARY_INTERRUPT]="intr",
+ [HVM_SHORT_SUMMARY_HLT] ="hlt",
+ [HVM_SHORT_SUMMARY_OTHER] ="other",
+};
+
+struct hvm_short_summary_struct {
+ struct cycle_summary s[HVM_SHORT_SUMMARY_MAX];
+};
+
+void init_hvm_data(struct hvm_data *h, struct vcpu_data *v) {
+ int i;
+
+ if(h->init)
+ return;
+
+ h->v = v;
+
+ h->init = 1;
+
+ if(opt.svm_mode) {
+ h->exit_reason_max = HVM_SVM_EXIT_REASON_MAX;
+ h->exit_reason_name = hvm_svm_exit_reason_name;
+ } else {
+ h->exit_reason_max = HVM_VMX_EXIT_REASON_MAX;
+ h->exit_reason_name = hvm_vmx_exit_reason_name;
+ }
+
+ if(opt.histogram_interrupt_eip) {
+ int count =
((1ULL<<ADDR_SPACE_BITS)/opt.histogram_interrupt_increment);
+ size_t size = count * sizeof(int);
+ h->summary.extint_histogram = malloc(size);
+ if(h->summary.extint_histogram)
+ bzero(h->summary.extint_histogram, size);
+ else {
+ fprintf(stderr, "FATAL: Could not allocate %zd bytes for interrupt
histogram!\n",
+ size);
+ error(ERR_SYSTEM, NULL);
+ }
+
+ }
+ for(i=0; i<GUEST_INTERRUPT_MAX+1; i++)
+ h->summary.guest_interrupt[i].count=0;
+}
+
+/* PV data */
+enum {
+ PV_HYPERCALL=1,
+ PV_TRAP=3,
+ PV_PAGE_FAULT,
+ PV_FORCED_INVALID_OP,
+ PV_EMULATE_PRIVOP,
+ PV_EMULATE_4GB,
+ PV_MATH_STATE_RESTORE,
+ PV_PAGING_FIXUP,
+ PV_GDT_LDT_MAPPING_FAULT,
+ PV_PTWR_EMULATION,
+ PV_PTWR_EMULATION_PAE,
+ PV_HYPERCALL_V2 = 13,
+ PV_HYPERCALL_SUBCALL = 14,
+ PV_MAX
+};
+
+char *pv_name[PV_MAX] = {
+ [PV_HYPERCALL]="hypercall",
+ [PV_TRAP]="trap",
+ [PV_PAGE_FAULT]="page_fault",
+ [PV_FORCED_INVALID_OP]="forced_invalid_op",
+ [PV_EMULATE_PRIVOP]="emulate privop",
+ [PV_EMULATE_4GB]="emulate 4g",
+ [PV_MATH_STATE_RESTORE]="math state restore",
+ [PV_PAGING_FIXUP]="paging fixup",
+ [PV_GDT_LDT_MAPPING_FAULT]="gdt/ldt mapping fault",
+ [PV_PTWR_EMULATION]="ptwr",
+ [PV_PTWR_EMULATION_PAE]="ptwr(pae)",
+ [PV_HYPERCALL_V2]="hypercall",
+ [PV_HYPERCALL_SUBCALL]="hypercall (subcall)",
+};
+
+#define PV_HYPERCALL_MAX 56
+#define PV_TRAP_MAX 20
+
+struct pv_data {
+ unsigned summary_info:1;
+ int count[PV_MAX];
+ int hypercall_count[PV_HYPERCALL_MAX];
+ int trap_count[PV_TRAP_MAX];
+};
+
+/* Sched data */
+
+enum {
+ SCHED_DOM_ADD=1,
+ SCHED_DOM_REM,
+ SCHED_SLEEP,
+ SCHED_WAKE,
+ SCHED_YIELD,
+ SCHED_BLOCK,
+ SCHED_SHUTDOWN,
+ SCHED_CTL,
+ SCHED_ADJDOM,
+ SCHED_SWITCH,
+ SCHED_S_TIMER_FN,
+ SCHED_T_TIMER_FN,
+ SCHED_DOM_TIMER_FN,
+ SCHED_SWITCH_INFPREV,
+ SCHED_SWITCH_INFNEXT,
+ SCHED_SHUTDOWN_CODE,
+ SCHED_MAX
+};
+
+enum {
+ RUNSTATE_RUNNING=0,
+ RUNSTATE_RUNNABLE,
+ RUNSTATE_BLOCKED,
+ RUNSTATE_OFFLINE,
+ RUNSTATE_LOST,
+ RUNSTATE_QUEUED,
+ RUNSTATE_INIT,
+ RUNSTATE_MAX
+};
+
+int runstate_graph[RUNSTATE_MAX] =
+{
+ [RUNSTATE_BLOCKED]=0,
+ [RUNSTATE_OFFLINE]=1,
+ [RUNSTATE_RUNNABLE]=2,
+ [RUNSTATE_RUNNING]=3,
+ [RUNSTATE_LOST]=-1,
+ [RUNSTATE_QUEUED]=-2,
+ [RUNSTATE_INIT]=-2,
+};
+
+char * runstate_name[RUNSTATE_MAX]={
+ [RUNSTATE_RUNNING]= "running",
+ [RUNSTATE_RUNNABLE]="runnable",
+ [RUNSTATE_BLOCKED]= "blocked", /* to be blocked */
+ [RUNSTATE_OFFLINE]= "offline",
+ [RUNSTATE_QUEUED]= "queued",
+ [RUNSTATE_INIT]= "init",
+ [RUNSTATE_LOST]= "lost",
+};
+
+enum {
+ RUNNABLE_STATE_INVALID,
+ RUNNABLE_STATE_WAKE,
+ RUNNABLE_STATE_PREEMPT,
+ RUNNABLE_STATE_OTHER,
+ RUNNABLE_STATE_MAX
+};
+
+char * runnable_state_name[RUNNABLE_STATE_MAX]={
+ [RUNNABLE_STATE_INVALID]="invalid", /* Should never show up */
+ [RUNNABLE_STATE_WAKE]="wake",
+ [RUNNABLE_STATE_PREEMPT]="preempt",
+ [RUNNABLE_STATE_OTHER]="other",
+};
+
+/* Memory data */
+enum {
+ MEM_PAGE_GRANT_MAP = 1,
+ MEM_PAGE_GRANT_UNMAP,
+ MEM_PAGE_GRANT_TRANSFER,
+ MEM_SET_P2M_ENTRY,
+ MEM_DECREASE_RESERVATION,
+ MEM_POD_POPULATE = 16,
+ MEM_POD_ZERO_RECLAIM,
+ MEM_POD_SUPERPAGE_SPLINTER,
+ MEM_MAX
+};
+
+char *mem_name[MEM_MAX] = {
+ [MEM_PAGE_GRANT_MAP] = "grant-map",
+ [MEM_PAGE_GRANT_UNMAP] = "grant-unmap",
+ [MEM_PAGE_GRANT_TRANSFER] = "grant-transfer",
+ [MEM_SET_P2M_ENTRY] = "set-p2m",
+ [MEM_DECREASE_RESERVATION] = "decrease-reservation",
+ [MEM_POD_POPULATE] = "pod-populate",
+ [MEM_POD_ZERO_RECLAIM] = "pod-zero-reclaim",
+ [MEM_POD_SUPERPAGE_SPLINTER] = "pod-superpage-splinter",
+};
+
+/* Per-unit information. */
+
+struct cr3_value_struct {
+ struct cr3_value_struct * next;
+ struct cr3_value_struct * gnext;
+ unsigned long long gmfn;
+ int cr3_id;
+ unsigned long long first_time, last_time, run_time;
+ struct cycle_summary total_time, guest_time, hv_time;
+ int switch_count, flush_count;
+
+ struct hvm_short_summary_struct hvm;
+
+ struct {
+ int now;
+ int count;
+ } prealloc_unpin;
+
+ struct {
+ unsigned callback:1;
+ unsigned flush_count, switch_count;
+ unsigned fixup_user, emulate_corr_user;
+ } destroy;
+};
+
+#ifndef MAX_CPUS
+#define MAX_CPUS 256
+#endif
+typedef uint32_t cpu_mask_t;
+
+#define IDLE_DOMAIN 32767
+#define DEFAULT_DOMAIN 32768
+
+#define MAX_VLAPIC_LIST 8
+struct vlapic_struct {
+ struct {
+ struct outstanding_ipi {
+ tsc_t first_tsc;
+ int vec, count;
+ int injected, valid;
+ } list[MAX_VLAPIC_LIST];
+ } outstanding;
+};
+
+struct vcpu_data {
+ int vid;
+ struct domain_data *d; /* up-pointer */
+ unsigned activated:1;
+
+ int guest_paging_levels;
+
+ /* Schedule info */
+ struct {
+ int state;
+ int runnable_state; /* Only valid when state==RUNSTATE_RUNNABLE */
+ tsc_t tsc;
+ /* TSC skew detection/correction */
+ struct last_oldstate_struct {
+ int wrong, actual, pid;
+ tsc_t tsc;
+ } last_oldstate;
+ /* Performance counters */
+ unsigned long long p1_start, p2_start;
+ } runstate;
+ struct pcpu_info *p;
+ tsc_t pcpu_tsc;
+
+ /* Hardware tracking */
+ struct {
+ long long val;
+ tsc_t start_time;
+ struct cr3_value_struct *data;
+ } cr3;
+
+ /* IPI latency tracking */
+ struct vlapic_struct vlapic;
+
+ /* Summary info */
+ struct cycle_framework f;
+ struct cycle_summary runstates[RUNSTATE_MAX];
+ struct cycle_summary runnable_states[RUNNABLE_STATE_MAX];
+ struct weighted_cpi_summary cpi;
+ struct cycle_summary cpu_affinity_all,
+ cpu_affinity_pcpu[MAX_CPUS];
+ enum {
+ VCPU_DATA_NONE=0,
+ VCPU_DATA_HVM,
+ VCPU_DATA_PV
+ } data_type;
+ union {
+ struct hvm_data hvm;
+ struct pv_data pv;
+ };
+};
+
+enum {
+ DOMAIN_RUNSTATE_BLOCKED=0,
+ DOMAIN_RUNSTATE_PARTIAL_RUN,
+ DOMAIN_RUNSTATE_FULL_RUN,
+ DOMAIN_RUNSTATE_PARTIAL_CONTENTION,
+ DOMAIN_RUNSTATE_CONCURRENCY_HAZARD,
+ DOMAIN_RUNSTATE_FULL_CONTENTION,
+ DOMAIN_RUNSTATE_LOST,
+ DOMAIN_RUNSTATE_MAX
+};
+
+char * domain_runstate_name[] = {
+ [DOMAIN_RUNSTATE_BLOCKED]="blocked",
+ [DOMAIN_RUNSTATE_PARTIAL_RUN]="partial run",
+ [DOMAIN_RUNSTATE_FULL_RUN]="full run",
+ [DOMAIN_RUNSTATE_PARTIAL_CONTENTION]="partial contention",
+ [DOMAIN_RUNSTATE_CONCURRENCY_HAZARD]="concurrency_hazard",
+ [DOMAIN_RUNSTATE_FULL_CONTENTION]="full_contention",
+ [DOMAIN_RUNSTATE_LOST]="lost",
+};
+
+enum {
+ POD_RECLAIM_CONTEXT_UNKNOWN=0,
+ POD_RECLAIM_CONTEXT_FAULT,
+ POD_RECLAIM_CONTEXT_BALLOON,
+ POD_RECLAIM_CONTEXT_MAX
+};
+
+char * pod_reclaim_context_name[] = {
+ [POD_RECLAIM_CONTEXT_UNKNOWN]="unknown",
+ [POD_RECLAIM_CONTEXT_FAULT]="fault",
+ [POD_RECLAIM_CONTEXT_BALLOON]="balloon",
+};
+
+#define POD_ORDER_MAX 4
+
+struct domain_data {
+ struct domain_data *next;
+ int did;
+ struct vcpu_data *vcpu[MAX_CPUS];
+
+ int max_vid;
+
+ int runstate;
+ tsc_t runstate_tsc;
+ struct cycle_summary total_time;
+ struct cycle_summary runstates[DOMAIN_RUNSTATE_MAX];
+ struct cr3_value_struct *cr3_value_head;
+ struct eip_list_struct *emulate_eip_list;
+ struct eip_list_struct *interrupt_eip_list;
+
+ int guest_interrupt[GUEST_INTERRUPT_MAX+1];
+ struct hvm_short_summary_struct hvm_short;
+ struct {
+ int done[MEM_MAX];
+ int done_interval[MEM_MAX];
+
+ int done_for[MEM_MAX];
+ int done_for_interval[MEM_MAX];
+ } memops;
+
+ struct {
+ int reclaim_order[POD_ORDER_MAX];
+ int reclaim_context[POD_RECLAIM_CONTEXT_MAX];
+ int reclaim_context_order[POD_RECLAIM_CONTEXT_MAX][POD_ORDER_MAX];
+ /* FIXME: Do a full cycle summary */
+ int populate_order[POD_ORDER_MAX];
+ } pod;
+};
+
+struct domain_data * domain_list=NULL;
+
+struct domain_data default_domain;
+
+enum {
+ TOPLEVEL_GEN=0,
+ TOPLEVEL_SCHED,
+ TOPLEVEL_DOM0OP,
+ TOPLEVEL_HVM,
+ TOPLEVEL_MEM,
+ TOPLEVEL_PV,
+ TOPLEVEL_SHADOW,
+ TOPLEVEL_HW,
+ TOPLEVEL_MAX=TOPLEVEL_HW+1,
+};
+
+char * toplevel_name[TOPLEVEL_MAX] = {
+ [TOPLEVEL_GEN]="gen",
+ [TOPLEVEL_SCHED]="sched",
+ [TOPLEVEL_DOM0OP]="dom0op",
+ [TOPLEVEL_HVM]="hvm",
+ [TOPLEVEL_MEM]="mem",
+ [TOPLEVEL_PV]="pv",
+ [TOPLEVEL_SHADOW]="shadow",
+ [TOPLEVEL_HW]="hw",
+};
+
+struct trace_volume {
+ unsigned long long toplevel[TOPLEVEL_MAX];
+ unsigned long long sched_verbose;
+ unsigned long long hvm[HVM_VOL_MAX];
+} volume;
+
+#define UPDATE_VOLUME(_p,_x,_s) \
+ do { \
+ (_p)->volume.total._x += _s; \
+ (_p)->volume.last_buffer._x += _s; \
+ } while(0)
+
+void volume_clear(struct trace_volume *vol)
+{
+ bzero(vol, sizeof(*vol));
+}
+
+void volume_summary(struct trace_volume *vol)
+{
+ int j, k;
+ for(j=0; j<TOPLEVEL_MAX; j++)
+ if(vol->toplevel[j]) {
+ printf(" %-6s: %10lld\n",
+ toplevel_name[j], vol->toplevel[j]);
+ switch(j) {
+ case TOPLEVEL_SCHED:
+ if(vol->sched_verbose)
+ printf(" +-verbose: %10lld\n",
+ vol->sched_verbose);
+ break;
+ case TOPLEVEL_HVM:
+ for(k=0; k<HVM_VOL_MAX; k++) {
+ if(vol->hvm[k])
+ printf(" +-%-7s: %10lld\n",
+ hvm_vol_name[k], vol->hvm[k]);
+ }
+
+ break;
+ }
+ }
+}
+
+struct pcpu_info {
+ /* Information about this pcpu */
+ unsigned active:1, summary:1;
+ int pid;
+
+ /* Information related to scanning thru the file */
+ tsc_t first_tsc, last_tsc, order_tsc;
+ loff_t file_offset;
+ loff_t next_cpu_change_offset;
+ struct record_info ri;
+ int last_cpu_change_pid;
+ int power_state;
+
+ /* Information related to tsc skew detection / correction */
+ struct {
+ tsc_t offset;
+ cpu_mask_t downstream; /* To detect cycles in dependencies */
+ } tsc_skew;
+
+ /* Information related to domain tracking */
+ struct vcpu_data * current;
+ struct {
+ unsigned active:1,
+ domain_valid:1,
+ seen_valid_schedule:1; /* Seen an actual schedule since lost
records */
+ unsigned did:16,vid:16;
+ tsc_t tsc;
+ } lost_record;
+
+ /* Record volume */
+ struct {
+ tsc_t buffer_first_tsc,
+ buffer_dom0_runstate_tsc,
+ buffer_dom0_runstate_cycles[RUNSTATE_MAX];
+ int buffer_dom0_runstate;
+ unsigned buffer_size;
+ struct trace_volume total, last_buffer;
+ } volume;
+
+ /* Time report */
+ struct {
+ tsc_t tsc;
+ struct cycle_summary idle, running, lost;
+ } time;
+};
+
+void __fill_in_record_info(struct pcpu_info *p);
+
+#define INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX 10
+
+struct {
+ int max_active_pcpu;
+ loff_t last_epoch_offset;
+ int early_eof;
+ int lost_cpus;
+ tsc_t now;
+ struct cycle_framework f;
+ tsc_t buffer_trace_virq_tsc;
+ struct pcpu_info pcpu[MAX_CPUS];
+
+ struct {
+ int id;
+ /* Invariant: head null => tail null; head !null => tail valid */
+ struct cr3_value_struct *head, **tail;
+ } cr3;
+
+ struct {
+ tsc_t start_tsc;
+ /* Information about specific interval output types */
+ union {
+ struct {
+ struct interval_element ** values;
+ int count;
+ } array;
+ struct {
+ struct interval_list *head, **tail;
+ } list;
+ struct cr3_value_struct *cr3;
+ struct {
+ struct domain_data *d;
+ int guest_vector[INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX];
+ } domain;
+ };
+ } interval;
+} P = { 0 };
+
+/* Function prototypes */
+char * pcpu_string(int pcpu);
+void pcpu_string_draw(struct pcpu_info *p);
+void process_generic(struct record_info *ri);
+void dump_generic(FILE *f, struct record_info *ri);
+ssize_t __read_record(struct trace_record *rec, loff_t offset);
+void error(enum error_level l, struct record_info *ri);
+void update_io_address(struct io_address ** list, unsigned int pa, int dir,
+ tsc_t arc_cycles, unsigned int va);
+int check_extra_words(struct record_info *ri, int expected_size, const char
*record);
+int vcpu_set_data_type(struct vcpu_data *v, int type);
+
+void cpumask_init(cpu_mask_t *c) {
+ *c = 0UL;
+}
+
+void cpumask_clear(cpu_mask_t *c, int cpu) {
+ *c &= ~(1UL << cpu);
+}
+
+void cpumask_set(cpu_mask_t *c, int cpu) {
+ *c |= (1UL << cpu);
+}
+
+int cpumask_isset(const cpu_mask_t *c, int cpu) {
+ if(*c & (1UL<<cpu))
+ return 1;
+ else
+ return 0;
+}
+
+void cpumask_union(cpu_mask_t *d, const cpu_mask_t *s) {
+ *d |= *s;
+}
+
+/* -- Time code -- */
+
+void cycles_to_time(unsigned long long c, struct time_struct *t) {
+ t->time = ((c - P.f.first_tsc) << 10) / opt.cpu_qhz;
+ t->s = t->time / 1000000000;
+ t->ns = t->time - (t->s * 1000000000);
+}
+
+void abs_cycles_to_time(unsigned long long ac, struct time_struct *t) {
+ if(ac > P.f.first_tsc) {
+ /* t->time = ((ac - P.f.first_tsc) * 1000) / (opt.cpu_hz / 1000000 );
*/
+ /* t->s = t->time / 1000000000; */
+ /* t->ns = t->time % 1000000000; */
+ t->time = ((ac - P.f.first_tsc) << 10) / opt.cpu_qhz;
+ t->s = t->time / 1000000000;
+ t->ns = t->time - (t->s * 1000000000);
+ } else {
+ t->time = t->s = t->ns = 0;
+ }
+}
+
+tsc_t abs_cycles_to_global(unsigned long long ac) {
+ if(ac > P.f.first_tsc)
+ return ac - P.f.first_tsc;
+ else
+ return 0;
+}
+
+void scatterplot_vs_time(tsc_t atsc, long long y) {
+ struct time_struct t;
+
+ abs_cycles_to_time(atsc, &t);
+
+ printf("%u.%09u %lld\n", t.s, t.ns, y);
+}
+
+/* -- Summary Code -- */
+
+/* With compliments to "Numerical Recipes in C", which provided the algorithm
+ * and basic template for this function. */
+long long percentile(long long * A, int N, int ple) {
+ int I, J, L, R, K;
+
+ long long X, W;
+
+ /* No samples! */
+ if ( N == 0 )
+ return 0;
+
+ /* Find K, the element # we want */
+ K=N*ple/100;
+
+ /* Set the left and right boundaries of the current search space */
+ L=0; R=N-1;
+
+ while(L < R) {
+ /* X: The value to order everything higher / lower than */
+ X=A[K];
+
+ /* Starting at the left and the right... */
+ I=L;
+ J=R;
+
+ do {
+ /* Find the first element on the left that is out-of-order w/ X */
+ while(A[I]<X)
+ I++;
+ /* Find the first element on the right that is out-of-order w/ X */
+ while(X<A[J])
+ J--;
+
+ /* If we found something out-of-order */
+ if(I<=J) {
+ /* Switch the values */
+ W=A[I];
+ A[I]=A[J];
+ A[J]=W;
+
+ /* And move on */
+ I++; J--;
+ }
+ } while (I <= J); /* Keep going until our pointers meet or pass */
+
+ /* Re-adjust L and R, based on which element we're looking for */
+ if(J<K)
+ L=I;
+ if(K<I)
+ R=J;
+ }
+
+ return A[K];
+}
+
+float weighted_percentile(float * A, /* values */
+ unsigned long long * w, /* weights */
+ int N, /* total */
+ int ple) /* percentile */
+{
+ int L, R, I, J, K;
+ unsigned long long L_weight, R_weight, I_weight, J_weight,
+ K_weight, N_weight;
+
+ float X, t1;
+ unsigned long long t2;
+
+ /* Calculate total weight */
+ N_weight=0;
+
+ for(I=0; I<N; I++) {
+ assert(w[I]!=0);
+ N_weight += w[I];
+ }
+
+ /* Find K_weight, the target weight we want */
+ K_weight = N_weight * ple / 100;
+
+ /* Set the left and right boundaries of the current search space */
+ L=0;
+ L_weight = 0;
+ R=N-1;
+ R_weight = N_weight - w[R];
+
+ /* Search between L and R, narrowing down until we're done */
+ while(L < R) {
+ /* Chose an ordering value from right in the middle */
+ K = (L + R) >> 1;
+ /* X: The value to order everything higher / lower than */
+ X=A[K];
+
+ /* Starting at the left and the right... */
+ I=L; I_weight = L_weight;
+ J=R; J_weight = R_weight;
+
+ do {
+ /* Find the first element on the left that is out-of-order w/ X */
+ while(A[I]<X) {
+ I_weight += w[I];
+ I++;
+ }
+ /* Find the first element on the right that is out-of-order w/ X */
+ while(X<A[J]) {
+ J_weight -= w[J];
+ J--;
+ }
+
+ /* If we actually found something... */
+ if(I<=J) {
+ /* Switch the values */
+ t1=A[I];
+ A[I]=A[J];
+ A[J]=t1;
+
+ t2=w[I];
+ w[I]=w[J];
+ w[J]=t2;
+
+ /* And move in */
+ I_weight += w[I];
+ I++;
+
+ J_weight -= w[J];
+ J--;
+ }
+ } while (I <= J); /* Keep going until our pointers meet or pass */
+
+ /* Re-adjust L and R, based on which element we're looking for */
+ if(J_weight<K_weight)
+ L=I; L_weight = I_weight;
+ if(K_weight<I_weight)
+ R=J; R_weight = J_weight;
+ }
+
+ return A[L];
+}
+
+long long self_weighted_percentile(long long * A,
+ int N, /* total */
+ int ple) /* percentile */
+{
+ int L, R, I, J, K;
+ long long L_weight, R_weight, I_weight, J_weight,
+ K_weight, N_weight;
+
+ long long X, t1;
+
+ /* Calculate total weight */
+ N_weight=0;
+
+ for(I=0; I<N; I++) {
+ if(A[I] < 0)
+ fprintf(warn, "%s: Value %lld less than zero!\n",
+ __func__, A[I]);
+ assert(A[I]!=0);
+ N_weight += A[I];
+ }
+
+ /* Find K_weight, the target weight we want */
+ K_weight = N_weight * ple / 100;
+
+ /* Set the left and right boundaries of the current search space */
+ L=0;
+ L_weight = 0;
+ R=N-1;
+ R_weight = N_weight - A[R];
+
+ /* Search between L and R, narrowing down until we're done */
+ while(L < R) {
+ /* Chose an ordering value from right in the middle */
+ K = (L + R) >> 1;
+ /* X: The value to order everything higher / lower than */
+ X=A[K];
+
+ /* Starting at the left and the right... */
+ I=L; I_weight = L_weight;
+ J=R; J_weight = R_weight;
+
+ do {
+ /* Find the first element on the left that is out-of-order w/ X */
+ while(A[I]<X) {
+ I_weight += A[I];
+ I++;
+ }
+ /* Find the first element on the right that is out-of-order w/ X */
+ while(X<A[J]) {
+ J_weight -= A[J];
+ J--;
+ }
+
+ /* If we actually found something... */
+ if(I<=J) {
+ /* Switch the values */
+ t1=A[I];
+ A[I]=A[J];
+ A[J]=t1;
+
+ /* And move in */
+ I_weight += A[I];
+ I++;
+
+ J_weight -= A[J];
+ J--;
+ }
+ } while (I <= J); /* Keep going until our pointers meet or pass */
+
+ /* Re-adjust L and R, based on which element we're looking for */
+ if(J_weight<K_weight)
+ L=I; L_weight = I_weight;
+ if(K_weight<I_weight)
+ R=J; R_weight = J_weight;
+ }
+
+ return A[L];
+}
+
+static inline double __cycles_percent(long long cycles, long long total) {
+ return (double)(cycles*100) / total;
+}
+
+static inline double __summary_percent(struct event_cycle_summary *s,
+ struct cycle_framework *f) {
+ return __cycles_percent(s->cycles, f->total_cycles);
+}
+
+static inline double summary_percent_global(struct event_cycle_summary *s) {
+ return __summary_percent(s, &P.f);
+}
+
+static inline void update_summary(struct event_cycle_summary *s, long long c) {
+/* We don't know ahead of time how many samples there are, and working
+ * with dynamic stuff is a pain, and unnecessary. This algorithm will
+ * generate a sample set that approximates an even sample. We can
+ * then take the percentiles on this, and get an approximate value. */
+ if(c) {
+ if(opt.sample_size) {
+ int lap = (s->cycles_count/opt.sample_size)+1,
+ index =s->cycles_count % opt.sample_size;
+ if((index - (lap/3))%lap == 0) {
+ if(!s->cycles_sample) {
+ s->cycles_sample = malloc(sizeof(*s->cycles_sample) *
opt.sample_size);
+ if(!s->cycles_sample) {
+ fprintf(stderr, "%s: malloc failed!\n", __func__);
+ error(ERR_SYSTEM, NULL);
+ }
+ }
+ s->cycles_sample[index]=c;
+ }
+ }
+ s->cycles_count++;
+ s->cycles += c;
+
+ s->interval.count++;
+ s->interval.cycles += c;
+ }
+ s->count++;
+}
+
+static inline void clear_interval_summary(struct event_cycle_summary *s) {
+ s->interval.count = 0;
+ s->interval.cycles = 0;
+}
+
+static inline void update_cycles(struct cycle_summary *s, long long c) {
+/* We don't know ahead of time how many samples there are, and working
+ * with dynamic stuff is a pain, and unnecessary. This algorithm will
+ * generate a sample set that approximates an even sample. We can
+ * then take the percentiles on this, and get an approximate value. */
+ int lap, index;
+
+ if ( c == 0 )
+ {
+ fprintf(warn, "%s: cycles 0! Not updating...\n",
+ __func__);
+ return;
+ }
+
+ if ( opt.sample_size ) {
+ lap = (s->count/opt.sample_size)+1;
+ index =s->count % opt.sample_size;
+
+ if((index - (lap/3))%lap == 0) {
+ if(!s->sample) {
+ s->sample = malloc(sizeof(*s->sample) * opt.sample_size);
+ if(!s->sample) {
+ fprintf(stderr, "%s: malloc failed!\n", __func__);
+ error(ERR_SYSTEM, NULL);
+ }
+ }
+ s->sample[index] = c;
+ }
+ }
+
+ if(c > 0) {
+ s->cycles += c;
+ s->interval.cycles += c;
+ } else {
+ s->cycles += -c;
+ s->interval.cycles += -c;
+ }
+ s->count++;
+ s->interval.count++;
+}
+
+static inline void clear_interval_cycles(struct interval_element *e) {
+ e->cycles = 0;
+ e->count = 0;
+ e->instructions = 0;
+}
+
+static inline void update_cpi(struct weighted_cpi_summary *s,
+ unsigned long long i,
+ unsigned long long c) {
+/* We don't know ahead of time how many samples there are, and working
+ * with dynamic stuff is a pain, and unnecessary. This algorithm will
+ * generate a sample set that approximates an even sample. We can
+ * then take the percentiles on this, and get an approximate value. */
+ int lap, index;
+
+ if ( opt.sample_size ) {
+ lap = (s->count/opt.sample_size)+1;
+ index =s->count % opt.sample_size;
+
+ if((index - (lap/3))%lap == 0) {
+ if(!s->cpi) {
+ assert(!s->cpi_weight);
+
+ s->cpi = malloc(sizeof(*s->cpi) * opt.sample_size);
+ s->cpi_weight = malloc(sizeof(*s->cpi_weight) *
opt.sample_size);
+ if(!s->cpi || !s->cpi_weight) {
+ fprintf(stderr, "%s: malloc failed!\n", __func__);
+ error(ERR_SYSTEM, NULL);
+ }
+ }
+ assert(s->cpi_weight);
+
+ s->cpi[index] = (float) c / i;
+ s->cpi_weight[index]=c;
+ }
+ }
+
+ s->instructions += i;
+ s->cycles += c;
+ s->count++;
+
+ s->interval.instructions += i;
+ s->interval.cycles += c;
+ s->interval.count++;
+}
+
+static inline void clear_interval_cpi(struct weighted_cpi_summary *s) {
+ s->interval.cycles = 0;
+ s->interval.count = 0;
+ s->interval.instructions = 0;
+}
+
+static inline void print_cpu_affinity(struct cycle_summary *s, char *p) {
+ if(s->count) {
+ long long avg;
+
+ avg = s->cycles / s->count;
+
+ if ( opt.sample_size ) {
+ long long p5, p50, p95;
+ int data_size = s->count;
+ if(data_size > opt.sample_size)
+ data_size = opt.sample_size;
+
+ p50 = percentile(s->sample, data_size, 50);
+ p5 = percentile(s->sample, data_size, 5);
+ p95 = percentile(s->sample, data_size, 95);
+
+ printf("%s: %7d %6lld {%6lld|%6lld|%6lld}\n",
+ p, s->count, avg, p5, p50, p95);
+ } else {
+ printf("%s: %7d %6lld\n",
+ p, s->count, avg);
+ }
+ }
+}
+
+static inline void print_cpi_summary(struct weighted_cpi_summary *s) {
+ if(s->count) {
+ float avg;
+
+ avg = (float)s->cycles / s->instructions;
+
+ if ( opt.sample_size ) {
+ float p5, p50, p95;
+ int data_size = s->count;
+
+ if(data_size > opt.sample_size)
+ data_size = opt.sample_size;
+
+ p50 = weighted_percentile(s->cpi, s->cpi_weight, data_size, 50);
+ p5 = weighted_percentile(s->cpi, s->cpi_weight, data_size, 5);
+ p95 = weighted_percentile(s->cpi, s->cpi_weight, data_size, 95);
+
+ printf(" CPI summary: %2.2f {%2.2f|%2.2f|%2.2f}\n",
+ avg, p5, p50, p95);
+ } else {
+ printf(" CPI summary: %2.2f\n", avg);
+ }
+ }
+}
+
+static inline void print_cycle_percent_summary(struct cycle_summary *s,
+ tsc_t total, char *p) {
+ if(s->count) {
+ long long avg;
+ double percent, seconds;
+
+ avg = s->cycles / s->count;
+
+ seconds = ((double)s->cycles) / opt.cpu_hz;
+
+ percent = ((double)(s->cycles * 100)) / total;
+
+ if ( opt.sample_size ) {
+ long long p5, p50, p95;
+ int data_size = s->count;
+
+ if(data_size > opt.sample_size)
+ data_size = opt.sample_size;
+
+ p50 = self_weighted_percentile(s->sample, data_size, 50);
+ p5 = self_weighted_percentile(s->sample, data_size, 5);
+ p95 = self_weighted_percentile(s->sample, data_size, 95);
+
+ printf("%s: %7d %5.2lfs %5.2lf%% %6lld {%6lld|%6lld|%6lld}\n",
+ p, s->count,
+ seconds,
+ percent,
+ avg, p5, p50, p95);
+ } else {
+ printf("%s: %7d %5.2lfs %5.2lf%% %6lld\n",
+ p, s->count,
+ seconds,
+ percent,
+ avg);
+ }
+ }
+}
+
+static inline void print_cycle_summary(struct cycle_summary *s, char *p) {
+ if(s->count) {
+ long long avg;
+
+ avg = s->cycles / s->count;
+
+ if ( opt.sample_size ) {
+ long long p5, p50, p95;
+ int data_size = s->count;
+
+ if(data_size > opt.sample_size)
+ data_size = opt.sample_size;
+
+ p50 = self_weighted_percentile(s->sample, data_size, 50);
+ p5 = self_weighted_percentile(s->sample, data_size, 5);
+ p95 = self_weighted_percentile(s->sample, data_size, 95);
+
+ printf("%s: %7d %5.2lfs %6lld {%6lld|%6lld|%6lld}\n",
+ p, s->count, ((double)s->cycles)/opt.cpu_hz,
+ avg, p5, p50, p95);
+ } else {
+ printf("%s: %7d %5.2lfs %6lld\n",
+ p, s->count, ((double)s->cycles)/opt.cpu_hz, avg);
+ }
+ }
+}
+
+#define PRINT_SUMMARY(_s, _p...) \
+ do { \
+ if((_s).count) { \
+ if ( opt.sample_size ) { \
+ unsigned long long p5, p50, p95; \
+ int data_size=(_s).cycles_count; \
+ if(data_size > opt.sample_size) \
+ data_size=opt.sample_size; \
+ p50=percentile((_s).cycles_sample, data_size, 50); \
+ p5=percentile((_s).cycles_sample, data_size, 5); \
+ p95=percentile((_s).cycles_sample, data_size, 95); \
+ printf(_p); \
+ printf(" %7d %5.2lfs %5.2lf%% %5lld cyc
{%5lld|%5lld|%5lld}\n", \
+ (_s).count, \
+ ((double)(_s).cycles)/opt.cpu_hz, \
+ summary_percent_global(&(_s)), \
+ (_s).cycles_count ? (_s).cycles / (_s).cycles_count:0, \
+ p5, p50, p95); \
+ } else { \
+ printf(_p); \
+ printf(" %7d %5.2lfs %5.2lf%% %5lld cyc\n", \
+ (_s).count, \
+ ((double)(_s).cycles)/opt.cpu_hz, \
+ summary_percent_global(&(_s)), \
+ (_s).cycles_count ? (_s).cycles / (_s).cycles_count:0);
\
+ } \
+ } \
+ } while(0)
+
+#define INTERVAL_DESC_MAX 31
+struct interval_list {
+ struct interval_element *elem;
+ struct interval_list *next;
+ char desc[INTERVAL_DESC_MAX+1]; /* +1 for the null terminator */
+};
+
+void __interval_cycle_percent_output(struct interval_element *e, tsc_t cycles)
{
+ printf(" %.02lf",
+ __cycles_percent(e->cycles, cycles));
+ clear_interval_cycles(e);
+}
+
+void interval_cycle_percent_output(struct interval_element *e) {
+ __interval_cycle_percent_output(e, opt.interval.cycles);
+}
+
+void interval_time_output(void) {
+ struct time_struct t;
+ abs_cycles_to_time(P.interval.start_tsc, &t);
+
+ printf("%u.%09u", t.s, t.ns);
+}
+
+void interval_table_output(void) {
+ int i;
+
+ interval_time_output();
+
+ if(opt.interval.mode == INTERVAL_MODE_ARRAY) {
+ for(i=0; i<P.interval.array.count; i++) {
+ struct interval_element *e = P.interval.array.values[i];
+ if(e) {
+ interval_cycle_percent_output(e);
+ } else {
+ printf(" 0.0");
+ }
+ }
+ } else if(opt.interval.mode == INTERVAL_MODE_LIST) {
+ struct interval_list *p;
+ for(p = P.interval.list.head; p; p = p->next)
+ interval_cycle_percent_output(p->elem);
+ }
+ printf("\n");
+}
+
+void interval_table_tail(void) {
+ struct interval_list *p;
+
+ printf("time");
+
+ for(p=P.interval.list.head; p; p = p->next)
+ printf(" %s", p->desc);
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |