Xen project Mailing List

Re: [Xen-devel] [PATCH v5 1/8] xenalyze: add to tools/xentrace/

On Tue, Jun 9, 2015 at 12:23 PM, Olaf Hering <olaf@xxxxxxxxx> wrote: > This merges xenalyze.hg, changeset 150:24308507be1d, > into tools/xentrace/xenalyze.c to have the tool and > public/trace.h in one place. > > Signed-off-by: Olaf Hering <olaf@xxxxxxxxx> > Acked-by: Wei Liu <wei.liu2@xxxxxxxxxx> > Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx> Normally in this case you wouldn't retain the Acked-by with this change, even though I was the one that suggested it. :-) First, it looks like maybe this wasn't based on 24308507be1d, but on 8083183a7238? At least there seems to be changes in it that aren't in the other one. Secondly, it's not identical to what's checked in there, but has a handful of changes; namely: * Use <public/trace.h> rather than "trace.h", both in xenalyze.c and pv.h. * Remove __atribute__((packed)) from various structures. With an updated changelog, then: Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx> > Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx> > Cc: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx> > Cc: Ian Campbell <ian.campbell@xxxxxxxxxx> > Cc: Wei Liu <wei.liu2@xxxxxxxxxx> > --- > .gitignore | 1 + > tools/xentrace/Makefile | 6 + > tools/xentrace/analyze.h | 107 + > tools/xentrace/mread.c | 160 + > tools/xentrace/mread.h | 18 + > tools/xentrace/pv.h | 41 + > tools/xentrace/xenalyze.c | 10407 > ++++++++++++++++++++++++++++++++++++++++++++ > 7 files changed, 10740 insertions(+) > > diff --git a/.gitignore b/.gitignore > index 3bc9cd9..3f42ded 100644 > --- a/.gitignore > +++ b/.gitignore > @@ -173,6 +173,7 @@ tools/misc/gtracestat > tools/misc/xenlockprof > tools/misc/lowmemd > tools/misc/xencov > +tools/xentrace/xenalyze > tools/pygrub/build/* > tools/python/build/* > tools/security/secpol_tool > diff --git a/tools/xentrace/Makefile b/tools/xentrace/Makefile > index 5360960..8932b73 100644 > --- a/tools/xentrace/Makefile > +++ b/tools/xentrace/Makefile > @@ -7,6 +7,8 @@ CFLAGS += $(CFLAGS_libxenctrl) > LDLIBS += $(LDLIBS_libxenctrl) > > BIN = > +BIN-$(CONFIG_X86) = xenalyze > +BIN += $(BIN-y) > SBIN = xentrace xentrace_setsize > LIBBIN = xenctx > SCRIPTS = xentrace_format > @@ -26,6 +28,7 @@ install: build > [ -z "$(LIBBIN)" ] || $(INSTALL_DIR) $(DESTDIR)$(LIBEXEC_BIN) > $(INSTALL_DIR) $(DESTDIR)$(MAN1DIR) > $(INSTALL_DIR) $(DESTDIR)$(MAN8DIR) > + $(INSTALL_PROG) $(BIN) $(DESTDIR)$(bindir) > $(INSTALL_PROG) $(SBIN) $(DESTDIR)$(sbindir) > $(INSTALL_PYTHON_PROG) $(SCRIPTS) $(DESTDIR)$(bindir) > [ -z "$(LIBBIN)" ] || $(INSTALL_PROG) $(LIBBIN) > $(DESTDIR)$(LIBEXEC_BIN) > @@ -48,5 +51,8 @@ xenctx: xenctx.o > xentrace_setsize: setsize.o > $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) $(APPEND_LDFLAGS) > > +xenalyze: xenalyze.o mread.o > + $(CC) $(LDFLAGS) -o $@ $^ $(LDLIBS) $(APPEND_LDFLAGS) > + > -include $(DEPS) > > diff --git a/tools/xentrace/analyze.h b/tools/xentrace/analyze.h > new file mode 100644 > index 0000000..40ee551 > --- /dev/null > +++ b/tools/xentrace/analyze.h > @@ -0,0 +1,107 @@ > +#ifndef __ANALYZE_H > +# define __ANALYZE_H > + > +#include <stdint.h> > + > +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) > + > +#define TRC_GEN_MAIN 0 > +#define TRC_SCHED_MAIN 1 > +#define TRC_DOM0OP_MAIN 2 > +#define TRC_HVM_MAIN 3 > +#define TRC_MEM_MAIN 4 > +#define TRC_PV_MAIN 5 > +#define TRC_SHADOW_MAIN 6 > +#define TRC_HW_MAIN 7 > + > +#define TRC_LOST_RECORDS_END (TRC_GEN + 50) > + > +#define NR_CPUS 128 > +#if __x86_64__ > +# define BITS_PER_LONG 64 > +#else > +# define BITS_PER_LONG 32 > +#endif > + > +#define BITS_TO_LONGS(bits) \ > + (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) > +#define DECLARE_BITMAP(name,bits) \ > + unsigned long name[BITS_TO_LONGS(bits)] > +typedef struct cpumask{ DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; > + > +enum { > + TRCE_SFLAG_SET_AD, > + TRCE_SFLAG_SET_A, > + TRCE_SFLAG_SHADOW_L1_GET_REF, > + TRCE_SFLAG_SHADOW_L1_PUT_REF, > + TRCE_SFLAG_L2_PROPAGATE, > + TRCE_SFLAG_SET_CHANGED, > + TRCE_SFLAG_SET_FLUSH, > + TRCE_SFLAG_SET_ERROR, > + TRCE_SFLAG_DEMOTE, > + TRCE_SFLAG_PROMOTE, > + TRCE_SFLAG_WRMAP, > + TRCE_SFLAG_WRMAP_GUESS_FOUND, > + TRCE_SFLAG_WRMAP_BRUTE_FORCE, > + TRCE_SFLAG_EARLY_UNSHADOW, > + TRCE_SFLAG_EMULATION_2ND_PT_WRITTEN, > + TRCE_SFLAG_EMULATION_LAST_FAILED, > + TRCE_SFLAG_EMULATE_FULL_PT, > + TRCE_SFLAG_PREALLOC_UNPIN, > + TRCE_SFLAG_PREALLOC_UNHOOK > +}; > + > +#define TRC_HVM_OP_DESTROY_PROC (TRC_HVM_HANDLER + 0x100) > + > +typedef unsigned long long tsc_t; > + > +/* -- on-disk trace buffer definitions -- */ > +struct trace_record { > + union { > + struct { > + unsigned event:28, > + extra_words:3, > + cycle_flag:1; > + union { > + struct { > + uint32_t tsc_lo, tsc_hi; > + uint32_t data[7]; > + } tsc; > + struct { > + uint32_t data[7]; > + } notsc; > + } u; > + }; > + uint32_t raw[8]; > + }; > +}; > + > +/* -- General info about a current record -- */ > +struct time_struct { > + unsigned long long time; > + unsigned int s, ns; > +}; > + > +#define DUMP_HEADER_MAX 256 > + > +struct record_info { > + int cpu; > + tsc_t tsc; > + union { > + unsigned event; > + struct { > + unsigned minor:12, > + sub:4, > + main:12, > + unused:4; > + } evt; > + }; > + int extra_words; > + int size; > + uint32_t *d; > + char dump_header[DUMP_HEADER_MAX]; > + struct time_struct t; > + struct trace_record rec; > +}; > + > +#endif > diff --git a/tools/xentrace/mread.c b/tools/xentrace/mread.c > new file mode 100644 > index 0000000..a63d16c > --- /dev/null > +++ b/tools/xentrace/mread.c > @@ -0,0 +1,160 @@ > +#include <stdio.h> > +#include <stdlib.h> > +#include <strings.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <sys/mman.h> > +#include <errno.h> > +#include "mread.h" > + > +mread_handle_t mread_init(int fd) > +{ > + struct stat64 s; > + mread_handle_t h; > + > + h=malloc(sizeof(struct mread_ctrl)); > + > + if (!h) > + { > + perror("malloc"); > + exit(1); > + } > + > + bzero(h, sizeof(struct mread_ctrl)); > + > + h->fd = fd; > + > + fstat64(fd, &s); > + h->file_size = s.st_size; > + > + return h; > +} > + > +ssize_t mread64(mread_handle_t h, void *rec, ssize_t len, loff_t offset) > +{ > + /* Idea: have a "cache" of N mmaped regions. If the offset is > + * in one of the regions, just copy it. If not, evict one of the > + * regions and map the appropriate range. > + * > + * Basic algorithm: > + * - See if the offset is in one of the regions > + * - If not, map it > + * - evict an old region > + * - map the new region > + * - Copy > + */ > + char * b=NULL; > + int bind=-1; > + loff_t boffset=0; > + ssize_t bsize; > + > +#define dprintf(x...) > +//#define dprintf fprintf > + > + dprintf(warn, "%s: offset %llx len %d\n", __func__, > + offset, len); > + if ( offset > h->file_size ) > + { > + dprintf(warn, " offset > file size %llx, returning 0\n", > + h->file_size); > + return 0; > + } > + if ( offset + len > h->file_size ) > + { > + dprintf(warn, " offset+len > file size %llx, truncating\n", > + h->file_size); > + len = h->file_size - offset; > + } > + > + /* Try to find the offset in our range */ > + dprintf(warn, " Trying last, %d\n", last); > + if ( h->map[h->last].buffer > + && (offset & MREAD_BUF_MASK) == h->map[h->last].start_offset ) > + { > + bind=h->last; > + goto copy; > + } > + > + /* Scan to see if it's anywhere else */ > + dprintf(warn, " Scanning\n"); > + for(bind=0; bind<MREAD_MAPS; bind++) > + if ( h->map[bind].buffer > + && (offset & MREAD_BUF_MASK) == h->map[bind].start_offset ) > + { > + dprintf(warn, " Found, index %d\n", bind); > + break; > + } > + > + /* If we didn't find it, evict someone and map it */ > + if ( bind == MREAD_MAPS ) > + { > + dprintf(warn, " Clock\n"); > + while(1) > + { > + h->clock++; > + if(h->clock >= MREAD_MAPS) > + h->clock=0; > + dprintf(warn, " %d\n", h->clock); > + if(h->map[h->clock].buffer == NULL) > + { > + dprintf(warn, " Buffer null, using\n"); > + break; > + } > + if(!h->map[h->clock].accessed) > + { > + dprintf(warn, " Not accessed, using\n"); > + break; > + } > + h->map[h->clock].accessed=0; > + } > + if(h->map[h->clock].buffer) > + { > + dprintf(warn, " Unmapping\n"); > + munmap(h->map[h->clock].buffer, MREAD_BUF_SIZE); > + } > + /* FIXME: Try MAP_HUGETLB? */ > + /* FIXME: Make sure this works on large files... */ > + h->map[h->clock].start_offset = offset & MREAD_BUF_MASK; > + dprintf(warn, " Mapping %llx from offset %llx\n", > + MREAD_BUF_SIZE, h->map[h->clock].start_offset); > + h->map[h->clock].buffer = mmap(NULL, MREAD_BUF_SIZE, PROT_READ, > + MAP_SHARED, > + h->fd, > + h->map[h->clock].start_offset); > + dprintf(warn, " mmap returned %p\n", h->map[h->clock].buffer); > + if ( h->map[h->clock].buffer == MAP_FAILED ) > + { > + h->map[h->clock].buffer = NULL; > + perror("mmap"); > + exit(1); > + } > + bind = h->clock; > + } > + > + h->last=bind; > +copy: > + h->map[bind].accessed=1; > + b=h->map[bind].buffer; > + boffset=offset - h->map[bind].start_offset; > + if ( boffset + len > MREAD_BUF_SIZE ) > + bsize = MREAD_BUF_SIZE - boffset; > + else > + bsize = len; > + dprintf(warn, " Using index %d, buffer at %p, buffer offset %llx len > %d\n", > + bind, b, boffset, bsize); > + > + bcopy(b+boffset, rec, bsize); > + > + /* Handle the boundary case; make sure this is after doing anything > + * with the static variables*/ > + if ( len > bsize ) > + { > + dprintf(warn, " Finishing up by reading l %d o %llx\n", > + len-bsize, offset+bsize); > + mread64(h, rec+bsize, len-bsize, offset+bsize); > + } > + > + /* FIXME: ?? */ > + return len; > +#undef dprintf > +} > diff --git a/tools/xentrace/mread.h b/tools/xentrace/mread.h > new file mode 100644 > index 0000000..8df41a8 > --- /dev/null > +++ b/tools/xentrace/mread.h > @@ -0,0 +1,18 @@ > +#define MREAD_MAPS 8 > +#define MREAD_BUF_SHIFT 9 > +#define PAGE_SHIFT 12 > +#define MREAD_BUF_SIZE (1ULL<<(PAGE_SHIFT+MREAD_BUF_SHIFT)) > +#define MREAD_BUF_MASK (~(MREAD_BUF_SIZE-1)) > +typedef struct mread_ctrl { > + int fd; > + loff_t file_size; > + struct mread_buffer { > + char * buffer; > + loff_t start_offset; > + int accessed; > + } map[MREAD_MAPS]; > + int clock, last; > +} *mread_handle_t; > + > +mread_handle_t mread_init(int fd); > +ssize_t mread64(mread_handle_t h, void *dst, ssize_t len, loff_t offset); > diff --git a/tools/xentrace/pv.h b/tools/xentrace/pv.h > new file mode 100644 > index 0000000..3e6ad77 > --- /dev/null > +++ b/tools/xentrace/pv.h > @@ -0,0 +1,41 @@ > +/* > + * PV event decoding. > + * > + * Copyright (C) 2012 Citrix Systems R&D Ltd. > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2, as published by the Free Software Foundation. > + */ > +#ifndef __PV_H > + > +#include "analyze.h" > +#include <xen/trace.h> > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +#define ARG_MISSING 0x0 > +#define ARG_32BIT 0x1 > +#define ARG_64BIT 0x2 > + > +#define MMU_UPDATE_PREEMPTED (~(~0U>>1)) > + > +static inline uint32_t pv_hypercall_op(const struct record_info *ri) > +{ > + return ri->d[0] & ~TRC_PV_HYPERCALL_V2_ARG_MASK; > +} > + > +static inline int pv_hypercall_arg_present(const struct record_info *ri, int > arg) > +{ > + return (ri->d[0] >> (20 + 2*arg)) & 0x3; > +} > + > +void pv_hypercall_gather_args(const struct record_info *ri, uint64_t *args); > + > +#ifdef __cplusplus > +} /* extern "C" */ > +#endif > + > +#endif > diff --git a/tools/xentrace/xenalyze.c b/tools/xentrace/xenalyze.c > new file mode 100644 > index 0000000..a0a3ac1 > --- /dev/null > +++ b/tools/xentrace/xenalyze.c > @@ -0,0 +1,10407 @@ > +/* > + * xenalyze.c: Analyzing xentrace output > + * > + * Written by George Dunlap. > + * > + * Copyright (c) 2006-2007, XenSource Inc. > + * Copyright (c) 2007-2008, Citrix Systems R&D Ltd, UK > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but WITHOUT > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > + * more details. > + * > + * You should have received a copy of the GNU General Public License along > with > + * this program; if not, write to the Free Software Foundation, Inc., 59 > Temple > + * Place - Suite 330, Boston, MA 02111-1307 USA. > + */ > +#define _XOPEN_SOURCE 600 > +#include <stdio.h> > +#include <stdlib.h> > +#include <argp.h> > +#include <inttypes.h> > +#include <sys/types.h> > +#include <sys/wait.h> > +#include <sys/stat.h> > +#include <fcntl.h> > +#include <unistd.h> > +#include <xen/trace.h> > +#include "analyze.h" > +#include "mread.h" > +#include "pv.h" > +#include <errno.h> > +#include <strings.h> > +#include <string.h> > +#include <assert.h> > + > +struct mread_ctrl; > + > + > +#define DEFAULT_CPU_HZ 2400000000LL > +#define QHZ_FROM_HZ(_hz) (((_hz) << 10)/ 1000000000) > + > +#define ADDR_SPACE_BITS 48 > +#define DEFAULT_SAMPLE_SIZE 10240 > +#define DEFAULT_INTERVAL_LENGTH 1000 > + > +struct array_struct { > + unsigned long long *values; > + int count; > +}; > + > +#define warn_once(_x...) \ > + do { \ > + static int _w=1; \ > + if ( _w ) { \ > + _w=0; \ > + fprintf(warn, ##_x); \ > + } \ > + } while(0) \ > + > +/* -- Global variables -- */ > +struct { > + int fd; > + struct mread_ctrl *mh; > + struct symbol_struct * symbols; > + char * symbol_file; > + char * trace_file; > + int output_defined; > + loff_t file_size; > + struct { > + loff_t update_offset; > + int pipe[2]; > + FILE* out; > + int pid; > + } progress; > +} G = { > + .fd=-1, > + .symbols = NULL, > + .symbol_file = NULL, > + .trace_file = NULL, > + .output_defined = 0, > + .file_size = 0, > + .progress = { .update_offset = 0 }, > +}; > + > +/* > + Kinds of errors: > + Unexpected values > + - RIP with information in high bits (not all 0 or 1) > + - exit reason too high > + Unexpected record layout > + - x64 bit set in PIO,PV_PTWR_EMULATION_PAE, > + - Unknown minor type (PV_PTWR_EMULATION, RUNSTATE_CHANGE > + - Wrong record size > + - More than one bit set in evt.main field > + Unexpected sequences > + - wake tsc tracking > + - TSC dependency loop > + - Mismatch between non-running old event states > + - Runstate continue while running on another pcpu > + - lost_record_end seen in non-lost pcpu > + - Unexpected non-CPU_CHANGE record during new_pcpu scan > + - record tsc < interval start tsc > + - lost_record tsc !> order tsc > + Limited resources > + - interrupt interval slots > + - record cpu > MAX_CPUS > + Algorithm asserts > + - Duplicate CR3/domain values > + - Logic holes > + - domain runstates > + - runstate / tsc skew > + - vcpu_{prev,next}_update p->current{==,!=}null > + - vcpu start conditions > + - lost_cpu count higher than # of seen cpus / < 0 > + - lost cpu has non-null p->current > + Symbol file > + -file doesn't open > + -file not ordered > + System > + - short read > + - malloc failed > + Args > + - Invalid cpu_hz value / suffix > + - No trace file > + - Can't open trace file > +*/ > +enum error_level { > + ERR_NONE=0, > + ERR_STRICT, /* Be unreasonably picky */ > + ERR_WARN, /* Something midly unexpected */ > + ERR_SANITY, /* Sanity checks: RIP with info in high bits */ > + ERR_RECORD, /* Something that keeps you from processing the record */ > + ERR_FILE, /* Probably caused by a corrupt file */ > + ERR_LIMIT, /* Exceeded limits; data will be lost */ > + ERR_MAX_TOLERABLE=ERR_LIMIT, > + /* -- Unrecoverable past this point -- */ > + ERR_ASSERT, /* Algoritm assert */ > + ERR_SYSTEM, /* System error: cannot allocate memory, short read, &c */ > +}; > + > +int verbosity = 5; > + > +struct { > + unsigned > + scatterplot_interrupt_eip:1, > + scatterplot_cpi:1, > + scatterplot_unpin_promote:1, > + scatterplot_cr3_switch:1, > + scatterplot_wake_to_halt:1, > + scatterplot_io:1, > + scatterplot_vmexit_eip:1, > + scatterplot_runstate:1, > + scatterplot_runstate_time:1, > + scatterplot_pcpu:1, > + scatterplot_extint_cycles:1, > + scatterplot_rdtsc:1, > + scatterplot_irq:1, > + histogram_interrupt_eip:1, > + interval_mode:1, > + dump_all:1, > + dump_raw_process:1, > + dump_raw_reads:1, > + dump_no_processing:1, > + dump_ipi_latency:1, > + dump_trace_volume_on_lost_record:1, > + dump_show_power_states:1, > + with_cr3_enumeration:1, > + with_pio_enumeration:1, > + with_mmio_enumeration:1, > + with_interrupt_eip_enumeration:1, > + show_default_domain_summary:1, > + mmio_enumeration_skip_vga:1, > + progress:1, > + svm_mode:1, > + summary:1, > + report_pcpu:1, > + tsc_loop_fatal:1, > + summary_info; > + long long cpu_qhz, cpu_hz; > + int scatterplot_interrupt_vector; > + int scatterplot_extint_cycles_vector; > + int scatterplot_io_port; > + int histogram_interrupt_vector; > + unsigned long long histogram_interrupt_increment; > + int interrupt_eip_enumeration_vector; > + int default_guest_paging_levels; > + int sample_size; > + enum error_level tolerance; /* Tolerate up to this level of error */ > + struct { > + tsc_t cycles; > + /* Used if interval is specified in seconds to delay calculating > + * time_interval until all arguments have been processed > (specifically, > + * cpu_hz). */ > + unsigned msec; > + enum { > + INTERVAL_CR3_SCHEDULE_TIME, > + INTERVAL_CR3_SCHEDULE_ORDERED, > + INTERVAL_CR3_SHORT_SUMMARY, > + INTERVAL_DOMAIN_TOTAL_TIME, > + INTERVAL_DOMAIN_SHORT_SUMMARY, > + INTERVAL_DOMAIN_GUEST_INTERRUPT, > + INTERVAL_DOMAIN_GRANT_MAPS > + } output; > + enum { > + INTERVAL_MODE_CUSTOM, > + INTERVAL_MODE_ARRAY, > + INTERVAL_MODE_LIST > + } mode; > + enum { > + INTERVAL_CHECK_NONE, > + INTERVAL_CHECK_CR3, > + INTERVAL_CHECK_DOMAIN > + } check; > + /* Options for specific interval output types */ > + union { > + struct array_struct array; > + }; > + int count; > + } interval; > +} opt = { > + .scatterplot_interrupt_eip=0, > + .scatterplot_cpi=0, > + .scatterplot_unpin_promote=0, > + .scatterplot_cr3_switch=0, > + .scatterplot_wake_to_halt=0, > + .scatterplot_vmexit_eip=0, > + .scatterplot_runstate=0, > + .scatterplot_runstate_time=0, > + .scatterplot_pcpu=0, > + .scatterplot_extint_cycles=0, > + .scatterplot_rdtsc=0, > + .scatterplot_irq=0, > + .histogram_interrupt_eip=0, > + .dump_all = 0, > + .dump_raw_process = 0, > + .dump_raw_reads = 0, > + .dump_no_processing = 0, > + .dump_ipi_latency = 0, > + .dump_trace_volume_on_lost_record = 0, > + .dump_show_power_states = 0, > + .with_cr3_enumeration = 0, > + .with_pio_enumeration = 1, > + .with_mmio_enumeration = 0, > + .with_interrupt_eip_enumeration = 0, > + .show_default_domain_summary = 0, > + .mmio_enumeration_skip_vga = 1, > + .progress = 0, > + .svm_mode = 0, > + .summary = 0, > + .report_pcpu = 0, > + .tsc_loop_fatal = 0, > + .cpu_hz = DEFAULT_CPU_HZ, > + /* Pre-calculate a multiplier that makes the rest of the > + * calculations easier */ > + .cpu_qhz = QHZ_FROM_HZ(DEFAULT_CPU_HZ), > + .default_guest_paging_levels = 2, > + .sample_size = DEFAULT_SAMPLE_SIZE, > + .tolerance = ERR_SANITY, > + .interval = { .msec = DEFAULT_INTERVAL_LENGTH }, > +}; > + > +FILE *warn = NULL; > + > +/* -- Summary data -- */ > +struct cycle_framework { > + tsc_t first_tsc, last_tsc, total_cycles; > +}; > + > +struct interval_element { > + int count; > + long long cycles; > + long long instructions; > +}; > + > +struct event_cycle_summary { > + int count, cycles_count; > + long long cycles; > + long long *cycles_sample; > + struct interval_element interval; > +}; > + > +struct cycle_summary { > + int count; > + unsigned long long cycles; > + long long *sample; > + struct interval_element interval; > +}; > + > +struct weighted_cpi_summary { > + int count; > + unsigned long long instructions; > + unsigned long long cycles; > + float *cpi; > + unsigned long long *cpi_weight; > + struct interval_element interval; > +}; > + > +/* -- Symbol list information -- */ > +#define SYMBOL_ENTRIES_PER_STRUCT 1023 > +#define SYMBOL_NAME_SIZE 124 > +struct symbol_struct { > + int count; > + struct { > + unsigned long long addr; > + char name[SYMBOL_NAME_SIZE]; > + } symbols[SYMBOL_ENTRIES_PER_STRUCT]; > + struct symbol_struct *next; > +}; > + > +void error(enum error_level l, struct record_info *ri); > + > +void parse_symbol_file(char *fn) { > + unsigned long long last_addr = 0; > + FILE * symbol_file; > + struct symbol_struct ** p=&G.symbols; > + > + if((symbol_file=fopen(fn, "rb"))==NULL) { > + fprintf(stderr, "Could not open symbol file %s\n", fn); > + perror("open"); > + error(ERR_SYSTEM, NULL); > + } > + while(!feof(symbol_file)) { > + /* Allocate a new struct if we need it */ > + if(!*p) { > + *p = malloc(sizeof(**p)); > + if(!*p) { > + fprintf(stderr, "Malloc failed!\n"); > + error(ERR_SYSTEM, NULL); > + } > + (*p)->count=0; > + (*p)->next=NULL; > + } > + > + /* FIXME -- use SYMBOL_NAME_SIZE */ > + /* FIXME -- use regexp. This won't work for symbols with spaces > (yes they exist) */ > + (*p)->symbols[(*p)->count].addr = 0xDEADBEEF; > + if ( fscanf(symbol_file, "%llx %128s", > + &(*p)->symbols[(*p)->count].addr, > + (*p)->symbols[(*p)->count].name) == 0 ) > + break; > + > + > + if( ((*p)->symbols[(*p)->count].addr > 0) > + && ((*p)->symbols[(*p)->count].addr < last_addr) ) { > + fprintf(stderr, "Symbol file not properly ordered: %llx %s < > %llx!\n", > + (*p)->symbols[(*p)->count].addr, > + (*p)->symbols[(*p)->count].name, > + last_addr); > + /* Could be recovered from; just free existing strings and set > symbols to NULL */ > + error(ERR_ASSERT, NULL); > + } else > + last_addr = (*p)->symbols[(*p)->count].addr; > + > + (*p)->count++; > + > + /* If this struct is full, point to the next. It will be allocated > + if needed. */ > + if((*p)->count == SYMBOL_ENTRIES_PER_STRUCT) { > + p=&((*p)->next); > + } > + } > +} > + > +/* WARNING not thread safe */ > +char * find_symbol(unsigned long long addr) { > + struct symbol_struct * p=G.symbols; > + int i; > + char * lastname="ZERO"; > + unsigned long long offset=addr; > + static char name[128]; > + > + if(!p) { > + name[0]=0; > + return name; > + } > + > + while(1) { > + if(!p) > + goto finish; > + for(i=0; i<p->count; i++) { > + if(p->symbols[i].addr > addr) > + goto finish; > + else { > + lastname=p->symbols[i].name; > + offset=addr - p->symbols[i].addr; > + } > + } > + p=p->next; > + } > + finish: > + snprintf(name, 128, "(%s +%llx)", > + lastname, offset); > + return name; > +} > + > +/* -- Eip list data -- */ > +enum { > + EIP_LIST_TYPE_NONE=0, > + EIP_LIST_TYPE_MAX > +}; > + > +struct eip_list_struct { > + struct eip_list_struct *next; > + unsigned long long eip; > + struct event_cycle_summary summary; > + int type; > + void * extra; > +}; > + > +struct { > + void (*update)(struct eip_list_struct *, void *); > + void (*new)(struct eip_list_struct *, void *); > + void (*dump)(struct eip_list_struct *); > +} eip_list_type[EIP_LIST_TYPE_MAX] = { > + [EIP_LIST_TYPE_NONE] = { > + .update=NULL, > + .new=NULL, > + .dump=NULL }, > +}; > + > + > +/* --- HVM class of events --- */ > + > +/* > + * -- Algorithms -- > + * > + * Interrupt Wake-to-halt detection > + * > + * Purpose: To correlate device interrupts to vcpu runtime. > + * > + * Diagram: > + * ... > + * blocked -> runnable <- set to waking > + * ... > + * runnable -> running > + * inj_virq A <- Note "waking" interrupt > + * vmenter <- Start tsc of "wake-to-halt" interval. > + Turn off 'waking'. > + * ... > + * inj_virq B <- Note alternate interrupt > + * vmenter <- Start tsc of "interrupt-to-halt" interval > + * ... > + * vmexit <- End tsc of "x-to-halt" interval > + * running -> blocked <- Process > + * > + * The "waking" interrupts we want to sub-classify into > + * "wake-only" (when interrupt was the only interrupt from wake to halt) and > + * "wake-all" (whether this was the only interrupt or not). > + */ > + > +/* VMX data */ > +#define EXIT_REASON_EXCEPTION_NMI 0 > +#define EXIT_REASON_EXTERNAL_INTERRUPT 1 > +#define EXIT_REASON_TRIPLE_FAULT 2 > +#define EXIT_REASON_INIT 3 > +#define EXIT_REASON_SIPI 4 > +#define EXIT_REASON_IO_SMI 5 > +#define EXIT_REASON_OTHER_SMI 6 > +#define EXIT_REASON_PENDING_INTERRUPT 7 > +#define EXIT_REASON_PENDING_VIRT_NMI 8 > +#define EXIT_REASON_TASK_SWITCH 9 > +#define EXIT_REASON_CPUID 10 > +#define EXIT_REASON_GETSEC 11 > +#define EXIT_REASON_HLT 12 > +#define EXIT_REASON_INVD 13 > +#define EXIT_REASON_INVLPG 14 > +#define EXIT_REASON_RDPMC 15 > +#define EXIT_REASON_RDTSC 16 > +#define EXIT_REASON_RSM 17 > +#define EXIT_REASON_VMCALL 18 > +#define EXIT_REASON_VMCLEAR 19 > +#define EXIT_REASON_VMLAUNCH 20 > +#define EXIT_REASON_VMPTRLD 21 > +#define EXIT_REASON_VMPTRST 22 > +#define EXIT_REASON_VMREAD 23 > +#define EXIT_REASON_VMRESUME 24 > +#define EXIT_REASON_VMWRITE 25 > +#define EXIT_REASON_VMOFF 26 > +#define EXIT_REASON_VMON 27 > +#define EXIT_REASON_CR_ACCESS 28 > +#define EXIT_REASON_DR_ACCESS 29 > +#define EXIT_REASON_IO_INSTRUCTION 30 > +#define EXIT_REASON_MSR_READ 31 > +#define EXIT_REASON_MSR_WRITE 32 > +#define EXIT_REASON_INVALID_GUEST_STATE 33 > +#define EXIT_REASON_MSR_LOADING 34 > +#define EXIT_REASON_MWAIT_INSTRUCTION 36 > +#define EXIT_REASON_MONITOR_TRAP_FLAG 37 > +#define EXIT_REASON_MONITOR_INSTRUCTION 39 > +#define EXIT_REASON_PAUSE_INSTRUCTION 40 > +#define EXIT_REASON_MACHINE_CHECK 41 > +#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 > +#define EXIT_REASON_APIC_ACCESS 44 > +#define EXIT_REASON_ACCESS_GDTR_OR_IDTR 46 > +#define EXIT_REASON_ACCESS_LDTR_OR_TR 47 > +#define EXIT_REASON_EPT_VIOLATION 48 > +#define EXIT_REASON_EPT_MISCONFIG 49 > +#define EXIT_REASON_INVEPT 50 > +#define EXIT_REASON_RDTSCP 51 > +#define EXIT_REASON_VMX_PREEMPTION_TIMER_EXPIRED 52 > +#define EXIT_REASON_INVVPID 53 > +#define EXIT_REASON_WBINVD 54 > +#define EXIT_REASON_XSETBV 55 > + > +#define HVM_VMX_EXIT_REASON_MAX (EXIT_REASON_XSETBV+1) > + > +char * hvm_vmx_exit_reason_name[HVM_VMX_EXIT_REASON_MAX] = { > + [0] = "NONE", > + [EXIT_REASON_EXCEPTION_NMI]="EXCEPTION_NMI", > + [EXIT_REASON_EXTERNAL_INTERRUPT]="EXTERNAL_INTERRUPT", > + [EXIT_REASON_TRIPLE_FAULT]="TRIPLE_FAULT", > + [EXIT_REASON_INIT]="INIT", > + [EXIT_REASON_SIPI]="SIPI", > + [EXIT_REASON_IO_SMI]="IO_SMI", > + [EXIT_REASON_OTHER_SMI]="OTHER_SMI", > + [EXIT_REASON_PENDING_INTERRUPT]="PENDING_INTERRUPT", > + [EXIT_REASON_PENDING_VIRT_NMI]="PENDING_VIRT_NMI", > + [EXIT_REASON_TASK_SWITCH]="TASK_SWITCH", > + [EXIT_REASON_CPUID]="CPUID", > + [EXIT_REASON_GETSEC]="GETSEC", > + [EXIT_REASON_HLT]="HLT", > + [EXIT_REASON_INVD]="INVD", > + [EXIT_REASON_INVLPG]="INVLPG", > + [EXIT_REASON_RDPMC]="RDPMC", > + [EXIT_REASON_RDTSC]="RDTSC", > + [EXIT_REASON_RSM]="RSM", > + [EXIT_REASON_VMCALL]="VMCALL", > + [EXIT_REASON_VMCLEAR]="VMCLEAR", > + [EXIT_REASON_VMLAUNCH]="VMLAUNCH", > + [EXIT_REASON_VMPTRLD]="VMPTRLD", > + [EXIT_REASON_VMPTRST]="VMPTRST", > + [EXIT_REASON_VMREAD]="VMREAD", > + [EXIT_REASON_VMRESUME]="VMRESUME", > + [EXIT_REASON_VMWRITE]="VMWRITE", > + [EXIT_REASON_VMOFF]="VMOFF", > + [EXIT_REASON_VMON]="VMON", > + [EXIT_REASON_CR_ACCESS]="CR_ACCESS", > + [EXIT_REASON_DR_ACCESS]="DR_ACCESS", > + [EXIT_REASON_IO_INSTRUCTION]="IO_INSTRUCTION", > + [EXIT_REASON_MSR_READ]="MSR_READ", > + [EXIT_REASON_MSR_WRITE]="MSR_WRITE", > + [EXIT_REASON_INVALID_GUEST_STATE]="INVALID_GUEST_STATE", > + [EXIT_REASON_MSR_LOADING]="MSR_LOADING", > + [EXIT_REASON_MWAIT_INSTRUCTION]="MWAIT_INSTRUCTION", > + [EXIT_REASON_MONITOR_TRAP_FLAG]="MONITOR_TRAP_FLAG", > + [EXIT_REASON_MONITOR_INSTRUCTION]="MONITOR_INSTRUCTION", > + [EXIT_REASON_PAUSE_INSTRUCTION]="PAUSE_INSTRUCTION", > + [EXIT_REASON_MACHINE_CHECK]="MACHINE_CHECK", > + [EXIT_REASON_TPR_BELOW_THRESHOLD]="TPR_BELOW_THRESHOLD", > + [EXIT_REASON_APIC_ACCESS]="APIC_ACCESS", > + [EXIT_REASON_EPT_VIOLATION]="EPT_VIOLATION", > + [EXIT_REASON_EPT_MISCONFIG]="EPT_MISCONFIG", > + [EXIT_REASON_INVEPT]="INVEPT", > + [EXIT_REASON_RDTSCP]="RDTSCP", > + > [EXIT_REASON_VMX_PREEMPTION_TIMER_EXPIRED]="VMX_PREEMPTION_TIMER_EXPIRED", > + [EXIT_REASON_INVVPID]="INVVPID", > + [EXIT_REASON_WBINVD]="WBINVD", > + [EXIT_REASON_XSETBV]="XSETBV", > +}; > + > +/* SVM data */ > +enum VMEXIT_EXITCODE > +{ > + /* control register read exitcodes */ > + VMEXIT_CR0_READ = 0, > + VMEXIT_CR1_READ = 1, > + VMEXIT_CR2_READ = 2, > + VMEXIT_CR3_READ = 3, > + VMEXIT_CR4_READ = 4, > + VMEXIT_CR5_READ = 5, > + VMEXIT_CR6_READ = 6, > + VMEXIT_CR7_READ = 7, > + VMEXIT_CR8_READ = 8, > + VMEXIT_CR9_READ = 9, > + VMEXIT_CR10_READ = 10, > + VMEXIT_CR11_READ = 11, > + VMEXIT_CR12_READ = 12, > + VMEXIT_CR13_READ = 13, > + VMEXIT_CR14_READ = 14, > + VMEXIT_CR15_READ = 15, > + > + /* control register write exitcodes */ > + VMEXIT_CR0_WRITE = 16, > + VMEXIT_CR1_WRITE = 17, > + VMEXIT_CR2_WRITE = 18, > + VMEXIT_CR3_WRITE = 19, > + VMEXIT_CR4_WRITE = 20, > + VMEXIT_CR5_WRITE = 21, > + VMEXIT_CR6_WRITE = 22, > + VMEXIT_CR7_WRITE = 23, > + VMEXIT_CR8_WRITE = 24, > + VMEXIT_CR9_WRITE = 25, > + VMEXIT_CR10_WRITE = 26, > + VMEXIT_CR11_WRITE = 27, > + VMEXIT_CR12_WRITE = 28, > + VMEXIT_CR13_WRITE = 29, > + VMEXIT_CR14_WRITE = 30, > + VMEXIT_CR15_WRITE = 31, > + > + /* debug register read exitcodes */ > + VMEXIT_DR0_READ = 32, > + VMEXIT_DR1_READ = 33, > + VMEXIT_DR2_READ = 34, > + VMEXIT_DR3_READ = 35, > + VMEXIT_DR4_READ = 36, > + VMEXIT_DR5_READ = 37, > + VMEXIT_DR6_READ = 38, > + VMEXIT_DR7_READ = 39, > + VMEXIT_DR8_READ = 40, > + VMEXIT_DR9_READ = 41, > + VMEXIT_DR10_READ = 42, > + VMEXIT_DR11_READ = 43, > + VMEXIT_DR12_READ = 44, > + VMEXIT_DR13_READ = 45, > + VMEXIT_DR14_READ = 46, > + VMEXIT_DR15_READ = 47, > + > + /* debug register write exitcodes */ > + VMEXIT_DR0_WRITE = 48, > + VMEXIT_DR1_WRITE = 49, > + VMEXIT_DR2_WRITE = 50, > + VMEXIT_DR3_WRITE = 51, > + VMEXIT_DR4_WRITE = 52, > + VMEXIT_DR5_WRITE = 53, > + VMEXIT_DR6_WRITE = 54, > + VMEXIT_DR7_WRITE = 55, > + VMEXIT_DR8_WRITE = 56, > + VMEXIT_DR9_WRITE = 57, > + VMEXIT_DR10_WRITE = 58, > + VMEXIT_DR11_WRITE = 59, > + VMEXIT_DR12_WRITE = 60, > + VMEXIT_DR13_WRITE = 61, > + VMEXIT_DR14_WRITE = 62, > + VMEXIT_DR15_WRITE = 63, > + > + /* processor exception exitcodes (VMEXIT_EXCP[0-31]) */ > + VMEXIT_EXCEPTION_DE = 64, /* divide-by-zero-error */ > + VMEXIT_EXCEPTION_DB = 65, /* debug */ > + VMEXIT_EXCEPTION_NMI = 66, /* non-maskable-interrupt */ > + VMEXIT_EXCEPTION_BP = 67, /* breakpoint */ > + VMEXIT_EXCEPTION_OF = 68, /* overflow */ > + VMEXIT_EXCEPTION_BR = 69, /* bound-range */ > + VMEXIT_EXCEPTION_UD = 70, /* invalid-opcode*/ > + VMEXIT_EXCEPTION_NM = 71, /* device-not-available */ > + VMEXIT_EXCEPTION_DF = 72, /* double-fault */ > + VMEXIT_EXCEPTION_09 = 73, /* unsupported (reserved) */ > + VMEXIT_EXCEPTION_TS = 74, /* invalid-tss */ > + VMEXIT_EXCEPTION_NP = 75, /* segment-not-present */ > + VMEXIT_EXCEPTION_SS = 76, /* stack */ > + VMEXIT_EXCEPTION_GP = 77, /* general-protection */ > + VMEXIT_EXCEPTION_PF = 78, /* page-fault */ > + VMEXIT_EXCEPTION_15 = 79, /* reserved */ > + VMEXIT_EXCEPTION_MF = 80, /* x87 floating-point exception-pending */ > + VMEXIT_EXCEPTION_AC = 81, /* alignment-check */ > + VMEXIT_EXCEPTION_MC = 82, /* machine-check */ > + VMEXIT_EXCEPTION_XF = 83, /* simd floating-point */ > + > + /* exceptions 20-31 (exitcodes 84-95) are reserved */ > + > + /* ...and the rest of the #VMEXITs */ > + VMEXIT_INTR = 96, > + VMEXIT_NMI = 97, > + VMEXIT_SMI = 98, > + VMEXIT_INIT = 99, > + VMEXIT_VINTR = 100, > + VMEXIT_CR0_SEL_WRITE = 101, > + VMEXIT_IDTR_READ = 102, > + VMEXIT_GDTR_READ = 103, > + VMEXIT_LDTR_READ = 104, > + VMEXIT_TR_READ = 105, > + VMEXIT_IDTR_WRITE = 106, > + VMEXIT_GDTR_WRITE = 107, > + VMEXIT_LDTR_WRITE = 108, > + VMEXIT_TR_WRITE = 109, > + VMEXIT_RDTSC = 110, > + VMEXIT_RDPMC = 111, > + VMEXIT_PUSHF = 112, > + VMEXIT_POPF = 113, > + VMEXIT_CPUID = 114, > + VMEXIT_RSM = 115, > + VMEXIT_IRET = 116, > + VMEXIT_SWINT = 117, > + VMEXIT_INVD = 118, > + VMEXIT_PAUSE = 119, > + VMEXIT_HLT = 120, > + VMEXIT_INVLPG = 121, > + VMEXIT_INVLPGA = 122, > + VMEXIT_IOIO = 123, > + VMEXIT_MSR = 124, > + VMEXIT_TASK_SWITCH = 125, > + VMEXIT_FERR_FREEZE = 126, > + VMEXIT_SHUTDOWN = 127, > + VMEXIT_VMRUN = 128, > + VMEXIT_VMMCALL = 129, > + VMEXIT_VMLOAD = 130, > + VMEXIT_VMSAVE = 131, > + VMEXIT_STGI = 132, > + VMEXIT_CLGI = 133, > + VMEXIT_SKINIT = 134, > + VMEXIT_RDTSCP = 135, > + VMEXIT_ICEBP = 136, > + VMEXIT_WBINVD = 137, > + VMEXIT_MONITOR = 138, > + VMEXIT_MWAIT = 139, > + VMEXIT_MWAIT_CONDITIONAL= 140, > + VMEXIT_NPF = 1024, /* nested paging fault */ > + VMEXIT_INVALID = -1 > +}; > + > +#define HVM_SVM_EXIT_REASON_MAX 1025 > +char * hvm_svm_exit_reason_name[HVM_SVM_EXIT_REASON_MAX] = { > + /* 0-15 */ > + "VMEXIT_CR0_READ", > + "VMEXIT_CR1_READ", > + "VMEXIT_CR2_READ", > + "VMEXIT_CR3_READ", > + "VMEXIT_CR4_READ", > + "VMEXIT_CR5_READ", > + "VMEXIT_CR6_READ", > + "VMEXIT_CR7_READ", > + "VMEXIT_CR8_READ", > + "VMEXIT_CR9_READ", > + "VMEXIT_CR10_READ", > + "VMEXIT_CR11_READ", > + "VMEXIT_CR12_READ", > + "VMEXIT_CR13_READ", > + "VMEXIT_CR14_READ", > + "VMEXIT_CR15_READ", > + /* 16-31 */ > + "VMEXIT_CR0_WRITE", > + "VMEXIT_CR1_WRITE", > + "VMEXIT_CR2_WRITE", > + "VMEXIT_CR3_WRITE", > + "VMEXIT_CR4_WRITE", > + "VMEXIT_CR5_WRITE", > + "VMEXIT_CR6_WRITE", > + "VMEXIT_CR7_WRITE", > + "VMEXIT_CR8_WRITE", > + "VMEXIT_CR9_WRITE", > + "VMEXIT_CR10_WRITE", > + "VMEXIT_CR11_WRITE", > + "VMEXIT_CR12_WRITE", > + "VMEXIT_CR13_WRITE", > + "VMEXIT_CR14_WRITE", > + "VMEXIT_CR15_WRITE", > + /* 32-47 */ > + "VMEXIT_DR0_READ", > + "VMEXIT_DR1_READ", > + "VMEXIT_DR2_READ", > + "VMEXIT_DR3_READ", > + "VMEXIT_DR4_READ", > + "VMEXIT_DR5_READ", > + "VMEXIT_DR6_READ", > + "VMEXIT_DR7_READ", > + "VMEXIT_DR8_READ", > + "VMEXIT_DR9_READ", > + "VMEXIT_DR10_READ", > + "VMEXIT_DR11_READ", > + "VMEXIT_DR12_READ", > + "VMEXIT_DR13_READ", > + "VMEXIT_DR14_READ", > + "VMEXIT_DR15_READ", > + /* 48-63 */ > + "VMEXIT_DR0_WRITE", > + "VMEXIT_DR1_WRITE", > + "VMEXIT_DR2_WRITE", > + "VMEXIT_DR3_WRITE", > + "VMEXIT_DR4_WRITE", > + "VMEXIT_DR5_WRITE", > + "VMEXIT_DR6_WRITE", > + "VMEXIT_DR7_WRITE", > + "VMEXIT_DR8_WRITE", > + "VMEXIT_DR9_WRITE", > + "VMEXIT_DR10_WRITE", > + "VMEXIT_DR11_WRITE", > + "VMEXIT_DR12_WRITE", > + "VMEXIT_DR13_WRITE", > + "VMEXIT_DR14_WRITE", > + "VMEXIT_DR15_WRITE", > + /* 64-83 */ > + "VMEXIT_EXCEPTION_DE", > + "VMEXIT_EXCEPTION_DB", > + "VMEXIT_EXCEPTION_NMI", > + "VMEXIT_EXCEPTION_BP", > + "VMEXIT_EXCEPTION_OF", > + "VMEXIT_EXCEPTION_BR", > + "VMEXIT_EXCEPTION_UD", > + "VMEXIT_EXCEPTION_NM", > + "VMEXIT_EXCEPTION_DF", > + "VMEXIT_EXCEPTION_09", > + "VMEXIT_EXCEPTION_TS", > + "VMEXIT_EXCEPTION_NP", > + "VMEXIT_EXCEPTION_SS", > + "VMEXIT_EXCEPTION_GP", > + "VMEXIT_EXCEPTION_PF", > + "VMEXIT_EXCEPTION_15", > + "VMEXIT_EXCEPTION_MF", > + "VMEXIT_EXCEPTION_AC", > + "VMEXIT_EXCEPTION_MC", > + "VMEXIT_EXCEPTION_XF", > + /* 84-95 */ > + "VMEXIT_EXCEPTION_20", > + "VMEXIT_EXCEPTION_21", > + "VMEXIT_EXCEPTION_22", > + "VMEXIT_EXCEPTION_23", > + "VMEXIT_EXCEPTION_24", > + "VMEXIT_EXCEPTION_25", > + "VMEXIT_EXCEPTION_26", > + "VMEXIT_EXCEPTION_27", > + "VMEXIT_EXCEPTION_28", > + "VMEXIT_EXCEPTION_29", > + "VMEXIT_EXCEPTION_30", > + "VMEXIT_EXCEPTION_31", > + /* 96-99 */ > + "VMEXIT_INTR", > + "VMEXIT_NMI", > + "VMEXIT_SMI", > + "VMEXIT_INIT", > + /* 100-109 */ > + "VMEXIT_VINTR", > + "VMEXIT_CR0_SEL_WRITE", > + "VMEXIT_IDTR_READ", > + "VMEXIT_GDTR_READ", > + "VMEXIT_LDTR_READ", > + "VMEXIT_TR_READ", > + "VMEXIT_IDTR_WRITE", > + "VMEXIT_GDTR_WRITE", > + "VMEXIT_LDTR_WRITE", > + "VMEXIT_TR_WRITE", > + /* 110-119 */ > + "VMEXIT_RDTSC", > + "VMEXIT_RDPMC", > + "VMEXIT_PUSHF", > + "VMEXIT_POPF", > + "VMEXIT_CPUID", > + "VMEXIT_RSM", > + "VMEXIT_IRET", > + "VMEXIT_SWINT", > + "VMEXIT_INVD", > + "VMEXIT_PAUSE", > + /* 120-129 */ > + "VMEXIT_HLT", > + "VMEXIT_INVLPG", > + "VMEXIT_INVLPGA", > + "VMEXIT_IOIO", > + "VMEXIT_MSR", > + "VMEXIT_TASK_SWITCH", > + "VMEXIT_FERR_FREEZE", > + "VMEXIT_SHUTDOWN", > + "VMEXIT_VMRUN", > + "VMEXIT_VMMCALL", > + /* 130-139 */ > + "VMEXIT_VMLOAD", > + "VMEXIT_VMSAVE", > + "VMEXIT_STGI", > + "VMEXIT_CLGI", > + "VMEXIT_SKINIT", > + "VMEXIT_RDTSCP", > + "VMEXIT_ICEBP", > + "VMEXIT_WBINVD", > + "VMEXIT_MONITOR", > + "VMEXIT_MWAIT", > + /* 140 */ > + "VMEXIT_MWAIT_CONDITIONAL", > + [VMEXIT_NPF] = "VMEXIT_NPF", /* nested paging fault */ > +}; > + > + > +#if ( HVM_VMX_EXIT_REASON_MAX > HVM_SVM_EXIT_REASON_MAX ) > +# define HVM_EXIT_REASON_MAX HVM_VMX_EXIT_REASON_MAX > +# error - Strange! > +#else > +# define HVM_EXIT_REASON_MAX HVM_SVM_EXIT_REASON_MAX > +#endif > + > +/* General hvm information */ > +#define SPURIOUS_APIC_VECTOR 0xff > +#define ERROR_APIC_VECTOR 0xfe > +#define INVALIDATE_TLB_VECTOR 0xfd > +#define EVENT_CHECK_VECTOR 0xfc > +#define CALL_FUNCTION_VECTOR 0xfb > +#define THERMAL_APIC_VECTOR 0xfa > +#define LOCAL_TIMER_VECTOR 0xf9 > + > +#define EXTERNAL_INTERRUPT_MAX 256 > + > +/* Stringify numbers */ > +char * hvm_extint_vector_name[EXTERNAL_INTERRUPT_MAX] = { > + [SPURIOUS_APIC_VECTOR] = "SPURIOS_APIC", > + [ERROR_APIC_VECTOR] = "ERROR_APIC", > + [INVALIDATE_TLB_VECTOR]= "INVALIDATE_TLB", > + [EVENT_CHECK_VECTOR]= "EVENT_CHECK", > + [CALL_FUNCTION_VECTOR]= "CALL_FUNCTION", > + [THERMAL_APIC_VECTOR]= "THERMAL_APIC", > + [LOCAL_TIMER_VECTOR] = "LOCAL_TIMER", > +}; > + > +#define HVM_TRAP_MAX 20 > + > +char * hvm_trap_name[HVM_TRAP_MAX] = { > + [0] = "Divide", > + [1] = "RESERVED", > + [2] = "NMI", > + [3] = "Breakpoint", > + [4] = "Overflow", > + [5] = "BOUND", > + [6] = "Invalid Op", > + [7] = "Coprocessor not present", > + [8] = "Double Fault", > + [9] = "Coprocessor segment overrun", > + [10] = "TSS", > + [11] = "Segment not present", > + [12] = "Stack-segment fault", > + [13] = "GP", > + [14] = "Page fault", > + [15] = "RESERVED", > + [16] = "FPU", > + [17] = "Alignment check", > + [18] = "Machine check", > + [19] = "SIMD", > +}; > + > + > +enum { > + HVM_EVENT_HANDLER_NONE = 0, > + HVM_EVENT_HANDLER_PF_XEN = 1, > + HVM_EVENT_HANDLER_PF_INJECT, > + HVM_EVENT_HANDLER_INJ_EXC, > + HVM_EVENT_HANDLER_INJ_VIRQ, > + HVM_EVENT_HANDLER_REINJ_VIRQ, > + HVM_EVENT_HANDLER_IO_READ, > + HVM_EVENT_HANDLER_IO_WRITE, > + HVM_EVENT_HANDLER_CR_READ, /* 8 */ > + HVM_EVENT_HANDLER_CR_WRITE, > + HVM_EVENT_HANDLER_DR_READ, > + HVM_EVENT_HANDLER_DR_WRITE, > + HVM_EVENT_HANDLER_MSR_READ, > + HVM_EVENT_HANDLER_MSR_WRITE, > + HVM_EVENT_HANDLER_CPUID, > + HVM_EVENT_HANDLER_INTR, > + HVM_EVENT_HANDLER_NMI, /* 16 */ > + HVM_EVENT_HANDLER_SMI, > + HVM_EVENT_HANDLER_VMCALL, > + HVM_EVENT_HANDLER_HLT, > + HVM_EVENT_HANDLER_INVLPG, > + HVM_EVENT_HANDLER_MCE, > + HVM_EVENT_HANDLER_IO_ASSIST, > + HVM_EVENT_HANDLER_MMIO_ASSIST, > + HVM_EVENT_HANDLER_CLTS, > + HVM_EVENT_HANDLER_LMSW, > + HVM_EVENT_RDTSC, > + HVM_EVENT_INTR_WINDOW=0x20, /* Oops... skipped 0x1b-1f */ > + HVM_EVENT_NPF, > + HVM_EVENT_REALMODE_EMULATE, > + HVM_EVENT_TRAP, > + HVM_EVENT_TRAP_DEBUG, > + HVM_EVENT_VLAPIC, > + HVM_EVENT_HANDLER_MAX > +}; > +char * hvm_event_handler_name[HVM_EVENT_HANDLER_MAX] = { > + "(no handler)", > + "pf_xen", > + "pf_inject", > + "inj_exc", > + "inj_virq", > + "reinj_virq", > + "io_read", > + "io_write", > + "cr_read", /* 8 */ > + "cr_write", > + "dr_read", > + "dr_write", > + "msr_read", > + "msr_write", > + "cpuid", > + "intr", > + "nmi", /* 16 */ > + "smi", > + "vmcall", > + "hlt", > + "invlpg", > + "mce", > + "io_assist", > + "mmio_assist", > + "clts", /* 24 */ > + "lmsw", > + "rdtsc", > + [HVM_EVENT_INTR_WINDOW]="intr_window", > + "npf", > + "realmode_emulate", > + "trap", > + "trap_debug", > + "vlapic" > +}; > + > +enum { > + HVM_VOL_VMENTRY, > + HVM_VOL_VMEXIT, > + HVM_VOL_HANDLER, > + HVM_VOL_MAX > +}; > + > +enum { > + GUEST_INTERRUPT_CASE_NONE, > + /* This interrupt woke, no other interrupts until halt */ > + GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ALONE, > + /* This interrupt woke, maybe another interrupt before halt */ > + GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ANY, > + /* Time from interrupt (running) to halt */ > + GUEST_INTERRUPT_CASE_INTERRUPT_TO_HALT, > + GUEST_INTERRUPT_CASE_MAX, > +}; > + > +char *guest_interrupt_case_name[] = { > + [GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ALONE]="wake to halt alone", > + /* This interrupt woke, maybe another interrupt before halt */ > + [GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ANY] ="wake to halt any ", > + /* Time from interrupt (running) to halt */ > + [GUEST_INTERRUPT_CASE_INTERRUPT_TO_HALT] ="intr to halt ", > +}; > + > +char *hvm_vol_name[HVM_VOL_MAX] = { > + [HVM_VOL_VMENTRY]="vmentry", > + [HVM_VOL_VMEXIT] ="vmexit", > + [HVM_VOL_HANDLER]="handler", > +}; > + > +enum { > + HYPERCALL_set_trap_table = 0, > + HYPERCALL_mmu_update, > + HYPERCALL_set_gdt, > + HYPERCALL_stack_switch, > + HYPERCALL_set_callbacks, > + HYPERCALL_fpu_taskswitch, > + HYPERCALL_sched_op_compat, > + HYPERCALL_platform_op, > + HYPERCALL_set_debugreg, > + HYPERCALL_get_debugreg, > + HYPERCALL_update_descriptor, > + HYPERCALL_memory_op=12, > + HYPERCALL_multicall, > + HYPERCALL_update_va_mapping, > + HYPERCALL_set_timer_op, > + HYPERCALL_event_channel_op_compat, > + HYPERCALL_xen_version, > + HYPERCALL_console_io, > + HYPERCALL_physdev_op_compat, > + HYPERCALL_grant_table_op, > + HYPERCALL_vm_assist, > + HYPERCALL_update_va_mapping_otherdomain, > + HYPERCALL_iret, > + HYPERCALL_vcpu_op, > + HYPERCALL_set_segment_base, > + HYPERCALL_mmuext_op, > + HYPERCALL_acm_op, > + HYPERCALL_nmi_op, > + HYPERCALL_sched_op, > + HYPERCALL_callback_op, > + HYPERCALL_xenoprof_op, > + HYPERCALL_event_channel_op, > + HYPERCALL_physdev_op, > + HYPERCALL_hvm_op, > + HYPERCALL_sysctl, > + HYPERCALL_domctl, > + HYPERCALL_kexec_op, > + HYPERCALL_MAX > +}; > + > +char *hypercall_name[HYPERCALL_MAX] = { > + [HYPERCALL_set_trap_table]="set_trap_table", > + [HYPERCALL_mmu_update]="mmu_update", > + [HYPERCALL_set_gdt]="set_gdt", > + [HYPERCALL_stack_switch]="stack_switch", > + [HYPERCALL_set_callbacks]="set_callbacks", > + [HYPERCALL_fpu_taskswitch]="fpu_taskswitch", > + [HYPERCALL_sched_op_compat]="sched_op(compat)", > + [HYPERCALL_platform_op]="platform_op", > + [HYPERCALL_set_debugreg]="set_debugreg", > + [HYPERCALL_get_debugreg]="get_debugreg", > + [HYPERCALL_update_descriptor]="update_descriptor", > + [HYPERCALL_memory_op]="memory_op", > + [HYPERCALL_multicall]="multicall", > + [HYPERCALL_update_va_mapping]="update_va_mapping", > + [HYPERCALL_set_timer_op]="set_timer_op", > + [HYPERCALL_event_channel_op_compat]="evtchn_op(compat)", > + [HYPERCALL_xen_version]="xen_version", > + [HYPERCALL_console_io]="console_io", > + [HYPERCALL_physdev_op_compat]="physdev_op(compat)", > + [HYPERCALL_grant_table_op]="grant_table_op", > + [HYPERCALL_vm_assist]="vm_assist", > + > [HYPERCALL_update_va_mapping_otherdomain]="update_va_mapping_otherdomain", > + [HYPERCALL_iret]="iret", > + [HYPERCALL_vcpu_op]="vcpu_op", > + [HYPERCALL_set_segment_base]="set_segment_base", > + [HYPERCALL_mmuext_op]="mmuext_op", > + [HYPERCALL_acm_op]="acm_op", > + [HYPERCALL_nmi_op]="nmi_op", > + [HYPERCALL_sched_op]="sched_op", > + [HYPERCALL_callback_op]="callback_op", > + [HYPERCALL_xenoprof_op]="xenoprof_op", > + [HYPERCALL_event_channel_op]="evtchn_op", > + [HYPERCALL_physdev_op]="physdev_op", > + [HYPERCALL_hvm_op]="hvm_op", > + [HYPERCALL_sysctl]="sysctl", > + [HYPERCALL_domctl]="domctl", > + [HYPERCALL_kexec_op]="kexec_op" > +}; > + > +enum { > + PF_XEN_EMUL_LVL_0, > + PF_XEN_EMUL_LVL_1, > + PF_XEN_EMUL_LVL_2, > + PF_XEN_EMUL_LVL_3, > + PF_XEN_EMUL_LVL_4, > + PF_XEN_EMUL_EARLY_UNSHADOW, > + PF_XEN_EMUL_SET_CHANGED, > + PF_XEN_EMUL_SET_UNCHANGED, > + PF_XEN_EMUL_SET_FLUSH, > + PF_XEN_EMUL_SET_ERROR, > + PF_XEN_EMUL_PROMOTE, > + PF_XEN_EMUL_DEMOTE, > + PF_XEN_EMUL_PREALLOC_UNPIN, > + PF_XEN_EMUL_PREALLOC_UNHOOK, > + PF_XEN_EMUL_MAX, > +}; > + > +char * pf_xen_emul_name[PF_XEN_EMUL_MAX] = { > + [PF_XEN_EMUL_LVL_0]="non-linmap", > + [PF_XEN_EMUL_LVL_1]="linmap l1", > + [PF_XEN_EMUL_LVL_2]="linmap l2", > + [PF_XEN_EMUL_LVL_3]="linmap l3", > + [PF_XEN_EMUL_LVL_4]="linmap l4", > + [PF_XEN_EMUL_EARLY_UNSHADOW]="early unshadow", > + [PF_XEN_EMUL_SET_UNCHANGED]="set unchanged", > + [PF_XEN_EMUL_SET_CHANGED]="set changed", > + [PF_XEN_EMUL_SET_FLUSH]="set changed", > + [PF_XEN_EMUL_SET_ERROR]="set changed", > + [PF_XEN_EMUL_PROMOTE]="promote", > + [PF_XEN_EMUL_DEMOTE]="demote", > + [PF_XEN_EMUL_PREALLOC_UNPIN]="unpin", > + [PF_XEN_EMUL_PREALLOC_UNHOOK]="unhook", > +}; > + > +/* Rio only */ > +enum { > + PF_XEN_NON_EMUL_VA_USER, > + PF_XEN_NON_EMUL_VA_KERNEL, > + PF_XEN_NON_EMUL_EIP_USER, > + PF_XEN_NON_EMUL_EIP_KERNEL, > + PF_XEN_NON_EMUL_MAX, > +}; > + > +char * pf_xen_non_emul_name[PF_XEN_NON_EMUL_MAX] = { > + [PF_XEN_NON_EMUL_VA_USER]="va user", > + [PF_XEN_NON_EMUL_VA_KERNEL]="va kernel", > + [PF_XEN_NON_EMUL_EIP_USER]="eip user", > + [PF_XEN_NON_EMUL_EIP_KERNEL]="eip kernel", > +}; > + > +enum { > + PF_XEN_FIXUP_PREALLOC_UNPIN, > + PF_XEN_FIXUP_PREALLOC_UNHOOK, > + PF_XEN_FIXUP_UNSYNC, > + PF_XEN_FIXUP_OOS_ADD, > + PF_XEN_FIXUP_OOS_EVICT, > + PF_XEN_FIXUP_PROMOTE, > + PF_XEN_FIXUP_UPDATE_ONLY, > + PF_XEN_FIXUP_WRMAP, > + PF_XEN_FIXUP_BRUTE_FORCE, > + PF_XEN_FIXUP_MAX, > +}; > + > +char * pf_xen_fixup_name[PF_XEN_FIXUP_MAX] = { > + [PF_XEN_FIXUP_PREALLOC_UNPIN] = "unpin", > + [PF_XEN_FIXUP_PREALLOC_UNHOOK] = "unhook", > + [PF_XEN_FIXUP_UNSYNC] = "unsync", > + [PF_XEN_FIXUP_OOS_ADD] = "oos-add", > + [PF_XEN_FIXUP_OOS_EVICT] = "oos-evict", > + [PF_XEN_FIXUP_PROMOTE] = "promote", > + [PF_XEN_FIXUP_UPDATE_ONLY] = "update", > + [PF_XEN_FIXUP_WRMAP] = "wrmap", > + [PF_XEN_FIXUP_BRUTE_FORCE] = "wrmap-bf", > +}; > + > +enum { > + PF_XEN_NOT_SHADOW = 1, > + PF_XEN_FAST_PROPAGATE, > + PF_XEN_FAST_MMIO, > + PF_XEN_FALSE_FAST_PATH, > + PF_XEN_MMIO, > + PF_XEN_FIXUP, > + PF_XEN_DOMF_DYING, > + PF_XEN_EMULATE, > + PF_XEN_EMULATE_UNSHADOW_USER, > + PF_XEN_EMULATE_UNSHADOW_EVTINJ, > + PF_XEN_EMULATE_UNSHADOW_UNHANDLED, > + PF_XEN_LAST_FAULT=PF_XEN_EMULATE_UNSHADOW_UNHANDLED, > + PF_XEN_NON_EMULATE, > + PF_XEN_NO_HANDLER, > + PF_XEN_MAX, > +}; > + > +#define SHADOW_WRMAP_BF 12 > +#define SHADOW_PREALLOC_UNPIN 13 > +#define SHADOW_RESYNC_FULL 14 > +#define SHADOW_RESYNC_ONLY 15 > + > +char * pf_xen_name[PF_XEN_MAX] = { > + [PF_XEN_NOT_SHADOW]="propagate", > + [PF_XEN_FAST_PROPAGATE]="fast propagate", > + [PF_XEN_FAST_MMIO]="fast mmio", > + [PF_XEN_FALSE_FAST_PATH]="false fast path", > + [PF_XEN_MMIO]="mmio", > + [PF_XEN_FIXUP]="fixup", > + [PF_XEN_DOMF_DYING]="dom dying", > + [PF_XEN_EMULATE]="emulate", > + [PF_XEN_EMULATE_UNSHADOW_USER]="unshadow:user-mode", > + [PF_XEN_EMULATE_UNSHADOW_EVTINJ]="unshadow:evt inj", > + [PF_XEN_EMULATE_UNSHADOW_UNHANDLED]="unshadow:unhandled instr", > + [PF_XEN_NON_EMULATE]="fixup|mmio", > + [PF_XEN_NO_HANDLER]="(no handler)", > +}; > + > +#define CORR_VA_INVALID (0ULL-1) > + > +enum { > + NONPF_MMIO_APIC, > + NONPF_MMIO_NPF, > + NONPF_MMIO_UNKNOWN, > + NONPF_MMIO_MAX > +}; > + > +struct mmio_info { > + unsigned long long gpa; > + unsigned long long va; /* Filled only by shadow */ > + unsigned data; > + unsigned data_valid:1, is_write:1; > +}; > + > +struct pf_xen_extra { > + unsigned long long va; > + union { > + unsigned flags; > + struct { > + unsigned flag_set_ad:1, > + flag_set_a:1, > + flag_shadow_l1_get_ref:1, > + flag_shadow_l1_put_ref:1, > + flag_l2_propagate:1, > + flag_set_changed:1, > + flag_set_flush:1, > + flag_set_error:1, > + flag_demote:1, > + flag_promote:1, > + flag_wrmap:1, > + flag_wrmap_guess_found:1, > + flag_wrmap_brute_force:1, > + flag_early_unshadow:1, > + flag_emulation_2nd_pt_written:1, > + flag_emulation_last_failed:1, > + flag_emulate_full_pt:1, > + flag_prealloc_unhook:1, > + flag_unsync:1, > + flag_oos_fixup_add:1, > + flag_oos_fixup_evict:1; > + }; > + }; /* Miami + ; fixup & emulate */ > + unsigned int error_code; /* Rio only */ > + > + /* Calculated */ > + int pf_case; /* Rio */ > + > + /* MMIO only */ > + unsigned long long gpa; > + unsigned int data; > + > + /* Emulate only */ > + unsigned long long gl1e; /* Miami + */ > + unsigned long long wval; /* Miami */ > + unsigned long long corresponding_va; > + unsigned int pt_index[5], pt_is_lo; > + int pt_level; > + > + /* Other */ > + unsigned long long gfn; > + > + /* Flags */ > + unsigned corr_valid:1, > + corr_is_kernel:1, > + va_is_kernel:1; > +}; > + > +struct pcpu_info; > + > +#define GUEST_INTERRUPT_MAX 350 > +#define FAKE_VECTOR 349 > +#define CR_MAX 9 > +#define RESYNCS_MAX 17 > +#define PF_XEN_FIXUP_UNSYNC_RESYNC_MAX 2 > + > +struct hvm_data; > + > +struct hvm_summary_handler_node { > + void (*handler)(struct hvm_data *, void* data); > + void *data; > + struct hvm_summary_handler_node *next; > +}; > + > +struct hvm_data { > + /* Summary information */ > + int init; > + int vmexit_valid; > + int summary_info; > + struct vcpu_data *v; /* up-pointer */ > + > + /* SVM / VMX compatibility. FIXME - should be global */ > + char ** exit_reason_name; > + int exit_reason_max; > + struct hvm_summary_handler_node > *exit_reason_summary_handler_list[HVM_EXIT_REASON_MAX]; > + > + /* Information about particular exit reasons */ > + struct { > + struct event_cycle_summary exit_reason[HVM_EXIT_REASON_MAX]; > + int extint[EXTERNAL_INTERRUPT_MAX+1]; > + int *extint_histogram; > + struct event_cycle_summary trap[HVM_TRAP_MAX]; > + struct event_cycle_summary pf_xen[PF_XEN_MAX]; > + struct event_cycle_summary pf_xen_emul[PF_XEN_EMUL_MAX]; > + struct event_cycle_summary pf_xen_emul_early_unshadow[5]; > + struct event_cycle_summary pf_xen_non_emul[PF_XEN_NON_EMUL_MAX]; > + struct event_cycle_summary pf_xen_fixup[PF_XEN_FIXUP_MAX]; > + struct event_cycle_summary > pf_xen_fixup_unsync_resync[PF_XEN_FIXUP_UNSYNC_RESYNC_MAX+1]; > + struct event_cycle_summary cr_write[CR_MAX]; > + struct event_cycle_summary cr3_write_resyncs[RESYNCS_MAX+1]; > + struct event_cycle_summary vmcall[HYPERCALL_MAX+1]; > + struct event_cycle_summary generic[HVM_EVENT_HANDLER_MAX]; > + struct event_cycle_summary mmio[NONPF_MMIO_MAX]; > + struct hvm_gi_struct { > + int count; > + struct cycle_summary runtime[GUEST_INTERRUPT_CASE_MAX]; > + /* OK, not summary info, but still... */ > + int is_wake; > + tsc_t start_tsc; > + } guest_interrupt[GUEST_INTERRUPT_MAX + 1]; > + /* IPI Latency */ > + struct event_cycle_summary ipi_latency; > + int ipi_count[256]; > + struct { > + struct io_address *mmio, *pio; > + } io; > + } summary; > + > + /* In-flight accumulation information */ > + struct { > + union { > + struct { > + unsigned port:31, > + is_write:1; > + unsigned int val; > + } io; > + struct pf_xen_extra pf_xen; > + struct { > + unsigned cr; > + unsigned long long val; > + int repromote; > + } cr_write; > + struct { > + unsigned addr; > + unsigned long long val; > + } msr; > + struct { > + unsigned int event; > + uint32_t d[4]; > + } generic; > + struct { > + unsigned eax; > + } vmcall; > + struct { > + unsigned vec; > + } intr; > + }; > + /* MMIO gets its separate area, since many exits may use it */ > + struct mmio_info mmio; > + }inflight; > + int resyncs; > + void (*post_process)(struct hvm_data *); > + tsc_t exit_tsc, arc_cycles, entry_tsc; > + unsigned long long rip; > + unsigned exit_reason, event_handler; > + int short_summary_done:1, prealloc_unpin:1, wrmap_bf:1; > + > + /* Immediate processing */ > + void *d; > + > + /* Wake-to-halt detection. See comment above. */ > + struct { > + unsigned waking:1; > + /* Wake vector: keep track of time from vmentry until: > + next halt, or next interrupt */ > + int vector, interrupts, interrupts_wanting_tsc; > + } w2h; > + > + /* Historical info */ > + tsc_t last_rdtsc; > +}; > + > +enum { > + HVM_SHORT_SUMMARY_EMULATE, > + HVM_SHORT_SUMMARY_UNSYNC, > + HVM_SHORT_SUMMARY_FIXUP, > + HVM_SHORT_SUMMARY_MMIO, > + HVM_SHORT_SUMMARY_PROPAGATE, > + HVM_SHORT_SUMMARY_CR3, > + HVM_SHORT_SUMMARY_VMCALL, > + HVM_SHORT_SUMMARY_INTERRUPT, > + HVM_SHORT_SUMMARY_HLT, > + HVM_SHORT_SUMMARY_OTHER, > + HVM_SHORT_SUMMARY_MAX, > +}; > + > +char *hvm_short_summary_name[HVM_SHORT_SUMMARY_MAX] = { > + [HVM_SHORT_SUMMARY_EMULATE] ="emulate", > + [HVM_SHORT_SUMMARY_UNSYNC] ="unsync", > + [HVM_SHORT_SUMMARY_FIXUP] ="fixup", > + [HVM_SHORT_SUMMARY_MMIO] ="mmio", > + [HVM_SHORT_SUMMARY_PROPAGATE]="propagate", > + [HVM_SHORT_SUMMARY_CR3] ="cr3", > + [HVM_SHORT_SUMMARY_VMCALL] ="vmcall", > + [HVM_SHORT_SUMMARY_INTERRUPT]="intr", > + [HVM_SHORT_SUMMARY_HLT] ="hlt", > + [HVM_SHORT_SUMMARY_OTHER] ="other", > +}; > + > +struct hvm_short_summary_struct { > + struct cycle_summary s[HVM_SHORT_SUMMARY_MAX]; > +}; > + > +void init_hvm_data(struct hvm_data *h, struct vcpu_data *v) { > + int i; > + > + if(h->init) > + return; > + > + h->v = v; > + > + h->init = 1; > + > + if(opt.svm_mode) { > + h->exit_reason_max = HVM_SVM_EXIT_REASON_MAX; > + h->exit_reason_name = hvm_svm_exit_reason_name; > + } else { > + h->exit_reason_max = HVM_VMX_EXIT_REASON_MAX; > + h->exit_reason_name = hvm_vmx_exit_reason_name; > + } > + > + if(opt.histogram_interrupt_eip) { > + int count = > ((1ULL<<ADDR_SPACE_BITS)/opt.histogram_interrupt_increment); > + size_t size = count * sizeof(int); > + h->summary.extint_histogram = malloc(size); > + if(h->summary.extint_histogram) > + bzero(h->summary.extint_histogram, size); > + else { > + fprintf(stderr, "FATAL: Could not allocate %zd bytes for > interrupt histogram!\n", > + size); > + error(ERR_SYSTEM, NULL); > + } > + > + } > + for(i=0; i<GUEST_INTERRUPT_MAX+1; i++) > + h->summary.guest_interrupt[i].count=0; > +} > + > +/* PV data */ > +enum { > + PV_HYPERCALL=1, > + PV_TRAP=3, > + PV_PAGE_FAULT, > + PV_FORCED_INVALID_OP, > + PV_EMULATE_PRIVOP, > + PV_EMULATE_4GB, > + PV_MATH_STATE_RESTORE, > + PV_PAGING_FIXUP, > + PV_GDT_LDT_MAPPING_FAULT, > + PV_PTWR_EMULATION, > + PV_PTWR_EMULATION_PAE, > + PV_HYPERCALL_V2 = 13, > + PV_HYPERCALL_SUBCALL = 14, > + PV_MAX > +}; > + > +char *pv_name[PV_MAX] = { > + [PV_HYPERCALL]="hypercall", > + [PV_TRAP]="trap", > + [PV_PAGE_FAULT]="page_fault", > + [PV_FORCED_INVALID_OP]="forced_invalid_op", > + [PV_EMULATE_PRIVOP]="emulate privop", > + [PV_EMULATE_4GB]="emulate 4g", > + [PV_MATH_STATE_RESTORE]="math state restore", > + [PV_PAGING_FIXUP]="paging fixup", > + [PV_GDT_LDT_MAPPING_FAULT]="gdt/ldt mapping fault", > + [PV_PTWR_EMULATION]="ptwr", > + [PV_PTWR_EMULATION_PAE]="ptwr(pae)", > + [PV_HYPERCALL_V2]="hypercall", > + [PV_HYPERCALL_SUBCALL]="hypercall (subcall)", > +}; > + > +#define PV_HYPERCALL_MAX 56 > +#define PV_TRAP_MAX 20 > + > +struct pv_data { > + unsigned summary_info:1; > + int count[PV_MAX]; > + int hypercall_count[PV_HYPERCALL_MAX]; > + int trap_count[PV_TRAP_MAX]; > +}; > + > +/* Sched data */ > + > +enum { > + SCHED_DOM_ADD=1, > + SCHED_DOM_REM, > + SCHED_SLEEP, > + SCHED_WAKE, > + SCHED_YIELD, > + SCHED_BLOCK, > + SCHED_SHUTDOWN, > + SCHED_CTL, > + SCHED_ADJDOM, > + SCHED_SWITCH, > + SCHED_S_TIMER_FN, > + SCHED_T_TIMER_FN, > + SCHED_DOM_TIMER_FN, > + SCHED_SWITCH_INFPREV, > + SCHED_SWITCH_INFNEXT, > + SCHED_SHUTDOWN_CODE, > + SCHED_MAX > +}; > + > +enum { > + RUNSTATE_RUNNING=0, > + RUNSTATE_RUNNABLE, > + RUNSTATE_BLOCKED, > + RUNSTATE_OFFLINE, > + RUNSTATE_LOST, > + RUNSTATE_QUEUED, > + RUNSTATE_INIT, > + RUNSTATE_MAX > +}; > + > +int runstate_graph[RUNSTATE_MAX] = > +{ > + [RUNSTATE_BLOCKED]=0, > + [RUNSTATE_OFFLINE]=1, > + [RUNSTATE_RUNNABLE]=2, > + [RUNSTATE_RUNNING]=3, > + [RUNSTATE_LOST]=-1, > + [RUNSTATE_QUEUED]=-2, > + [RUNSTATE_INIT]=-2, > +}; > + > +char * runstate_name[RUNSTATE_MAX]={ > + [RUNSTATE_RUNNING]= "running", > + [RUNSTATE_RUNNABLE]="runnable", > + [RUNSTATE_BLOCKED]= "blocked", /* to be blocked */ > + [RUNSTATE_OFFLINE]= "offline", > + [RUNSTATE_QUEUED]= "queued", > + [RUNSTATE_INIT]= "init", > + [RUNSTATE_LOST]= "lost", > +}; > + > +enum { > + RUNNABLE_STATE_INVALID, > + RUNNABLE_STATE_WAKE, > + RUNNABLE_STATE_PREEMPT, > + RUNNABLE_STATE_OTHER, > + RUNNABLE_STATE_MAX > +}; > + > +char * runnable_state_name[RUNNABLE_STATE_MAX]={ > + [RUNNABLE_STATE_INVALID]="invalid", /* Should never show up */ > + [RUNNABLE_STATE_WAKE]="wake", > + [RUNNABLE_STATE_PREEMPT]="preempt", > + [RUNNABLE_STATE_OTHER]="other", > +}; > + > +/* Memory data */ > +enum { > + MEM_PAGE_GRANT_MAP = 1, > + MEM_PAGE_GRANT_UNMAP, > + MEM_PAGE_GRANT_TRANSFER, > + MEM_SET_P2M_ENTRY, > + MEM_DECREASE_RESERVATION, > + MEM_POD_POPULATE = 16, > + MEM_POD_ZERO_RECLAIM, > + MEM_POD_SUPERPAGE_SPLINTER, > + MEM_MAX > +}; > + > +char *mem_name[MEM_MAX] = { > + [MEM_PAGE_GRANT_MAP] = "grant-map", > + [MEM_PAGE_GRANT_UNMAP] = "grant-unmap", > + [MEM_PAGE_GRANT_TRANSFER] = "grant-transfer", > + [MEM_SET_P2M_ENTRY] = "set-p2m", > + [MEM_DECREASE_RESERVATION] = "decrease-reservation", > + [MEM_POD_POPULATE] = "pod-populate", > + [MEM_POD_ZERO_RECLAIM] = "pod-zero-reclaim", > + [MEM_POD_SUPERPAGE_SPLINTER] = "pod-superpage-splinter", > +}; > + > +/* Per-unit information. */ > + > +struct cr3_value_struct { > + struct cr3_value_struct * next; > + struct cr3_value_struct * gnext; > + unsigned long long gmfn; > + int cr3_id; > + unsigned long long first_time, last_time, run_time; > + struct cycle_summary total_time, guest_time, hv_time; > + int switch_count, flush_count; > + > + struct hvm_short_summary_struct hvm; > + > + struct { > + int now; > + int count; > + } prealloc_unpin; > + > + struct { > + unsigned callback:1; > + unsigned flush_count, switch_count; > + unsigned fixup_user, emulate_corr_user; > + } destroy; > +}; > + > +#ifndef MAX_CPUS > +#define MAX_CPUS 256 > +#endif > +typedef uint32_t cpu_mask_t; > + > +#define IDLE_DOMAIN 32767 > +#define DEFAULT_DOMAIN 32768 > + > +#define MAX_VLAPIC_LIST 8 > +struct vlapic_struct { > + struct { > + struct outstanding_ipi { > + tsc_t first_tsc; > + int vec, count; > + int injected, valid; > + } list[MAX_VLAPIC_LIST]; > + } outstanding; > +}; > + > +struct vcpu_data { > + int vid; > + struct domain_data *d; /* up-pointer */ > + unsigned activated:1; > + > + int guest_paging_levels; > + > + /* Schedule info */ > + struct { > + int state; > + int runnable_state; /* Only valid when state==RUNSTATE_RUNNABLE */ > + tsc_t tsc; > + /* TSC skew detection/correction */ > + struct last_oldstate_struct { > + int wrong, actual, pid; > + tsc_t tsc; > + } last_oldstate; > + /* Performance counters */ > + unsigned long long p1_start, p2_start; > + } runstate; > + struct pcpu_info *p; > + tsc_t pcpu_tsc; > + > + /* Hardware tracking */ > + struct { > + long long val; > + tsc_t start_time; > + struct cr3_value_struct *data; > + } cr3; > + > + /* IPI latency tracking */ > + struct vlapic_struct vlapic; > + > + /* Summary info */ > + struct cycle_framework f; > + struct cycle_summary runstates[RUNSTATE_MAX]; > + struct cycle_summary runnable_states[RUNNABLE_STATE_MAX]; > + struct weighted_cpi_summary cpi; > + struct cycle_summary cpu_affinity_all, > + cpu_affinity_pcpu[MAX_CPUS]; > + enum { > + VCPU_DATA_NONE=0, > + VCPU_DATA_HVM, > + VCPU_DATA_PV > + } data_type; > + union { > + struct hvm_data hvm; > + struct pv_data pv; > + }; > +}; > + > +enum { > + DOMAIN_RUNSTATE_BLOCKED=0, > + DOMAIN_RUNSTATE_PARTIAL_RUN, > + DOMAIN_RUNSTATE_FULL_RUN, > + DOMAIN_RUNSTATE_PARTIAL_CONTENTION, > + DOMAIN_RUNSTATE_CONCURRENCY_HAZARD, > + DOMAIN_RUNSTATE_FULL_CONTENTION, > + DOMAIN_RUNSTATE_LOST, > + DOMAIN_RUNSTATE_MAX > +}; > + > +char * domain_runstate_name[] = { > + [DOMAIN_RUNSTATE_BLOCKED]="blocked", > + [DOMAIN_RUNSTATE_PARTIAL_RUN]="partial run", > + [DOMAIN_RUNSTATE_FULL_RUN]="full run", > + [DOMAIN_RUNSTATE_PARTIAL_CONTENTION]="partial contention", > + [DOMAIN_RUNSTATE_CONCURRENCY_HAZARD]="concurrency_hazard", > + [DOMAIN_RUNSTATE_FULL_CONTENTION]="full_contention", > + [DOMAIN_RUNSTATE_LOST]="lost", > +}; > + > +enum { > + POD_RECLAIM_CONTEXT_UNKNOWN=0, > + POD_RECLAIM_CONTEXT_FAULT, > + POD_RECLAIM_CONTEXT_BALLOON, > + POD_RECLAIM_CONTEXT_MAX > +}; > + > +char * pod_reclaim_context_name[] = { > + [POD_RECLAIM_CONTEXT_UNKNOWN]="unknown", > + [POD_RECLAIM_CONTEXT_FAULT]="fault", > + [POD_RECLAIM_CONTEXT_BALLOON]="balloon", > +}; > + > +#define POD_ORDER_MAX 4 > + > +struct domain_data { > + struct domain_data *next; > + int did; > + struct vcpu_data *vcpu[MAX_CPUS]; > + > + int max_vid; > + > + int runstate; > + tsc_t runstate_tsc; > + struct cycle_summary total_time; > + struct cycle_summary runstates[DOMAIN_RUNSTATE_MAX]; > + struct cr3_value_struct *cr3_value_head; > + struct eip_list_struct *emulate_eip_list; > + struct eip_list_struct *interrupt_eip_list; > + > + int guest_interrupt[GUEST_INTERRUPT_MAX+1]; > + struct hvm_short_summary_struct hvm_short; > + struct { > + int done[MEM_MAX]; > + int done_interval[MEM_MAX]; > + > + int done_for[MEM_MAX]; > + int done_for_interval[MEM_MAX]; > + } memops; > + > + struct { > + int reclaim_order[POD_ORDER_MAX]; > + int reclaim_context[POD_RECLAIM_CONTEXT_MAX]; > + int reclaim_context_order[POD_RECLAIM_CONTEXT_MAX][POD_ORDER_MAX]; > + /* FIXME: Do a full cycle summary */ > + int populate_order[POD_ORDER_MAX]; > + } pod; > +}; > + > +struct domain_data * domain_list=NULL; > + > +struct domain_data default_domain; > + > +enum { > + TOPLEVEL_GEN=0, > + TOPLEVEL_SCHED, > + TOPLEVEL_DOM0OP, > + TOPLEVEL_HVM, > + TOPLEVEL_MEM, > + TOPLEVEL_PV, > + TOPLEVEL_SHADOW, > + TOPLEVEL_HW, > + TOPLEVEL_MAX=TOPLEVEL_HW+1, > +}; > + > +char * toplevel_name[TOPLEVEL_MAX] = { > + [TOPLEVEL_GEN]="gen", > + [TOPLEVEL_SCHED]="sched", > + [TOPLEVEL_DOM0OP]="dom0op", > + [TOPLEVEL_HVM]="hvm", > + [TOPLEVEL_MEM]="mem", > + [TOPLEVEL_PV]="pv", > + [TOPLEVEL_SHADOW]="shadow", > + [TOPLEVEL_HW]="hw", > +}; > + > +struct trace_volume { > + unsigned long long toplevel[TOPLEVEL_MAX]; > + unsigned long long sched_verbose; > + unsigned long long hvm[HVM_VOL_MAX]; > +} volume; > + > +#define UPDATE_VOLUME(_p,_x,_s) \ > + do { \ > + (_p)->volume.total._x += _s; \ > + (_p)->volume.last_buffer._x += _s; \ > + } while(0) > + > +void volume_clear(struct trace_volume *vol) > +{ > + bzero(vol, sizeof(*vol)); > +} > + > +void volume_summary(struct trace_volume *vol) > +{ > + int j, k; > + for(j=0; j<TOPLEVEL_MAX; j++) > + if(vol->toplevel[j]) { > + printf(" %-6s: %10lld\n", > + toplevel_name[j], vol->toplevel[j]); > + switch(j) { > + case TOPLEVEL_SCHED: > + if(vol->sched_verbose) > + printf(" +-verbose: %10lld\n", > + vol->sched_verbose); > + break; > + case TOPLEVEL_HVM: > + for(k=0; k<HVM_VOL_MAX; k++) { > + if(vol->hvm[k]) > + printf(" +-%-7s: %10lld\n", > + hvm_vol_name[k], vol->hvm[k]); > + } > + > + break; > + } > + } > +} > + > +struct pcpu_info { > + /* Information about this pcpu */ > + unsigned active:1, summary:1; > + int pid; > + > + /* Information related to scanning thru the file */ > + tsc_t first_tsc, last_tsc, order_tsc; > + loff_t file_offset; > + loff_t next_cpu_change_offset; > + struct record_info ri; > + int last_cpu_change_pid; > + int power_state; > + > + /* Information related to tsc skew detection / correction */ > + struct { > + tsc_t offset; > + cpu_mask_t downstream; /* To detect cycles in dependencies */ > + } tsc_skew; > + > + /* Information related to domain tracking */ > + struct vcpu_data * current; > + struct { > + unsigned active:1, > + domain_valid:1, > + seen_valid_schedule:1; /* Seen an actual schedule since lost > records */ > + unsigned did:16,vid:16; > + tsc_t tsc; > + } lost_record; > + > + /* Record volume */ > + struct { > + tsc_t buffer_first_tsc, > + buffer_dom0_runstate_tsc, > + buffer_dom0_runstate_cycles[RUNSTATE_MAX]; > + int buffer_dom0_runstate; > + unsigned buffer_size; > + struct trace_volume total, last_buffer; > + } volume; > + > + /* Time report */ > + struct { > + tsc_t tsc; > + struct cycle_summary idle, running, lost; > + } time; > +}; > + > +void __fill_in_record_info(struct pcpu_info *p); > + > +#define INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX 10 > + > +struct { > + int max_active_pcpu; > + loff_t last_epoch_offset; > + int early_eof; > + int lost_cpus; > + tsc_t now; > + struct cycle_framework f; > + tsc_t buffer_trace_virq_tsc; > + struct pcpu_info pcpu[MAX_CPUS]; > + > + struct { > + int id; > + /* Invariant: head null => tail null; head !null => tail valid */ > + struct cr3_value_struct *head, **tail; > + } cr3; > + > + struct { > + tsc_t start_tsc; > + /* Information about specific interval output types */ > + union { > + struct { > + struct interval_element ** values; > + int count; > + } array; > + struct { > + struct interval_list *head, **tail; > + } list; > + struct cr3_value_struct *cr3; > + struct { > + struct domain_data *d; > + int guest_vector[INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX]; > + } domain; > + }; > + } interval; > +} P = { 0 }; > + > +/* Function prototypes */ > +char * pcpu_string(int pcpu); > +void pcpu_string_draw(struct pcpu_info *p); > +void process_generic(struct record_info *ri); > +void dump_generic(FILE *f, struct record_info *ri); > +ssize_t __read_record(struct trace_record *rec, loff_t offset); > +void error(enum error_level l, struct record_info *ri); > +void update_io_address(struct io_address ** list, unsigned int pa, int dir, > + tsc_t arc_cycles, unsigned int va); > +int check_extra_words(struct record_info *ri, int expected_size, const char > *record); > +int vcpu_set_data_type(struct vcpu_data *v, int type); > + > +void cpumask_init(cpu_mask_t *c) { > + *c = 0UL; > +} > + > +void cpumask_clear(cpu_mask_t *c, int cpu) { > + *c &= ~(1UL << cpu); > +} > + > +void cpumask_set(cpu_mask_t *c, int cpu) { > + *c |= (1UL << cpu); > +} > + > +int cpumask_isset(const cpu_mask_t *c, int cpu) { > + if(*c & (1UL<<cpu)) > + return 1; > + else > + return 0; > +} > + > +void cpumask_union(cpu_mask_t *d, const cpu_mask_t *s) { > + *d |= *s; > +} > + > +/* -- Time code -- */ > + > +void cycles_to_time(unsigned long long c, struct time_struct *t) { > + t->time = ((c - P.f.first_tsc) << 10) / opt.cpu_qhz; > + t->s = t->time / 1000000000; > + t->ns = t->time - (t->s * 1000000000); > +} > + > +void abs_cycles_to_time(unsigned long long ac, struct time_struct *t) { > + if(ac > P.f.first_tsc) { > + /* t->time = ((ac - P.f.first_tsc) * 1000) / (opt.cpu_hz / 1000000 > ); */ > + /* t->s = t->time / 1000000000; */ > + /* t->ns = t->time % 1000000000; */ > + t->time = ((ac - P.f.first_tsc) << 10) / opt.cpu_qhz; > + t->s = t->time / 1000000000; > + t->ns = t->time - (t->s * 1000000000); > + } else { > + t->time = t->s = t->ns = 0; > + } > +} > + > +tsc_t abs_cycles_to_global(unsigned long long ac) { > + if(ac > P.f.first_tsc) > + return ac - P.f.first_tsc; > + else > + return 0; > +} > + > +void scatterplot_vs_time(tsc_t atsc, long long y) { > + struct time_struct t; > + > + abs_cycles_to_time(atsc, &t); > + > + printf("%u.%09u %lld\n", t.s, t.ns, y); > +} > + > +/* -- Summary Code -- */ > + > +/* With compliments to "Numerical Recipes in C", which provided the algorithm > + * and basic template for this function. */ > +long long percentile(long long * A, int N, int ple) { > + int I, J, L, R, K; > + > + long long X, W; > + > + /* No samples! */ > + if ( N == 0 ) > + return 0; > + > + /* Find K, the element # we want */ > + K=N*ple/100; > + > + /* Set the left and right boundaries of the current search space */ > + L=0; R=N-1; > + > + while(L < R) { > + /* X: The value to order everything higher / lower than */ > + X=A[K]; > + > + /* Starting at the left and the right... */ > + I=L; > + J=R; > + > + do { > + /* Find the first element on the left that is out-of-order w/ X > */ > + while(A[I]<X) > + I++; > + /* Find the first element on the right that is out-of-order w/ X > */ > + while(X<A[J]) > + J--; > + > + /* If we found something out-of-order */ > + if(I<=J) { > + /* Switch the values */ > + W=A[I]; > + A[I]=A[J]; > + A[J]=W; > + > + /* And move on */ > + I++; J--; > + } > + } while (I <= J); /* Keep going until our pointers meet or pass */ > + > + /* Re-adjust L and R, based on which element we're looking for */ > + if(J<K) > + L=I; > + if(K<I) > + R=J; > + } > + > + return A[K]; > +} > + > +float weighted_percentile(float * A, /* values */ > + unsigned long long * w, /* weights */ > + int N, /* total */ > + int ple) /* percentile > */ > +{ > + int L, R, I, J, K; > + unsigned long long L_weight, R_weight, I_weight, J_weight, > + K_weight, N_weight; > + > + float X, t1; > + unsigned long long t2; > + > + /* Calculate total weight */ > + N_weight=0; > + > + for(I=0; I<N; I++) { > + assert(w[I]!=0); > + N_weight += w[I]; > + } > + > + /* Find K_weight, the target weight we want */ > + K_weight = N_weight * ple / 100; > + > + /* Set the left and right boundaries of the current search space */ > + L=0; > + L_weight = 0; > + R=N-1; > + R_weight = N_weight - w[R]; > + > + /* Search between L and R, narrowing down until we're done */ > + while(L < R) { > + /* Chose an ordering value from right in the middle */ > + K = (L + R) >> 1; > + /* X: The value to order everything higher / lower than */ > + X=A[K]; > + > + /* Starting at the left and the right... */ > + I=L; I_weight = L_weight; > + J=R; J_weight = R_weight; > + > + do { > + /* Find the first element on the left that is out-of-order w/ X > */ > + while(A[I]<X) { > + I_weight += w[I]; > + I++; > + } > + /* Find the first element on the right that is out-of-order w/ X > */ > + while(X<A[J]) { > + J_weight -= w[J]; > + J--; > + } > + > + /* If we actually found something... */ > + if(I<=J) { > + /* Switch the values */ > + t1=A[I]; > + A[I]=A[J]; > + A[J]=t1; > + > + t2=w[I]; > + w[I]=w[J]; > + w[J]=t2; > + > + /* And move in */ > + I_weight += w[I]; > + I++; > + > + J_weight -= w[J]; > + J--; > + } > + } while (I <= J); /* Keep going until our pointers meet or pass */ > + > + /* Re-adjust L and R, based on which element we're looking for */ > + if(J_weight<K_weight) > + L=I; L_weight = I_weight; > + if(K_weight<I_weight) > + R=J; R_weight = J_weight; > + } > + > + return A[L]; > +} > + > +long long self_weighted_percentile(long long * A, > + int N, /* total */ > + int ple) /* percentile */ > +{ > + int L, R, I, J, K; > + long long L_weight, R_weight, I_weight, J_weight, > + K_weight, N_weight; > + > + long long X, t1; > + > + /* Calculate total weight */ > + N_weight=0; > + > + for(I=0; I<N; I++) { > + if(A[I] < 0) > + fprintf(warn, "%s: Value %lld less than zero!\n", > + __func__, A[I]); > + assert(A[I]!=0); > + N_weight += A[I]; > + } > + > + /* Find K_weight, the target weight we want */ > + K_weight = N_weight * ple / 100; > + > + /* Set the left and right boundaries of the current search space */ > + L=0; > + L_weight = 0; > + R=N-1; > + R_weight = N_weight - A[R]; > + > + /* Search between L and R, narrowing down until we're done */ > + while(L < R) { > + /* Chose an ordering value from right in the middle */ > + K = (L + R) >> 1; > + /* X: The value to order everything higher / lower than */ > + X=A[K]; > + > + /* Starting at the left and the right... */ > + I=L; I_weight = L_weight; > + J=R; J_weight = R_weight; > + > + do { > + /* Find the first element on the left that is out-of-order w/ X > */ > + while(A[I]<X) { > + I_weight += A[I]; > + I++; > + } > + /* Find the first element on the right that is out-of-order w/ X > */ > + while(X<A[J]) { > + J_weight -= A[J]; > + J--; > + } > + > + /* If we actually found something... */ > + if(I<=J) { > + /* Switch the values */ > + t1=A[I]; > + A[I]=A[J]; > + A[J]=t1; > + > + /* And move in */ > + I_weight += A[I]; > + I++; > + > + J_weight -= A[J]; > + J--; > + } > + } while (I <= J); /* Keep going until our pointers meet or pass */ > + > + /* Re-adjust L and R, based on which element we're looking for */ > + if(J_weight<K_weight) > + L=I; L_weight = I_weight; > + if(K_weight<I_weight) > + R=J; R_weight = J_weight; > + } > + > + return A[L]; > +} > + > +static inline double __cycles_percent(long long cycles, long long total) { > + return (double)(cycles*100) / total; > +} > + > +static inline double __summary_percent(struct event_cycle_summary *s, > + struct cycle_framework *f) { > + return __cycles_percent(s->cycles, f->total_cycles); > +} > + > +static inline double summary_percent_global(struct event_cycle_summary *s) { > + return __summary_percent(s, &P.f); > +} > + > +static inline void update_summary(struct event_cycle_summary *s, long long > c) { > +/* We don't know ahead of time how many samples there are, and working > + * with dynamic stuff is a pain, and unnecessary. This algorithm will > + * generate a sample set that approximates an even sample. We can > + * then take the percentiles on this, and get an approximate value. */ > + if(c) { > + if(opt.sample_size) { > + int lap = (s->cycles_count/opt.sample_size)+1, > + index =s->cycles_count % opt.sample_size; > + if((index - (lap/3))%lap == 0) { > + if(!s->cycles_sample) { > + s->cycles_sample = malloc(sizeof(*s->cycles_sample) * > opt.sample_size); > + if(!s->cycles_sample) { > + fprintf(stderr, "%s: malloc failed!\n", __func__); > + error(ERR_SYSTEM, NULL); > + } > + } > + s->cycles_sample[index]=c; > + } > + } > + s->cycles_count++; > + s->cycles += c; > + > + s->interval.count++; > + s->interval.cycles += c; > + } > + s->count++; > +} > + > +static inline void clear_interval_summary(struct event_cycle_summary *s) { > + s->interval.count = 0; > + s->interval.cycles = 0; > +} > + > +static inline void update_cycles(struct cycle_summary *s, long long c) { > +/* We don't know ahead of time how many samples there are, and working > + * with dynamic stuff is a pain, and unnecessary. This algorithm will > + * generate a sample set that approximates an even sample. We can > + * then take the percentiles on this, and get an approximate value. */ > + int lap, index; > + > + if ( c == 0 ) > + { > + fprintf(warn, "%s: cycles 0! Not updating...\n", > + __func__); > + return; > + } > + > + if ( opt.sample_size ) { > + lap = (s->count/opt.sample_size)+1; > + index =s->count % opt.sample_size; > + > + if((index - (lap/3))%lap == 0) { > + if(!s->sample) { > + s->sample = malloc(sizeof(*s->sample) * opt.sample_size); > + if(!s->sample) { > + fprintf(stderr, "%s: malloc failed!\n", __func__); > + error(ERR_SYSTEM, NULL); > + } > + } > + s->sample[index] = c; > + } > + } > + > + if(c > 0) { > + s->cycles += c; > + s->interval.cycles += c; > + } else { > + s->cycles += -c; > + s->interval.cycles += -c; > + } > + s->count++; > + s->interval.count++; > +} > + > +static inline void clear_interval_cycles(struct interval_element *e) { > + e->cycles = 0; > + e->count = 0; > + e->instructions = 0; > +} > + > +static inline void update_cpi(struct weighted_cpi_summary *s, > + unsigned long long i, > + unsigned long long c) { > +/* We don't know ahead of time how many samples there are, and working > + * with dynamic stuff is a pain, and unnecessary. This algorithm will > + * generate a sample set that approximates an even sample. We can > + * then take the percentiles on this, and get an approximate value. */ > + int lap, index; > + > + if ( opt.sample_size ) { > + lap = (s->count/opt.sample_size)+1; > + index =s->count % opt.sample_size; > + > + if((index - (lap/3))%lap == 0) { > + if(!s->cpi) { > + assert(!s->cpi_weight); > + > + s->cpi = malloc(sizeof(*s->cpi) * opt.sample_size); > + s->cpi_weight = malloc(sizeof(*s->cpi_weight) * > opt.sample_size); > + if(!s->cpi || !s->cpi_weight) { > + fprintf(stderr, "%s: malloc failed!\n", __func__); > + error(ERR_SYSTEM, NULL); > + } > + } > + assert(s->cpi_weight); > + > + s->cpi[index] = (float) c / i; > + s->cpi_weight[index]=c; > + } > + } > + > + s->instructions += i; > + s->cycles += c; > + s->count++; > + > + s->interval.instructions += i; > + s->interval.cycles += c; > + s->interval.count++; > +} > + > +static inline void clear_interval_cpi(struct weighted_cpi_summary *s) { > + s->interval.cycles = 0; > + s->interval.count = 0; > + s->interval.instructions = 0; > +} > + > +static inline void print_cpu_affinity(struct cycle_summary *s, char *p) { > + if(s->count) { > + long long avg; > + > + avg = s->cycles / s->count; > + > + if ( opt.sample_size ) { > + long long p5, p50, p95; > + int data_size = s->count; > + if(data_size > opt.sample_size) > + data_size = opt.sample_size; > + > + p50 = percentile(s->sample, data_size, 50); > + p5 = percentile(s->sample, data_size, 5); > + p95 = percentile(s->sample, data_size, 95); > + > + printf("%s: %7d %6lld {%6lld|%6lld|%6lld}\n", > + p, s->count, avg, p5, p50, p95); > + } else { > + printf("%s: %7d %6lld\n", > + p, s->count, avg); > + } > + } > +} > + > +static inline void print_cpi_summary(struct weighted_cpi_summary *s) { > + if(s->count) { > + float avg; > + > + avg = (float)s->cycles / s->instructions; > + > + if ( opt.sample_size ) { > + float p5, p50, p95; > + int data_size = s->count; > + > + if(data_size > opt.sample_size) > + data_size = opt.sample_size; > + > + p50 = weighted_percentile(s->cpi, s->cpi_weight, data_size, 50); > + p5 = weighted_percentile(s->cpi, s->cpi_weight, data_size, 5); > + p95 = weighted_percentile(s->cpi, s->cpi_weight, data_size, 95); > + > + printf(" CPI summary: %2.2f {%2.2f|%2.2f|%2.2f}\n", > + avg, p5, p50, p95); > + } else { > + printf(" CPI summary: %2.2f\n", avg); > + } > + } > +} > + > +static inline void print_cycle_percent_summary(struct cycle_summary *s, > + tsc_t total, char *p) { > + if(s->count) { > + long long avg; > + double percent, seconds; > + > + avg = s->cycles / s->count; > + > + seconds = ((double)s->cycles) / opt.cpu_hz; > + > + percent = ((double)(s->cycles * 100)) / total; > + > + if ( opt.sample_size ) { > + long long p5, p50, p95; > + int data_size = s->count; > + > + if(data_size > opt.sample_size) > + data_size = opt.sample_size; > + > + p50 = self_weighted_percentile(s->sample, data_size, 50); > + p5 = self_weighted_percentile(s->sample, data_size, 5); > + p95 = self_weighted_percentile(s->sample, data_size, 95); > + > + printf("%s: %7d %5.2lfs %5.2lf%% %6lld {%6lld|%6lld|%6lld}\n", > + p, s->count, > + seconds, > + percent, > + avg, p5, p50, p95); > + } else { > + printf("%s: %7d %5.2lfs %5.2lf%% %6lld\n", > + p, s->count, > + seconds, > + percent, > + avg); > + } > + } > +} > + > +static inline void print_cycle_summary(struct cycle_summary *s, char *p) { > + if(s->count) { > + long long avg; > + > + avg = s->cycles / s->count; > + > + if ( opt.sample_size ) { > + long long p5, p50, p95; > + int data_size = s->count; > + > + if(data_size > opt.sample_size) > + data_size = opt.sample_size; > + > + p50 = self_weighted_percentile(s->sample, data_size, 50); > + p5 = self_weighted_percentile(s->sample, data_size, 5); > + p95 = self_weighted_percentile(s->sample, data_size, 95); > + > + printf("%s: %7d %5.2lfs %6lld {%6lld|%6lld|%6lld}\n", > + p, s->count, ((double)s->cycles)/opt.cpu_hz, > + avg, p5, p50, p95); > + } else { > + printf("%s: %7d %5.2lfs %6lld\n", > + p, s->count, ((double)s->cycles)/opt.cpu_hz, avg); > + } > + } > +} > + > +#define PRINT_SUMMARY(_s, _p...) \ > + do { \ > + if((_s).count) { \ > + if ( opt.sample_size ) { \ > + unsigned long long p5, p50, p95; \ > + int data_size=(_s).cycles_count; \ > + if(data_size > opt.sample_size) \ > + data_size=opt.sample_size; \ > + p50=percentile((_s).cycles_sample, data_size, 50); \ > + p5=percentile((_s).cycles_sample, data_size, 5); \ > + p95=percentile((_s).cycles_sample, data_size, 95); \ > + printf(_p); \ > + printf(" %7d %5.2lfs %5.2lf%% %5lld cyc > {%5lld|%5lld|%5lld}\n", \ > + (_s).count, \ > + ((double)(_s).cycles)/opt.cpu_hz, \ > + summary_percent_global(&(_s)), \ > + (_s).cycles_count ? (_s).cycles / > (_s).cycles_count:0, \ > + p5, p50, p95); \ > + } else { \ > + printf(_p); \ > + printf(" %7d %5.2lfs %5.2lf%% %5lld cyc\n", \ > + (_s).count, \ > + ((double)(_s).cycles)/opt.cpu_hz, \ > + summary_percent_global(&(_s)), \ > + (_s).cycles_count ? (_s).cycles / > (_s).cycles_count:0); \ > + } \ > + } \ > + } while(0) > + > +#define INTERVAL_DESC_MAX 31 > +struct interval_list { > + struct interval_element *elem; > + struct interval_list *next; > + char desc[INTERVAL_DESC_MAX+1]; /* +1 for the null terminator */ > +}; > + > +void __interval_cycle_percent_output(struct interval_element *e, tsc_t > cycles) { > + printf(" %.02lf", > + __cycles_percent(e->cycles, cycles)); > + clear_interval_cycles(e); > +} > + > +void interval_cycle_percent_output(struct interval_element *e) { > + __interval_cycle_percent_output(e, opt.interval.cycles); > +} > + > +void interval_time_output(void) { > + struct time_struct t; > + abs_cycles_to_time(P.interval.start_tsc, &t); > + > + printf("%u.%09u", t.s, t.ns); > +} > + > +void interval_table_output(void) { > + int i; > + > + interval_time_output(); > + > + if(opt.interval.mode == INTERVAL_MODE_ARRAY) { > + for(i=0; i<P.interval.array.count; i++) { > + struct interval_element *e = P.interval.array.values[i]; > + if(e) { > + interval_cycle_percent_output(e); > + } else { > + printf(" 0.0"); > + } > + } > + } else if(opt.interval.mode == INTERVAL_MODE_LIST) { > + struct interval_list *p; > + for(p = P.interval.list.head; p; p = p->next) > + interval_cycle_percent_output(p->elem); > + } > + printf("\n"); > +} > + > +void interval_table_tail(void) { > + struct interval_list *p; > + > + printf("time"); > + > + for(p=P.interval.list.head; p; p = p->next) > + printf(" %s", p->desc); > + > + printf("\n"); > +} > + > +void interval_table_alloc(int count) { > + P.interval.array.count = count; > + P.interval.array.values = malloc(count * sizeof(struct interval_list *)); > + > + if(!P.interval.array.values) { > + fprintf(stderr, "Malloc failed!\n"); > + error(ERR_SYSTEM, NULL); > + } > + > + bzero(P.interval.array.values, count*sizeof(struct interval_list *)); > +} > + > +void interval_list_add(struct interval_element *e, char *desc) { > + struct interval_list *p; > + > + fprintf(warn, "%s: Adding element '%s'\n", __func__, desc); > + > + if((p=malloc(sizeof(*p)))==NULL) { > + fprintf(stderr, "malloc() failed.\n"); > + error(ERR_SYSTEM, NULL); > + } > + > + bzero(p, sizeof(*p)); > + > + p->elem = e; > + strncpy(p->desc, desc, INTERVAL_DESC_MAX); > + > + p->next=NULL; > + > + if(P.interval.list.head) > + *P.interval.list.tail = p; > + else > + P.interval.list.head = p; > + P.interval.list.tail = &p->next; > +} > + > +void interval_cr3_schedule_time_header(void) { > + if( opt.interval.mode == INTERVAL_MODE_ARRAY ) { > + int i; > + > + printf("time"); > + for(i=0; i<opt.interval.array.count; i++) { > + printf(" %llx", opt.interval.array.values[i]); > + } > + printf("\n"); > + } > + /* Can't see into the future, so no header if cr3 values are > + not specified. */ > +} > + > +void interval_cr3_value_check(struct cr3_value_struct *cr3) { > + if( opt.interval.mode == INTERVAL_MODE_ARRAY ) { > + int i; > + > + for(i=0; i<opt.interval.array.count; i++) { > + if(cr3->gmfn == opt.interval.array.values[i]) { > + if(P.interval.array.values[i]) { > + fprintf(stderr, "Fatal: duplicate cr3 value %llx!\n", > + cr3->gmfn); > + error(ERR_ASSERT, NULL); > + } > + fprintf(stderr, "%s: found gmfn %llx\n", > + __func__, cr3->gmfn); > + > + P.interval.array.values[i] = &cr3->total_time.interval; > + } > + } > + } else if(opt.interval.mode == INTERVAL_MODE_LIST) { > + char desc[32]; > + snprintf(desc, 32, "%llx", cr3->gmfn); > + interval_list_add(&cr3->total_time.interval, desc); > + } else { > + /* Custom */ > + if(cr3->gmfn == opt.interval.array.values[0]) > + P.interval.cr3 = cr3; > + } > +} > + > +void interval_cr3_schedule_ordered_output(void) { > + struct cr3_value_struct *p; > + int i; > + > + struct cr3_value_struct **qsort_array; > + int N=0; > + > + int cr3_time_compare(const void *_a, const void *_b) { > + struct cr3_value_struct *a=*(typeof(&a))_a; > + struct cr3_value_struct *b=*(typeof(&a))_b; > + > + if(a->total_time.interval.cycles < b->total_time.interval.cycles) > + return 1; > + else if(b->total_time.interval.cycles == > a->total_time.interval.cycles) { > + if(a->total_time.interval.count < b->total_time.interval.count) > + return 1; > + else if(a->total_time.interval.count == > b->total_time.interval.count) > + return 0; > + else > + return -1; > + } else > + return -1; > + } > + > + for(p=P.cr3.head; p; p=p->gnext) > + N++; > + > + if(!N) > + return; > + > + qsort_array = malloc(N * sizeof(struct eip_list_struct *)); > + > + for(i=0, p=P.cr3.head; p; p=p->gnext, i++) > + qsort_array[i]=p; > + > + qsort(qsort_array, N, sizeof(struct eip_list_struct *), > + cr3_time_compare); > + > + interval_time_output(); > + > + for(i=0; i<N; i++) { > + p = qsort_array[i]; > + /* Rounding down means this will get ..1]% */ > + if(p->total_time.interval.cycles > 0) { > + printf(" %8llx: %.02lf %c\n", > + p->gmfn, > + __cycles_percent(p->total_time.interval.cycles, > + opt.interval.cycles), > + (p->first_time > P.interval.start_tsc)?'*':' '); > + } > + clear_interval_cycles(&p->total_time.interval); > + } > + > + free(qsort_array); > +} > + > +void interval_cr3_short_summary_header(void) { > + int i; > + > + printf("time guest"); > + for(i=0; i<HVM_SHORT_SUMMARY_MAX; i++) > + printf(" %s", hvm_short_summary_name[i]); > + printf("\n"); > +} > + > +void interval_cr3_short_summary_output(void) { > + struct cycle_summary *hss_array; > + int i; > + > + if(P.interval.cr3) { > + struct cr3_value_struct *p = P.interval.cr3; > + > + interval_time_output(); > + > + hss_array = p->hvm.s; > + > + printf(" %.02lf", > + __cycles_percent(p->total_time.interval.cycles, > + opt.interval.cycles)); > + > + for(i=0; i<HVM_SHORT_SUMMARY_MAX; i++) > + __interval_cycle_percent_output(&hss_array[i].interval, > + p->total_time.interval.cycles); > + > + clear_interval_cycles(&p->total_time.interval); > + > + printf("\n"); > + } > +} > + > +void interval_domain_value_check(struct domain_data *d) { > + if( opt.interval.mode == INTERVAL_MODE_ARRAY ) { > + int i; > + > + for(i=0; i<opt.interval.array.count; i++) { > + if(d->did == opt.interval.array.values[i]) { > + if(P.interval.array.values[i]) { > + fprintf(stderr, "Fatal: duplicate domain value %d!\n", > + d->did); > + error(ERR_ASSERT, NULL); > + } > + > + P.interval.array.values[i] = &d->total_time.interval; > + } > + } > + } else if(opt.interval.mode == INTERVAL_MODE_LIST) { > + char desc[32]; > + snprintf(desc, 32, "%d", d->did); > + interval_list_add(&d->total_time.interval, desc); > + } else { > + if(d->did == opt.interval.array.values[0]) > + P.interval.domain.d = d; > + } > +} > + > +void interval_domain_short_summary_header(void) { > + int i; > + > + printf("time running"); > + for(i=0; i<HVM_SHORT_SUMMARY_MAX; i++) > + printf(" %s", hvm_short_summary_name[i]); > + printf("\n"); > +} > + > +void interval_domain_short_summary_output(void) { > + > + if(P.interval.domain.d) { > + struct domain_data *d; > + int i; > + > + d=P.interval.domain.d; > + > + interval_time_output(); > + > + interval_cycle_percent_output(&d->total_time.interval); > + > + for(i=0; i<HVM_SHORT_SUMMARY_MAX; i++) > + interval_cycle_percent_output(&d->hvm_short.s[i].interval); > + > + printf("\n"); > + } > +} > + > +void interval_domain_guest_interrupt(struct hvm_data *h, int vector) { > + struct domain_data *d = h->v->d; > + int i; > + > + /* Check to see if this vector is in the "print list" */ > + for(i=0; i<INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX; i++) { > + if(P.interval.domain.guest_vector[i] == 0) { > + P.interval.domain.guest_vector[i] = vector; > + break; > + } > + if(P.interval.domain.guest_vector[i] == vector) > + break; > + } > + > + if(i == INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX) { > + fprintf(stderr, "FATAL: used up all %d guest interrupt slots!\n", > + INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX); > + error(ERR_LIMIT, NULL); > + } else { > + d->guest_interrupt[vector]++; > + } > +} > + > +void interval_domain_guest_interrupt_tail(void) { > + int i; > + > + printf("time running"); > + for(i=0; i<INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX; i++) { > + if(P.interval.domain.guest_vector[i] == 0) > + break; > + printf(" %d", P.interval.domain.guest_vector[i]); > + } > + printf("\n"); > +} > + > +void interval_domain_guest_interrupt_output(void) { > + > + if(P.interval.domain.d) { > + struct domain_data *d; > + int i; > + > + d=P.interval.domain.d; > + > + interval_time_output(); > + > + for(i=0; i<INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX; i++) { > + int v = P.interval.domain.guest_vector[i]; > + > + if(v == 0) > + break; > + > + printf(" %d", d->guest_interrupt[v]); > + > + d->guest_interrupt[v]=0; > + } > + > + printf("\n"); > + } > + > +} > + > +void interval_domain_grant_maps_output(void) { > + > + if(P.interval.domain.d) { > + struct domain_data *d; > + > + d=P.interval.domain.d; > + > + interval_time_output(); > + > + printf(" %d", d->memops.done_for_interval[MEM_PAGE_GRANT_MAP]); > + > + d->memops.done_for_interval[MEM_PAGE_GRANT_MAP] = 0; > + > + printf("\n"); > + } > +} > + > +/* General interval gateways */ > + > +void interval_callback(void) { > + /* First, see if we're in generic mode. */ > + switch(opt.interval.mode) { > + case INTERVAL_MODE_LIST: > + case INTERVAL_MODE_ARRAY: > + interval_table_output(); > + return; > + default: > + break; > + } > + > + switch(opt.interval.output) { > + case INTERVAL_CR3_SCHEDULE_ORDERED: > + interval_cr3_schedule_ordered_output(); > + break; > + case INTERVAL_CR3_SHORT_SUMMARY: > + interval_cr3_short_summary_output(); > + break; > + case INTERVAL_DOMAIN_SHORT_SUMMARY: > + interval_domain_short_summary_output(); > + break; > + case INTERVAL_DOMAIN_GUEST_INTERRUPT: > + interval_domain_guest_interrupt_output(); > + break; > + case INTERVAL_DOMAIN_GRANT_MAPS: > + interval_domain_grant_maps_output(); > + break; > + default: > + break; > + } > +} > + > +void interval_header(void) { > + switch(opt.interval.output) { > + case INTERVAL_CR3_SHORT_SUMMARY: > + interval_cr3_short_summary_header(); > + break; > + case INTERVAL_DOMAIN_SHORT_SUMMARY: > + interval_domain_short_summary_header(); > + break; > + default: > + break; > + } > +} > + > +void interval_tail(void) { > + if(opt.interval.mode == INTERVAL_MODE_LIST) { > + interval_table_tail(); > + return; > + } > + > + switch(opt.interval.output) { > + case INTERVAL_DOMAIN_GUEST_INTERRUPT: > + interval_domain_guest_interrupt_tail(); > + break; > + default: > + break; > + } > +} > + > +/* -- Eip list data -- */ > + > +void update_eip(struct eip_list_struct **head, unsigned long long eip, > + unsigned long long cycles, int type, void * extra) { > + struct eip_list_struct *p, **last=head; > + > + for(p=*head; p; last = (&p->next), p=p->next) > + if(p->eip >= eip) > + break; > + > + if(!p || p->eip != eip) { > + p=malloc(sizeof(*p)); > + if(!p) { > + perror("malloc failed"); > + error(ERR_SYSTEM, NULL); > + } > + > + bzero(p, sizeof(*p)); > + > + p->eip=eip; > + p->type = type; > + > + if(eip_list_type[type].new) { > + eip_list_type[type].new(p, extra); > + } > + p->next = *last; > + *last=p; > + } else if(p->type != type) { > + fprintf(stderr, "WARNING, mixed types! %d %d\n", p->type, type); > + } else if(eip_list_type[type].update) { > + eip_list_type[type].update(p, extra); > + } > + > + update_summary(&p->summary, cycles); > +} > + > +void dump_eip(struct eip_list_struct *head) { > + struct eip_list_struct *p; > + int i; > + int total = 0; > + > + struct eip_list_struct **qsort_array; > + int N=0; > + > + int eip_compare(const void *_a, const void *_b) { > + struct eip_list_struct *a=*(typeof(&a))_a; > + struct eip_list_struct *b=*(typeof(&a))_b; > + > + if(a->summary.cycles < b->summary.cycles) > + return 1; > + else if(b->summary.cycles == a->summary.cycles) { > + if(a->summary.count < b->summary.count) > + return 1; > + else if(a->summary.count == b->summary.count) > + return 0; > + else > + return -1; > + } else > + return -1; > + } > + > + for(p=head; p; p=p->next) > + { > + total += p->summary.count; > + N++; > + } > + > + if(!N) > + return; > + > + qsort_array = malloc(N * sizeof(struct eip_list_struct *)); > + > + for(i=0, p=head; p; p=p->next, i++) > + qsort_array[i]=p; > + > + qsort(qsort_array, N, sizeof(struct eip_list_struct *), > + eip_compare); > + > + /* WARNING: don't use N after this point unless you copy this variable */ > +#if 0 > + if(opt.summary_eip_limit && opt.summary_eip_limit < N) > + N=opt.summary_eip_limit; > +#endif > + > + printf(" Total samples: %d\n", total); > + > + for(i=0; i<N; i++) { > + p = qsort_array[i]; > + if ( p->summary.cycles ) > + PRINT_SUMMARY(p->summary, " %12llx%-45s: ", > + p->eip, > + find_symbol(p->eip)); > + else > + { > + printf(" %12llx%-45s: ", > + p->eip, > + find_symbol(p->eip)); > + printf(" %7d %5.2lf%%\n", > + p->summary.count, > + ((double)p->summary.count*100)/total); > + } > + > + > + if(eip_list_type[p->type].dump) { > + eip_list_type[p->type].dump(p); > + } > + } > + > + free(qsort_array); > +} > + > +/* -- HVM code -- */ > +struct hvm_pf_xen_record { > + //unsigned vcpu:16, domain:16; > + union { > + struct { > + unsigned long long va; > + unsigned int error_code; > + } x64; > + struct { > + unsigned int va; > + unsigned int error_code; > + } x32; > + }; > +}; > + > +void hvm_update_short_summary(struct hvm_data *h, int element) { > + struct vcpu_data *v = h->v; > + > + if(v->cr3.data) > + update_cycles(&v->cr3.data->hvm.s[element], h->arc_cycles); > + > + update_cycles(&v->d->hvm_short.s[element], h->arc_cycles); > + > + h->short_summary_done=1; > +} > + > +void hvm_short_summary(struct hvm_short_summary_struct *hss, > + tsc_t total, char *prefix) { > + char desc[80]; > + int i; > + > + for(i=0; i<HVM_SHORT_SUMMARY_MAX; i++) { > + snprintf(desc, 80, "%s%s", prefix, hvm_short_summary_name[i]); > + print_cycle_percent_summary(hss->s + i, total, desc); > + } > +} > + > +/* Wrapper to try to make sure this is only called once per > + * call site, rather than walking through the list each time */ > +#define hvm_set_summary_handler(_h, _s, _d) \ > + do { \ > + static int done=0; \ > + int ret; \ > + if(!done) { \ > + if ((ret=__hvm_set_summary_handler(_h, _s, _d))) \ > + fprintf(stderr, "%s: hvm_set_summary_handler returned %d\n", > \ > + __func__, ret); \ > + done=1; \ > + } \ > + } while(0) > + > +int __hvm_set_summary_handler(struct hvm_data *h, void (*s)(struct hvm_data > *h, void*d), void*d) { > + /* Set summary handler */ > + if(h->exit_reason < h->exit_reason_max) > + { > + struct hvm_summary_handler_node *p, **q; > + > + /* Find the end of the list, checking to make sure there are no > + * duplicates along the way */ > + q=&h->exit_reason_summary_handler_list[h->exit_reason]; > + p = *q; > + while(p) > + { > + if(p->handler == s && p->data == d) > + { > + fprintf(stderr, "%s: Unexpected duplicate handler %p,%p\n", > + __func__, s, d); > + error(ERR_STRICT, NULL); > + return -EBUSY; > + } > + q=&p->next; > + p=*q; > + } > + > + assert(p==NULL); > + > + /* Insert the new handler */ > + p=malloc(sizeof(*p)); > + if (!p) { > + fprintf(stderr, "%s: Malloc failed!\n", __func__); > + error(ERR_SYSTEM, NULL); > + } > + p->handler=s; > + p->data = d; > + p->next=*q; > + *q=p; > + return 0; > + } > + return -EINVAL; > +} > + > +void hvm_generic_postprocess(struct hvm_data *h); > + > +static int hvm_set_postprocess(struct hvm_data *h, void (*s)(struct hvm_data > *h)) > +{ > + if ( h->post_process == NULL > + || h->post_process == hvm_generic_postprocess ) > + { > + h->post_process = s; > + return 0; > + } > + else > + return 1; > +} > + > +#define SIGN_EXTENDED_BITS (~((1ULL<<48)-1)) > +#define HIGH_BIT(_v) ((_v) & (1ULL<<47)) > +static inline int is_valid_addr64(unsigned long long va) > +{ > + if(HIGH_BIT(va)) > + return ((va & SIGN_EXTENDED_BITS) == SIGN_EXTENDED_BITS); > + else > + return ((va & SIGN_EXTENDED_BITS) == 0); > +} > + > +void hvm_pf_xen_summary(struct hvm_data *h, void *d) { > + int i,j, k; > + > + printf(" page_fault\n"); > + for(i=0; i<PF_XEN_MAX; i++) > + { > + if( pf_xen_name[i] ) > + { > + PRINT_SUMMARY(h->summary.pf_xen[i], > + " %-25s ", pf_xen_name[i]); > + } > + else > + { > + PRINT_SUMMARY(h->summary.pf_xen[i], > + " [%23d] ", i); > + } > + switch(i){ > + case PF_XEN_NON_EMULATE: > + for(j=0; j<PF_XEN_NON_EMUL_MAX; j++) > + PRINT_SUMMARY(h->summary.pf_xen_non_emul[j], > + " *%-13s ", pf_xen_non_emul_name[j]); > + break; > + case PF_XEN_EMULATE: > + for(j=0; j<PF_XEN_EMUL_MAX; j++) { > + PRINT_SUMMARY(h->summary.pf_xen_emul[j], > + " *%-13s ", pf_xen_emul_name[j]); > + if(j == PF_XEN_EMUL_EARLY_UNSHADOW) { > + int k; > + for(k=0; k<5; k++) { > + > PRINT_SUMMARY(h->summary.pf_xen_emul_early_unshadow[k], > + " +[%d] ", k); > + } > + } > + } > + break; > + case PF_XEN_FIXUP: > + for(j=0; j<PF_XEN_FIXUP_MAX; j++) { > + PRINT_SUMMARY(h->summary.pf_xen_fixup[j], > + " *%-13s ", pf_xen_fixup_name[j]); > + if(j == PF_XEN_FIXUP_UNSYNC ) { > + for(k=0; k<PF_XEN_FIXUP_UNSYNC_RESYNC_MAX; k++) { > + > PRINT_SUMMARY(h->summary.pf_xen_fixup_unsync_resync[k], > + " +[%3d] ", k); > + } > + PRINT_SUMMARY(h->summary.pf_xen_fixup_unsync_resync[k], > + " +[max] "); > + } > + } > + break; > + } > + } > +} > + > +void pf_preprocess(struct pf_xen_extra *e, int guest_paging_levels) > +{ > + switch(guest_paging_levels) { > + /* Select a subfield of _bits bits starting at bit _shift from _x */ > +#define _SUBFIELD(_bits, _shift, _x) \ > + (((_x)>>(_shift)) & ((1ULL<<(_bits))-1)) > + case 4: > + /* Verify sign-extension */ > + if((HIGH_BIT(e->va) > + &&((e->va & SIGN_EXTENDED_BITS) != SIGN_EXTENDED_BITS)) > + || (!HIGH_BIT(e->va) > + && ((e->va & SIGN_EXTENDED_BITS) != 0))) { > + fprintf(warn, "Strange, va %llx not properly sign extended for > 4-level pagetables\n", > + e->va); > + } > + e->pt_index[4]=_SUBFIELD(9,39,e->va); > + e->pt_index[3]=_SUBFIELD(9,30,e->va); > + e->pt_index[2]=_SUBFIELD(9,21,e->va); > + e->pt_index[1]=_SUBFIELD(9,12,e->va); > + /* These are only useful for the linear-pagetable code */ > + e->pt_index[0]=_SUBFIELD(9,3,e->va); > + if(e->va & 0x4) > + e->pt_is_lo=0; > + break; > + case 3: > + e->pt_index[3]=_SUBFIELD(2,30,e->va); > + e->pt_index[2]=_SUBFIELD(9,21,e->va); > + e->pt_index[1]=_SUBFIELD(9,12,e->va); > + /* These are only useful for the linear-pagetable code */ > + e->pt_index[0]=_SUBFIELD(9,3,e->va); > + if(e->va & 0x4) > + e->pt_is_lo=0; > + break; > + case 2: > + e->pt_index[2]=_SUBFIELD(10,22,e->va); > + e->pt_index[1]=_SUBFIELD(10,12,e->va); > + /* This is only useful for the linear pagetable code */ > + e->pt_index[0]=_SUBFIELD(10,2,e->va); > + break; > + case 0: > + break; > + default: > + fprintf(warn, "Don't know how to handle %d-level pagetables\n", > + guest_paging_levels); > + } > + > + e->corresponding_va = CORR_VA_INVALID; > + e->pt_level = 0; > + > + /* Detect accesses to Windows linear pagetables */ > + switch(guest_paging_levels) > + { > + case 2: > + if(e->pt_index[2] == 768) { > + if(e->pt_index[1] == 768) > + { > + e->pt_level = 2; > + e->corresponding_va=((1UL<<22)-1) > + | e->pt_index[0]<<22; > + } > + else > + { > + e->pt_level = 1; > + e->corresponding_va = ((1UL<<12)-1) > + | e->pt_index[1]<<22 > + | e->pt_index[0]<<12; > + } > + } > + break; > + case 3: > + if(e->pt_index[3]==3 && (e->pt_index[2]>>2==0)) > + { > + if(e->pt_index[2]==3 && e->pt_index[1]>>2==0) > + { > + if(e->pt_index[1] == 3 && e->pt_index[0]>>2==0) > + { > + e->pt_level = 3; > + e->corresponding_va=((1UL<<30)-1) > + | e->pt_index[0]<<30; > + } > + else > + { > + e->pt_level = 2; > + e->corresponding_va=((1UL<<21)-1) > + | e->pt_index[1]<<30 > + | e->pt_index[2]<<21; > + } > + } > + else > + { > + e->pt_level = 1; > + e->corresponding_va = ((1UL<<12)-1) > + | e->pt_index[0]<<12 > + | e->pt_index[1]<<21 > + | e->pt_index[2]<<30; > + } > + } > + break; > + case 4: > + if(e->pt_index[4] == 0x1ed) > + { > + if(e->pt_index[3] == 0x1ed) > + { > + if(e->pt_index[2] == 0x1ed) > + { > + if(e->pt_index[1] == 0x1ed) > + { > + e->pt_level = 4; > + e->corresponding_va = ((1ULL<<39)-1) > + | (unsigned long long)e->pt_index[0]<<39; > + } > + else > + { > + e->pt_level = 3; > + e->corresponding_va = ((1ULL<<30)-1) > + | (unsigned long long)e->pt_index[0]<<30 > + | (unsigned long long)e->pt_index[1]<<39; > + } > + } > + else > + { > + e->pt_level = 2; > + e->corresponding_va = ((1ULL<<21)-1) > + | (unsigned long long)e->pt_index[0]<<21 > + | (unsigned long long)e->pt_index[1]<<30 > + | (unsigned long long)e->pt_index[2]<<39; > + } > + } > + else > + { > + e->pt_level = 1; > + e->corresponding_va = ((1ULL<<12)-1) > + | (unsigned long long)e->pt_index[0]<<12 > + | (unsigned long long)e->pt_index[1]<<21 > + | (unsigned long long)e->pt_index[2]<<30 > + | (unsigned long long)e->pt_index[3]<<39; > + } > + > + if(HIGH_BIT(e->corresponding_va)) > + e->corresponding_va |= SIGN_EXTENDED_BITS; > + } > + break; > + default: > + break; > + } > +} > + > +void hvm_pf_xen_preprocess(unsigned event, struct hvm_data *h) { > + struct pf_xen_extra *e = &h->inflight.pf_xen; > + struct mmio_info *m = &h->inflight.mmio; > + struct hvm_pf_xen_record *r = (typeof(r))h->d; > + > + if(event == TRC_HVM_PF_XEN64) > + { > + if(!is_valid_addr64(r->x64.va)) > + fprintf(warn, "%s: invalid va %llx", > + __func__, r->x64.va); > + e->va = r->x64.va; > + e->error_code = r->x64.error_code; > + } > + else > + { > + e->va = r->x32.va; > + e->error_code = r->x32.error_code; > + } > + > + if(m->data_valid) > + e->pf_case = PF_XEN_MMIO; > + else > + { > + pf_preprocess(e, h->v->guest_paging_levels); > + > + /* On rio traces, we try to infer emulation by looking for accesses > + in the linear pagetable */ > + if(e->pt_level > 0) > + e->pf_case = PF_XEN_EMULATE; > + else > + e->pf_case = PF_XEN_NON_EMULATE; > + } > +} > + > +static inline int is_kernel(int paging_levels, unsigned long long va) { > + switch(paging_levels) { > + case 2: > + case 3: > + if(va & 0x80000000) > + return 1; > + else > + return 0; > + break; > + case 4: > + if(HIGH_BIT(va)) > + return 1; > + else return 0; > + default: > + return 0; > + } > + > +} > + > +void hvm_pf_xen_postprocess(struct hvm_data *h) { > + struct pf_xen_extra *e = &h->inflight.pf_xen; > + > + if(opt.summary_info) { > + if(e->pf_case) > + update_summary(&h->summary.pf_xen[e->pf_case], > + h->arc_cycles); > + else > + fprintf(warn, "Strange, pf_case 0!\n"); > + switch(e->pf_case) > + { > + case PF_XEN_EMULATE: > + update_eip(&h->v->d->emulate_eip_list, > + h->rip, > + h->arc_cycles, > + 0, NULL); > + break; > + case PF_XEN_NON_EMULATE: > + if(is_kernel(h->v->guest_paging_levels, h->rip)) > + > update_summary(&h->summary.pf_xen_non_emul[PF_XEN_NON_EMUL_EIP_KERNEL], > + h->arc_cycles); > + else > + > update_summary(&h->summary.pf_xen_non_emul[PF_XEN_NON_EMUL_EIP_USER], > + h->arc_cycles); > + if(is_kernel(h->v->guest_paging_levels, e->va)) > + > update_summary(&h->summary.pf_xen_non_emul[PF_XEN_NON_EMUL_VA_KERNEL], > + h->arc_cycles); > + > + else > + > update_summary(&h->summary.pf_xen_non_emul[PF_XEN_NON_EMUL_VA_USER], > + h->arc_cycles); > + } > + > + /* Set summary handler */ > + hvm_set_summary_handler(h, hvm_pf_xen_summary, NULL); > + } > +} > + > +void hvm_pf_xen_process(struct record_info *ri, struct hvm_data *h) { > + struct pf_xen_extra *e = &h->inflight.pf_xen; > + > + if(ri->event == TRC_HVM_PF_XEN64 > + && h->v->guest_paging_levels != 4) > + fprintf(warn, "Strange, PF_XEN64 but guest_paging_levels %d!\n", > + h->v->guest_paging_levels); > + else if(ri->event == TRC_HVM_PF_XEN > + && h->v->guest_paging_levels == 4) > + fprintf(warn, "Strange, PF_XEN but guest_paging_levels %d!\n", > + h->v->guest_paging_levels); > + > + hvm_pf_xen_preprocess(ri->event, h); > + > + if(opt.dump_all) > + { > + if(e->pf_case == PF_XEN_EMULATE) > + printf("]%s pf_xen:emulate va %llx ec %x level %d corr %llx > e->pt_index[%d %d %d %d %d]\n", > + ri->dump_header, e->va, e->error_code, > + e->pt_level, e->corresponding_va, > + e->pt_index[0], e->pt_index[1], e->pt_index[2], > + e->pt_index[3], > + e->pt_index[4]); > + else > + printf("]%s pf_xen va %llx ec %x e->pt_index[%d %d %d %d %d]\n", > + ri->dump_header, e->va, e->error_code, > + e->pt_index[0], e->pt_index[1], e->pt_index[2], > + e->pt_index[3], > + e->pt_index[4]); > + } > + > + if ( hvm_set_postprocess(h, hvm_pf_xen_postprocess) ) > + fprintf(warn, "%s: Strange, postprocess already set\n", __func__); > +} > + > +char * hvm_vlapic_icr_dest_shorthand_name[4] = { > + "dest_field", "self", "all-inc", "all-exc" > +}; > + > +void hvm_vlapic_vmentry_cleanup(struct vcpu_data *v, tsc_t tsc) > +{ > + int i; > + > + struct vlapic_struct *vla = &v->vlapic; > + > + for(i=0; i<MAX_VLAPIC_LIST; i++) > + { > + unsigned long long lat=0; > + struct outstanding_ipi *o = vla->outstanding.list + i; > + > + if(!(o->valid && o->injected)) > + continue; > + > + if(tsc >= o->first_tsc) > + lat = tsc - o->first_tsc; > + else > + fprintf(warn, "Strange, vec %d first_tsc %lld > ri->tsc %lld!\n", > + o->vec, o->first_tsc, tsc); > + > + if(opt.dump_ipi_latency > + || (opt.dump_all && o->count > 1)) { > + struct time_struct t; > + cycles_to_time(lat, &t); > + printf(" [vla] d%dv%d vec %d ipis %d, latency %lld (%u.%09u > s)\n", > + v->d->did, v->vid, o->vec, o->count, lat, > + t.s, t.ns); > + } > + > +#if 0 > + /* FIXME: make general somehow */ > + if(opt.summary_info) > + { > + update_summary(&h->summary.ipi_latency, lat); > + h->summary.ipi_count[vla->outstanding_ipis]++; > + } > +#endif > + > + o->vec = o->count = o->injected = o->valid = o->first_tsc = 0; > + } > +} > + > +void hvm_vlapic_clear(struct vlapic_struct *vla) > +{ > + bzero(vla, sizeof(*vla)); > +} > + > +struct outstanding_ipi *find_vec(struct vlapic_struct *vla, int vec) > +{ > + struct outstanding_ipi *o = NULL; > + int i; > + > + /* Find the entry for this vector, or the first empty one. */ > + for(i=0; i<MAX_VLAPIC_LIST; i++) > + { > + if(vla->outstanding.list[i].valid && vla->outstanding.list[i].vec == > vec) > + { > + o = vla->outstanding.list + i; > + break; > + } else if(!vla->outstanding.list[i].valid && !o) > + o = vla->outstanding.list + i; > + } > + > + if(!o->valid) { > + o->vec = vec; > + o->valid = 1; > + } > + > + return o; > +} > + > +void hvm_vlapic_icr_handler(struct hvm_data *h) > +{ > + struct mmio_info *m = &h->inflight.mmio; > + union { > + unsigned int val; > + struct { > + unsigned vec:8, > + delivery_mode:3, > + dest_mode:1, > + delivery_status:1, > + _res1:1, > + level:1, > + trigger:1, > + _res2:2, > + dest_shorthand:2; > + }; > + } icr = { .val = m->data }; > + > + void ipi_send(struct vcpu_data *ov, int vec) > + { > + struct vlapic_struct *vla; > + struct outstanding_ipi *o = NULL; > + > + if(ov->runstate.state == RUNSTATE_LOST) { > + if(opt.dump_all) > + fprintf(warn, "%s: v%d in state RUNSTATE_LOST, not counting > ipi\n", > + __func__, ov->vid); > + return; > + } > + > + vla = &ov->vlapic; > + > + o = find_vec(vla, vec); > + > + if(!o) > + { > + fprintf(warn, "%s: Couldn't find an open slot!\n", > + __func__); > + return; > + } > + > + if(!o->first_tsc) > + o->first_tsc = P.now; > + > + if(opt.dump_all && o->count == 0 && o->injected) > + printf(" [vla] Pre-injection\n"); > + > + o->count++; > + > + if((opt.dump_all) > +#if 0 > + && (ov->runstate.state != RUNSTATE_RUNNING > + || ov->hvm.vmexit_valid) > +#endif > + ) > + printf(" [vla] d%dv%d vec %d state %s (outstanding ipis %d)\n", > + ov->d->did, ov->vid, > + o->vec, > + runstate_name[ov->runstate.state], > + o->count); > + } > + > + if(m->is_write) { > + if(opt.dump_all) { > + printf(" [vla] d%dv%d icr vec %d %s\n", > + h->v->d->did, h->v->vid, > + icr.vec, > + hvm_vlapic_icr_dest_shorthand_name[icr.dest_shorthand]); > + } > + > + if(icr.dest_shorthand == 3) > + { > + struct vcpu_data *ov, *v = h->v; > + struct domain_data *d = v->d; > + int i; > + > + for(i=0; i<MAX_CPUS; i++) > + { > + ov = d->vcpu[i]; > + if(!ov || ov == v) > + continue; > + > + ipi_send(ov, icr.vec); > + > + } > + } else if(icr.dest_shorthand != 1) { > +#if 0 > + fprintf(warn, "Strange, vlapic icr %s vec %d!\n", > + hvm_vlapic_icr_dest_shorthand_name[icr.dest_shorthand], > + icr.vec); > +#endif > + } > + } else { > + /* Read */ > + if(opt.dump_all) { > + printf(" [vla] d%dv%d icr status %s\n", > + h->v->d->did, h->v->vid, > + icr.delivery_status?"pending":"idle"); > + } > + } > + > +} > + > +void hvm_vlapic_inject(struct vcpu_data *v, int vec) > +{ > + struct vlapic_struct *vla = &v->vlapic; > + struct outstanding_ipi *o = NULL; > + > + o = find_vec(vla, vec); > + > + if(o) { > + if(opt.dump_all) > + printf(" [vla] d%dv%d vec %d injecting\n", > + v->d->did, v->vid, vec); > + o->injected=1; > + } else { > + fprintf(stderr, "%s: Couldn't find an open ipi slot!\n", > + __func__); > + } > +} > + > +void hvm_vlapic_eoi_handler(struct hvm_data *h) { > + if(opt.dump_all) > + printf(" [vla] d%dv%d eoi\n", > + h->v->d->did, h->v->vid); > +} > + > +void hvm_vlapic_handler(struct hvm_data *h) > +{ > + struct mmio_info *m = &h->inflight.mmio; > + > + switch(m->gpa) { > + case 0xfee00300: > + hvm_vlapic_icr_handler(h); > + break; > + case 0xfee000b0: > + hvm_vlapic_eoi_handler(h); > + break; > + } > + > +} > + > +/* Also called by shadow_mmio_postprocess */ > +#define MMIO_VGA_START (0xa0000) > +#define MMIO_VGA_END (0xbffff) > +void enumerate_mmio(struct hvm_data *h) > +{ > + struct mmio_info *m = &h->inflight.mmio; > + > + /* Skip vga area */ > + if ( opt.mmio_enumeration_skip_vga > + && m->gpa >= MMIO_VGA_START > + && m->gpa < MMIO_VGA_END) > + { > + warn_once("WARNING: Not enumerationg MMIO in VGA range. Use > --mmio-enumeration-skip-vga=0 to override.\n"); > + return; > + } > + > + if ( m->data_valid ) > + update_io_address(&h->summary.io.mmio, m->gpa, m->is_write, > h->arc_cycles, m->va); > +} > + > +void hvm_mmio_summary(struct hvm_data *h, void *data) > +{ > + long reason=(long)data; > + > + PRINT_SUMMARY(h->summary.mmio[reason], > + " mmio "); > +} > + > +void hvm_mmio_assist_postprocess(struct hvm_data *h) > +{ > + long reason; > + > + switch(h->exit_reason) > + { > + case VMEXIT_NPF: > + case EXIT_REASON_EPT_VIOLATION: > + reason=NONPF_MMIO_NPF; > + hvm_set_summary_handler(h, hvm_mmio_summary, (void *)reason); > + break; > + case EXIT_REASON_APIC_ACCESS: > + reason=NONPF_MMIO_APIC; > + hvm_set_summary_handler(h, hvm_mmio_summary, (void *)reason); > + break; > + default: > + { > + static int warned = 0; > + if (!warned) > + { > + fprintf(stderr, "%s: Strange, MMIO with unexpected exit reason > %d\n", > + __func__, h->exit_reason); > + warned=1; > + } > + reason=NONPF_MMIO_UNKNOWN; > + hvm_set_summary_handler(h, hvm_mmio_summary, (void *)reason); > + break; > + } > + } > + > + if(opt.summary_info) > + { > + update_summary(&h->summary.mmio[reason], > + h->arc_cycles); > + } > + > + if ( opt.with_mmio_enumeration ) > + enumerate_mmio(h); > +} > + > +#define HVM_IO_ASSIST_WRITE 0x200 > +void hvm_mmio_assist_process(struct record_info *ri, struct hvm_data *h) > +{ > + struct mmio_info *m = &h->inflight.mmio; > + union { > + struct { > + unsigned int gpa; > + unsigned int data; > + } x32; > + struct { > + unsigned long long gpa; > + unsigned int data; > + } x64; > + } *r = (typeof(r))h->d; > + > + union { > + unsigned event; > + struct { > + unsigned minor:8, > + x64:1, > + write:2; > + }; > + } mevt = { .event = ri->event }; > + > + if(mevt.x64) { > + m->gpa = r->x64.gpa; > + m->data = r->x64.data; > + if(ri->extra_words*(sizeof(unsigned int))==sizeof(r->x64)) > + m->data_valid=1; > + } else { > + m->gpa = r->x32.gpa; > + m->data = r->x32.data; > + if(ri->extra_words*(sizeof(unsigned int))==sizeof(r->x32)) > + m->data_valid=1; > + } > + > + m->is_write = mevt.write; > + > + if(opt.dump_all) > + { > + if(m->data_valid) > + printf("]%s mmio_assist %c gpa %llx data %x\n", > + ri->dump_header, > + mevt.write?'w':'r', > + m->gpa, m->data); > + else > + printf("]%s mmio_assist %c gpa %llx (no data)\n", > ri->dump_header, > + mevt.write?'w':'r', m->gpa); > + } > + > + if((m->gpa & 0xfffff000) == 0xfee00000) > + hvm_vlapic_handler(h); > + > + /* Catch MMIOs that don't go through the shadow code; tolerate > + * failures to set (probably shadow_mmio) */ > + hvm_set_postprocess(h, hvm_mmio_assist_postprocess); > +} > + > +void hvm_inj_virq_process(struct record_info *ri, struct hvm_data *h) { > + struct { > + int vector, fake; > + } *r = (typeof(r))h->d; > + > + if(opt.dump_all) { > + printf(" %s inj_virq vec %u %s\n", > + ri->dump_header, > + r->vector, r->fake?"fake":"real"); > + } > + > + if(opt.summary_info) > + { > + int vector = r->vector; > + > + if(vector >= GUEST_INTERRUPT_MAX) > + vector = GUEST_INTERRUPT_MAX; > + h->summary.guest_interrupt[vector].count++; > + > + if(opt.interval.output == INTERVAL_DOMAIN_GUEST_INTERRUPT) > + interval_domain_guest_interrupt(h, vector); > + } > + > + /* If we're waking, make this the wake vector */ > + if(r->vector < GUEST_INTERRUPT_MAX ) { > + int vector = r->vector; > + if ( h->w2h.waking && h->w2h.vector == 0 ) { > + if(h->summary.guest_interrupt[vector].start_tsc) { > + fprintf(warn, "Strange, d%dv%d waking && wake_vector 0 but > vec %d start_tsc %lld!\n", > + h->v->d->did, h->v->vid, > + vector, > + h->summary.guest_interrupt[vector].start_tsc); > + error(ERR_WARN, NULL); > + } > + if(h->w2h.interrupts) > + fprintf(warn, "Strange, waking && wake_vector 0 but > interrupts_this_wait_to_halt %d!\n", > + h->w2h.interrupts); > + > + if(opt.dump_all) > + printf(" [w2h] d%dv%d Setting wake_vector %d\n", > + h->v->d->did, h->v->vid, vector); > + > + /* In svm mode, vector information is invalid */ > + if ( opt.svm_mode && r->fake ) > + h->w2h.vector = FAKE_VECTOR; > + else > + h->w2h.vector = vector; > + h->summary.guest_interrupt[vector].is_wake = 1; > + } > + > + if( h->summary.guest_interrupt[vector].start_tsc == 0 ) { > + /* Note that we want start_tsc set at the next vmentry */ > + h->summary.guest_interrupt[vector].start_tsc = 1; > + h->w2h.interrupts_wanting_tsc++; > + h->w2h.interrupts++; > + > + if(opt.dump_all) > + printf(" [w2h] d%dv%d Starting vec %d\n", > + h->v->d->did, h->v->vid, vector); > + } > + } > + > + hvm_vlapic_inject(h->v, r->vector); > +} > + > +/* I/O Handling */ > +struct io_address { > + struct io_address *next; > + unsigned int pa; > + unsigned int va; > + struct event_cycle_summary summary[2]; > +}; > + > +void update_io_address(struct io_address ** list, unsigned int pa, int dir, > + tsc_t arc_cycles, unsigned int va) { > + struct io_address *p, *q=NULL; > + > + /* Keep list in order */ > + for(p=*list; p && (p->pa != pa) && (p->pa < pa); q=p, p=p->next); > + > + /* If we didn't find it, make a new element. */ > + if(!p || (p->pa != pa)) { > + if((p=malloc(sizeof(*p)))==NULL) { > + fprintf(stderr, "malloc() failed.\n"); > + error(ERR_SYSTEM, NULL); > + } > + > + bzero(p, sizeof(*p)); > + > + p->pa=pa; > + p->va=va; > + > + /* If we stopped in the middle or at the end, add it in */ > + if(q) { > + p->next=q->next; > + q->next=p; > + } else { > + /* Otherwise, we stopped after the first element; put it at the > beginning */ > + p->next = *list; > + *list = p; > + } > + } > + update_summary(&p->summary[dir], arc_cycles); > +} > + > +void hvm_io_address_summary(struct io_address *list, char * s) { > + if(!list) > + return; > + > + printf("%s\n", s); > + > + for(; list; list=list->next) { > + if ( list->va ) > + { > + PRINT_SUMMARY(list->summary[0], "%8x@%8x:[r] ", list->pa, > list->va); > + PRINT_SUMMARY(list->summary[1], "%8x@%8x:[w] ", list->pa, > list->va); > + } > + else > + { > + PRINT_SUMMARY(list->summary[0], "%8x:[r] ", list->pa); > + PRINT_SUMMARY(list->summary[1], "%8x:[w] ", list->pa); > + } > + } > +} > + > +void hvm_io_write_postprocess(struct hvm_data *h) > +{ > + if(opt.with_pio_enumeration) > + update_io_address(&h->summary.io.pio, h->inflight.io.port, 1, > h->arc_cycles, 0); > +} > + > +void hvm_io_read_postprocess(struct hvm_data *h) > +{ > + if(opt.with_pio_enumeration) > + update_io_address(&h->summary.io.pio, h->inflight.io.port, 0, > h->arc_cycles, 0); > + if(opt.scatterplot_io && h->inflight.io.port == opt.scatterplot_io_port) > + scatterplot_vs_time(h->exit_tsc, P.now - h->exit_tsc); > +} > + > +void hvm_io_assist_process(struct record_info *ri, struct hvm_data *h) > +{ > + union { > + struct { > + unsigned int port; > + unsigned int data; > + } x32; > + } *r = (typeof(r))h->d; > + > + union { > + unsigned event; > + struct { > + unsigned minor:8, > + x64:1, > + write:2; > + }; > + } mevt = { .event = ri->event }; > + > + if(mevt.x64) { > + fprintf(stderr, "FATAL: Unexpected 64-bit PIO\n"); > + error(ERR_RECORD, ri); > + return; > + } > + > + h->inflight.io.port = r->x32.port; > + h->inflight.io.val = r->x32.data; > + > + if(mevt.write) { > + h->inflight.io.is_write = 1; > + if ( hvm_set_postprocess(h, hvm_io_write_postprocess) ) > + fprintf(warn, "%s: Strange, postprocess already set\n", > __func__); > + } else { > + h->inflight.io.is_write = 0; > + if ( hvm_set_postprocess(h, hvm_io_read_postprocess) ) > + fprintf(warn, "%s: Strange, postprocess already set\n", > __func__); > + } > + > + if(opt.dump_all) > + { > + printf(" %s io %s port %x val %x\n", > + ri->dump_header, > + mevt.write?"write":"read", > + r->x32.port, > + r->x32.data); > + } > +} > + > +/* cr_write */ > +/* CR3 list */ > +void cr3_switch(unsigned long long val, struct hvm_data *h) { > + struct vcpu_data *v = h->v; > + /* Really only need absolute tsc here. Later change to global time. */ > + unsigned long long now = P.now; > + unsigned long long gmfn = val >> 12; > + > + if ( !h->init ) > + return; > + > + if(opt.with_cr3_enumeration) { > + if(v->cr3.data) { > + struct cr3_value_struct *cur = v->cr3.data; > + unsigned long long cycles = now - v->cr3.start_time; > + > + if(opt.summary_info) > + update_cycles(&cur->total_time, cycles); > + > + cur->last_time = now; > + } > + > + if(gmfn) { > + struct cr3_value_struct *p, **last=&v->d->cr3_value_head; > + > + /* Always add to tail, so that we get consistent interval > + ouptut as the number of cr3s grow */ > + for(p=*last; p; last = (&p->next), p=p->next) > + if(p->gmfn == gmfn) > + break; > + > + if(!p) { > + if((p=malloc(sizeof(*p)))==NULL) { > + fprintf(stderr, "malloc() failed.\n"); > + error(ERR_SYSTEM, NULL); > + } > + > + bzero(p, sizeof(*p)); > + > + p->gmfn = gmfn; > + p->cr3_id = P.cr3.id; > + p->first_time = now; > + > + p->next=*last; > + *last=p; > + > + p->gnext = NULL; > + if(P.cr3.head) > + *P.cr3.tail = p; > + else > + P.cr3.head = p; > + P.cr3.tail = &p->gnext; > + > + P.cr3.id++; > + > + /* Add to the interval list if appropriate */ > + if(opt.interval.check == INTERVAL_CHECK_CR3 > + && v->d->did != DEFAULT_DOMAIN) > + interval_cr3_value_check(p); > + } > + > + if(p->prealloc_unpin.now) { > + fprintf(warn, "Re-promoting previously unpinned cr3 %llx!\n", > + p->gmfn); > + p->prealloc_unpin.now = 0; > + h->inflight.cr_write.repromote = 1; > + } > + > + /* Accounting for new toplevel */ > + v->cr3.start_time = now; > + p->switch_count++; > + if(p->destroy.callback) > + p->destroy.switch_count++; > + v->cr3.data = p; > + } else { > + v->cr3.data = NULL; > + } > + > + if (opt.scatterplot_cr3_switch) { > + scatterplot_vs_time(h->exit_tsc, > + v->cr3.data ? (v->cr3.data->cr3_id) : 0); > + } > + } else { > + if (opt.scatterplot_cr3_switch) > + scatterplot_vs_time(h->exit_tsc, gmfn); > + } > + > + v->cr3.val = val; > +}; > + > +void cr3_prealloc_unpin(struct vcpu_data *v, unsigned long long gmfn) { > + struct cr3_value_struct *cr3; > + > + /* Look for it in the list */ > + for(cr3 = v->d->cr3_value_head; cr3; cr3=cr3->next) > + if(cr3->gmfn == gmfn) > + break; > + > + if(!cr3) > + return; > + > + if(cr3->prealloc_unpin.now) > + fprintf(warn, "Strange, gmfn %llx multiple unpins w/o access!\n", > + gmfn); > + > + cr3->prealloc_unpin.now = 1; > + cr3->prealloc_unpin.count++; > + > + if(opt.dump_all) > + printf(" cr3 %llx unpinned %d times\n", > + gmfn, cr3->prealloc_unpin.count); > +} > + > +void cr3_dump_list(struct cr3_value_struct *head){ > + struct cr3_value_struct *p; > + struct cr3_value_struct **qsort_array; > + int i, N=0; > + > + int cr3_compare_total(const void *_a, const void *_b) { > + struct cr3_value_struct *a=*(typeof(&a))_a; > + struct cr3_value_struct *b=*(typeof(&a))_b; > + > + if(a->total_time.cycles < b->total_time.cycles) > + return 1; > + else if(b->total_time.cycles == a->total_time.cycles) { > + if(a->total_time.count < b->total_time.count) > + return 1; > + else if(a->total_time.count == b->total_time.count) > + return 0; > + else > + return -1; > + } else > + return -1; > + } > + > + int cr3_compare_start(const void *_a, const void *_b) { > + struct cr3_value_struct *a=*(typeof(&a))_a; > + struct cr3_value_struct *b=*(typeof(&a))_b; > + > + if(a->first_time > b->first_time) > + return 1; > + else if(b->first_time == a->first_time) > + return 0; > + else > + return -1; > + } > + > + if(!head) > + return; > + > + /* Count the number of elements */ > + for(p=head; p; p=p->next) > + N++; > + > + if(!N) > + return; > + > + /* Alloc a struct of the right size */ > + qsort_array = malloc(N * sizeof(struct eip_list_struct *)); > + > + /* Point the array into it */ > + for(i=0, p=head; p; p=p->next, i++) > + qsort_array[i]=p; > + > + /* Sort the array by time */ > + qsort(qsort_array, N, sizeof(struct eip_list_struct *), > + cr3_compare_start); > + > + /* WARNING: don't use N after this point unless you copy this variable */ > +#if 0 > + if(opt.summary_eip_limit && opt.summary_eip_limit < N) > + N=opt.summary_eip_limit; > +#endif > + > + /* Now print the results */ > + printf(" cr3 values:\n"); > + for(i=0; i<N; i++) { > + char desc[30]; > + struct time_struct first, last; > + > + p = qsort_array[i]; > + > + abs_cycles_to_time(p->first_time, &first); > + abs_cycles_to_time(p->last_time, &last); > + > + > + snprintf(desc, 30, " %8llx (id %d)", p->gmfn, p->cr3_id); > + print_cycle_summary(&p->total_time, desc); > + snprintf(desc, 30, " guest"); > + print_cycle_percent_summary(&p->guest_time, p->run_time, desc); > + snprintf(desc, 30, " hv "); > + print_cycle_percent_summary(&p->hv_time, p->run_time, desc); > + > + hvm_short_summary(&p->hvm, p->run_time, " + "); > + printf(" Seen: %4u.%09u-%4u.%09u switch %d flush %d\n", > + first.s, first.ns, > + last.s, last.ns, > + p->switch_count, p->flush_count); > + if(p->destroy.callback) > + printf(" destroy: flush %u switch %u fixup %u emulate > %u\n", > + p->destroy.flush_count, > + p->destroy.switch_count, > + p->destroy.fixup_user, > + p->destroy.emulate_corr_user); > + } > + > + free(qsort_array); > +} > + > +void hvm_cr3_write_summary(struct hvm_data *h) { > + int j; > + > + for(j=0; j<RESYNCS_MAX; j++) > + PRINT_SUMMARY(h->summary.cr3_write_resyncs[j], > + " *[%3d] ", j); > + PRINT_SUMMARY(h->summary.cr3_write_resyncs[j], > + " *[MAX] "); > +} > + > +void hvm_cr_write_summary(struct hvm_data *h, void *data) > +{ > + long cr=(long)data; > + > + PRINT_SUMMARY(h->summary.cr_write[cr], > + " cr%ld ", cr); > + if ( cr==3 ) > + hvm_cr3_write_summary(h); > +} > + > +void hvm_cr_write_postprocess(struct hvm_data *h) > +{ > + if(h->inflight.cr_write.cr == 3) { > + struct vcpu_data *v = h->v; > + unsigned long long new_val = h->inflight.cr_write.val; > + unsigned long long oval; > + int flush=0; > + > + if(v->cr3.val) { > + oval = v->cr3.val; > + > + if(new_val == oval) { > + if(v->cr3.data) { > + v->cr3.data->flush_count++; > + if(v->cr3.data->destroy.callback) > + v->cr3.data->destroy.flush_count++; > + } > + flush=1; > + } > + } > + > + if(opt.summary_info) { > + int resyncs = h->resyncs; > + > + if(resyncs > RESYNCS_MAX) > + resyncs = RESYNCS_MAX; > + > + update_summary(&h->summary.cr3_write_resyncs[resyncs], > + h->arc_cycles); > + > + update_summary(&h->summary.cr_write[3], > + h->arc_cycles); > + > + hvm_update_short_summary(h, HVM_SHORT_SUMMARY_CR3); > + } > + > + if(!flush) > + cr3_switch(new_val, h); > + } else { > + if(opt.summary_info) > + { > + if(h->inflight.cr_write.cr < CR_MAX) > + update_summary(&h->summary.cr_write[h->inflight.cr_write.cr], > + h->arc_cycles); > + > + } > + } > + > + /* Set summary handler */ > + /* FIXME - deal with cr_read_summary */ > + if(h->exit_reason < h->exit_reason_max) > + { > + /* Want a different "set" for each cr */ > + switch(h->inflight.cr_write.cr) > + { > +#define case_cr(_x) \ > + case (_x): \ > + hvm_set_summary_handler(h, hvm_cr_write_summary, (void > *)(_x)); \ > + break > + case_cr(0); > + case_cr(1); > + case_cr(2); > + case_cr(3); > + case_cr(4); > + case_cr(5); > + case_cr(6); > + case_cr(7); > + case_cr(8); > + case_cr(9); > + case_cr(10); > + case_cr(11); > + case_cr(12); > + case_cr(13); > + case_cr(14); > + case_cr(15); > +#undef case_cr > + default: > + fprintf(stderr, "Unexpected cr: %d\n", h->inflight.cr_write.cr); > + error(ERR_SANITY, NULL); > + break; > + } > + } > +} > + > +void hvm_cr_write_process(struct record_info *ri, struct hvm_data *h) > +{ > + union { > + struct { > + unsigned cr; > + unsigned int val; > + } x32; > + struct { > + unsigned cr; > + unsigned long long val; > + } __attribute__((packed)) x64; > + } *r = (typeof(r))h->d; > + unsigned cr; > + unsigned long long val; > + > + if(ri->event & TRC_64_FLAG) { > + h->inflight.cr_write.cr = cr = r->x64.cr; > + h->inflight.cr_write.val = val = r->x64.val; > + } else { > + h->inflight.cr_write.cr = cr = r->x32.cr; > + h->inflight.cr_write.val = val = r->x32.val; > + } > + > + /* In vmx, in real mode, cr accesses may cause EXNMI vmexits. > + * Account them under that heading; otherwise, complain */ > + if ( hvm_set_postprocess(h, hvm_cr_write_postprocess) ) > + fprintf(warn, "%s: Strange, h->postprocess already set!\n", > + __func__); > + > + if(opt.dump_all) > + { > + if(cr == 3 && h->v->cr3.val) { > + printf("]%s cr_write cr3 val %llx oval %llx %s\n", > + ri->dump_header, > + val, > + h->v->cr3.val, > + (h->v->cr3.val == val)?"flush":"switch"); > + } else { > + printf(" %s cr_write cr%d val %llx\n", > + ri->dump_header, > + cr, val); > + > + } > + } > + > +} > + > +/* msr_write */ > +void hvm_msr_write_summary(struct hvm_data *h, void *d) > +{ > +} > + > +void hvm_msr_write_postprocess(struct hvm_data *h) > +{ > + if(opt.summary_info) { > + } > + > + /* Set summary handler */ > + hvm_set_summary_handler(h, hvm_msr_write_summary, NULL); > +} > + > +void hvm_msr_write_process(struct record_info *ri, struct hvm_data *h) > +{ > + struct { > + unsigned int addr; > + unsigned long long val; > + } __attribute__((packed)) *r = (typeof(r))h->d; > + > + if(check_extra_words(ri, sizeof(*r), "msr_write")) > + return; > + > + h->inflight.msr.addr = r->addr; > + h->inflight.msr.val = r->val; > + > + if(opt.dump_all) > + { > + printf(" %s msr_write addr %x val %llx\n", > + ri->dump_header, > + r->addr, r->val); > + } > + > + if ( hvm_set_postprocess(h, hvm_msr_write_postprocess) ) > + fprintf(warn, "%s: Strange, postprocess already set\n", __func__); > +} > + > +/* msr_read */ > +void hvm_msr_read_summary(struct hvm_data *h, void *d) > +{ > +} > + > +void hvm_msr_read_postprocess(struct hvm_data *h) > +{ > + if(opt.summary_info) { > + } > + > + /* Set summary handler */ > + hvm_set_summary_handler(h, hvm_msr_read_summary, NULL); > +} > + > +void hvm_msr_read_process(struct record_info *ri, struct hvm_data *h) > +{ > + struct { > + unsigned int addr; > + unsigned long long val; > + } __attribute__((packed)) *r = (typeof(r))h->d; > + > + if(check_extra_words(ri, sizeof(*r), "msr_read")) > + return; > + > + h->inflight.msr.addr = r->addr; > + h->inflight.msr.val = r->val; > + > + if(opt.dump_all) > + { > + printf(" %s msr_read addr %x val %llx\n", > + ri->dump_header, > + r->addr, r->val); > + } > + > + if ( hvm_set_postprocess(h, hvm_msr_read_postprocess) ) > + fprintf(warn, "%s: Strange, postprocess already set\n", __func__); > +} > + > +void hvm_vmcall_summary(struct hvm_data *h, void *d) > +{ > + int i; > + > + for ( i=0; i<HYPERCALL_MAX ; i++) > + { > + PRINT_SUMMARY(h->summary.vmcall[i], > + " [%10s] ", hypercall_name[i]); > + } > + PRINT_SUMMARY(h->summary.vmcall[HYPERCALL_MAX], > + " [%10s] ", "max"); > +} > + > +void hvm_vmcall_postprocess(struct hvm_data *h) > +{ > + unsigned eax = h->inflight.vmcall.eax ; > + > + if(opt.summary) > + { > + if ( eax < HYPERCALL_MAX ) > + update_summary(&h->summary.vmcall[eax], > + h->arc_cycles); > + else > + update_summary(&h->summary.vmcall[HYPERCALL_MAX], > + h->arc_cycles); > + hvm_set_summary_handler(h, hvm_vmcall_summary, NULL); > + } > +} > + > +void hvm_vmcall_process(struct record_info *ri, struct hvm_data *h) > +{ > + struct { > + unsigned int eax; > + } *r = (typeof(r))h->d; > + > + if(opt.dump_all) { > + if(r->eax < HYPERCALL_MAX) > + printf(" %s vmcall %2x (%s)\n", > + ri->dump_header, > + r->eax, > + hypercall_name[r->eax]); > + else > + printf(" %s vmcall %2x\n", > + ri->dump_header, > + r->eax); > + } > + > + h->inflight.vmcall.eax = r->eax; > + > + if ( hvm_set_postprocess(h, hvm_vmcall_postprocess) ) > + fprintf(warn, "%s: Strange, postprocess already set\n", __func__); > +} > + > +void hvm_inj_exc_process(struct record_info *ri, struct hvm_data *h) > +{ > + struct { > + unsigned vec, ec; > + } *r = (typeof(r))h->d; > + > + if ( opt.dump_all ) > + { > + if(r->vec < HVM_TRAP_MAX) > + printf(" %3u.%09u %s inj_exc trap %s ec %x\n", > + ri->t.s, ri->t.ns, pcpu_string(ri->cpu), > + hvm_trap_name[r->vec], r->ec); > + else > + printf(" %3u.%09u %s inj_exc trap %u ec %x\n", > + ri->t.s, ri->t.ns, pcpu_string(ri->cpu), > + r->vec, r->ec); > + } > + > +} > + > +void hvm_intr_summary(struct hvm_data *h, void *d) > +{ > + int i; > + > + for(i=0; i<EXTERNAL_INTERRUPT_MAX; i++) > + if(h->summary.extint[i]) > + { > + if(hvm_extint_vector_name[i]) > + printf(" %10s(%3d): %d\n", > + hvm_extint_vector_name[i], > + i, > + h->summary.extint[i]); > + else > + printf(" [%3d]: %d\n", > + i, > + h->summary.extint[i]); > + } > + if(h->summary.extint[EXTERNAL_INTERRUPT_MAX]) > + printf(" Other: : %d\n", > + h->summary.extint[EXTERNAL_INTERRUPT_MAX]); > +} > + > + > +void hvm_intr_process(struct record_info *ri, struct hvm_data *h) > +{ > + unsigned vec = *(unsigned *)h->d; > + > + /* Vector is difficult to get in SVM mode */ > + if ( opt.svm_mode ) > + vec = 0; > + > + if( (h->rip >> ADDR_SPACE_BITS) != 00 > + && (h->rip >> ADDR_SPACE_BITS) != ((0ULL-1)>> ADDR_SPACE_BITS) ) { > + fprintf(stderr, "Unexpected rip %llx (shift %llx)\n", > + h->rip, > + h->rip >> ADDR_SPACE_BITS); > + error(ERR_RECORD, NULL); > + /* Can process with strange rip */ > + } > + > + h->inflight.intr.vec = vec; > + > + if ( opt.dump_all ) > + { > + if ( vec < EXTERNAL_INTERRUPT_MAX && > + hvm_extint_vector_name[vec] ) > + printf(" %s intr vec %s(%x)\n", > + ri->dump_header, > + hvm_extint_vector_name[vec], > + vec); > + else > + printf(" %s intr vec %x\n", > + ri->dump_header, vec); > + } > + > + if(opt.scatterplot_interrupt_eip > + && vec == opt.scatterplot_interrupt_vector) > + { > + struct time_struct t; > + /* Truncate to 40 bits */ > + unsigned long long rip = h->rip & ((1ULL << ADDR_SPACE_BITS)-1); > + > + /* Want absolute tsc to global tsc */ > + abs_cycles_to_time(h->exit_tsc, &t); > + printf("d%dv%d %u.%09u %lld\n", > + h->v->d->did, h->v->vid, > + t.s, t.ns, > + rip); > + } > + > + if(opt.histogram_interrupt_eip > + && vec == opt.histogram_interrupt_vector) > + { > + /* Truncate to 40 bits */ > + unsigned long long rip = h->rip & ((1ULL << ADDR_SPACE_BITS)-1); > + unsigned index = rip / opt.histogram_interrupt_increment; > + > + h->summary.extint_histogram[index]++; > + } > + > + if(opt.with_interrupt_eip_enumeration > + && vec == opt.interrupt_eip_enumeration_vector) > + { > + /* Truncate to 40 bits */ > + unsigned long long rip = h->rip & ((1ULL << ADDR_SPACE_BITS)-1); > + > + /* Want absolute tsc to global tsc */ > + update_eip(&h->v->d->interrupt_eip_list, rip, 0, 0, NULL); > + } > + > + /* Disable generic postprocessing */ > + /* FIXME: Do the summary stuff in a post-processor */ > + h->post_process = NULL; > + > + if(opt.summary_info) { > + if(opt.summary) > + hvm_set_summary_handler(h, hvm_intr_summary, NULL); > + > + if(vec < EXTERNAL_INTERRUPT_MAX) > + h->summary.extint[vec]++; > + else > + h->summary.extint[EXTERNAL_INTERRUPT_MAX]++; > + } > +} > + > + > +void hvm_intr_window_process(struct record_info *ri, struct hvm_data *h) > +{ > + struct { > + uint32_t vector; > + uint32_t source; > + int32_t intr; > + } *r = (typeof(r))h->d; > + > + char *intsrc_name[] = { > + "none", > + "pic", > + "lapic", > + "nmi", > + "mce", > + "vector" > + }; > + > + if ( opt.dump_all ) > + { > + printf(" %s intr_window vec %u src %u(%s) ", > + ri->dump_header, > + (unsigned)r->vector, > + (unsigned)r->source, > + r->source < 6 ? intsrc_name[r->source]: "?"); > + > + if ( r->intr > 0 ) > + printf("intr %x\n", > + (unsigned)r->intr); > + else > + printf("intr #\n"); > + } > +} > + > +void hvm_pf_inject_process(struct record_info *ri, struct hvm_data *h) > +{ > + union { > + struct { > + unsigned ec; > + unsigned int cr2; > + } x32; > + struct { > + unsigned ec; > + unsigned long long cr2; > + } __attribute__((packed)) x64; > + } *r = (typeof(r))h->d; > + unsigned int ec; > + unsigned long long cr2; > + int is_64 = 0; > + > + if(ri->event & TRC_64_FLAG) { > + is_64 = 1; > + cr2 = r->x64.cr2; > + ec = r->x64.ec; > + } else { > + cr2 = r->x32.cr2; > + ec = r->x32.ec; > + } > + > + if ( opt.dump_all ) > + { > + printf(" %3u.%09u %s pf_inject%s guest_cr2 %llx guest_ec %x\n", > + ri->t.s, ri->t.ns, pcpu_string(ri->cpu), > + is_64?"64":"", > + cr2, ec); > + } > +} > + > +void hvm_generic_postprocess_init(struct record_info *ri, struct hvm_data > *h); > + > +void hvm_npf_process(struct record_info *ri, struct hvm_data *h) > +{ > + struct { > + uint64_t gpa; > + uint64_t mfn; > + uint32_t qualification; > + uint32_t p2mt; > + } *r = (typeof(r))h->d; > + > + if ( opt.dump_all ) > + printf(" %s npf gpa %llx q %x mfn %llx t %d\n", > + ri->dump_header, > + (unsigned long long)r->gpa, r->qualification, > + (unsigned long long)r->mfn, r->p2mt); > + > + if ( opt.summary_info ) > + hvm_generic_postprocess_init(ri, h); > +} > + > +void hvm_rdtsc_process(struct record_info *ri, struct hvm_data *h) > +{ > + struct { > + unsigned long long tsc; > + } *r = (typeof(r))h->d; > + > + if ( opt.dump_all ) > + printf(" %s rdtsc %llx %lld %s\n", > + ri->dump_header, > + (unsigned long long)r->tsc, > + (unsigned long long)r->tsc, > + h->last_rdtsc > r->tsc ? "BACKWARDS" : ""); > + > + if ( opt.scatterplot_rdtsc ) > + { > + struct time_struct t; > + > + abs_cycles_to_time(ri->tsc, &t); > + > + printf("%dv%d %u.%09u %llu\n", > + h->v->d->did, h->v->vid, > + t.s, t.ns, > + r->tsc); > + } > + > + h->last_rdtsc = r->tsc; > +} > + > +void hvm_generic_summary(struct hvm_data *h, void *data) > +{ > + long evt = (long)data; > + > + assert(evt < HVM_EVENT_HANDLER_MAX); > + > + PRINT_SUMMARY(h->summary.generic[evt], > + " %s ", hvm_event_handler_name[evt]); > + > +} > + > +void hvm_generic_postprocess_init(struct record_info *ri, struct hvm_data *h) > +{ > + if ( h->post_process != hvm_generic_postprocess ) > + fprintf(warn, "%s: Strange, h->postprocess set!\n", > + __func__); > + h->inflight.generic.event = ri->event; > + bcopy(h->d, h->inflight.generic.d, sizeof(unsigned int) * 4); > +} > + > +void hvm_generic_postprocess(struct hvm_data *h) > +{ > + long evt = 0; > + static unsigned registered[HVM_EVENT_HANDLER_MAX] = { 0 }; > + > + if ( h->inflight.generic.event ) > + evt = (h->inflight.generic.event - TRC_HVM_HANDLER) > + & ~(TRC_64_FLAG|HVM_IO_ASSIST_WRITE); > + else { > + static unsigned warned[HVM_EXIT_REASON_MAX] = { 0 }; > + /* Some exits we don't expect a handler; just return */ > + if(opt.svm_mode) > + { > + } > + else > + { > + switch(h->exit_reason) > + { > + /* These just need us to go through the return path */ > + case EXIT_REASON_PENDING_INTERRUPT: > + case EXIT_REASON_TPR_BELOW_THRESHOLD: > + /* Not much to log now; may need later */ > + case EXIT_REASON_WBINVD: > + return; > + default: > + break; > + } > + } > + if ( !warned[h->exit_reason] ) > + { > + /* If we aren't a known exception, warn and log results */ > + fprintf(warn, "%s: Strange, exit %x(%s) missing a handler\n", > + __func__, h->exit_reason, > + (h->exit_reason > h->exit_reason_max) > + ? "[clipped]" > + : h->exit_reason_name[h->exit_reason]); > + warned[h->exit_reason]=1; > + } > + } > + > + if ( evt >= HVM_EVENT_HANDLER_MAX || evt < 0) > + { > + fprintf(warn, "%s: invalid hvm event %lx(%x)\n", > + __func__, evt, h->inflight.generic.event); > + error(ERR_RECORD, NULL); > + return; > + } > + > + if(opt.summary_info) { > + update_summary(&h->summary.generic[evt], > + h->arc_cycles); > + > + /* NB that h->exit_reason may be 0, so we offset by 1 */ > + if ( registered[evt] ) > + { > + static unsigned warned[HVM_EXIT_REASON_MAX] = { 0 }; > + if ( registered[evt] != h->exit_reason+1 && > !warned[h->exit_reason]) > + { > + fprintf(warn, "%s: HVM evt %lx in %x and %x!\n", > + __func__, evt, registered[evt]-1, h->exit_reason); > + warned[h->exit_reason]=1; > + } > + } > + else > + { > + int ret; > + if((ret=__hvm_set_summary_handler(h, hvm_generic_summary, (void > *)evt))) > + fprintf(stderr, "%s: hvm_set_summary_handler returned %d\n", > + __func__, ret); > + registered[evt]=h->exit_reason+1; > + } > + /* HLT checked at hvm_vmexit_close() */ > + } > +} > + > +void hvm_generic_dump(struct record_info *ri, char * prefix) > +{ > + struct { > + unsigned vcpu:16, domain:16; > + unsigned d[4]; > + } *cr = (typeof(cr))ri->d; > + > + char *evt_string, evt_number[256]; > + int i, evt, is_64 = 0; > + > + evt = ri->event - TRC_HVM_HANDLER; > + > + if(evt & TRC_64_FLAG) { > + evt &= ~(TRC_64_FLAG); > + is_64=1; > + } > + > + if(evt < HVM_EVENT_HANDLER_MAX) > + { > + evt_string = hvm_event_handler_name[evt]; > + } > + else > + { > + snprintf(evt_number, 256, "hvm_handler %d", evt); > + evt_string = evt_number; > + } > + > + printf("%s%s %s%s [", > + prefix, > + ri->dump_header, > + evt_string, > + is_64?"64":""); > + > + for(i=0; i<ri->extra_words; i++) { > + printf(" %x", ri->d[i]); > + } > + > + printf(" ]\n"); > +} > + > +void hvm_handler_process(struct record_info *ri, struct hvm_data *h) { > + /* Wait for first vmexit to initialize */ > + if(!h->init) > + { > + if(opt.dump_all) > + hvm_generic_dump(ri,"!"); > + return; > + } > + > + h->d = ri->d; > + > + /* Handle things that don't need a vmexit */ > + switch(ri->event) { > + default: > + goto needs_vmexit; > + /* Records about changing guest state */ > + case TRC_HVM_PF_INJECT: > + case TRC_HVM_PF_INJECT64: > + hvm_pf_inject_process(ri, h); > + break; > + case TRC_HVM_REINJ_VIRQ: > + if ( opt.dump_all ) > + { > + printf(" %3u.%09u %s inj_virq vec %u\n", > + ri->t.s, ri->t.ns, pcpu_string(ri->cpu), > + *(unsigned*)h->d); > + } > + break; > + case TRC_HVM_INJ_EXC: > + hvm_inj_exc_process(ri, h); > + break; > + case TRC_HVM_INJ_VIRQ: > + hvm_inj_virq_process(ri, h); > + break; > + case TRC_HVM_INTR_WINDOW: > + hvm_intr_window_process(ri, h); > + break; > + case TRC_HVM_OP_DESTROY_PROC: > + if(h->v->cr3.data) { > + struct cr3_value_struct *cur = h->v->cr3.data; > + if(cur->destroy.callback) > + fprintf(warn, "Strange, double callback for cr3 gmfn > %llx!\n", > + cur->gmfn); > + cur->destroy.callback = 1; > + } else if(opt.with_cr3_enumeration) { > + fprintf(warn, "Warning: destroy_proc: don't know current cr3\n"); > + } > + if ( opt.dump_all ) > + { > + printf(" %3u.%09u %s destroy_proc cur_cr3 %llx\n", > + ri->t.s, ri->t.ns, pcpu_string(ri->cpu), h->v->cr3.val); > + } > + break; > + } > + > + return; > + > +needs_vmexit: > + /* Wait for the next vmexit */ > + if(!h->vmexit_valid) > + { > + if(opt.dump_all) > + hvm_generic_dump(ri,"!"); > + return; > + } > + > + /* Keep generic "event handler" info */ > + h->event_handler = ri->event - TRC_HVM_HANDLER; > + > + switch(ri->event) { > + /* Records adding to the vmexit reason */ > + case TRC_HVM_INTR: > + hvm_intr_process(ri, h); > + break; > + case TRC_HVM_PF_XEN: > + case TRC_HVM_PF_XEN64: > + hvm_pf_xen_process(ri, h); > + break; > + case TRC_HVM_IOPORT_READ: > + case TRC_HVM_IOPORT_WRITE: > + hvm_io_assist_process(ri, h); > + break; > + case TRC_HVM_IOMEM_READ: > + case TRC_HVM_IOMEM_WRITE: > + case TRC_HVM_IOMEM_READ|TRC_64_FLAG: > + case TRC_HVM_IOMEM_WRITE|TRC_64_FLAG: > + hvm_mmio_assist_process(ri, h); > + break; > + case TRC_HVM_CR_WRITE: > + case TRC_HVM_CR_WRITE64: > + hvm_cr_write_process(ri, h); > + break; > + case TRC_HVM_MSR_WRITE: > + hvm_msr_write_process(ri, h); > + break; > + case TRC_HVM_MSR_READ: > + hvm_msr_read_process(ri, h); > + break; > + case TRC_HVM_VMMCALL: > + hvm_vmcall_process(ri, h); > + break; > + case TRC_HVM_NPF: > + hvm_npf_process(ri, h); > + break; > + case TRC_HVM_RDTSC: > + hvm_rdtsc_process(ri, h); > + break; > + case TRC_HVM_DR_READ: > + case TRC_HVM_DR_WRITE: > + case TRC_HVM_CPUID: > + case TRC_HVM_SMI: > + case TRC_HVM_HLT: > + case TRC_HVM_INVLPG: > + case TRC_HVM_INVLPG64: > + case TRC_HVM_MCE: > + case TRC_HVM_CLTS: > + case TRC_HVM_LMSW: > + case TRC_HVM_LMSW64: > + case TRC_HVM_NMI: > + case TRC_HVM_REALMODE_EMULATE: > + case TRC_HVM_TRAP: > + case TRC_HVM_TRAP_DEBUG: > + case TRC_HVM_CR_READ: > + case TRC_HVM_CR_READ64: > + default: > + if(opt.dump_all) > + hvm_generic_dump(ri, "]"); > + if(opt.summary_info) > + hvm_generic_postprocess_init(ri, h); > + break; > + } > +} > + > +void vcpu_next_update(struct pcpu_info *p, struct vcpu_data *next, tsc_t > tsc); > +void vcpu_prev_update(struct pcpu_info *p, struct vcpu_data *prev, > + tsc_t tsc, int new_runstate); > +struct vcpu_data * vcpu_find(int did, int vid); > +void lose_vcpu(struct vcpu_data *v, tsc_t tsc); > + > +int domain_runstate(struct domain_data *d) { > + int i; > + int runstates[RUNSTATE_MAX]; > + int ret=-1; > + int max_vcpus = 0; > + > + if(d->did == DEFAULT_DOMAIN) > + return 0; > + > + for(i=0; i<RUNSTATE_MAX; i++) > + runstates[i]=0; > + > + for(i=0; i<=d->max_vid; i++) > + if(d->vcpu[i] && d->vcpu[i]->runstate.state != RUNSTATE_INIT) { > + max_vcpus++; > + runstates[d->vcpu[i]->runstate.state]++; > + } > + > + if(runstates[RUNSTATE_LOST] == max_vcpus) > + ret=DOMAIN_RUNSTATE_LOST; > + else if(runstates[RUNSTATE_RUNNING]) > + { > + if(runstates[RUNSTATE_RUNNABLE]) > + ret=DOMAIN_RUNSTATE_CONCURRENCY_HAZARD; > + else if(runstates[RUNSTATE_BLOCKED]||runstates[RUNSTATE_OFFLINE]) > + ret= DOMAIN_RUNSTATE_PARTIAL_RUN; > + else > + ret= DOMAIN_RUNSTATE_FULL_RUN; > + } > + else if(runstates[RUNSTATE_RUNNABLE]) > + { > + if(runstates[RUNSTATE_BLOCKED]||runstates[RUNSTATE_OFFLINE]) > + ret= DOMAIN_RUNSTATE_PARTIAL_CONTENTION; > + else > + ret= DOMAIN_RUNSTATE_FULL_CONTENTION; > + } > + else if(runstates[RUNSTATE_BLOCKED]||runstates[RUNSTATE_OFFLINE]) > + { > + ret= DOMAIN_RUNSTATE_BLOCKED; > + } else { > + fprintf(warn, "Strange, no meaningful runstates for d%d!\n", > + d->did); > + } > + > + if ( ret < 0 ) > + { > + printf(" Max vid: %d (max_vcpus %d)\n", d->max_vid, max_vcpus); > + for(i=0; i<=d->max_vid; i++) > + if(d->vcpu[i]) > + fprintf(warn, " v%d: %s\n", > + i, runstate_name[d->vcpu[i]->runstate.state]); > + > + for(i=0; i<RUNSTATE_MAX; i++) > + fprintf(warn, " %s: %d\n", > + runstate_name[i], runstates[i]); > + } > + > + if(ret >= 0) > + return ret; > + > + error(ERR_ASSERT, NULL); > + return -1; /* Never happens */ > +} > + > +static inline void runstate_update(struct vcpu_data *v, int new_runstate, > + tsc_t tsc) > +{ > + struct domain_data *d = v->d; > + > + if ( opt.scatterplot_runstate ) > + { > + struct time_struct t; > + > + abs_cycles_to_time(tsc, &t); > + > + printf("%dv%d %u.%09u %d\n", > + d->did, v->vid, > + t.s, t.ns, > + runstate_graph[v->runstate.state]); > + printf("%dv%d %u.%09u %d\n", > + d->did, v->vid, > + t.s, t.ns, > + runstate_graph[new_runstate]); > + } > + > + if(v->runstate.tsc > 0 && v->runstate.tsc < tsc) { > + update_cycles(v->runstates + v->runstate.state, tsc - > v->runstate.tsc); > + > + if ( opt.scatterplot_runstate_time ) > + { > + struct time_struct t, dt; > + > + abs_cycles_to_time(tsc, &t); > + cycles_to_time(tsc - v->runstate.tsc, &dt); > + > + printf("%dv%d %u.%09u %u.%09u\n", > + d->did, v->vid, > + t.s, t.ns, > + dt.s, dt.ns); > + } > + > + if(v->runstate.state == RUNSTATE_RUNNING) > + update_cycles(&v->d->total_time, tsc - v->runstate.tsc); > + > + if(v->runstate.state == RUNSTATE_RUNNABLE) > + update_cycles(v->runnable_states + v->runstate.runnable_state, > tsc - v->runstate.tsc); > + > + /* How much did dom0 run this buffer? */ > + if(v->d->did == 0) { > + int i; > + for(i=0; i<MAX_CPUS; i++) { > + struct pcpu_info * p = P.pcpu + i; > + tsc_t start_tsc; > + if(!p->active) > + continue; > + start_tsc = (p->volume.buffer_first_tsc > v->runstate.tsc) ? > + p->volume.buffer_first_tsc : > + v->runstate.tsc; > + p->volume.buffer_dom0_runstate_cycles[v->runstate.state] > + += tsc - start_tsc; > +#if 0 > + printf(" - updated p%d dom0_runstate %s to %lld cycles > (+%lld)\n", > + p->pid, runstate_name[v->runstate.state], > + > p->volume.buffer_dom0_runstate_cycles[v->runstate.state], > + tsc - start_tsc); > +#endif > + p->volume.buffer_dom0_runstate = new_runstate; > + p->volume.buffer_dom0_runstate_tsc = tsc; > + } > + } > + } > + > + /* Detect "runnable" states */ > + if ( new_runstate == RUNSTATE_RUNNABLE ) > + { > + switch(v->runstate.state) > + { > + case RUNSTATE_RUNNING: > + v->runstate.runnable_state=RUNNABLE_STATE_PREEMPT; > + break; > + case RUNSTATE_BLOCKED: > + case RUNSTATE_OFFLINE: > + v->runstate.runnable_state=RUNNABLE_STATE_WAKE; > + break; > + default: > + v->runstate.runnable_state=RUNNABLE_STATE_OTHER; > + break; > + } > + } else > + v->runstate.runnable_state=RUNNABLE_STATE_INVALID; > + > + v->runstate.state = new_runstate; > + v->runstate.tsc = tsc; > + > + /* Determine the domain runstate */ > + if(d->runstate_tsc > 0 && d->runstate_tsc < tsc) > + update_cycles(d->runstates + d->runstate, tsc - d->runstate_tsc); > + > + d->runstate = domain_runstate(d); > + > + d->runstate_tsc = tsc; > +} > + > +void hvm_vmexit_process(struct record_info *ri, struct hvm_data *h, > + struct vcpu_data *v) { > + struct { > + union { > + struct { > + unsigned int exit_reason; > + unsigned long long rip; > + } __attribute__((packed)) x64; > + struct { > + unsigned int exit_reason; > + unsigned int eip; > + } x32; > + }; > + } *r; > + > + if ( ri->event & TRC_64_FLAG ) > + { > + if (check_extra_words(ri, sizeof(r->x64), "vmexit")) > + return; > + } > + else > + { > + if (check_extra_words(ri, sizeof(r->x32), "vmexit")) > + return; > + } > + > + r = (typeof(r))ri->d; > + > + if(!h->init) > + init_hvm_data(h, v); > + > + h->vmexit_valid=1; > + bzero(&h->inflight, sizeof(h->inflight)); > + > + if(ri->event == TRC_HVM_VMEXIT64) { > + if(v->guest_paging_levels != 4) > + { > + if ( verbosity >= 6 ) > + fprintf(warn, "%s: VMEXIT64, but guest_paging_levels %d. > Switching to 4.\n", > + __func__, v->guest_paging_levels); > + v->guest_paging_levels = 4; > + } > + if(!is_valid_addr64(r->x64.rip)) > + fprintf(warn, "%s: invalid va %llx\n", > + __func__, r->x64.rip); > + h->rip = r->x64.rip; > + h->exit_reason = r->x64.exit_reason; > + } else { > + if(v->guest_paging_levels == 4) > + { > + int new_paging_levels = opt.default_guest_paging_levels; > + > + if(new_paging_levels == 4) > + new_paging_levels = 2; /* Wild guess */ > + > + if ( verbosity >= 6 ) > + fprintf(warn, "%s: VMEXIT, but guest_paging_levels %d. > Switching to %d(default).\n", > + __func__, v->guest_paging_levels, new_paging_levels); > + > + v->guest_paging_levels = new_paging_levels; > + } > + h->rip = r->x32.eip; > + h->exit_reason = r->x32.exit_reason; > + } > + > + if(opt.scatterplot_vmexit_eip) > + scatterplot_vs_time(ri->tsc, h->rip); > + > + if(h->exit_reason > h->exit_reason_max) > + { > + fprintf(warn, "h->exit_reason %x > exit_reason_max %x!\n", > + (unsigned int)h->exit_reason, > + (unsigned int)h->exit_reason_max); > + error(ERR_RECORD, ri); > + return; > + } > + > + if(opt.dump_all) { > + if ( h->exit_reason < h->exit_reason_max > + && h->exit_reason_name[h->exit_reason] != NULL) > + printf("]%s vmexit exit_reason %s eip %llx%s\n", > + ri->dump_header, > + h->exit_reason_name[h->exit_reason], > + h->rip, > + find_symbol(h->rip)); > + else > + printf("]%s vmexit exit_reason %x eip %llx%s\n", > + ri->dump_header, > + h->exit_reason, > + h->rip, > + find_symbol(h->rip)); > + } > + > + if(h->v->cr3.data && h->entry_tsc) { > + update_cycles(&h->v->cr3.data->guest_time, > + ri->tsc - h->entry_tsc); > + h->v->cr3.data->run_time += (ri->tsc - h->entry_tsc); > + } > + > + h->exit_tsc = ri->tsc; > + h->entry_tsc = 0; > + h->resyncs = 0; > + h->prealloc_unpin = 0; > + h->wrmap_bf = 0; > + h->short_summary_done = 0; > + > + h->post_process = hvm_generic_postprocess; > + h->inflight.generic.event = 0; > +} > + > +void hvm_close_vmexit(struct hvm_data *h, tsc_t tsc) { > + > + if(h->exit_tsc) { > + if(h->exit_tsc > tsc) > + h->arc_cycles = 0; > + else { > + h->arc_cycles = tsc - h->exit_tsc; > + > + if(opt.summary_info) { > + update_summary(&h->summary.exit_reason[h->exit_reason], > + h->arc_cycles); > + h->summary_info = 1; > + } > + > + if ( opt.scatterplot_extint_cycles > + && h->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT > + && h->inflight.intr.vec == > opt.scatterplot_extint_cycles_vector ) > + { > + struct time_struct t; > + > + abs_cycles_to_time(tsc, &t); > + > + printf("d%dv%d %u.%09u %lld\n", > + h->v->d->did, > + h->v->vid, > + t.s, t.ns, > + h->arc_cycles); > + } > + } > + } > + > + if(h->post_process) > + (h->post_process)(h); > + > + if(h->arc_cycles) { > + if(opt.summary_info && !h->short_summary_done) { > + switch(h->event_handler) { > + case HVM_EVENT_HANDLER_VMCALL: > + hvm_update_short_summary(h, HVM_SHORT_SUMMARY_VMCALL); > + break; > + case HVM_EVENT_HANDLER_INTR: > + hvm_update_short_summary(h, HVM_SHORT_SUMMARY_INTERRUPT); > + break; > + case HVM_EVENT_HANDLER_HLT: > + hvm_update_short_summary(h, HVM_SHORT_SUMMARY_HLT); > + break; > + default: > + hvm_update_short_summary(h, HVM_SHORT_SUMMARY_OTHER); > + break; > + } > + } > + > + > + if(h->v->cr3.data) { > + h->v->cr3.data->run_time += h->arc_cycles; > + > + if(opt.summary_info) > + update_cycles(&h->v->cr3.data->hv_time, > + h->arc_cycles); > + } > + } > + > + h->exit_tsc = 0; > + h->vmexit_valid = 0; > + h->post_process = NULL; > + > +} > + > +void hvm_vmentry_process(struct record_info *ri, struct hvm_data *h) { > + if(!h->init) > + { > + if(opt.dump_all) > + printf("!%s vmentry\n", > + ri->dump_header); > + return; > + } > + > + /* Vista bug > + * This has to be done here because irqs are injected on the path out > + * to vmexit. */ > + hvm_vlapic_vmentry_cleanup(h->v, ri->tsc); > + > + if(h->w2h.waking && opt.dump_all) > + printf(" [w2h] d%dv%d Finishing waking\n", > + h->v->d->did, h->v->vid); > + > + h->w2h.waking = 0; > + > + if ( h->w2h.interrupts_wanting_tsc ) { > + int i; > + for(i=0; i<GUEST_INTERRUPT_MAX; i++) > + { > + if ( h->summary.guest_interrupt[i].start_tsc == 1 ) > + { > + if(opt.dump_all) > + printf(" [w2h] d%dv%d Setting vec %d tsc to %lld\n", > + h->v->d->did, h->v->vid, i, ri->tsc); > + h->summary.guest_interrupt[i].start_tsc = ri->tsc; > + h->w2h.interrupts_wanting_tsc--; > + if ( h->w2h.interrupts_wanting_tsc == 0 ) > + break; > + } > + } > + } > + > + if(!h->vmexit_valid) > + { > + if(opt.dump_all) > + printf("!%s vmentry\n", > + ri->dump_header); > + return; > + } > + > + if(opt.dump_all) { > + unsigned long long arc_cycles = ri->tsc - h->exit_tsc; > + printf("]%s vmentry cycles %lld %s\n", > + ri->dump_header, arc_cycles, (arc_cycles>10000)?"!":""); > + } > + > + hvm_close_vmexit(h, ri->tsc); > + h->entry_tsc = ri->tsc; > +} > + > +void hvm_process(struct pcpu_info *p) > +{ > + struct record_info *ri = &p->ri; > + struct vcpu_data *v = p->current; > + struct hvm_data *h = &v->hvm; > + > + assert(p->current); > + > + if(vcpu_set_data_type(p->current, VCPU_DATA_HVM)) > + return; > + > + if(ri->evt.sub == 2) > + { > + UPDATE_VOLUME(p, hvm[HVM_VOL_HANDLER], ri->size); > + hvm_handler_process(ri, h); > + } > + else > + { > + switch(ri->event) { > + /* HVM */ > + case TRC_HVM_VMEXIT: > + case TRC_HVM_VMEXIT64: > + UPDATE_VOLUME(p, hvm[HVM_VOL_VMEXIT], ri->size); > + hvm_vmexit_process(ri, h, v); > + break; > + case TRC_HVM_VMENTRY: > + UPDATE_VOLUME(p, hvm[HVM_VOL_VMENTRY], ri->size); > + hvm_vmentry_process(ri, &p->current->hvm); > + break; > + default: > + fprintf(warn, "Unknown hvm event: %x", ri->event); > + } > + } > +} > + > +void hvm_summary(struct hvm_data *h) { > + int i; > + > + if(!h->summary_info) > + return; > + > + printf("Exit reasons:\n"); > + for(i=0; i<h->exit_reason_max; i++) { > + struct hvm_summary_handler_node *p; > + > + if ( h->exit_reason_name[i] ) > + PRINT_SUMMARY(h->summary.exit_reason[i], > + " %-20s ", h->exit_reason_name[i]); > + else > + PRINT_SUMMARY(h->summary.exit_reason[i], > + " %20d ", i); > + > + p=h->exit_reason_summary_handler_list[i]; > + while(p) > + { > + p->handler(h, p->data); > + p=p->next; > + } > + } > + > + printf("Guest interrupt counts:\n"); > + for(i=0; i<GUEST_INTERRUPT_MAX; i++) > + if(h->summary.guest_interrupt[i].count) { > + int j; > + printf(" [%3d] %d\n", > + i, h->summary.guest_interrupt[i].count); > + for(j=1; j<GUEST_INTERRUPT_CASE_MAX; j++) { > + char desc[80]; > + snprintf(desc, 80, " * %s", guest_interrupt_case_name[j]); > + print_cycle_summary(h->summary.guest_interrupt[i].runtime+j, > desc); > + } > + } > + if(h->summary.guest_interrupt[i].count) > + printf(" [%d+] %d\n", > + i, h->summary.guest_interrupt[i].count); > + > + if(opt.histogram_interrupt_eip) > + { > + unsigned max = > ((1ULL<<ADDR_SPACE_BITS)/opt.histogram_interrupt_increment); > + printf("Interrupt eip histogram:\n"); > + for(i=0; i<max; i++) > + if(h->summary.extint_histogram[i]) > + { > + printf("[%llx-%llx]: %d\n", > + opt.histogram_interrupt_increment * i, > + (opt.histogram_interrupt_increment * (i+1)) - 1, > + h->summary.extint_histogram[i]); > + } > + } > + > + PRINT_SUMMARY(h->summary.ipi_latency, > + "IPI latency \n"); > + for(i=0; i<256; i++) > + if(h->summary.ipi_count[i]) > + printf(" [%3d] %10d\n", > + i, h->summary.ipi_count[i]); > + hvm_io_address_summary(h->summary.io.pio, "IO address summary:"); > + hvm_io_address_summary(h->summary.io.mmio, "MMIO address summary:"); > +} > + > +/* ---- Shadow records ---- */ > +union shadow_event > +{ > + unsigned event; > + struct { > + unsigned minor:8, > + paging_levels:4; > + }; > +}; > + > +/* WARNING - not thread safe */ > +#define FLAGSTRING(_name, _char) \ > + if(e->flag_ ## _name) \ > + flagstring[i] = _char; \ > + i++; > + > +char * flag_string(struct pf_xen_extra *e) > +{ > + static char flagstring[32]; > + int i=0; > + > + for(i=0; i<32; i++) > + flagstring[i]='-'; > + > + i=0; > + > + if(e->flag_set_ad) > + flagstring[i]='d'; > + else if(e->flag_set_a) > + flagstring[i]='a'; > + i++; > + > + FLAGSTRING(shadow_l1_get_ref, 'g'); > + FLAGSTRING(shadow_l1_put_ref, 'p'); > + //FLAGSTRING(l2_propagate, '2'); > + FLAGSTRING(demote, 'D'); > + FLAGSTRING(promote, 'P'); > + FLAGSTRING(wrmap, 'w'); > + FLAGSTRING(wrmap_guess_found, 'G'); > + //FLAGSTRING(wrmap_brute_force, 'b'); > + FLAGSTRING(early_unshadow, 'e'); > + FLAGSTRING(prealloc_unhook, 'H'); > + FLAGSTRING(unsync, 'u'); > + FLAGSTRING(oos_fixup_add, 'a'); > + FLAGSTRING(oos_fixup_evict, 'x'); > + > + flagstring[i]=0; > + > + return flagstring; > +} > + > +void shadow_emulate_postprocess(struct hvm_data *h) > +{ > + struct pf_xen_extra *e = &h->inflight.pf_xen; > + > + if ( opt.summary_info ) > + { > + update_eip(&h->v->d->emulate_eip_list, > + h->rip, > + h->arc_cycles, > + 0, NULL); > + update_summary(&h->summary.pf_xen[PF_XEN_EMULATE], h->arc_cycles); > + update_summary(&h->summary.pf_xen_emul[e->pt_level], h->arc_cycles); > + if(h->prealloc_unpin) > + > update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_PREALLOC_UNPIN], > h->arc_cycles); > + if(e->flag_prealloc_unhook) > + > update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_PREALLOC_UNHOOK], > h->arc_cycles); > + if(e->flag_early_unshadow) > + > update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_EARLY_UNSHADOW], > h->arc_cycles); > + if(e->flag_set_changed) > + update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_SET_CHANGED], > h->arc_cycles); > + else > + > update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_SET_UNCHANGED], > h->arc_cycles); > + if(e->flag_set_flush) > + update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_SET_FLUSH], > h->arc_cycles); > + if(e->flag_set_error) > + update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_SET_ERROR], > h->arc_cycles); > + if(e->flag_promote) > + update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_PROMOTE], > h->arc_cycles); > + if(e->flag_demote) > + update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_DEMOTE], > h->arc_cycles); > + /* more summary info */ > + > + hvm_update_short_summary(h, HVM_SHORT_SUMMARY_EMULATE); > + } > + > + if(opt.scatterplot_unpin_promote) { > + if(e->flag_early_unshadow) > + scatterplot_vs_time(h->exit_tsc, -10); > + if(h->prealloc_unpin) > + scatterplot_vs_time(h->exit_tsc, 0); > + if(e->flag_promote) { > + if(opt.with_cr3_enumeration) { > + if(h->v->cr3.data) > + scatterplot_vs_time(h->exit_tsc, h->v->cr3.data->cr3_id); > + } else > + scatterplot_vs_time(h->exit_tsc, 2); > + } > + } > + > + > +} > + > +void shadow_emulate_process(struct record_info *ri, struct hvm_data *h) > +{ > + struct pf_xen_extra *e = &h->inflight.pf_xen; > + union { > + /* for PAE, guest_l1e may be 64 while guest_va may be 32; > + so put it first for alignment sake. */ > + struct { > + unsigned gl1e, write_val; > + unsigned va; > + unsigned flags:29, emulation_count:3; > + } gpl2; > + struct { > + unsigned long long gl1e, write_val; > + unsigned va; > + unsigned flags:29, emulation_count:3; > + } gpl3; > + struct { > + unsigned long long gl1e, write_val; > + unsigned long long va; > + unsigned flags:29, emulation_count:3; > + } gpl4; > + } *r = (typeof(r))ri->d; > + > + union shadow_event sevt = { .event = ri->event }; > + int rec_gpl = sevt.paging_levels + 2; > + > + if ( rec_gpl != h->v->guest_paging_levels ) > + { > + fprintf(warn, "%s: record paging levels %d, guest paging levels %d. > Switching.\n", > + __func__, rec_gpl, h->v->guest_paging_levels); > + h->v->guest_paging_levels = rec_gpl; > + } > + > + /* Fill in extended information */ > + switch(rec_gpl) > + { > + case 2: > + if(sizeof(r->gpl2) != ri->extra_words * 4) > + { > + fprintf(warn, "%s: expected %zd bytes for %d-level guest, got > %d!\n", > + __func__, sizeof(r->gpl2), h->v->guest_paging_levels, > + ri->extra_words * 4); > + > + error(ERR_RECORD, ri); > + return; > + } > + e->va = r->gpl2.va; > + e->flags = r->gpl2.flags; > + e->gl1e = r->gpl2.gl1e; > + e->wval = r->gpl2.write_val; > + break; > + case 3: > + if(sizeof(r->gpl3) != ri->extra_words * 4) > + { > + fprintf(warn, "%s: expected %zd bytes for %d-level guest, got > %d!\n", > + __func__, sizeof(r->gpl3), h->v->guest_paging_levels, > + ri->extra_words * 4); > + error(ERR_RECORD, ri); > + return; > + } > + e->va = r->gpl3.va; > + e->flags = r->gpl3.flags; > + e->gl1e = r->gpl3.gl1e; > + e->wval = r->gpl3.write_val; > + break; > + case 4: > + if(sizeof(r->gpl4) != ri->extra_words * 4) > + { > + fprintf(warn, "%s: expected %zd bytes for %d-level guest, got > %d!\n", > + __func__, sizeof(r->gpl4), h->v->guest_paging_levels, > + ri->extra_words * 4); > + error(ERR_RECORD, ri); > + return; > + } > + e->va = r->gpl4.va; > + e->flags = r->gpl4.flags; > + e->gl1e = r->gpl4.gl1e; > + e->wval = r->gpl4.write_val; > + break; > + } > + > + pf_preprocess(e,rec_gpl); > + > + if(opt.dump_all) > + printf("]%s emulate va %llx gl1e %8llx wval %8llx flags %s(%x) > pt_level %d corr %8llx\n", > + ri->dump_header, > + e->va, > + e->gl1e, e->wval, > + flag_string(e), e->flags, > + e->pt_level, e->corresponding_va); > + > + if ( hvm_set_postprocess(h, shadow_emulate_postprocess) ) > + fprintf(warn, "%s: Strange, postprocess already set\n", __func__); > +} > + > +struct shadow_emulate_other { > + unsigned long long gfn, va; > +}; > + > +#define SHADOW_OTHER_LOGS_GFN_NOT_GMFN 1 > + > +void shadow_parse_other(struct record_info *ri, > + struct shadow_emulate_other *o, > + struct hvm_data *h) { > + union { > + /* for PAE, guest_l1e may be 64 while guest_va may be 32; > + so put it first for alignment sake. */ > +#if SHADOW_OTHER_LOGS_GFN_NOT_GMFN > + /* D'OH! Accidentally used mfn_t in the struct, so gmfns are always > + 64-bit... :-/ */ > + struct { > + unsigned int gfn, va; > + } gpl2; > +#endif > + struct { > + unsigned long long gfn; > + unsigned int va; > + } gpl3; > + struct { > + unsigned long long gfn, va; > + } gpl4; > + } *r = (typeof(r))ri->d; > + > + > + union shadow_event sevt = { .event = ri->event }; > + int rec_gpl = sevt.paging_levels + 2; > + > + if ( rec_gpl != h->v->guest_paging_levels ) > + { > + fprintf(warn, "%s: record paging levels %d, guest paging levels %d. > Switching.\n", > + __func__, rec_gpl, h->v->guest_paging_levels); > + h->v->guest_paging_levels = rec_gpl; > + } > + > + switch(rec_gpl) > + { > +#if SHADOW_OTHER_LOGS_GFN_NOT_GMFN > + case 2: > + if(sizeof(r->gpl2) != ri->extra_words * 4) > + { > + fprintf(warn, "%s: expected %zd bytes for %d-level guest, got > %d!\n", > + __func__, sizeof(r->gpl2), rec_gpl, > + ri->extra_words * 4); > + error(ERR_RECORD, ri); > + return; > + } > + o->va = r->gpl2.va; > + o->gfn = r->gpl2.gfn; > + break; > +#else > + case 2: > + /* FALLTHRU */ > +#endif > + case 3: > + if(sizeof(r->gpl3) != ri->extra_words * 4) > + { > + fprintf(warn, "%s: expected %zd bytes for %d-level guest, got > %d!\n", > + __func__, sizeof(r->gpl3), rec_gpl, > + ri->extra_words * 4); > + error(ERR_RECORD, ri); > + return; > + } > + o->va = r->gpl3.va; > + o->gfn = r->gpl3.gfn; > + break; > + case 4: > + if(sizeof(r->gpl4) != ri->extra_words * 4) > + { > + fprintf(warn, "%s: expected %zd bytes for %d-level guest, got > %d!\n", > + __func__, sizeof(r->gpl4), rec_gpl, > + ri->extra_words * 4); > + error(ERR_RECORD, ri); > + return; > + } > + o->va = r->gpl4.va; > + o->gfn = r->gpl4.gfn; > + break; > + } > +} > + > +#if 0 > +void shadow_unsync_postprocess(struct hvm_data *h) > +{ > + struct pf_xen_extra *e = &h->inflight.pf_xen; > + > + if(h->resyncs > 1) > + fprintf(warn, "Strange, %d resyncs for an unsync!\n", > + h->resyncs); > + > + if(opt.summary_info) { > + update_summary(&h->summary.pf_xen[PF_XEN_EMULATE_UNSYNC], > + h->arc_cycles); > + if(h->resyncs <= 1) > + update_summary(&h->summary.pf_xen_unsync[h->resyncs], > + h->arc_cycles); > + } > +} > + > + > +void shadow_unsync_process(struct record_info *ri, struct hvm_data *h) > +{ > + struct pf_xen_extra *e = &h->inflight.pf_xen; > + struct shadow_emulate_other r; > + > + shadow_parse_other(ri, &r, h); > + > + e->gmfn = r.gmfn; > + e->va = r.va; > + > + pf_preprocess(e, h->v->guest_paging_levels); > + > + if(opt.dump_all) > + printf("]%s shadow unsync gmfn %llx va %llx pt_level %d corr %llx\n", > + ri->dump_header, > + e->gmfn, > + e->va, > + e->pt_level, > + e->corresponding_va); > + > + if ( hvm_set_postprocess(h, shadow_unsync_postprocess) ) > + fprintf(warn, "%s: Strange, postprocess already set\n", __func__); > +} > +#endif > + > +void shadow_fault_generic_postprocess(struct hvm_data *h); > + > +void shadow_emulate_other_process(struct record_info *ri, struct hvm_data *h) > +{ > + struct pf_xen_extra *e = &h->inflight.pf_xen; > + struct shadow_emulate_other r; > + union shadow_event sevt = { .event = ri->event }; > + > + shadow_parse_other(ri, &r, h); > + > + e->gfn = r.gfn; > + e->va = r.va; > + e->pf_case = sevt.minor; > + > + pf_preprocess(e, h->v->guest_paging_levels); > + > + if(opt.dump_all) > + printf("]%s shadow %s gfn %llx va %llx\n", > + ri->dump_header, > + pf_xen_name[sevt.minor], > + e->gfn, > + e->va); > + > + if ( hvm_set_postprocess(h, shadow_fault_generic_postprocess) ) > + fprintf(warn, "%s: Strange, postprocess already set\n", __func__); > +} > + > +void shadow_fixup_postprocess(struct hvm_data *h) > +{ > + struct pf_xen_extra *e = &h->inflight.pf_xen; > + > + if ( opt.summary_info ) > + { > + update_summary(&h->summary.pf_xen[PF_XEN_FIXUP], h->arc_cycles); > + if(h->prealloc_unpin) { > + > update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_PREALLOC_UNPIN], > h->arc_cycles); > + } > + if(e->flag_unsync) { > + update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_UNSYNC], > h->arc_cycles); > + if(h->resyncs < PF_XEN_FIXUP_UNSYNC_RESYNC_MAX) > + > update_summary(&h->summary.pf_xen_fixup_unsync_resync[h->resyncs], > + h->arc_cycles); > + else > + > update_summary(&h->summary.pf_xen_fixup_unsync_resync[PF_XEN_FIXUP_UNSYNC_RESYNC_MAX], > + h->arc_cycles); > + } > + if(e->flag_oos_fixup_add) > + update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_OOS_ADD], > h->arc_cycles); > + if(e->flag_oos_fixup_evict) > + update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_OOS_EVICT], > h->arc_cycles); > + if(e->flag_promote) > + update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_PROMOTE], > h->arc_cycles); > + if(e->flag_wrmap) { > + update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_WRMAP], > h->arc_cycles); > + if(e->flag_wrmap_brute_force || h->wrmap_bf) > + > update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_BRUTE_FORCE], > h->arc_cycles); > + } else if(e->flag_wrmap_brute_force || h->wrmap_bf) { > + fprintf(warn, "Strange: wrmap_bf but not wrmap!\n"); > + } > + > + > + if(!(e->flag_promote || h->prealloc_unpin || e->flag_unsync)) > + > update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_UPDATE_ONLY], > h->arc_cycles); > + /* more summary info */ > + > + if(e->flag_unsync) > + hvm_update_short_summary(h, HVM_SHORT_SUMMARY_UNSYNC); > + else > + hvm_update_short_summary(h, HVM_SHORT_SUMMARY_FIXUP); > + } > + > + if(opt.scatterplot_unpin_promote) { > + if(h->prealloc_unpin) > + scatterplot_vs_time(h->exit_tsc, 0); > + if(e->flag_promote) { > + if(opt.with_cr3_enumeration) { > + if(h->v->cr3.data) > + scatterplot_vs_time(h->exit_tsc, h->v->cr3.data->cr3_id); > + } else > + scatterplot_vs_time(h->exit_tsc, 2); > + } > + } > +} > + > +void shadow_fixup_process(struct record_info *ri, struct hvm_data *h) > +{ > + struct pf_xen_extra *e = &h->inflight.pf_xen; > + union { > + /* for PAE, guest_l1e may be 64 while guest_va may be 32; > + so put it first for alignment sake. */ > + struct { > + unsigned int gl1e, va, flags; > + } gpl2; > + struct { > + unsigned long long gl1e; > + unsigned int va, flags; > + } gpl3; > + struct { > + unsigned long long gl1e, va; > + unsigned int flags; > + } gpl4; > + } *r = (typeof(r))ri->d; > + union shadow_event sevt = { .event = ri->event }; > + int rec_gpl = sevt.paging_levels + 2; > + > + if ( rec_gpl != h->v->guest_paging_levels ) > + { > + fprintf(warn, "%s: record paging levels %d, guest paging levels %d. > Switching.\n", > + __func__, rec_gpl, h->v->guest_paging_levels); > + h->v->guest_paging_levels = rec_gpl; > + } > + > + switch(rec_gpl) > + { > + case 2: > + if(sizeof(r->gpl2) != ri->extra_words * 4) > + { > + fprintf(warn, "%s: expected %zd bytes for %d-level guest, got > %d!\n", > + __func__, sizeof(r->gpl2), h->v->guest_paging_levels, > + ri->extra_words * 4); > + error(ERR_RECORD, ri); > + return; > + } > + e->va = r->gpl2.va; > + e->flags = r->gpl2.flags; > + e->gl1e = r->gpl2.gl1e; > + break; > + case 3: > + if(sizeof(r->gpl3) != ri->extra_words * 4) > + { > + fprintf(warn, "%s: expected %zd bytes for %d-level guest, got > %d!\n", > + __func__, sizeof(r->gpl3), h->v->guest_paging_levels, > + ri->extra_words * 4); > + error(ERR_RECORD, ri); > + return; > + } > + e->va = r->gpl3.va; > + e->flags = r->gpl3.flags; > + e->gl1e = r->gpl3.gl1e; > + break; > + case 4: > + if(sizeof(r->gpl4) != ri->extra_words * 4) > + { > + fprintf(warn, "%s: expected %zd bytes for %d-level guest, got > %d!\n", > + __func__, sizeof(r->gpl4), h->v->guest_paging_levels, > + ri->extra_words * 4); > + error(ERR_RECORD, ri); > + return; > + } > + e->va = r->gpl4.va; > + e->flags = r->gpl4.flags; > + e->gl1e = r->gpl4.gl1e; > + break; > + } > + > + pf_preprocess(e,rec_gpl); > + > + if(opt.dump_all) > + { > + if ( e->flag_unsync ) > + printf("]%s fixup:unsync va %llx gl1e %llx corr %llx flags > (%x)%s\n", > + ri->dump_header, > + e->va, e->gl1e, > + e->corresponding_va, > + e->flags, > + flag_string(e)); > + else > + printf("]%s fixup va %llx gl1e %llx flags (%x)%s\n", > + ri->dump_header, > + e->va, e->gl1e, e->flags, > + flag_string(e)); > + } > + > + if ( hvm_set_postprocess(h, shadow_fixup_postprocess) ) > + fprintf(warn, "%s: Strange, postprocess already set\n", __func__); > +} > + > +void shadow_mmio_postprocess(struct hvm_data *h) > +{ > + struct pf_xen_extra *e = &h->inflight.pf_xen; > + if ( opt.summary_info ) > + { > + if(e->pf_case) > + update_summary(&h->summary.pf_xen[e->pf_case], > + h->arc_cycles); > + else > + fprintf(warn, "Strange, pf_case 0!\n"); > + > + hvm_update_short_summary(h, HVM_SHORT_SUMMARY_MMIO); > + } > + > + if(opt.with_mmio_enumeration) > + enumerate_mmio(h); > +} > + > +void shadow_mmio_process(struct record_info *ri, struct hvm_data *h) > +{ > + struct pf_xen_extra *e = &h->inflight.pf_xen; > + struct mmio_info *m = &h->inflight.mmio; > + union { > + /* for PAE, guest_l1e may be 64 while guest_va may be 32; > + so put it first for alignment sake. */ > + struct { > + unsigned int va; > + } gpl2; > + struct { > + unsigned long long va; > + } gpl4; > + } *r = (typeof(r))ri->d; > + union shadow_event sevt = { .event = ri->event }; > + int rec_gpl = sevt.paging_levels + 2; > + > + if ( rec_gpl != h->v->guest_paging_levels ) > + { > + fprintf(warn, "%s: record paging levels %d, guest paging levels %d. > Switching.\n", > + __func__, rec_gpl, h->v->guest_paging_levels); > + h->v->guest_paging_levels = rec_gpl; > + } > + > + switch(rec_gpl) > + { > + case 2: > + case 3: > + if(sizeof(r->gpl2) != ri->extra_words * 4) > + { > + fprintf(warn, "%s: expected %zd bytes for %d-level guest, got > %d!\n", > + __func__, sizeof(r->gpl2), h->v->guest_paging_levels, > + ri->extra_words * 4); > + error(ERR_RECORD, ri); > + return; > + } > + e->va = m->va = r->gpl2.va; > + break; > + case 4: > + if(sizeof(r->gpl4) != ri->extra_words * 4) > + { > + fprintf(warn, "%s: expected %zd bytes for %d-level guest, got > %d!\n", > + __func__, sizeof(r->gpl4), h->v->guest_paging_levels, > + ri->extra_words * 4); > + error(ERR_RECORD, ri); > + return; > + } > + e->va = m->va = r->gpl4.va; > + break; > + } > + > + if(opt.dump_all) > + printf("]%s %smmio va %llx\n", > + ri->dump_header, > + (e->pf_case==PF_XEN_FAST_MMIO)?"fast ":"", > + e->va); > + > + if ( hvm_set_postprocess(h, shadow_mmio_postprocess) ) > + fprintf(warn, "%s: Strange, postprocess already set\n", __func__); > +} > + > +void shadow_propagate_postprocess(struct hvm_data *h) > +{ > + struct pf_xen_extra *e = &h->inflight.pf_xen; > + > + if ( opt.summary_info ) > + { > + if(e->pf_case) > + update_summary(&h->summary.pf_xen[e->pf_case], > + h->arc_cycles); > + else > + fprintf(warn, "Strange, pf_case 0!\n"); > + > + hvm_update_short_summary(h, HVM_SHORT_SUMMARY_PROPAGATE); > + } > +} > + > +void shadow_propagate_process(struct record_info *ri, struct hvm_data *h) > +{ > + struct pf_xen_extra *e = &h->inflight.pf_xen; > + union { > + /* for PAE, guest_l1e may be 64 while guest_va may be 32; > + so put it first for alignment sake. */ > + struct { > + unsigned int gl1e, va, flags; > + } gpl2; > + struct { > + unsigned long long gl1e; > + unsigned int va, flags; > + } gpl3; > + struct { > + unsigned long long gl1e, va; > + unsigned int flags; > + } gpl4; > + } *r = (typeof(r))ri->d; > + union shadow_event sevt = { .event = ri->event }; > + int rec_gpl = sevt.paging_levels + 2; > + > + if ( rec_gpl != h->v->guest_paging_levels ) > + { > + fprintf(warn, "%s: record paging levels %d, guest paging levels %d. > Switching.\n", > + __func__, rec_gpl, h->v->guest_paging_levels); > + h->v->guest_paging_levels = rec_gpl; > + } > + > + switch(rec_gpl) > + { > + case 2: > + if(sizeof(r->gpl2) != ri->extra_words * 4) > + { > + fprintf(warn, "%s: expected %zd bytes for %d-level guest, got > %d!\n", > + __func__, sizeof(r->gpl2), h->v->guest_paging_levels, > + ri->extra_words * 4); > + error(ERR_RECORD, ri); > + return; > + } > + e->va = r->gpl2.va; > + e->flags = r->gpl2.flags; > + e->gl1e = r->gpl2.gl1e; > + break; > + case 3: > + if(sizeof(r->gpl3) != ri->extra_words * 4) > + { > + fprintf(warn, "%s: expected %zd bytes for %d-level guest, got > %d!\n", > + __func__, sizeof(r->gpl3), h->v->guest_paging_levels, > + ri->extra_words * 4); > + error(ERR_RECORD, ri); > + return; > + } > + e->va = r->gpl3.va; > + e->flags = r->gpl3.flags; > + e->gl1e = r->gpl3.gl1e; > + break; > + case 4: > + if(sizeof(r->gpl4) != ri->extra_words * 4) > + { > + fprintf(warn, "%s: expected %zd bytes for %d-level guest, got > %d!\n", > + __func__, sizeof(r->gpl4), h->v->guest_paging_levels, > + ri->extra_words * 4); > + error(ERR_RECORD, ri); > + return; > + } > + e->va = r->gpl4.va; > + e->flags = r->gpl4.flags; > + e->gl1e = r->gpl4.gl1e; > + break; > + } > + > + if(opt.dump_all) > + printf("]%s propagate va %llx gl1e %llx flags (%x)%s\n", > + ri->dump_header, > + e->va, e->gl1e, e->flags, > + flag_string(e)); > + > + if ( hvm_set_postprocess(h, shadow_propagate_postprocess) ) > + fprintf(warn, "%s: Strange, postprocess already set\n", __func__); > +} > + > +void shadow_fault_generic_dump(unsigned int event, uint32_t *d, char *prefix, > + char * dump_header) > +{ > + char *evt_string, evt_number[10]; > + union shadow_event sevt = { .event = event }; > + int i; > + > + if(sevt.minor < PF_XEN_MAX && pf_xen_name[sevt.minor]) > + { > + evt_string = pf_xen_name[sevt.minor]; > + } > + else > + { > + snprintf(evt_number, 10, "%d", sevt.minor); > + evt_string = evt_number; > + } > + > + printf("%s%s shadow %s gl %d [", > + prefix, > + dump_header, > + evt_string, > + sevt.paging_levels); > + > + for(i=0; i<4; i++) > + { > + printf(" %x", d[i]); > + } > + > + printf(" ]\n"); > +} > + > +void shadow_fault_generic_postprocess(struct hvm_data *h) > +{ > + struct pf_xen_extra *e = &h->inflight.pf_xen; > + if ( e->pf_case < PF_XEN_NOT_SHADOW || e->pf_case > PF_XEN_LAST_FAULT ) > + { > + fprintf(warn, "%s: Strange, unexpected case %d\n", > + __func__, e->pf_case); > + return; > + } > + > + if(opt.summary_info) { > + update_summary(&h->summary.pf_xen[e->pf_case], > + h->arc_cycles); > + > + hvm_update_short_summary(h, HVM_SHORT_SUMMARY_PROPAGATE); > + } > +} > + > +void shadow_fault_generic_process(struct record_info *ri, struct hvm_data *h) > +{ > + union shadow_event sevt = { .event = ri->event }; > + > + /* pf-case traces, vs others */ > + h->inflight.generic.event = ri->event; > + bcopy(ri->d, h->inflight.generic.d, sizeof(unsigned int) * 4); > + > + if(opt.dump_all) > + shadow_fault_generic_dump(h->inflight.generic.event, > + h->inflight.generic.d, > + "]", ri->dump_header); > + > + h->inflight.pf_xen.pf_case = sevt.minor; > + if ( hvm_set_postprocess(h, shadow_fault_generic_postprocess) ) > + fprintf(warn, "%s: Strange, postprocess already set\n", __func__); > +} > + > +void shadow_resync_process(struct record_info *ri, struct hvm_data *h) > +{ > + struct { > + unsigned long long gfn; > + } *r = (typeof(r))ri->d; > + > + if(opt.dump_all) > + printf(" %s oos resync %s gfn %llx\n", > + ri->dump_header, > + (ri->event == TRC_SHADOW_RESYNC_FULL)?"full":"only", > + r->gfn); > + > + h->resyncs++; > +} > + > +void shadow_prealloc_unpin_process(struct record_info *ri, struct hvm_data > *h) { > + struct { > + unsigned long long gfn; > + } *r = (typeof(r))ri->d; > + > + if(opt.dump_all) > + printf(" %s prealloc-unpin gfn %llx\n", > + ri->dump_header, r->gfn); > + > + if(h->prealloc_unpin) > + fprintf(warn, "Strange, more than one prealloc_unpin per arc!\n"); > + > + h->prealloc_unpin = 1; > + > + if(opt.with_cr3_enumeration) > + cr3_prealloc_unpin(h->v, r->gfn); > +} > + > +void shadow_wrmap_bf_process(struct record_info *ri, struct hvm_data *h) { > + struct { > + unsigned long long gfn; > + } *r = (typeof(r))ri->d; > + > + if(opt.dump_all) > + printf(" %s wrmap-bf gfn %llx\n", > + ri->dump_header, r->gfn); > + > + h->wrmap_bf = 1; > +} > + > +void shadow_process(struct pcpu_info *p) > +{ > + struct record_info *ri = &p->ri; > + struct hvm_data *h; > + > + union shadow_event sevt = { .event = ri->event }; > + > + int gpl = sevt.paging_levels + 2; > + > + assert(p->current); > + if(vcpu_set_data_type(p->current, VCPU_DATA_HVM)) > + return; > + > + h = &p->current->hvm; > + > + if(!h->init || !h->vmexit_valid) > + { > + if(opt.dump_all) > + shadow_fault_generic_dump(ri->event, > + ri->d, > + "!", ri->dump_header); > + return; > + } > + > + if(sevt.minor <= PF_XEN_NOT_SHADOW) { > + if(p->current->guest_paging_levels != gpl) > + { > + fprintf(warn, "%s: Changing guest paging levels to %d\n", > + __func__, gpl); > + p->current->guest_paging_levels = gpl; > + } > + } > + > + if(sevt.minor <= PF_XEN_LAST_FAULT) { > + h->inflight.pf_xen.pf_case = sevt.minor; > + if(opt.summary) { > + hvm_set_summary_handler(h, hvm_pf_xen_summary, NULL); > + } > + } > + > + /* FIXME - mask out paging levels */ > + switch(sevt.minor) > + { > + case PF_XEN_NOT_SHADOW: > + shadow_propagate_process(ri, h); > + break; > + case PF_XEN_EMULATE: > + shadow_emulate_process(ri, h); > + break; > + case PF_XEN_FIXUP: > + shadow_fixup_process(ri, h); > + break; > + case PF_XEN_MMIO: > + case PF_XEN_FAST_MMIO: > + shadow_mmio_process(ri, h); > + break; > + case PF_XEN_EMULATE_UNSHADOW_USER: > + case PF_XEN_EMULATE_UNSHADOW_EVTINJ: > + case PF_XEN_EMULATE_UNSHADOW_UNHANDLED: > + shadow_emulate_other_process(ri, h); > + break; > +#if 0 > + case PF_XEN_EMULATE_UNSYNC: > + shadow_unsync_process(ri, h); > + break; > +#endif > + case SHADOW_RESYNC_FULL: > + case SHADOW_RESYNC_ONLY: > + shadow_resync_process(ri, h); > + break; > + case SHADOW_PREALLOC_UNPIN: > + shadow_prealloc_unpin_process(ri, h); > + break; > + case SHADOW_WRMAP_BF: > + shadow_wrmap_bf_process(ri, h); > + break; > + default: > + if(sevt.minor <= PF_XEN_LAST_FAULT) { > + shadow_fault_generic_process(ri, h); > + } else { > + warn_once("Warning: processing shadow as generic\n"); > + process_generic(ri); > + } > + break; > + } > +} > + > +/* ---- PV guests ---- */ > +union pv_event { > + unsigned event; > + struct { > + unsigned minor:8, > + x64:1, > + unused1:3, > + sub:4, > + main:12, > + unused:4; > + }; > +}; > + > +void pv_hypercall_process(struct record_info *ri, struct pv_data *pv) { > + union { > + struct { > + uint32_t eip, eax; > + } x32; > + struct { > + uint64_t eip; > + uint32_t eax; > + } x64; > + } * r = (typeof(r)) ri->d; > + union pv_event pevt = { .event = ri->event }; > + unsigned long long eip; > + unsigned int eax; > + > + if(pevt.x64) { > + eip = r->x64.eip; > + eax = r->x64.eax; > + } else { > + eip = r->x32.eip; > + eax = r->x32.eax; > + } > + > + if(opt.summary_info) { > + if(eax < PV_HYPERCALL_MAX) > + pv->hypercall_count[eax]++; > + } > + > + if(opt.dump_all) { > + if(eax < HYPERCALL_MAX) > + printf(" %s hypercall %2x (%s) eip %llx\n", > + ri->dump_header, eax, > + hypercall_name[eax], eip); > + else > + printf(" %s hypercall %x eip %llx\n", > + ri->dump_header, eax, eip); > + } > +} > + > +void pv_trap_process(struct record_info *ri, struct pv_data *pv) { > + union { > + struct { > + unsigned int eip; > + unsigned trapnr:15, > + use_error_code:1, > + error_code:16; > + } x32; > + struct { > + unsigned long long eip; > + unsigned trapnr:15, > + use_error_code:1, > + error_code:16; > + } x64; > + } * r = (typeof(r)) ri->d; > + union pv_event pevt = { .event = ri->event }; > + unsigned long long eip; > + unsigned trapnr, use_ec, ec; > + > + if(pevt.x64) { > + eip = r->x64.eip; > + trapnr = r->x64.trapnr; > + use_ec = r->x64.use_error_code; > + ec = r->x64.error_code; > + } else { > + eip = r->x32.eip; > + trapnr = r->x32.trapnr; > + use_ec = r->x32.use_error_code; > + ec = r->x32.error_code; > + } > + > + if(opt.summary_info) { > + if(trapnr < PV_TRAP_MAX) > + pv->trap_count[trapnr]++; > + } > + > + if(opt.dump_all) { > + printf(" %s trap %x eip %llx", > + ri->dump_header, trapnr, eip); > + if(use_ec) > + printf(" ec %x\n", ec); > + else > + printf("\n"); > + } > +} > + > +void pv_ptwr_emulation_process(struct record_info *ri, struct pv_data *pv) { > + union pv_event pevt = { .event = ri->event }; > + union { > + /* gpl2 is deprecated */ > + struct { > + unsigned long long pte; > + unsigned int addr, eip; > + } gpl3; > + struct { > + unsigned long long pte; > + unsigned long long addr, eip; > + } gpl4; > + } *r = (typeof(r))ri->d; > + struct { > + unsigned long long pte, addr, eip; > + } e; > + > + switch ( pevt.minor ) { > + case PV_PTWR_EMULATION_PAE: > + if ( pevt.x64 ) > + { > + fprintf(warn, "Strange: PV_PTWR_EMULATION, but x64! %x\n", > + ri->event); > + error(ERR_RECORD, ri); > + } > + e.pte = r->gpl3.pte; > + e.addr = r->gpl3.addr; > + e.eip = r->gpl3.eip; > + break; > + case PV_PTWR_EMULATION: > + if ( !pevt.x64 ) > + { > + fprintf(warn, "Strange: PV_PTWR_EMULATION, but !x64! %x\n", > + ri->event); > + error(ERR_RECORD, ri); > + } > + e.pte = r->gpl4.pte; > + e.addr = r->gpl4.addr; > + e.eip = r->gpl4.eip; > + break; > + default: > + fprintf(warn, "ERROR: Unknown PV_PTRW minor type %d!\n", > + pevt.minor); > + error(ERR_RECORD, ri); > + return; > + } > + > + if ( opt.dump_all ) > + { > + printf(" %s ptwr l1e %llx eip %llx addr %llx\n", > + ri->dump_header, > + e.pte, e.eip, e.addr); > + } > +} > + > +void pv_generic_process(struct record_info *ri, struct pv_data *pv) { > + union pv_event pevt = { .event = ri->event }; > + if ( opt.dump_all ) { > + if(pevt.minor < PV_MAX && pv_name[pevt.minor]) > + printf(" %s %s", > + ri->dump_header, > + pv_name[pevt.minor]); > + else > + printf(" %s PV-%d ", > + ri->dump_header, pevt.minor); > + if (ri->extra_words) { > + int i; > + printf("[ "); > + for(i=0; i<ri->extra_words; i++) { > + printf("%x ", (unsigned)ri->d[i]); > + } > + printf("]"); > + > + } > + printf("\n"); > + } > +} > + > +void pv_summary(struct pv_data *pv) { > + int i, j; > + > + if(!pv->summary_info) > + return; > + > + printf("PV events:\n"); > + for(i=0; i<PV_MAX; i++) { > + int count; > + > + count = pv->count[i]; > + if (i == PV_HYPERCALL_V2) > + count += pv->count[PV_HYPERCALL_SUBCALL]; > + > + if (count == 0) > + continue; > + > + printf(" %s %d\n", pv_name[i], count); > + > + switch(i) { > + case PV_HYPERCALL: > + case PV_HYPERCALL_V2: > + for(j=0; j<PV_HYPERCALL_MAX; j++) { > + if(pv->hypercall_count[j]) > + printf(" %-29s[%2d]: %6d\n", > + hypercall_name[j], > + j, > + pv->hypercall_count[j]); > + } > + break; > + case PV_TRAP: > + for(j=0; j<PV_TRAP_MAX; j++) { > + if(pv->trap_count[j]) > + printf(" [%d] %d\n", > + j, pv->trap_count[j]); > + } > + break; > + } > + } > +} > + > +static const char *grant_table_op_str[] = { > + "map_grant_ref", "unmap_grant_ref", "setup_table", "dump_table", > + "transfer", "copy", "query_size", "unmap_and_replace", > + "set_version", "get_status_frames", "get_version", "swap_grant_ref", > +}; > + > +static const char *vcpu_op_str[] = { > + "initialise", "up", "down", "is_up", "get_runstate_info", > + "register_runstate_memory_area", "set_periodic_timer", > + "stop_periodic_timer", "set_singleshot_timer", "stop_singleshot_timer", > + "register_vcpu_info", "send_nmi", "get_physid", > + "register_vcpu_time_memory_area", > +}; > + > +static const char *sched_op_str[] = { > + "yield", "block", "shutdown", "poll", "remote_shutdown", "shutdown_code", > + "watchdog", > +}; > + > +static const char *cmd_to_str(const char *strings[], size_t n, uint32_t cmd) > +{ > + static char buf[32]; > + > + if (cmd < n) > + return strings[cmd]; > + > + snprintf(buf, sizeof(buf), "unknown (%d)", cmd); > + return buf; > +} > + > +#define CMD_TO_STR(op) \ > + static const char * op ## _to_str(uint32_t cmd) { \ > + return cmd_to_str(op ## _str, ARRAY_SIZE(op ## _str), cmd); \ > + } > + > +CMD_TO_STR(grant_table_op); > +CMD_TO_STR(vcpu_op); > +CMD_TO_STR(sched_op); > + > +void pv_hypercall_gather_args(const struct record_info *ri, uint64_t *args) > +{ > + int i, word; > + > + /* Missing arguments are zeroed. */ > + memset(args, 0, 6 * sizeof(uint64_t)); > + > + for (i = 0, word = 1; i < 6 && word < ri->extra_words; i++) { > + int present = pv_hypercall_arg_present(ri, i); > + > + switch (present) { > + case ARG_32BIT: > + args[i] = ri->d[word]; > + break; > + case ARG_64BIT: > + args[i] = ((uint64_t)ri->d[word + 1] << 32) | ri->d[word]; > + break; > + } > + > + /* Skip over any words for this argument. */ > + word += present; > + } > +} > + > +static void pv_hypercall_print_args(const struct record_info *ri) > +{ > + int i, word; > + > + for (i = 0, word = 1; i < 6 && word < ri->extra_words; i++) { > + int present = pv_hypercall_arg_present(ri, i); > + > + switch (present) { > + case ARG_MISSING: > + printf(" ??"); > + break; > + case ARG_32BIT: > + printf(" %08x", ri->d[word]); > + break; > + case ARG_64BIT: > + printf(" %016"PRIu64"", ((uint64_t)ri->d[word + 1] << 32) | > ri->d[word]); > + break; > + } > + > + word += present; > + } > +} > + > +void pv_hypercall_v2_process(struct record_info *ri, struct pv_data *pv, > + const char *indent) > +{ > + int op = pv_hypercall_op(ri); > + > + if(opt.summary_info) { > + if(op < PV_HYPERCALL_MAX) > + pv->hypercall_count[op]++; > + } > + > + if(opt.dump_all) { > + uint64_t args[6]; > + > + if(op < HYPERCALL_MAX) > + printf(" %s%s hypercall %2x (%s)", > + ri->dump_header, indent, op, hypercall_name[op]); > + else > + printf(" %s%s hypercall %2x", > + ri->dump_header, indent, op); > + > + switch(op) { > + case HYPERCALL_mmu_update: > + pv_hypercall_gather_args(ri, args); > + printf(" %d updates%s", (uint32_t)args[1] & > ~MMU_UPDATE_PREEMPTED, > + (args[1] & MMU_UPDATE_PREEMPTED) ? " (preempted)" : ""); > + break; > + case HYPERCALL_multicall: > + pv_hypercall_gather_args(ri, args); > + printf(" %d calls", (uint32_t)args[1]); > + break; > + case HYPERCALL_grant_table_op: > + pv_hypercall_gather_args(ri, args); > + printf(" %s %d ops", grant_table_op_to_str(args[0]), > (uint32_t)args[2]); > + break; > + case HYPERCALL_vcpu_op: > + pv_hypercall_gather_args(ri, args); > + printf(" %s vcpu %d", vcpu_op_to_str(args[0]), > (uint32_t)args[1]); > + break; > + case HYPERCALL_mmuext_op: > + pv_hypercall_gather_args(ri, args); > + printf(" %d ops", (uint32_t)args[1]); > + break; > + case HYPERCALL_sched_op: > + pv_hypercall_gather_args(ri, args); > + printf(" %s", sched_op_to_str(args[0])); > + break; > + default: > + pv_hypercall_print_args(ri); > + break; > + } > + printf("\n"); > + } > +} > + > +void pv_process(struct pcpu_info *p) > +{ > + struct record_info *ri = &p->ri; > + struct vcpu_data *v = p->current; > + struct pv_data *pv = &v->pv; > + > + union pv_event pevt = { .event = ri->event }; > + > + if(vcpu_set_data_type(p->current, VCPU_DATA_PV)) > + return; > + > + if(opt.summary_info) { > + pv->summary_info=1; > + > + if(pevt.minor == PV_PTWR_EMULATION_PAE) > + pv->count[PV_PTWR_EMULATION]++; > + else > + pv->count[pevt.minor]++; > + } > + > + switch(pevt.minor) > + { > + case PV_HYPERCALL: > + pv_hypercall_process(ri, pv); > + break; > + case PV_TRAP: > + pv_trap_process(ri, pv); > + break; > + case PV_PTWR_EMULATION: > + case PV_PTWR_EMULATION_PAE: > + pv_ptwr_emulation_process(ri, pv); > + break; > + case PV_HYPERCALL_V2: > + pv_hypercall_v2_process(ri, pv, ""); > + break; > + case PV_HYPERCALL_SUBCALL: > + pv_hypercall_v2_process(ri, pv, " "); > + break; > + default: > + pv_generic_process(ri, pv); > + break; > + } > +} > + > +/* ---- Schedule ---- */ > +struct vcpu_data * vcpu_create(struct domain_data *d, int vid) > +{ > + struct vcpu_data *v; > + > + assert(d->vcpu[vid] == NULL); > + > + fprintf(warn, "Creating vcpu %d for dom %d\n", vid, d->did); > + > + if((v=malloc(sizeof(*v)))==NULL) > + { > + fprintf(stderr, "%s: malloc %zd failed!\n", __func__, sizeof(*d)); > + error(ERR_SYSTEM, NULL); > + } > + > + bzero(v, sizeof(*v)); > + > + v->vid = vid; > + v->d = d; > + v->p = NULL; > + v->runstate.state = RUNSTATE_INIT; > + v->runstate.last_oldstate.wrong = RUNSTATE_INIT; > + > + d->vcpu[vid] = v; > + > + assert(v == v->d->vcpu[v->vid]); > + > + if(vid > d->max_vid) > + d->max_vid = vid; > + > + return v; > +} > + > +/* Called by both domain_create and sched_default_domain_init */ > +void domain_init(struct domain_data *d, int did) > +{ > + bzero(d, sizeof(*d)); > + > + d->did = did; > + d->next = NULL; > + > + if(opt.interval.check == INTERVAL_CHECK_DOMAIN) > + interval_domain_value_check(d); > +} > + > +struct domain_data * domain_create(int did) > +{ > + struct domain_data *d; > + > + fprintf(warn, "Creating domain %d\n", did); > + > + if((d=malloc(sizeof(*d)))==NULL) > + { > + fprintf(stderr, "%s: malloc %zd failed!\n", __func__, sizeof(*d)); > + error(ERR_SYSTEM, NULL); > + } > + > + /* Initialize domain & vcpus */ > + domain_init(d, did); > + > + return d; > + } > + > +struct domain_data * domain_find(int did) > +{ > + struct domain_data *d, *n, **q; > + > + /* Look for domain, keeping track of the last pointer so we can add > + a domain if we need to. */ > + for ( d = domain_list, q=&domain_list ; > + d && (d->did < did) ; > + q = &d->next, d=d->next ) ; > + > + if(d && d->did == did) > + return d; > + > + /* Make a new domain */ > + n = domain_create(did); > + > + /* Insert it into the list */ > + n->next = d; > + *q = n; > + > + return n; > +} > + > +struct vcpu_data * vcpu_find(int did, int vid) > +{ > + struct domain_data *d; > + struct vcpu_data *v; > + > + d = domain_find(did); > + > + v = d->vcpu[vid]; > + > + if(!v) > + v = vcpu_create(d, vid); > + > + return v; > +} > + > +void pcpu_runstate_update(struct pcpu_info *p, tsc_t tsc) > +{ > + if ( p->time.tsc ) > + { > + if ( p->current->d->did == IDLE_DOMAIN ) > + update_cycles(&p->time.idle, tsc - p->time.tsc); > + else > + update_cycles(&p->time.running, tsc - p->time.tsc); > + p->time.tsc = 0; > + } > +} > + > +void vcpu_prev_update(struct pcpu_info *p, struct vcpu_data *prev, > + tsc_t tsc, int new_runstate) > +{ > + assert(prev == prev->d->vcpu[prev->vid]); > + > + if(prev->p != p) > + { > + fprintf(warn, "Strange, sched_switch on pcpu %d, prev->pcpu %d!\n", > + p->pid, prev->p->pid); > + prev->runstate.tsc = 0; > + goto set; > + } > + > + //assert(p->current); > + > + if ( !p->current ) > + { > + fprintf(warn, "%s: FATAL: p->current NULL!\n", __func__); > + error(ERR_ASSERT, NULL); > + } > + > + if(p->current != prev) > + { > + fprintf(warn, "Strange, sched_switch prev d%dv%d, pcpu %d current > d%dv%d!\n", > + prev->d->did, prev->vid, > + p->pid, p->current->d->did, p->current->vid); > + prev->runstate.tsc = 0; > + goto set; > + } > + > + if(prev->runstate.state != RUNSTATE_RUNNING) > + { > + fprintf(warn, "Strange, prev d%dv%d not running!\n", > + prev->d->did, prev->vid); > + prev->runstate.tsc = 0; > + goto set; > + } > + > +set: > + pcpu_runstate_update(p, tsc); > + p->current = NULL; > + pcpu_string_draw(p); > + runstate_update(prev, new_runstate, tsc); > +} > + > +void vcpu_next_update(struct pcpu_info *p, struct vcpu_data *next, tsc_t tsc) > +{ > + assert(next == next->d->vcpu[next->vid]); > + //assert(p->current == NULL); > + > + if ( p->current != NULL ) > + { > + if ( p->lost_record.seen_valid_schedule == 0 ) > + { > + fprintf(warn, "%s: p->current non-NULL, but seen_valid_schedule > 0. Ignoring.\n", > + __func__); > + runstate_update(p->current, RUNSTATE_LOST, tsc); > + p->current = NULL; > + } > + else > + { > + fprintf(warn, "%s: FATAL: p->current not NULL! (d%dv%d, runstate > %s)\n", > + __func__, > + p->current->d->did, > + p->current->vid, > + runstate_name[p->current->runstate.state]); > + error(ERR_ASSERT, NULL); > + } > + } > + > + if(next->activated) > + { > + /* We may get lost records at start-of-day, so ignore > + setting runstate of default vcpus */ > + if(next->runstate.state == RUNSTATE_RUNNING > + && next->d->did != DEFAULT_DOMAIN) > + { > + fprintf(warn, "Strange, next d%dv%d already running on proc > %d!\n", > + next->d->did, next->vid, > + next->p->pid); > + next->runstate.tsc = 0; > + } > + > + /* If we're moving from one pcpu to another, record change & update > tsc */ > + if(next->p != p) { > + if(next->pcpu_tsc) > + { > + update_cycles(&next->cpu_affinity_all, tsc - next->pcpu_tsc); > + update_cycles(&next->cpu_affinity_pcpu[p->pid], tsc - > next->pcpu_tsc); > + } > + next->pcpu_tsc = tsc; > + } > + } > + else > + { > + next->guest_paging_levels = opt.default_guest_paging_levels; > + next->activated = 1; > + next->pcpu_tsc = tsc; > + } > + > + runstate_update(next, RUNSTATE_RUNNING, tsc); > + > + if ( opt.scatterplot_pcpu > + && next->d->did != IDLE_DOMAIN > + && next->d->did != DEFAULT_DOMAIN ) > + { > + struct time_struct t; > + > + abs_cycles_to_time(tsc, &t); > + > + if ( next->p ) > + printf("%dv%d %u.%09u %d\n", > + next->d->did, next->vid, > + t.s, t.ns, > + next->p->pid); > + printf("%dv%d %u.%09u %d\n", > + next->d->did, next->vid, > + t.s, t.ns, > + p->pid); > + } > + > + next->p = p; > + p->current = next; > + pcpu_string_draw(p); > + p->time.tsc = tsc; > + p->lost_record.seen_valid_schedule = 1; > +} > + > +/* If current is the default domain, we're fixing up from something > + * like start-of-day. Update what we can. */ > +void vcpu_start(struct pcpu_info *p, struct vcpu_data *v) { > + /* If vcpus are created, or first show up, in a "dead zone", this will > + * fail. */ > + if( !p->current || p->current->d->did != DEFAULT_DOMAIN) { > + fprintf(stderr, "Strange, p->current not default domain!\n"); > + error(ERR_FILE, NULL); > + return; > + } > + > + if(!p->first_tsc) { > + fprintf(stderr, "Strange, p%d first_tsc 0!\n", p->pid); > + error(ERR_FILE, NULL); > + } > + > + if(p->first_tsc <= p->current->runstate.tsc) { > + fprintf(stderr, "Strange, first_tsc %llx < default_domain runstate > tsc %llx!\n", > + p->first_tsc, > + p->current->runstate.tsc); > + error(ERR_FILE, NULL); > + } > + > + /* Change default domain to 'queued' */ > + runstate_update(p->current, RUNSTATE_QUEUED, p->first_tsc); > + > + /* FIXME: Copy over data from the default domain this interval */ > + fprintf(warn, "Using first_tsc for d%dv%d (%lld cycles)\n", > + v->d->did, v->vid, p->last_tsc - p->first_tsc); > + > + /* Simulate the time since the first tsc */ > + runstate_update(v, RUNSTATE_RUNNING, p->first_tsc); > + p->time.tsc = p->first_tsc; > + p->current = v; > + pcpu_string_draw(p); > + v->p = p; > +} > + > +void sched_runstate_process(struct pcpu_info *p) > +{ > + enum { > + CHANGE=0, > + CONTINUE > + } type; > + struct vcpu_data *v; > + struct record_info *ri = &p->ri; > + struct { > + unsigned vcpu:16, dom:16; > + unsigned long long p1, p2; > + } __attribute__((packed)) * r = (typeof(r))ri->d; > + union { > + unsigned int event; > + struct { > + unsigned lo:4, > + new_runstate:4, > + old_runstate:4, > + sub:4, > + main:12, > + unused:4; > + }; > + } _sevt = { .event = ri->event }; > + struct { > + int new_runstate, old_runstate; > + } sevt; > + int perfctrs; > + struct last_oldstate_struct last_oldstate; > + > + switch(_sevt.lo) > + { > + case 1: > + type = CHANGE; > + sevt.new_runstate = _sevt.new_runstate; > + sevt.old_runstate = _sevt.old_runstate; > + break; > + case 2: > + type = CONTINUE; > + sevt.new_runstate = sevt.old_runstate = RUNSTATE_RUNNING; > + break; > + default: > + fprintf(warn, "FATAL: Unexpected runstate change type %d!\n", > + _sevt.lo); > + error(ERR_RECORD, NULL); > + return; > + } > + > + perfctrs = (ri->extra_words == 5); > + > + if(opt.dump_all) { > + if( perfctrs ) { > + printf(" %s %s {%lld,%lld} d%uv%u %s->%s\n", > + ri->dump_header, > + type?"runstate_continue":"runstate_change", > + r->p1, r->p2, > + r->dom, r->vcpu, > + runstate_name[sevt.old_runstate], > + runstate_name[sevt.new_runstate]); > + } else { > + printf(" %s %s d%uv%u %s->%s\n", > + ri->dump_header, > + type?"runstate_continue":"runstate_change", > + r->dom, r->vcpu, > + runstate_name[sevt.old_runstate], > + runstate_name[sevt.new_runstate]); > + } > + } > + > + /* Sanity check: expected transitions */ > + if ( type == CHANGE ) > + { > + if( (sevt.new_runstate == RUNSTATE_RUNNING > + && sevt.old_runstate != RUNSTATE_RUNNABLE) > + || (sevt.new_runstate == RUNSTATE_BLOCKED > + && sevt.old_runstate == RUNSTATE_RUNNABLE ) ) > + { > + fprintf(warn, "Strange, d%dv%d unexpected runstate transition > %s->%s\n", > + r->dom, r->vcpu, > + runstate_name[sevt.old_runstate], > + runstate_name[sevt.new_runstate]); > + } > + } > + > + if(r->vcpu > MAX_CPUS) > + { > + fprintf(warn, "%s: vcpu %u > MAX_VCPUS %d!\n", > + __func__, r->vcpu, MAX_CPUS); > + return; > + } > + > + v = vcpu_find(r->dom, r->vcpu); > + > + /* We want last_oldstate reset every time; so copy the last one and use > + * that locally, clobbering the one in the vcpu struct. If it needs to > + * be reset, it will be reset below. */ > + last_oldstate = v->runstate.last_oldstate; > + v->runstate.last_oldstate.wrong = RUNSTATE_INIT; > + > + /* Close vmexits when the putative reason for blocking / &c stops. > + * This way, we don't account cpu contention to some other overhead. */ > + if(sevt.new_runstate == RUNSTATE_RUNNABLE > + && v->data_type == VCPU_DATA_HVM > + && v->hvm.vmexit_valid) { > + hvm_close_vmexit(&v->hvm, ri->tsc); > + } > + > + /* Track waking state */ > + if ( v->data_type == VCPU_DATA_HVM && v->runstate.state != RUNSTATE_LOST > ) { > + if ( sevt.new_runstate == RUNSTATE_RUNNABLE > + && sevt.old_runstate == RUNSTATE_BLOCKED ) > + { > + /* Hmm... want to make sure we're not in some weird > + vmexit state... have to look later. */ > + if(opt.dump_all) > + printf(" [w2h] d%dv%d Setting waking\n", v->d->did, v->vid); > + v->hvm.w2h.waking = 1; > + } > + else if ( sevt.new_runstate != RUNSTATE_RUNNING > + || sevt.old_runstate != RUNSTATE_RUNNABLE ) > + { > + if( v->hvm.w2h.waking > + && sevt.old_runstate == RUNSTATE_RUNNING > + && sevt.new_runstate != RUNSTATE_OFFLINE ) > + { > + /* NB: This is printed a lot unnecessairly when there is TSC > skew */ > + if ( sevt.old_runstate != v->runstate.state ) > + fprintf(warn, "Strange, unexpected waking transition for > d%dv%d: %s -> %s\n", > + v->d->did, v->vid, > + runstate_name[sevt.old_runstate], > + runstate_name[sevt.new_runstate]); > + v->hvm.w2h.waking = 0; > + } > + > + /* Close wake-to-halt summary */ > + /* FIXME: Need to think about handling preemption. */ > + if (sevt.new_runstate == RUNSTATE_BLOCKED > + && sevt.old_runstate == RUNSTATE_RUNNING > + && v->hvm.w2h.interrupts ) { > + int i; > + for(i=0; i<GUEST_INTERRUPT_MAX; i++) { > + struct hvm_gi_struct *g=v->hvm.summary.guest_interrupt + > i; > + tsc_t start_tsc = g->start_tsc; > + if(start_tsc) { > + tsc_t t = (start_tsc == 1) ? 0 : ri->tsc - start_tsc; > + if(opt.dump_all) > + printf(" [w2h] Halting vec %d is_wake %d time > %lld\n", > + i, > + g->is_wake, > + t); > + > + if(opt.scatterplot_wake_to_halt > + && t > + && g->is_wake) > + scatterplot_vs_time(ri->tsc, t); > + > + if(opt.summary && t) { > + if(g->is_wake) { > + if(v->hvm.w2h.interrupts==1) > + > update_cycles(&g->runtime[GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ALONE], > + t); > + > update_cycles(&g->runtime[GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ANY], > + t); > + } else { > + > update_cycles(&g->runtime[GUEST_INTERRUPT_CASE_INTERRUPT_TO_HALT], > + t); > + } > + } > + g->start_tsc = 0; > + g->is_wake = 0; > + } > + } > + v->hvm.w2h.interrupts = 0; > + v->hvm.w2h.vector = 0; > + } > + } > + } > + > + /* Sanity checks / tsc skew detection */ > + if( v->runstate.state != sevt.old_runstate > + && v->runstate.state != RUNSTATE_INIT ) > + { > + if(v->runstate.state == RUNSTATE_LOST) { > + if( sevt.new_runstate == RUNSTATE_RUNNING ) > + goto update; > + else if(opt.dump_all) > + fprintf(warn, "%s: d%dv%d in runstate lost, not updating to > %s\n", > + __func__, v->d->did, v->vid, > + runstate_name[sevt.new_runstate]); > + goto no_update; > + } else if (last_oldstate.wrong == sevt.new_runstate > + && last_oldstate.actual == sevt.old_runstate) { > + tsc_t lag, old_offset; > + struct pcpu_info *p2; > + > + if(ri->tsc < last_oldstate.tsc) { > + fprintf(warn, "WARNING: new tsc %lld < detected runstate tsc > %lld! Not updating\n", > + ri->tsc, last_oldstate.tsc); > + goto no_update; > + } > + > + p2 = P.pcpu + last_oldstate.pid; > + > + lag = ri->tsc > + - last_oldstate.tsc; > + > + old_offset = p2->tsc_skew.offset; > + > + cpumask_union(&p2->tsc_skew.downstream, &p->tsc_skew.downstream); > + cpumask_set(&p2->tsc_skew.downstream, p->pid); > + > + if(cpumask_isset(&p2->tsc_skew.downstream, p2->pid)) { > + if ( opt.tsc_loop_fatal ) > + { > + fprintf(stderr, "FATAL: tsc skew dependency loop > detected!\n"); > + error(ERR_FILE, NULL); > + } > + else > + { > + int i; > + fprintf(warn, "Tsc skew dependency loop detected! > Resetting...\n"); > + for ( i=0; i<=P.max_active_pcpu; i++) > + { > + struct pcpu_info *p = P.pcpu + i; > + > + p->tsc_skew.offset = 0; > + cpumask_init(&p->tsc_skew.downstream); > + } > + goto no_update; > + } > + } > + > + p2->tsc_skew.offset += lag * 2; > + > + fprintf(warn, "TSC skew detected p%d->p%d, %lld cycles. Changing > p%d offset from %lld to %lld\n", > + p->pid, p2->pid, lag, > + p2->pid, > + old_offset, > + p2->tsc_skew.offset); > + > + goto no_update; > + } else { > + fprintf(warn, "runstate_change old_runstate %s, d%dv%d runstate > %s. Possible tsc skew.\n", > + runstate_name[sevt.old_runstate], > + v->d->did, v->vid, > + runstate_name[v->runstate.state]); > + > + v->runstate.last_oldstate.wrong = sevt.old_runstate; > + v->runstate.last_oldstate.actual = v->runstate.state; > + v->runstate.last_oldstate.tsc = ri->tsc; > + v->runstate.last_oldstate.pid = p->pid; > + > + if ( v->runstate.state == RUNSTATE_RUNNING ) > + { > + fprintf(warn, " Not updating.\n"); > + goto no_update; > + } > + goto update; > + } > + fprintf(stderr, "FATAL: Logic hole in %s\n", __func__); > + error(ERR_ASSERT, NULL); > + } > + > +update: > + /* Actually update the runstate. Special things to do if we're starting > + * or stopping actually running on a physical cpu. */ > + if ( type == CONTINUE ) > + { > + if( v->runstate.state == RUNSTATE_INIT ) { > + /* Start-of-day; account first tsc -> now to v */ > + vcpu_start(p, v); > + } else { > + /* Continue running. First, do some sanity checks */ > + if ( v->runstate.state == RUNSTATE_LOST ) { > + fprintf(warn, "WARNING: continue with d%dv%d in > RUNSTATE_LOST. Resetting current.\n", > + v->d->did, v->vid); > + if ( p->current ) > + vcpu_prev_update(p, p->current, ri->tsc, RUNSTATE_LOST); > + vcpu_next_update(p, v, ri->tsc); > + } > + else if( v->runstate.state != RUNSTATE_RUNNING ) { > + /* This should never happen. */ > + fprintf(warn, "FATAL: sevt.old_runstate running, but d%dv%d > runstate %s!\n", > + v->d->did, v->vid, runstate_name[v->runstate.state]); > + error(ERR_FILE, NULL); > + } else if ( v->p != p ) { > + fprintf(warn, "FATAL: continue on p%d, but d%dv%d p%d!\n", > + p->pid, v->d->did, v->vid, > + v->p ? v->p->pid : -1); > + error(ERR_FILE, NULL); > + } > + > + runstate_update(v, RUNSTATE_RUNNING, ri->tsc); > + } > + } > + else if ( sevt.old_runstate == RUNSTATE_RUNNING > + || v->runstate.state == RUNSTATE_RUNNING ) > + { > +#if 0 > + /* A lot of traces include cpi that shouldn't... */ > + if(perfctrs && v->runstate.tsc) { > + unsigned long long run_cycles, run_instr; > + double cpi; > + > + //run_cycles = r->p1 - v->runstate_p1_start; > + run_cycles = ri->tsc - v->runstate.tsc; > + run_instr = r->p2 - v->runstate.p2_start; > + > + cpi = ((double)run_cycles) / run_instr; > + > + if(opt.dump_all) { > + printf(" cpi: %2.2lf ( %lld / %lld )\n", > + cpi, run_cycles, run_instr); > + } > + > + if(opt.scatterplot_cpi && v->d->did == 1) > + printf("%lld,%2.2lf\n", > + ri->tsc, cpi); > + > + if(opt.summary_info) > + update_cpi(&v->cpi, run_instr, run_cycles); > + } > +#endif > + /* > + * Cases: > + * old running, v running: > + * normal (prev update p, lost record check) > + * v running, old ! running: > + * tsc skew (prev update v->p, lost record check) > + * old running, v init: > + start-of-day (fake update, prev p, lost record) > + * old running, v !{running,init}: > + * # (should never happen) > + */ > + if( sevt.old_runstate == RUNSTATE_RUNNING ) { > + if( v->runstate.state == RUNSTATE_INIT ) { > + /* Start-of-day; account first tsc -> now to v */ > + vcpu_start(p, v); > + } else if( v->runstate.state != RUNSTATE_RUNNING > + && v->runstate.state != RUNSTATE_LOST ) { > + /* This should never happen. */ > + fprintf(warn, "FATAL: sevt.old_runstate running, but d%dv%d > runstate %s!\n", > + v->d->did, v->vid, runstate_name[v->runstate.state]); > + error(ERR_FILE, NULL); > + } > + > + vcpu_prev_update(p, v, ri->tsc, sevt.new_runstate); > + } else { > + vcpu_prev_update(v->p, v, ri->tsc, sevt.new_runstate); > + } > + > + if(P.lost_cpus && v->d->did != IDLE_DOMAIN) { > + if(opt.dump_all) > + fprintf(warn, "%s: %d lost cpus, setting d%dv%d runstate to > RUNSTATE_LOST\n", > + __func__, P.lost_cpus, v->d->did, v->vid); > + lose_vcpu(v, ri->tsc); > + } > + } > + else if ( sevt.new_runstate == RUNSTATE_RUNNING ) > + { > + if(perfctrs) { > + v->runstate.p1_start = r->p1; > + v->runstate.p2_start = r->p2; > + } > + > + vcpu_next_update(p, v, ri->tsc); > + } > + else if ( v->runstate.state != RUNSTATE_INIT ) > + { > + /* TSC skew at start-of-day is hard to deal with. Don't > + * bring a vcpu out of INIT until it's seen to be actually > + * running somewhere. */ > + runstate_update(v, sevt.new_runstate, ri->tsc); > + } > + > +no_update: > + return; > +} > + > +void sched_switch_process(struct pcpu_info *p) > +{ > + struct vcpu_data *prev, *next; > + struct record_info *ri = &p->ri; > + struct { > + unsigned int prev_dom, prev_vcpu, next_dom, next_vcpu; > + } * r = (typeof(r))ri->d; > + > + if(opt.dump_all) > + printf("%s sched_switch prev d%uv%u next d%uv%u\n", > + ri->dump_header, > + r->prev_dom, r->prev_vcpu, > + r->next_dom, r->next_vcpu); > + > + if(r->prev_vcpu > MAX_CPUS) > + { > + fprintf(warn, "%s: prev_vcpu %u > MAX_VCPUS %d!\n", > + __func__, r->prev_vcpu, MAX_CPUS); > + return; > + } > + > + if(r->next_vcpu > MAX_CPUS) > + { > + fprintf(warn, "%s: next_vcpu %u > MAX_VCPUS %d!\n", > + __func__, r->next_vcpu, MAX_CPUS); > + return; > + } > + > + prev = vcpu_find(r->prev_dom, r->prev_vcpu); > + next = vcpu_find(r->next_dom, r->next_vcpu); > + > + vcpu_prev_update(p, prev, ri->tsc, RUNSTATE_QUEUED); /* FIXME */ > + > + vcpu_next_update(p, next, ri->tsc); > +} > + > +void sched_default_vcpu_activate(struct pcpu_info *p) > +{ > + struct vcpu_data *v = default_domain.vcpu[p->pid]; > + > + if(!v) > + v = vcpu_create(&default_domain, p->pid); > + > + assert(v == v->d->vcpu[v->vid]); > + > + v->activated = 1; > + v->guest_paging_levels = opt.default_guest_paging_levels; > + v->p = p; > + v->runstate.state = RUNSTATE_RUNNING; > + > + p->current = v; > + pcpu_string_draw(p); > +} > + > +void sched_default_domain_init(void) > +{ > + struct domain_data *d = &default_domain; > + > + domain_init(d, DEFAULT_DOMAIN); > +} > + > +void runstate_clear(tsc_t * runstate_cycles) > +{ > + int i; > + for(i=0; i<RUNSTATE_MAX; i++) > + runstate_cycles[i]=0; > +} > + > +void runstate_summary(tsc_t * runstate_cycles) > +{ > + int i; > + for(i=0; i<RUNSTATE_MAX; i++) > + if(runstate_cycles[i]) { > + struct time_struct t; > + cycles_to_time(runstate_cycles[i], &t); > + printf(" %s: %u.%09u s\n", > + runstate_name[i], t.s, t.ns); > + } > +} > + > +void sched_summary_vcpu(struct vcpu_data *v) > +{ > + int i; > + char desc[30]; > + > + /* FIXME: Update all records like this */ > + if ( v->pcpu_tsc ) > + { > + update_cycles(&v->cpu_affinity_all, P.f.last_tsc - v->pcpu_tsc); > + update_cycles(&v->cpu_affinity_pcpu[v->p->pid], P.f.last_tsc - > v->pcpu_tsc); > + } > + > + printf(" Runstates:\n"); > + for(i=0; i<RUNSTATE_MAX; i++) { > + snprintf(desc,30, " %8s", runstate_name[i]); > + print_cycle_summary(v->runstates+i, desc); > + if ( i==RUNSTATE_RUNNABLE ) > + { > + int j; > + for(j=0; j<RUNNABLE_STATE_MAX; j++) { > + if ( j == RUNNABLE_STATE_INVALID ) > + continue; > + snprintf(desc,30, " %8s", runnable_state_name[j]); > + print_cycle_summary(v->runnable_states+j, desc); > + } > + } > + } > + print_cpi_summary(&v->cpi); > + print_cpu_affinity(&v->cpu_affinity_all, " cpu affinity"); > + for ( i = 0; i < MAX_CPUS ; i++) > + { > + snprintf(desc,30, " [%d]", i); > + print_cpu_affinity(v->cpu_affinity_pcpu+i, desc); > + } > +} > + > +void sched_summary_domain(struct domain_data *d) > +{ > + int i; > + char desc[30]; > + > + printf(" Runstates:\n"); > + for(i=0; i<DOMAIN_RUNSTATE_MAX; i++) { > + snprintf(desc,30, " %8s", domain_runstate_name[i]); > + print_cycle_summary(d->runstates+i, desc); > + } > +} > + > + > +void sched_process(struct pcpu_info *p) > +{ > + struct record_info *ri = &p->ri; > + > + if(ri->evt.sub == 0xf) { > + switch(ri->event) > + { > + case TRC_SCHED_SWITCH: > + sched_switch_process(p); > + break; > + default: > + process_generic(&p->ri); > + } > + } else { > + if(ri->evt.sub == 1) > + sched_runstate_process(p); > + else { > + UPDATE_VOLUME(p, sched_verbose, ri->size); > + process_generic(&p->ri); > + } > + } > +} > + > +/* ---- Memory ---- */ > +void mem_summary_domain(struct domain_data *d) { > + int i, j; > + > + printf(" Grant table ops:\n"); > + > + printf(" Done by:\n"); > + for(i=0; i<MEM_MAX; i++) > + if(d->memops.done[i]) > + printf(" %-14s: %d\n", > + mem_name[i], > + d->memops.done[i]); > + > + printf(" Done for:\n"); > + for(i=0; i<MEM_MAX; i++) > + if(d->memops.done_for[i]) > + printf(" %-14s: %d\n", > + mem_name[i], > + d->memops.done_for[i]); > + > + printf(" Populate-on-demand:\n"); > + printf(" Populated:\n"); > + for(i=0; i<4; i++) > + { > + if ( d->pod.populate_order[i] ) > + printf(" [%d] %d\n", i, > + d->pod.populate_order[i]); > + } > + printf(" Reclaim order:\n"); > + for(i=0; i<4; i++) > + { > + if ( d->pod.reclaim_order[i] ) > + printf(" [%d] %d\n", i, > + d->pod.reclaim_order[i]); > + } > + printf(" Reclaim contexts:\n"); > + for(j=0; j<POD_RECLAIM_CONTEXT_MAX; j++) > + { > + if ( d->pod.reclaim_context[j] ) > + { > + printf(" * [%s] %d\n", > + pod_reclaim_context_name[j], > + d->pod.reclaim_context[j]); > + for(i=0; i<4; i++) > + { > + if ( d->pod.reclaim_context_order[j][i] ) > + printf(" [%d] %d\n", i, > + d->pod.reclaim_context_order[j][i]); > + } > + } > + } > +} > + > +int p2m_canonical_order(int order) > +{ > + if ( order % 9 > + || (order / 9) > 2 ) > + { > + fprintf(warn, "%s: Strange, non-canonical order %d\n", > + __func__, order); > + order = 4; > + } else { > + order /= 9; > + } > + return order; > +} > + > +void mem_pod_zero_reclaim_process(struct pcpu_info *p) > +{ > + struct record_info *ri = &p->ri; > + int context = POD_RECLAIM_CONTEXT_UNKNOWN; > + struct vcpu_data *v = p->current; > + > + struct { > + uint64_t gfn, mfn; > + int d:16,order:16; > + } *r = (typeof(r))ri->d; > + > + if ( v && v->hvm.vmexit_valid ) > + { > + switch(v->hvm.exit_reason) > + { > + case EXIT_REASON_EPT_VIOLATION: > + case EXIT_REASON_EXCEPTION_NMI: > + context = POD_RECLAIM_CONTEXT_FAULT; > + break; > + case EXIT_REASON_VMCALL: > + context = POD_RECLAIM_CONTEXT_BALLOON; > + break; > + } > + } > + > + if ( opt.dump_all ) > + { > + printf(" %s pod_zero_reclaim d%d o%d g %llx m %llx ctx %s\n", > + ri->dump_header, > + r->d, r->order, > + (unsigned long long)r->gfn, (unsigned long long)r->mfn, > + pod_reclaim_context_name[context]); > + > + } > + > + if ( opt.summary_info ) > + { > + struct domain_data *d; > + > + if ( v && (d=v->d) ) > + { > + int order; > + > + order = p2m_canonical_order(r->order); > + > + d->pod.reclaim_order[order]++; > + d->pod.reclaim_context[context]++; > + d->pod.reclaim_context_order[context][order]++; > + } > + } > +} > + > +void mem_pod_populate_process(struct pcpu_info *p) > +{ > + struct record_info *ri = &p->ri; > + > + struct { > + uint64_t gfn, mfn; > + int d:16,order:16; > + } *r = (typeof(r))ri->d; > + > + if ( opt.dump_all ) > + { > + printf(" %s pod_populate d%d o%d g %llx m %llx\n", > + ri->dump_header, > + r->d, r->order, > + (unsigned long long)r->gfn, (unsigned long long)r->mfn); > + } > + > + if ( opt.summary_info ) > + { > + struct vcpu_data *v = p->current; > + struct domain_data *d; > + > + if ( v && (d=v->d) ) > + { > + int order; > + > + order = p2m_canonical_order(r->order); > + > + d->pod.populate_order[order]++; > + } > + } > +} > + > +void mem_pod_superpage_splinter_process(struct pcpu_info *p) > +{ > + struct record_info *ri = &p->ri; > + > + struct { > + uint64_t gfn; > + int d:16; > + } *r = (typeof(r))ri->d; > + > + if ( opt.dump_all ) > + { > + printf(" %s pod_spage_splinter d%d g %llx\n", > + ri->dump_header, > + r->d, (unsigned long long)r->gfn); > + } > +} > + > +void mem_page_grant(struct pcpu_info *p) > +{ > + struct record_info *ri = &p->ri; > + > + struct { > + unsigned domain; > + } *r = (typeof(r))ri->d; > + union pv_event pevt = { .event = ri->event }; > + > + if ( opt.dump_all ) > + { > + printf(" %s %s domain %u\n", ri->dump_header, mem_name[pevt.minor], > r->domain); > + } > +} > +void mem_set_p2m_entry_process(struct pcpu_info *p) > +{ > + struct record_info *ri = &p->ri; > + > + struct { > + uint64_t gfn, mfn; > + int p2mt; > + int d:16,order:16; > + } *r = (typeof(r))ri->d; > + > + if ( opt.dump_all ) > + { > + printf(" %s set_p2m_entry d%d o%d t %d g %llx m %llx\n", > + ri->dump_header, > + r->d, r->order, > + r->p2mt, > + (unsigned long long)r->gfn, (unsigned long long)r->mfn); > + } > +} > + > +void mem_decrease_reservation_process(struct pcpu_info *p) > +{ > + struct record_info *ri = &p->ri; > + > + struct { > + uint64_t gfn; > + int d:16,order:16; > + } *r = (typeof(r))ri->d; > + > + if ( opt.dump_all ) > + { > + printf(" %s decrease_reservation d%d o%d g %llx\n", > + ri->dump_header, > + r->d, r->order, > + (unsigned long long)r->gfn); > + } > +} > + > +void mem_process(struct pcpu_info *p) { > + struct record_info *ri = &p->ri; > + struct { > + int dom; > + } *r = (typeof(r))ri->d; > + > + int minor = ri->evt.minor; > + > + switch ( minor ) > + { > + case MEM_PAGE_GRANT_MAP: > + case MEM_PAGE_GRANT_UNMAP: > + case MEM_PAGE_GRANT_TRANSFER: > + mem_page_grant(p); > + break; > + case MEM_SET_P2M_ENTRY: > + mem_set_p2m_entry_process(p); > + break; > + case MEM_DECREASE_RESERVATION: > + mem_decrease_reservation_process(p); > + break; > + case MEM_POD_POPULATE: > + mem_pod_populate_process(p); > + break; > + case MEM_POD_ZERO_RECLAIM: > + mem_pod_zero_reclaim_process(p); > + break; > + case MEM_POD_SUPERPAGE_SPLINTER: > + mem_pod_superpage_splinter_process(p); > + break; > + default: > + if(opt.dump_all) { > + dump_generic(stdout, ri); > + } > + > + if(opt.summary_info && minor < MEM_MAX) { > + struct domain_data *d; > + > + if(p->current) { > + if (p->current->d) { > + p->current->d->memops.done[minor]++; > + p->current->d->memops.done_interval[minor]++; > + } > + if((d=domain_find(r->dom))) { > + d->memops.done_for[minor]++; > + d->memops.done_for_interval[minor]++; > + } > + } > + } > + break; > + } > + > +} > + > +/* ---- PM ---- */ > +#define CSTATE_MAX 5 > +#define CSTATE_INVALID ((CSTATE_MAX)+1) > +void pm_process(struct pcpu_info *p) { > + struct record_info *ri = &p->ri; > + > + switch ( ri->event ) > + { > + case TRC_PM_FREQ_CHANGE: > + if (opt.dump_all ) > + printf(" %s pm_freq_change o%d n%d\n", > + ri->dump_header, > + ri->d[0], > + ri->d[1]); > + break; > + case TRC_PM_IDLE_ENTRY: > + if (opt.dump_all ) > + printf(" %s pm_idle_start c%d\n", > + ri->dump_header, > + ri->d[0]); > + if ( ri->d[0] <= CSTATE_MAX ) > + { > + p->power_state=ri->d[0]; > + pcpu_string_draw(p); > + } > + break; > + case TRC_PM_IDLE_EXIT: > + if (opt.dump_all ) > + printf(" %s pm_idle_end c%d\n", > + ri->dump_header, > + ri->d[0]); > + if ( p->power_state != ri->d[0] > + && p->power_state != CSTATE_INVALID ) > + printf("Strange, pm_idle_end %d, power_state %d!\n", > + ri->d[0], p->power_state); > + p->power_state = 0; > + pcpu_string_draw(p); > + break; > + default: > + if(opt.dump_all) { > + dump_generic(stdout, ri); > + } > + break; > + } > + > +} > + > +/* > + * IRQ related stuff > + */ > + > +#define MAX_VECTOR 256 > +int global_vector_used[256] = {0}; > +struct pci_dev { > + uint8_t bus; > + uint8_t devfn; > + int vector_used[MAX_VECTOR]; > + struct pci_dev *next; > +} *pdev_list; > + > +#define MAX_IRQ 512 > +struct irq_desc { > + enum { > + IRQ_NONE, > + IRQ_MSI, > + IRQ_GSI > + } type; > + struct pci_dev *dev; > +} irq_table[MAX_IRQ]; > + > +struct pci_dev * pdev_find(uint8_t bus, uint8_t devfn) > +{ > + struct pci_dev *d, *n, **q; > + > + /* Look for domain, keeping track of the last pointer so we can add > + a domain if we need to. */ > + for ( d = pdev_list, q=&pdev_list ; > + d && ( (d->bus < bus) > + || (d->bus == bus && d->devfn < devfn) ) ; > + q = &d->next, d=d->next ) ; > + > + if(d && d->bus == bus && d->devfn == devfn) > + return d; > + > + /* Make a new domain */ > + fprintf(warn, "Creating pdev %02x:%02x.%x\n", bus, devfn>>4, devfn&3); > + > + if((n=malloc(sizeof(*n)))==NULL) > + { > + fprintf(stderr, "%s: malloc %zd failed!\n", __func__, sizeof(*n)); > + error(ERR_SYSTEM, NULL); > + } > + > + bzero(n, sizeof(*n)); > + > + n->bus=bus; > + n->devfn=devfn; > + > + /* Insert it into the list */ > + n->next = d; > + *q = n; > + > + return n; > +} > + > +void irq_process(struct pcpu_info *p) { > + struct record_info *ri = &p->ri; > + > + switch ( ri->event ) > + { > + case TRC_HW_IRQ_BIND_VECTOR: > + { > + struct { > + int irq, vec; > + unsigned mask[4]; > + } *r = (typeof(r))ri->d; > + if ( opt.dump_all ) > + { > + printf(" %s irq_bind_vector irq %x vec %x mask %04x %04x %04x > %04x\n", > + ri->dump_header, > + r->irq, r->vec, > + r->mask[3], > + r->mask[2], > + r->mask[1], > + r->mask[0]); > + } > + break; > + } > + case TRC_HW_IRQ_HANDLED: > + { > + struct { > + int irq, start_tsc, end_tsc; > + } *r = (typeof(r))ri->d; > + int arctime; > + > + arctime = r->end_tsc - r->start_tsc; > + if ( opt.dump_all ) > + { > + printf(" %s irq_handled irq %x %d (%d,%d)\n", > + ri->dump_header, > + r->irq, arctime, r->start_tsc, r->end_tsc); > + } > + if ( opt.scatterplot_irq ) > + { > + struct time_struct t; > + > + abs_cycles_to_time(ri->tsc, &t); > + > + printf("i%x %u.%09u %d\n", > + (unsigned)r->irq, > + t.s, t.ns, > + p->pid); > + } > + break; > + } > + case TRC_HW_IRQ_ASSIGN_VECTOR: > + { > + struct { > + int irq, vec; > + unsigned mask[4]; > + } *r = (typeof(r))ri->d; > + if ( opt.dump_all ) > + { > + printf(" %s irq_assign_vector irq %x vec %x mask %04x %04x %04x > %04x\n", > + ri->dump_header, > + r->irq, r->vec, > + r->mask[3], > + r->mask[2], > + r->mask[1], > + r->mask[0]); > + } > + if ( r->irq < MAX_IRQ > + && r->vec < MAX_VECTOR ) > + { > + if ( irq_table[r->irq].type == IRQ_MSI ) > + { > + if(global_vector_used[r->vec]) > + fprintf(warn, " Vector collision on global table!\n"); > + global_vector_used[r->vec]=1; > + } > + if( irq_table[r->irq].dev ) > + { > + struct pci_dev * pdev=irq_table[r->irq].dev; > + > + if(pdev->vector_used[r->vec]) > + fprintf(warn, " Vector collision on %02x.%02x!\n", > + pdev->bus, pdev->devfn); > + pdev->vector_used[r->vec]=1; > + } > + } > + break; > + } > + case TRC_HW_IRQ_MOVE_CLEANUP_DELAY: > + { > + struct { > + int irq, vec, cpu; > + } *r = (typeof(r))ri->d; > + > + if ( opt.dump_all ) > + { > + printf(" %s irq_move_cleanup_delay irq %x vec %x cpu %d\n", > + ri->dump_header, > + r->irq, r->vec, r->cpu); > + } > + break; > + } > + case TRC_HW_IRQ_MOVE_CLEANUP: > + { > + struct { > + int irq; > + int vec; > + int cpu; > + } *r = (typeof(r))ri->d; > + > + if ( opt.dump_all ) > + { > + printf(" %s irq_move_cleanup irq %x vec %x cpu %d\n", > + ri->dump_header, > + r->irq, r->vec, r->cpu); > + } > + if ( r->irq < MAX_IRQ > + && r->vec < MAX_VECTOR ) > + { > + if ( irq_table[r->irq].type == IRQ_MSI ) > + { > + if(!global_vector_used[r->vec]) > + fprintf(warn," Strange, cleanup on non-used vector\n"); > + global_vector_used[r->vec]=0; > + } > + if ( irq_table[r->irq].dev ) > + { > + struct pci_dev * pdev=irq_table[r->irq].dev; > + > + if(!pdev->vector_used[r->vec]) > + fprintf(warn," Strange, cleanup on non-used vector\n"); > + pdev->vector_used[r->vec]=0; > + } > + } > + break; > + } > + case TRC_HW_IRQ_UNMAPPED_VECTOR: > + { > + struct { > + int vec; > + } *r = (typeof(r))ri->d; > + > + if ( opt.dump_all ) > + { > + printf(" %s irq_unmapped_vector vec %x\n", > + ri->dump_header, > + r->vec); > + } > + break; > + } > + case TRC_HW_IRQ_CLEAR_VECTOR: > + case TRC_HW_IRQ_MOVE_FINISH : > + default: > + if(opt.dump_all) { > + dump_generic(stdout, ri); > + } > + break; > + } > +} > + > +#define TRC_HW_SUB_PM 1 > +#define TRC_HW_SUB_IRQ 2 > +void hw_process(struct pcpu_info *p) > +{ > + struct record_info *ri = &p->ri; > + > + switch(ri->evt.sub) > + { > + case TRC_HW_SUB_PM: > + pm_process(p); > + break; > + case TRC_HW_SUB_IRQ: > + irq_process(p); > + break; > + } > + > +} > +/* ---- Base ----- */ > +void dump_generic(FILE * f, struct record_info *ri) > +{ > + int i; > + > + fprintf(f, "]%s %7x(%x:%x:%x) %u [", > + ri->dump_header, > + ri->event, > + ri->evt.main, > + ri->evt.sub, > + ri->evt.minor, > + ri->extra_words); > + > + for(i=0; i<ri->extra_words; i++) { > + fprintf(f, " %x", ri->d[i]); > + } > + > + fprintf(f, " ]\n"); > +} > + > +void dump_raw(char * s, struct record_info *ri) > +{ > + int i; > + > + if(ri->rec.cycle_flag) > + printf("%s %7x %d %14lld [", > + s, ri->event, ri->extra_words, ri->tsc); > + else > + printf("%s %7x %d %14s [", > + s, ri->event, ri->extra_words, "-"); > + > + for(i=0; i<7; i++) { > + if ( i < ri->extra_words ) > + printf(" %8x", ri->d[i]); > + else > + printf(" "); > + } > + > + printf(" ] | "); > + > + for (i=0; i<8; i++) { > + printf(" %08x", ri->rec.raw[i]); > + } > + > + printf(" |\n"); > +} > + > +void error(enum error_level l, struct record_info *ri) > +{ > + if ( l > opt.tolerance ) > + { > + if ( ri ) > + dump_generic(warn, ri); > + exit(1); > + } > +} > + > +int check_extra_words(struct record_info *ri, > + int expected_size, > + const char *record) > +{ > + static int off_by_one = 0; > + int expected_extra = expected_size / sizeof(unsigned int); > + > + if(ri->extra_words != expected_extra > + && !(off_by_one && ri->extra_words == expected_extra + 1) ) > + { > + if ( !off_by_one && ri->extra_words == expected_extra + 1 ) > + { > + fprintf(warn, "Detected off-by-one bug; relaxing > expectations\n"); > + off_by_one=1; > + } > + else { > + fprintf(warn, "ERROR: %s extra_words %d, expected %d!\n", > + record, > + ri->extra_words, expected_extra); > + error(ERR_RECORD, ri); > + return 1; > + } > + } > + return 0; > +} > + > +void process_generic(struct record_info *ri) { > + > + error(ERR_STRICT, ri); > + > + if(opt.dump_all) { > + dump_generic(stdout, ri); > + } > +} > + > +int vcpu_set_data_type(struct vcpu_data *v, int type) > +{ > + if (v->data_type == VCPU_DATA_NONE ) > + { > + v->data_type = type; > + switch(type) > + { > + case VCPU_DATA_HVM: > + init_hvm_data(&v->hvm, v); > + break; > + default: > + break; > + } > + } > + else > + assert(v->data_type == type); > + return 0; > +} > + > + > +void lose_vcpu(struct vcpu_data *v, tsc_t tsc) > +{ > + if(v->data_type == VCPU_DATA_HVM) > + v->hvm.vmexit_valid=0; > + runstate_update(v, RUNSTATE_LOST, tsc); > + hvm_vlapic_clear(&v->vlapic); > + > + if(v->data_type == VCPU_DATA_HVM) { > + int i; > + if(opt.dump_all) > + printf(" [w2h] Clearing w2h state for d%dv%d\n", > + v->d->did, v->vid); > + v->hvm.w2h.interrupts=0; > + v->hvm.w2h.vector=0; > + v->hvm.w2h.waking = 0; > + for(i=0; i<GUEST_INTERRUPT_MAX; i++) { > + if(opt.dump_all && v->hvm.summary.guest_interrupt[i].start_tsc) { > + printf(" Interrupt %d clearing start_tsc %lld\n", > + i, v->hvm.summary.guest_interrupt[i].start_tsc); > + } > + v->hvm.summary.guest_interrupt[i].start_tsc = 0; > + } > + } > +} > + > +struct lost_record_struct { > + int lost_records; > + unsigned did:16,vid:16; > + tsc_t first_tsc; > +}; > + > +void process_lost_records(struct pcpu_info *p) > +{ > + struct record_info *ri = &p->ri; > + struct lost_record_struct *r = (typeof(r))ri->d; > + tsc_t first_tsc; /* TSC of first record that was lost */ > + > + /* Sanity checks */ > + if(ri->extra_words != 4) > + { > + fprintf(warn, "FATAL: Lost record has unexpected extra words %d!\n", > + ri->extra_words); > + error(ERR_RECORD, ri); > + return; > + } > + > + first_tsc = r->first_tsc; > + > + if(opt.dump_all) > + { > + if(p->current) > + printf(" %s lost_records count %d d%uv%u (cur d%dv%d) first_tsc > %lld\n", > + ri->dump_header, r->lost_records, > + r->did, r->vid, > + p->current->d->did, p->current->vid, > + r->first_tsc); > + else > + printf(" %s lost_records count %d d%uv%u (cur X) first_tsc > %lld\n", > + ri->dump_header, r->lost_records, > + r->did, r->vid, > + r->first_tsc); > + } > + > +#if 0 > + if(opt.dump_trace_volume_on_lost_record) > + volume_summary(&p->volume.last_buffer); > +#endif > + > + if ( p->current ) { > + > + hvm_vlapic_clear(&p->current->vlapic); > + if(p->current->data_type == VCPU_DATA_HVM) { > + p->current->hvm.vmexit_valid=0; > + cr3_switch(0, &p->current->hvm); > + } > + > + /* We may lose scheduling records; so we need to: > + * - Point all records until now to the next schedule in the > + * "default" domain > + * - Make sure there are no warnings / strangeness with the > + * current vcpu (if it gets scheduled elsewhere). > + */ > + vcpu_prev_update(p, p->current, first_tsc, RUNSTATE_LOST); > + } > +#if 0 > + vcpu_next_update(p, default_domain.vcpu[p->pid], first_tsc); > + if(p->current->data_type == VCPU_DATA_HVM) { > + p->current->hvm.vmexit_valid=0; > + } > +#endif > + > + /* The lost record trace is processed early -- i.e., > + * After the last good record, rather than when the next > + * record is processed. Between the time it's processed and > + * the time it actually went in, the vcpu may be scheduled on > + * other processors. So we can't switch vcpus until the first > + * TSC'd record after the lost record. */ > + if(!p->lost_record.active) { > + P.lost_cpus++; > + if(P.lost_cpus > P.max_active_pcpu + 1) { > + fprintf(warn, "ERROR: P.lost_cpus %d > P.max_active_pcpu + 1 > %d!\n", > + P.lost_cpus, P.max_active_pcpu + 1); > + error(ERR_ASSERT, NULL); > + } > + } else > + fprintf(warn, "Strange, lost record for pcpu %d, but lost_record > still active!\n", > + p->pid); > + > + p->lost_record.active = 1; > + p->lost_record.tsc = first_tsc; > + pcpu_string_draw(p); > + > + { > + /* Any vcpu which is not actively running may be scheduled on the > + * lost cpu. To avoid mis-accounting, we need to reset */ > + struct domain_data *d; > + int i; > + for(d=domain_list ; d; d=d->next) > + { > + if(d->did != DEFAULT_DOMAIN) { > + for(i=0; i<MAX_CPUS; i++) > + if(d->vcpu[i] && > + d->vcpu[i]->runstate.state != RUNSTATE_RUNNING) { > + if(opt.dump_all) > + fprintf(warn, "%s: setting d%dv%d to > RUNSTATE_LOST\n", > + __func__, d->did, i); > + lose_vcpu(d->vcpu[i], first_tsc); > + } > + } > + } > + } > + > + p->lost_record.domain_valid=1; > + p->lost_record.did=r->did; > + p->lost_record.vid=r->vid; > +} > + > + > +void process_lost_records_end(struct pcpu_info *p) > +{ > + struct record_info *ri = &p->ri; > + struct lost_record_struct *r = (typeof(r))ri->d; > + > + if(!p->lost_record.active) { > + fprintf(warn, "FATAL: lost_records_end but pid %d not lost!\n", > + p->pid); > + error(ERR_FILE, NULL); > + return; > + } > + > + /* Lost records. If this is the first record on a pcpu after the loss, > + * Update the information. */ > + if(ri->tsc > p->lost_record.tsc) > + { > + if(opt.dump_all) > + printf(" %s lost_records end ---\n", > + pcpu_string(p->pid)); > + > + update_cycles(&p->time.lost, ri->tsc - p->lost_record.tsc); > + > + if(p->lost_record.domain_valid) { > + int did = p->lost_record.did, > + vid = p->lost_record.vid; > + > + if(opt.dump_all) > + printf(" %s lost_records end d%dv%d---\n", > + pcpu_string(p->pid), > + did, vid); > + if(p->current) > + { > + fprintf(warn, "FATAL: lost_record valid (d%dv%d), but > current d%dv%d!\n", > + did, vid, > + p->current->d->did, p->current->vid); > + error(ERR_FILE, NULL); > + return; > + } > + > + if(opt.dump_all) > + fprintf(warn, "Changing p%d current to d%dv%d\n", > + p->pid, did, vid); > + vcpu_next_update(p, > + vcpu_find(did, vid), > + ri->tsc); > + p->lost_record.domain_valid=0; > + p->lost_record.seen_valid_schedule=0; /* Let next > vcpu_next_update know that > + this one was inferred */ > + } else { > + if(opt.dump_all) > + printf(" %s lost_records end (domain > invalid)---\n", > + pcpu_string(p->pid)); > + } > + > + > + p->lost_record.active = 0; > + pcpu_string_draw(p); > + P.lost_cpus--; > + if(P.lost_cpus < 0) { > + fprintf(warn, "ERROR: lost_cpus fell below 0 for pcpu %d!\n", > + p->pid); > + error(ERR_ASSERT, NULL); > + } > + } > +} > + > +void base_process(struct pcpu_info *p) { > + struct record_info *ri = &p->ri; > + switch(ri->event) > + { > + case TRC_LOST_RECORDS: > + process_lost_records(p); > + break; > + case TRC_LOST_RECORDS_END: > + process_lost_records_end(p); > + break; > + default: > + process_generic(ri); > + } > + } > + > + > + > +/* Non-compat only */ > +void record_order_insert(struct pcpu_info *new); > +void record_order_remove(struct pcpu_info *rem); > +void record_order_bubble(struct pcpu_info *last); > + > +struct cpu_change_data { > + int cpu; > + unsigned window_size; > +}; > + > +void activate_early_eof(void) { > + struct pcpu_info *p; > + int i; > + > + fprintf(warn, "Short cpu_change window, activating early_eof\n"); > + > + P.early_eof = 1; > + > + for(i=0; i<=P.max_active_pcpu; i++) { > + p = P.pcpu + i; > + if(p->active && p->file_offset > P.last_epoch_offset) { > + fprintf(warn, " deactivating pid %d\n", > + p->pid); > + p->active = 0; > + } > + } > +} > + > +loff_t scan_for_new_pcpu(loff_t offset) { > + ssize_t r; > + struct trace_record rec; > + struct cpu_change_data *cd; > + > + r=__read_record(&rec, offset); > + > + if(r==0) > + return 0; > + > + if(rec.event != TRC_TRACE_CPU_CHANGE > + || rec.cycle_flag) > + { > + fprintf(stderr, "%s: Unexpected record event %x!\n", > + __func__, rec.event); > + error(ERR_ASSERT, NULL); /* Actually file, but can't recover */ > + } > + > + cd = (typeof(cd))rec.u.notsc.data; > + > + if ( cd->cpu > MAX_CPUS ) > + { > + fprintf(stderr, "%s: cpu %d exceeds MAX_CPU %d!\n", > + __func__, cd->cpu, MAX_CPUS); > + /* FIXME: Figure out if we could handle this more gracefully */ > + error(ERR_ASSERT, NULL); > + } > + > + if(cd->cpu > P.max_active_pcpu || !P.pcpu[cd->cpu].active) { > + struct pcpu_info *p = P.pcpu + cd->cpu; > + > + fprintf(warn, "%s: Activating pcpu %d at offset %lld\n", > + __func__, cd->cpu, (unsigned long long)offset); > + > + p->active = 1; > + /* Process this cpu_change record first */ > + p->ri.rec = rec; > + p->ri.size = r; > + __fill_in_record_info(p); > + > + p->file_offset = offset; > + p->next_cpu_change_offset = offset; > + > + record_order_insert(p); > + > + offset += r + cd->window_size; > + > + sched_default_vcpu_activate(p); > + > + if ( cd->cpu > P.max_active_pcpu ) > + P.max_active_pcpu = cd->cpu; > + > + return offset; > + } else { > + return 0; > + } > +} > + > +/* > + * Conceptually, when we reach a cpu_change record that's not for our pcpu, > + * we want to scan forward through the file until we reach one that's for us. > + * However, looping through involves reading the file, which we'd rather > + * do in one place. Because cpu_change records don't include a tsc, > + * the same pcpu will be processed repeatedly until the cpu_change > + * equals p->pid. > + * > + * There are two additional things we need to do in this algorithm: > + * + Detect new pcpus as they come online > + * + De-activate pcpus which don't have any more records > + * > + * Detecting new pcpus which are less than P.max_active_pcpu is straight- > + * forward: when max_active_pcpu is searching for its next cpu window, > + * it will pass by the new cpu's window, and can activate it then. > + * > + * Detecting new pcpus greater than P.max_active_pcpu is a little harder; > + * When max_active_pcpu is scanning for its next cpu window, after it's found > + * it, we need to scan one more window forward to see if its' an > already-active > + * pcpu; if not, activate it. > + * > + * We also need to deal with truncated files, where records from one pcpu may > + * be present but not from another pcpu due to lack of disk space. The best > + * thing to do is to find the last "epoch" and essentially truncate the file > + * to that. > + */ > +void deactivate_pcpu(struct pcpu_info *p) > +{ > + if ( p->current ) > + { > + pcpu_runstate_update(p, p->last_tsc); > + > + fprintf(warn, "%s: setting d%dv%d to state LOST\n", > + __func__, p->current->d->did, > + p->current->vid); > + lose_vcpu(p->current, p->last_tsc); > + } > + p->active = 0; > + > + record_order_remove(p); > + > + if ( p->pid == P.max_active_pcpu ) > + { > + int i, max_active_pcpu = -1; > + for(i=0; i<=P.max_active_pcpu; i++) > + { > + if(!P.pcpu[i].active) > + continue; > + > + max_active_pcpu = i; > + } > + P.max_active_pcpu = max_active_pcpu; > + fprintf(warn, "%s: Setting max_active_pcpu to %d\n", > + __func__, max_active_pcpu); > + } > + > +} > + > +/* Helper function to process tsc-related record info */ > +void process_record_tsc(tsc_t order_tsc, struct record_info *ri) > +{ > + /* Find the first tsc set */ > + if(ri->tsc && ri->tsc >= P.f.first_tsc) { > + /* We use the order_tsc to account for the second processing of > + * a lost record. */ > + tsc_t tsc = order_tsc; > + > + if(P.f.first_tsc == 0) { > + P.f.first_tsc = tsc; > + if ( opt.interval_mode ) { > + P.interval.start_tsc = tsc; > + } > + } else { > + if ( opt.interval_mode ) { > + if(P.interval.start_tsc > tsc) { > + fprintf(warn, "FATAL: order_tsc %lld < > interval.start_tsc %lld!\n", > + tsc, P.interval.start_tsc); > + error(ERR_FILE, NULL); > + } else { > + while ( tsc - P.interval.start_tsc > opt.interval.cycles > ) { > + interval_callback(); > + P.interval.start_tsc += opt.interval.cycles; > + } > + } > + } > + } > + > + P.f.last_tsc=tsc; > + > + P.f.total_cycles = P.f.last_tsc - P.f.first_tsc; > + > + P.now = tsc; > + } > +} > + > +/* Standardized part of dump output */ > +void create_dump_header(struct record_info *ri, struct pcpu_info *p) > +{ > + char * c; > + int len, r; > + > + len = DUMP_HEADER_MAX; > + c = ri->dump_header; > + > + abs_cycles_to_time(ri->tsc, &ri->t); > + > + if ( ri->t.time ) > + { > + r=snprintf(c, len, "%3u.%09u", ri->t.s, ri->t.ns); > + c+=r; > + len-=r; > + } > + else > + { > + r=snprintf(c, > + len, > + " "); > + c+=r; > + len-=r; > + } > + > + r = snprintf(c, len, " %s", pcpu_string(ri->cpu)); > + c+=r; > + len-=r; > + > + if ( p->current ) > + { > + r = snprintf(c, len, " d%dv%d", p->current->d->did, p->current->vid); > + c+=r; > + len-=r; > + } > + else > + { > + r = snprintf(c, len, " d?v?"); > + c+=r; > + len-=r; > + } > +} > + > +int find_toplevel_event(struct record_info *ri) > +{ > + int toplevel=0, i, count; > + > + for(i=0, count=0; i<TOPLEVEL_MAX; i++) > + if(ri->evt.main & (1UL<<i)) > + { > + toplevel=i; > + count++; > + } > + > + /* Sanity check: One and only one bit should be set */ > + if(count != 1) > + { > + fprintf(warn, "FATAL: unexpected number bits(%d) in evt.main! event > %x main %x sub %x minor %x\n", > + count, > + ri->event, > + ri->evt.main, ri->evt.sub, ri->evt.minor); > + error(ERR_RECORD, NULL); > + return -1; > + } > + > + return toplevel; > +} > + > + > +void process_cpu_change(struct pcpu_info *p) { > + struct record_info *ri = &p->ri; > + struct cpu_change_data *r = (typeof(r))ri->d; > + > + if(opt.dump_all && verbosity >= 6) { > + printf("]%s cpu_change this-cpu %u record-cpu %u window_size > %u(0x%08x)\n", > + ri->dump_header, p->pid, r->cpu, r->window_size, > + r->window_size); > + } > + > + /* File sanity check */ > + if(p->file_offset != p->next_cpu_change_offset) { > + fprintf(warn, "Strange, pcpu %d expected offet %llx, actual %llx!\n", > + p->pid, (unsigned long long)p->next_cpu_change_offset, > + (unsigned long long)p->file_offset); > + } > + > + if(r->cpu > MAX_CPUS) > + { > + fprintf(stderr, "FATAL: cpu %d > MAX_CPUS %d.\n", > + r->cpu, MAX_CPUS); > + /* Actually file, but takes some work to skip */ > + error(ERR_ASSERT, NULL); > + } > + > + /* Detect beginning of new "epoch" while scanning thru file */ > + if((p->last_cpu_change_pid > r->cpu) > + && (p->file_offset > P.last_epoch_offset)) { > + P.last_epoch_offset = p->file_offset; > + } > + > + /* If that pcpu has never been activated, activate it. */ > + if(!P.pcpu[r->cpu].active && P.pcpu[r->cpu].file_offset == 0) > + { > + struct pcpu_info * p2 = P.pcpu + r->cpu; > + > + p2->active = 1; > + if(r->cpu > P.max_active_pcpu) > + P.max_active_pcpu = r->cpu; > + > + /* Taking this record as the first record should make everything > + * run swimmingly. */ > + p2->ri = *ri; > + p2->ri.cpu = r->cpu; > + p2->ri.d = p2->ri.rec.u.notsc.data; > + p2->file_offset = p->file_offset; > + p2->next_cpu_change_offset = p->file_offset; > + > + fprintf(warn, "%s: Activating pcpu %d at offset %lld\n", > + __func__, r->cpu, (unsigned long long)p->file_offset); > + > + record_order_insert(p2); > + > + sched_default_vcpu_activate(p2); > + } > + > + p->last_cpu_change_pid = r->cpu; > + > + /* If this isn't the cpu we're looking for, skip the whole bunch */ > + if(p->pid != r->cpu) > + { > + p->file_offset += ri->size + r->window_size; > + p->next_cpu_change_offset = p->file_offset; > + > + if(p->file_offset > G.file_size) { > + activate_early_eof(); > + } else if(P.early_eof && p->file_offset > P.last_epoch_offset) { > + fprintf(warn, "%s: early_eof activated, pcpu %d past > last_epoch_offset %llx, deactivating.\n", > + __func__, p->pid, (unsigned long > long)P.last_epoch_offset); > + deactivate_pcpu(p); > + } > + } > + else > + { > + /* Track information about dom0 scheduling and records */ > + if(opt.dump_trace_volume_on_lost_record) { > + tsc_t cycles; > + struct time_struct t; > + > + /* Update dom0 runstates */ > + cycles = (p->volume.buffer_first_tsc > > p->volume.buffer_dom0_runstate_tsc) ? > + p->volume.buffer_first_tsc : > + p->volume.buffer_dom0_runstate_tsc; > + > p->volume.buffer_dom0_runstate_cycles[p->volume.buffer_dom0_runstate] > + += ri->tsc - cycles; > + > + printf(" - updated p%d dom0_runstate %s to %lld cycles > (+%lld)\n", > + p->pid, runstate_name[p->volume.buffer_dom0_runstate], > + > p->volume.buffer_dom0_runstate_cycles[p->volume.buffer_dom0_runstate], > + ri->tsc - cycles); > + > + /* print info */ > + cycles = ri->tsc - p->volume.buffer_first_tsc; > + cycles_to_time(cycles, &t); > + printf("Buffer time: %u.%09u (%lld cycles)\n", > + t.s, t.ns, cycles); > + if(p->volume.buffer_size) > + printf("Rate: %lld cycles / byte\n", > + cycles / p->volume.buffer_size); > + if(P.buffer_trace_virq_tsc) > + { > + cycles = ri->tsc - P.buffer_trace_virq_tsc; > + cycles_to_time(cycles, &t); > + printf("trace_virq latency: %u.%09u (%lld cycles)\n", > + t.s, t.ns, cycles); > + P.buffer_trace_virq_tsc = 0; > + } > + else > + { > + printf("No trace_virq record found.\n"); > + } > + printf("Dom0 runstates this buffer:\n"); > + runstate_summary(p->volume.buffer_dom0_runstate_cycles); > + volume_summary(&p->volume.last_buffer); > + > + /* reset info */ > + p->volume.buffer_first_tsc = 0; > + p->volume.buffer_size = r->window_size; > + runstate_clear(p->volume.buffer_dom0_runstate_cycles); > + volume_clear(&p->volume.last_buffer); > + } > + > + p->file_offset += ri->size; > + p->next_cpu_change_offset = p->file_offset + r->window_size; > + > + if(p->next_cpu_change_offset > G.file_size) > + activate_early_eof(); > + else if(p->pid == P.max_active_pcpu) > + scan_for_new_pcpu(p->next_cpu_change_offset); > + > + } > +} > + > +struct tl_assert_mask { > + unsigned p_current:1, > + not_idle_domain:1; > + int vcpu_data_mode; > +}; > +static struct tl_assert_mask tl_assert_checks[TOPLEVEL_MAX] = { > + [TRC_HVM_MAIN]={ .p_current=1, .not_idle_domain=1, > .vcpu_data_mode=VCPU_DATA_HVM }, > + [TRC_SHADOW_MAIN]={ .p_current=1, .not_idle_domain=1, > .vcpu_data_mode=VCPU_DATA_HVM }, > + [TRC_PV_MAIN]={ .p_current=1, .not_idle_domain=1, > .vcpu_data_mode=VCPU_DATA_PV }, > +}; > + > +/* There are a lot of common assumptions for the various processing > + * routines. Check them all in one place, doing something else if > + * they don't pass. */ > +int toplevel_assert_check(int toplevel, struct pcpu_info *p) > +{ > + struct tl_assert_mask mask; > + > + mask = tl_assert_checks[toplevel]; > + > + if (mask.p_current && p->current == NULL) > + { > + fprintf(warn, "WARNING: p->current null! Not processing\n"); > + goto fail; > + } > + > + if( mask.not_idle_domain ) > + { > + /* Can't do this check w/o first doing above check */ > + assert(mask.p_current); > + > + if ( p->current->d->did == IDLE_DOMAIN) { > + fprintf(warn, "WARNING: Unexpected record for idle domain! Not > processing\n"); > + goto fail; > + } > + } > + > + if ( mask.vcpu_data_mode ) > + { > + struct vcpu_data *v; > + assert(mask.p_current); > + > + v = p->current; > + > + if ( ! (v->data_type == VCPU_DATA_NONE > + || v->data_type == mask.vcpu_data_mode) ) > + { > + /* This may happen for track_dirty_vram, which causes a > SHADOW_WRMAP_BF trace f/ dom0 */ > + fprintf(warn, "WARNING: Unexpected vcpu data type for d%dv%d on > proc %d! Expected %d got %d. Not processing\n", > + v->d->did, v->vid, p->pid, > + mask.vcpu_data_mode, > + v->data_type); > + goto fail; > + } > + } > + > + return 1; > + > +fail: > + dump_generic(warn, &p->ri); > + return 0; > +} > + > +void process_record(struct pcpu_info *p) { > + struct record_info *ri = &p->ri; > + int toplevel; > + > + /* Process only TRC_TRACE_CPU_CHANGE */ > + if(ri->event == TRC_TRACE_CPU_CHANGE) { > + process_cpu_change(p); > + return; > + } > + > + if ( opt.dump_no_processing ) > + goto out; > + > + p->summary = 1; > + > + if( opt.dump_raw_process ) > + dump_raw("* ", ri); > + > + process_record_tsc(p->order_tsc, ri); > + > + if(opt.dump_all) > + create_dump_header(ri, p); > + > + > + toplevel = find_toplevel_event(ri); > + if ( toplevel < 0 ) > + return; > + > + /* Unify toplevel assertions */ > + if ( toplevel_assert_check(toplevel, p) ) > + { > + switch(toplevel) { > + case TRC_GEN_MAIN: > + base_process(p); > + break; > + case TRC_SCHED_MAIN: > + sched_process(p); > + break; > + case TRC_HVM_MAIN: > + hvm_process(p); > + break; > + case TRC_SHADOW_MAIN: > + shadow_process(p); > + break; > + case TRC_PV_MAIN: > + pv_process(p); > + break; > + case TRC_MEM_MAIN: > + mem_process(p); > + break; > + case TRC_HW_MAIN: > + hw_process(p); > + break; > + case TRC_DOM0OP_MAIN: > + default: > + process_generic(ri); > + } > + } > + > + UPDATE_VOLUME(p, toplevel[toplevel], ri->size); > + > + if(!p->volume.buffer_first_tsc) > + p->volume.buffer_first_tsc = ri->tsc; > + > + out: > + /* Lost records gets processed twice */ > + if(ri->event != TRC_LOST_RECORDS) > + p->file_offset += ri->size; > +} > + > +static inline ssize_t get_rec_size(struct trace_record *rec) { > + ssize_t s; > + > + s = sizeof(uint32_t); > + > + if(rec->cycle_flag) > + s += sizeof(tsc_t); > + > + s += rec->extra_words * sizeof(uint32_t); > + > + return s; > +} > + > +#define STDIN 0 > + > +void progress_child_exec(void) { > + fclose(stdin); > + dup2(G.progress.pipe[0], STDIN); > + > + execlp("zenity", "zenity", "--progress", "--auto-close", "--title", > + "Analyzing", "--text", G.trace_file, "--auto-kill", NULL); > +} > + > +void progress_init(void) { > + int pid; > + > + if (pipe(G.progress.pipe) < 0) > + perror("pipe"); > + > + if(!(pid = fork())) { > + progress_child_exec(); > + > + fprintf(stderr, "%s: exec failed (%s), disabling progress bar\n", > + __func__, strerror(errno)); > + opt.progress = 0; > + exit(1); > + } else if( pid < 0 ) { > + fprintf(stderr, "%s: could not fork: %s, disabling progress bar\n", > + __func__, strerror(errno)); > + opt.progress = 0; > + } > + > + if( (G.progress.out = fdopen(G.progress.pipe[1], "w")) < 0 ) { > + fprintf(stderr, "%s: could not fdopen pipe: %s, disabling progress > bar\n", > + __func__, strerror(errno)); > + opt.progress = 0; > + } > + > +} > + > +void progress_update(loff_t offset) { > + long long p; > + > + p = ( offset * 100 ) / G.file_size; > + > + fprintf(G.progress.out, "%lld\n", p); > + fflush(G.progress.out); > + > + p += 1; > + > + G.progress.update_offset = ( G.file_size * p ) / 100; > + > +#if 0 > + fprintf(stderr, "Progress: %lld %% Next update_offset: %lld\n", > + p-1, > + G.progress.update_offset); > +#endif > +} > + > +void progress_finish(void) { > + int pid; > + > + fprintf(G.progress.out, "100\n"); > + fflush(G.progress.out); > + fclose(G.progress.out); > + > + wait(NULL); > + > + if(!(pid = fork())) { > + /* Child */ > + char text[128]; > + > + snprintf(text, 128, "Finished analyzing %s", > + G.trace_file); > + execlp("zenity", "zenity", "--info", "--text", text, NULL); > + } > +} > + > +ssize_t __read_record(struct trace_record *rec, loff_t offset) > +{ > + ssize_t r, rsize; > + > + r=mread64(G.mh, rec, sizeof(*rec), offset); > + > + if(r < 0) { > + /* Read error */ > + perror("read"); > + fprintf(stderr, "offset %llx\n", (unsigned long long)offset); > + return 0; > + } else if(r==0) { > + /* End-of-file */ > + return 0; > + } else if(r < sizeof(uint32_t)) { > + /* Full header not read */ > + fprintf(stderr, "%s: short read (%zd bytes)\n", > + __func__, r); > + error(ERR_SYSTEM, NULL); > + } > + > + rsize=get_rec_size(rec); > + > + if(r < rsize) { > + /* Full record not read */ > + fprintf(stderr, "%s: short read (%zd, expected %zd)\n", > + __func__, r, rsize); > + return 0; > + } > + > + return rsize; > +} > + > +void __fill_in_record_info(struct pcpu_info *p) > +{ > + struct record_info *ri; > + tsc_t tsc=0; > + > + ri = &p->ri; > + > + ri->event = ri->rec.event; > + ri->extra_words = ri->rec.extra_words; > + > + if(ri->rec.cycle_flag) { > + tsc = (((tsc_t)ri->rec.u.tsc.tsc_hi) << 32) > + | ri->rec.u.tsc.tsc_lo; > + > + tsc += p->tsc_skew.offset; > + > + ri->tsc = tsc; > + ri->d = ri->rec.u.tsc.data; > + > + if(p->first_tsc == 0) > + p->first_tsc = tsc; > + > + /* We process lost record twice: once at the first_tsc, > + once at the time it was placed in the log */ > + if(ri->event == TRC_LOST_RECORDS && ri->extra_words == 4) { > + struct lost_record_struct *r = (typeof(r))ri->d; > + p->order_tsc = r->first_tsc + p->tsc_skew.offset; > + } else > + p->order_tsc = tsc; > + > + p->last_tsc = tsc; > + } else { > + ri->tsc = p->last_tsc; > + ri->d = ri->rec.u.notsc.data; > + } > + > + if ( opt.dump_raw_reads ) { > + char s[256]; > + snprintf(s, 256, "R p%2d o%016llx ", > + p->pid, (unsigned long long)p->file_offset); > + dump_raw(s, ri); > + } > + > + /* Updated tracing uses CPU_CHANGE. If we hit one of these, > + * it will process very next (since the tsc isn't updated), and > + * we'll skip forward appropriately. */ > + ri->cpu = p->pid; > +} > + > +ssize_t read_record(struct pcpu_info * p) { > + loff_t * offset; > + struct record_info *ri; > + > + offset = &p->file_offset; > + ri = &p->ri; > + > + ri->size = __read_record(&ri->rec, *offset); > + if(ri->size) > + { > + __fill_in_record_info(p); > + } > + else > + { > + fprintf(warn, "%s: read returned zero, deactivating pcpu %d\n", > + __func__, p->pid); > + deactivate_pcpu(p); > + } > + > + return ri->size; > +} > + > +/* > + * This funciton gets called for every record when doing dump. Try to > + * make it efficient by changing the minimum amount from the last > + * call. Do this by: > + * - Keeping track of the last pcpu called, so we can just set that to - > + * - Keeping track of how many pcpus we've "drawn", and only "drawing" new > ones > + * - Updating the current one > + * > + * FIXME: Need to deal with pcpu states changing... > + * > + * WARNING not thread-safe > + */ > + > +char __pcpu_string[MAX_CPUS+1] = { 0 }; > +void pcpu_string_draw(struct pcpu_info *p) > +{ > + char *s = __pcpu_string; > + int i=p->pid; > + > + if(p->lost_record.active) > + s[i]='l'; > + else if (!p->current) > + s[i]=' '; > + else if (p->current->d->did == DEFAULT_DOMAIN) > + s[i]='.'; > + else if (p->current->d->did == IDLE_DOMAIN) > + { > + if ( opt.dump_show_power_states ) > + s[i]=p->power_state+'0'; > + else > + s[i]='-'; > + } > + else > + s[i]='|'; > +} > + > +char * pcpu_string(int pcpu) > +{ > + char *s = __pcpu_string; > + static int max_active_pcpu=-1, last_pcpu=-1; > + > + assert(P.max_active_pcpu < MAX_CPUS); > + assert(pcpu <= P.max_active_pcpu); > + > + if(last_pcpu >= 0) > + pcpu_string_draw(P.pcpu+last_pcpu); > + > + if(P.max_active_pcpu > max_active_pcpu) > + { > + int i; > + for(i=max_active_pcpu + 1; i<= P.max_active_pcpu; i++) > + pcpu_string_draw(P.pcpu+i); > + max_active_pcpu=P.max_active_pcpu; > + } > + > + s[pcpu]='x'; > + last_pcpu = pcpu; > + > + return s; > +} > + > +/* Null terminated */ > +struct pcpu_info *record_order[MAX_CPUS+1] = { 0 }; > + > +/* In the case of identical tsc values, the old algorithm would favor the > + * pcpu with the lowest number. By default the new algorithm favors the > + * pcpu which has been processed most recently. > + * > + * I think the second way is better; but it's good to be able to use the > + * old ordering, at very lest to verify that there are no (other) ordering > + * differences. Enabling the below flag will cause the insertion / bubble > + * routines to order by pcpu id as well as tsc, preserving the old order. */ > +//#define PRESERVE_PCPU_ORDERING > + > +/* Steady state: > + * + Entire list is in order, except (potentially) for the first entry > + * + last is pointing to the first entry. > + */ > +void record_order_bubble(struct pcpu_info *last) > +{ > + int i; > + > + /* Find the pcpu to "bubble". This is usually the > + * first one, but if other pcpus have been activated, it may > + * not be. */ > + for(i=0; record_order[i] && record_order[i]!=last; i++); > + > + assert(record_order[i]); > + > + /* Now bubble it down */ > + for( ; > + record_order[i+1] > + && ( record_order[i+1]->order_tsc < last->order_tsc > +#ifdef PRESERVE_PCPU_ORDERING > + || ( record_order[i+1]->order_tsc == last->order_tsc > + && record_order[i+1]->pid < last->pid ) > +#endif > + ) ; > + i++) > + record_order[i]=record_order[i+1]; > + record_order[i]=last; > +} > + > +void record_order_insert(struct pcpu_info *new) > +{ > + int i; > + struct pcpu_info *p=NULL, *t=NULL; > + > + /* Sanity check: Make sure it's not already in there */ > + for(i=0; record_order[i]; i++) > + assert(record_order[i]!=new); > + > + /* Find where to insert it */ > + for(i=0; > + record_order[i] > + && ( record_order[i]->order_tsc < new->order_tsc > +#ifdef PRESERVE_PCPU_ORDERING > + || ( record_order[i]->order_tsc == new->order_tsc > + && record_order[i]->pid < new->pid ) > +#endif > + ) ; > + i++) > + ; > + > + /* And insert it */ > + for( p=new; p ; i++) > + { > + t=record_order[i]; > + record_order[i]=p; > + p=t; > + } > +} > + > +void record_order_remove(struct pcpu_info *rem) > +{ > + int i; > + > + /* Find where the record is */ > + for(i=0; record_order[i] && record_order[i]!=rem; i++) > + ; > + > + /* Sanity check: Make sure it's actually there! */ > + assert(record_order[i]); > + > + /* And move everyone forward */ > + for(; (record_order[i]=record_order[i+1]); i++) > + ; > +} > + > +struct pcpu_info * choose_next_record(void) > +{ > + struct pcpu_info *min_p=NULL; > + > + min_p=record_order[0]; > + > + if(opt.progress && min_p && min_p->file_offset >= > G.progress.update_offset) > + progress_update(min_p->file_offset); > + > + /* If there are active pcpus, make sure we chose one */ > + assert(min_p || (P.max_active_pcpu==-1)); > + > + return min_p; > +} > + > +void process_records(void) { > + while(1) { > + struct pcpu_info *p = NULL; > + > + if(!(p=choose_next_record())) > + return; > + > + process_record(p); > + > + /* Lost records gets processed twice. */ > + if(p->ri.event == TRC_LOST_RECORDS) { > + p->ri.event = TRC_LOST_RECORDS_END; > + if(p->ri.tsc > p->order_tsc) > + p->order_tsc = p->ri.tsc; > + else { > + fprintf(warn, "Strange, lost_record ri->tsc %lld !> > p->order_tsc %lld!\n", > + p->ri.tsc, p->order_tsc); > + error(ERR_FILE, NULL); > + } > + } > + else > + read_record(p); > + > + /* Update this pcpu in the processing order */ > + if ( p->active ) > + record_order_bubble(p); > + } > +} > + > +void vcpu_summary(struct vcpu_data *v) > +{ > + printf("-- v%d --\n", v->vid); > + sched_summary_vcpu(v); > + switch(v->data_type) { > + case VCPU_DATA_HVM: > + hvm_summary(&v->hvm); > + break; > + case VCPU_DATA_PV: > + pv_summary(&v->pv); > + break; > + default: > + break; > + } > +} > + > +void domain_summary(void) > +{ > + struct domain_data * d; > + int i; > + > + if(opt.show_default_domain_summary) { > + d = &default_domain; > + printf("|-- Default domain --|\n"); > + > + for( i = 0; i < MAX_CPUS ; i++ ) > + { > + if(d->vcpu[i]) > + vcpu_summary(d->vcpu[i]); > + } > + } > + > + for ( d = domain_list ; d ; d=d->next ) > + { > + int i; > + printf("|-- Domain %d --|\n", d->did); > + > + sched_summary_domain(d); > + > + mem_summary_domain(d); > + > + for( i = 0; i < MAX_CPUS ; i++ ) > + { > + if(d->vcpu[i]) > + vcpu_summary(d->vcpu[i]); > + } > + > + printf("Emulate eip list\n"); > + dump_eip(d->emulate_eip_list); > + > + if ( opt.with_interrupt_eip_enumeration ) > + { > + printf("Interrupt eip list (vector %d)\n", > + opt.interrupt_eip_enumeration_vector); > + dump_eip(d->interrupt_eip_list); > + } > + > + cr3_dump_list(d->cr3_value_head); > + } > +} > + > +char * stringify_cpu_hz(long long cpu_hz); > + > +void summary(void) { > + int i; > + printf("Total time: %.2lf seconds (using cpu speed %s)\n", > + ((double)(P.f.total_cycles))/opt.cpu_hz, > + stringify_cpu_hz(opt.cpu_hz)); > + printf("--- Log volume summary ---\n"); > + for(i=0; i<MAX_CPUS; i++) > + { > + struct pcpu_info *p = P.pcpu+i; > + if(!p->summary) > + continue; > + printf(" - cpu %d -\n", i); > + volume_summary(&p->volume.total); > + } > + domain_summary(); > +} > + > +void report_pcpu(void) { > + int i, active=0; > + > + for(i=0; i<MAX_CPUS; i++) > + { > + struct pcpu_info *p = P.pcpu+i; > + if(!p->summary) > + continue; > + printf("pcpu %d\n", i); > + > + print_cycle_summary(&p->time.running, " running"); > + print_cycle_summary(&p->time.idle, " idle"); > + print_cycle_summary(&p->time.lost, " lost"); > + > + if ( p->time.running.count ) > + active++; > + } > + printf("Total active cpus: %d\n", active); > + > +} > + > +void init_pcpus(void) { > + int i=0; > + loff_t offset = 0; > + > + for(i=0; i<MAX_CPUS; i++) > + { > + P.pcpu[i].pid=i; > + P.pcpu[i].lost_record.seen_valid_schedule=1; > + P.pcpu[i].power_state=CSTATE_INVALID; > + } > + > + P.max_active_pcpu = -1; > + > + sched_default_domain_init(); > + > + /* Scan through the cpu_change recs until we see a duplicate */ > + do { > + offset = scan_for_new_pcpu(offset); > + > + if(!offset) { > + fprintf(warn, "%s: through first trace write, done for now.\n", > + __func__); > + } > + } while(offset); > + > +} > + > +enum { > + OPT_NULL=0, > + /* Dumping info */ > + OPT_DUMP_RAW_READS, > + OPT_DUMP_RAW_PROCESS, > + OPT_DUMP_NO_PROCESSING, > + OPT_DUMP_IPI_LATENCY, > + OPT_DUMP_TRACE_VOLUME_ON_LOST_RECORD, > + OPT_DUMP_SHOW_POWER_STATES, > + /* Extra tracking functionality */ > + OPT_WITH_CR3_ENUMERATION, > + OPT_WITH_PIO_ENUMERATION, > + OPT_WITH_MMIO_ENUMERATION, > + OPT_WITH_INTERRUPT_EIP_ENUMERATION, > + OPT_SCATTERPLOT_INTERRUPT_EIP, > + OPT_SCATTERPLOT_CPI, > + OPT_SCATTERPLOT_UNPIN_PROMOTE, > + OPT_SCATTERPLOT_CR3_SWITCH, > + OPT_SCATTERPLOT_WAKE_TO_HALT, > + OPT_SCATTERPLOT_IO, > + OPT_SCATTERPLOT_VMEXIT_EIP, > + OPT_SCATTERPLOT_RUNSTATE, > + OPT_SCATTERPLOT_RUNSTATE_TIME, > + OPT_SCATTERPLOT_PCPU, > + OPT_SCATTERPLOT_EXTINT_CYCLES, > + OPT_SCATTERPLOT_RDTSC, > + OPT_SCATTERPLOT_IRQ, > + OPT_HISTOGRAM_INTERRUPT_EIP, > + /* Interval options */ > + OPT_INTERVAL_CR3_SCHEDULE_TIME, > + OPT_INTERVAL_CR3_SCHEDULE_TIME_ALL, > + OPT_INTERVAL_CR3_SCHEDULE_ORDERED, > + OPT_INTERVAL_CR3_SHORT_SUMMARY, > + OPT_INTERVAL_DOMAIN_TOTAL_TIME, > + OPT_INTERVAL_DOMAIN_TOTAL_TIME_ALL, > + OPT_INTERVAL_DOMAIN_SHORT_SUMMARY, > + OPT_INTERVAL_DOMAIN_GUEST_INTERRUPT, > + OPT_INTERVAL_DOMAIN_GRANT_MAPS, > + /* Summary info */ > + OPT_SHOW_DEFAULT_DOMAIN_SUMMARY, > + OPT_MMIO_ENUMERATION_SKIP_VGA, > + OPT_SAMPLE_SIZE, > + OPT_REPORT_PCPU, > + /* Guest info */ > + OPT_DEFAULT_GUEST_PAGING_LEVELS, > + OPT_SYMBOL_FILE, > + /* Hardware info */ > + OPT_SVM_MODE, > + OPT_CPU_HZ, > + /* Misc */ > + OPT_PROGRESS, > + OPT_TOLERANCE, > + OPT_TSC_LOOP_FATAL, > + /* Specific letters */ > + OPT_DUMP_ALL='a', > + OPT_INTERVAL_LENGTH='i', > + OPT_SUMMARY='s', > +}; > + > +enum { > + OPT_GROUP_SUMMARY=1, > + OPT_GROUP_DUMP, > + OPT_GROUP_INTERVAL, > + OPT_GROUP_EXTRA, > + OPT_GROUP_GUEST, > + OPT_GROUP_HARDWARE > +}; > + > +#define xstr(x) str(x) > +#define str(x) #x > + > +#define GHZ 1000000000LL > +#define MHZ 1000000LL > +#define KHZ 1000LL > + > +void parse_cpu_hz(char * arg) { > + float hz_base; > + char * next_ptr; > + > + hz_base=strtof(arg, &next_ptr); > + if(next_ptr == arg) { > + fprintf(stderr, "Invalid cpu_hz %s\n", arg); > + exit(1); > + } > + switch(*next_ptr) { > + case '\0': > + opt.cpu_hz=(long long)hz_base; > + break; > + case 'G': > + opt.cpu_hz= hz_base * GHZ; > + break; > + case 'M': > + opt.cpu_hz=hz_base * MHZ; > + break; > + case 'K': > + opt.cpu_hz=hz_base * KHZ; > + break; > + default: > + fprintf(stderr, "Unknown suffix %c\n", *next_ptr); > + exit(1); > + } > + /* Just a convenient pre-calculation */ > + opt.cpu_qhz = QHZ_FROM_HZ(opt.cpu_hz); > +} > + > +/* WARNING not thread-safe */ > +char * stringify_cpu_hz(long long cpu_hz) { > + static char cpu_string[20], suffix; > + float hz; > + > + if(cpu_hz > GHZ) { > + hz = (float)cpu_hz / GHZ; > + suffix = 'G'; > + } else if(cpu_hz > MHZ) { > + hz = (float)cpu_hz / MHZ; > + suffix = 'M'; > + } else if(cpu_hz > KHZ) { > + hz = (float)cpu_hz / KHZ; > + suffix = 'k'; > + } else { > + hz = cpu_hz; > + suffix = ' '; > + } > + > + snprintf(cpu_string, 20, "%1.2lf %cHz", hz, suffix); > + > + return cpu_string; > +} > + > +int parse_array(char *arg, struct array_struct *a) { > + char *p, *q; > + int n=1, i; > + > + /* Count the number of commas (and thus the number of elements) */ > + for(p=arg; *p; p++) > + if(*p == ',') > + n++; > + > + fprintf(warn, "%s: Found %d elements\n", __func__, n); > + fflush(warn); > + a->count = n; > + a->values = malloc(n * sizeof(unsigned long long)); > + > + if(!a->values) { > + fprintf(stderr, "Malloc failed!\n"); > + error(ERR_SYSTEM, NULL); > + } > + > + /* Now parse the elements */ > + p = q = arg; > + for(i=0; i<n; i++) { > + a->values[i] = strtoull(p, &q, 0); > + if(p == q) { > + fprintf(stderr, "Bad format: %s\n", q); > + return -1; > + } > + fprintf(warn, "%s: Found element 0x%llx (%lld)\n", > + __func__, a->values[i], > + a->values[i]); > + fflush(warn); > + if(*q == ',') > + q++; > + else if(*q != '\0') { > + fprintf(stderr, "Bad format: %s\n", q); > + return -1; > + } > + p=q; > + } > + > + return n; > +} > + > +error_t cmd_parser(int key, char *arg, struct argp_state *state) > +{ > + switch (key) > + { > + /* Dump group */ > + case OPT_DUMP_ALL: > + opt.dump_all = 1; > + G.output_defined = 1; > + break; > + case OPT_DUMP_RAW_READS: > + opt.dump_raw_reads = 1; > + G.output_defined = 1; > + break; > + case OPT_DUMP_NO_PROCESSING: > + opt.dump_no_processing = 1; > + opt.dump_raw_reads = 1; > + G.output_defined = 1; > + break; > + case OPT_DUMP_RAW_PROCESS: > + opt.dump_raw_process = 1; > + G.output_defined = 1; > + break; > + case OPT_DUMP_IPI_LATENCY: > + opt.dump_ipi_latency = 1; > + break; > + case OPT_DUMP_TRACE_VOLUME_ON_LOST_RECORD: > + opt.dump_trace_volume_on_lost_record = 1; > + break; > + case OPT_DUMP_SHOW_POWER_STATES: > + opt.dump_show_power_states = 1; > + break; > + /* Extra group */ > + case OPT_WITH_CR3_ENUMERATION: > + opt.with_cr3_enumeration=1; > + break; > + case OPT_WITH_PIO_ENUMERATION: > + opt.with_pio_enumeration=1; > + break; > + case OPT_WITH_MMIO_ENUMERATION: > + opt.with_mmio_enumeration=1; > + break; > + case OPT_SHOW_DEFAULT_DOMAIN_SUMMARY: > + opt.show_default_domain_summary=1; > + break; > + case OPT_SAMPLE_SIZE: > + { > + char * inval; > + opt.sample_size = (int)strtol(arg, &inval, 0); > + if( inval == arg ) > + argp_usage(state); > + break; > + } > + case OPT_MMIO_ENUMERATION_SKIP_VGA: > + { > + char * inval; > + opt.mmio_enumeration_skip_vga = (int)strtol(arg, &inval, 0); > + if( inval == arg ) > + argp_usage(state); > + break; > + } > + case OPT_SCATTERPLOT_INTERRUPT_EIP: > + { > + char * inval; > + G.output_defined = 1; > + opt.scatterplot_interrupt_eip=1; > + opt.scatterplot_interrupt_vector = (int)strtol(arg, &inval, 0); > + if( inval == arg ) > + argp_usage(state); > + } > + break; > + case OPT_WITH_INTERRUPT_EIP_ENUMERATION: > + { > + char * inval; > + opt.with_interrupt_eip_enumeration=1; > + opt.interrupt_eip_enumeration_vector = (int)strtol(arg, &inval, 0); > + if( inval == arg ) > + argp_usage(state); > + } > + break; > + case OPT_SCATTERPLOT_CPI: > + G.output_defined = 1; > + opt.scatterplot_cpi=1; > + break; > + case OPT_SCATTERPLOT_UNPIN_PROMOTE: > + G.output_defined = 1; > + opt.scatterplot_unpin_promote=1; > + break; > + case OPT_SCATTERPLOT_CR3_SWITCH: > + G.output_defined = 1; > + opt.scatterplot_cr3_switch=1; > + break; > + case OPT_SCATTERPLOT_WAKE_TO_HALT: > + G.output_defined = 1; > + opt.scatterplot_wake_to_halt=1; > + break; > + case OPT_SCATTERPLOT_VMEXIT_EIP: > + G.output_defined = 1; > + opt.scatterplot_vmexit_eip=1; > + break; > + case OPT_SCATTERPLOT_EXTINT_CYCLES: > + { > + char * inval; > + G.output_defined = 1; > + opt.scatterplot_extint_cycles=1; > + opt.scatterplot_extint_cycles_vector = (int)strtol(arg, &inval, 0); > + if( inval == arg ) > + argp_usage(state); > + } > + break; > + case OPT_SCATTERPLOT_RDTSC: > + G.output_defined = 1; > + opt.scatterplot_rdtsc=1; > + break; > + case OPT_SCATTERPLOT_IRQ: > + G.output_defined = 1; > + opt.scatterplot_irq=1; > + break; > + case OPT_SCATTERPLOT_IO: > + { > + char * inval; > + G.output_defined = 1; > + opt.scatterplot_io=1; > + opt.scatterplot_io_port = (int)strtol(arg, &inval, 0); > + if( inval == arg ) > + argp_usage(state); > + } > + break; > + case OPT_SCATTERPLOT_RUNSTATE: > + G.output_defined = 1; > + opt.scatterplot_runstate=1; > + break; > + case OPT_SCATTERPLOT_RUNSTATE_TIME: > + G.output_defined = 1; > + opt.scatterplot_runstate_time=1; > + break; > + case OPT_SCATTERPLOT_PCPU: > + G.output_defined = 1; > + opt.scatterplot_pcpu=1; > + break; > + case OPT_HISTOGRAM_INTERRUPT_EIP: > + { > + char * inval, *p; > + > + opt.histogram_interrupt_eip=1; > + opt.histogram_interrupt_vector = (int)strtol(arg, &inval, 0); > + > + if( inval == arg ) > + argp_usage(state); > + > + p = inval; > + > + if(*p == ',') > + opt.histogram_interrupt_increment = (unsigned long > long)strtoull(p+1, &inval, 0); > + else > + opt.histogram_interrupt_increment = 0x1000000; > + > + printf("Making histogram of eips at interrupt %d, increment %llx\n", > + opt.histogram_interrupt_vector, > + opt.histogram_interrupt_increment); > + } > + break; > + > + case OPT_INTERVAL_LENGTH: > + { > + char * inval; > + > + opt.interval.msec = (unsigned) (strtof(arg, &inval) * 1000); > + > + if ( inval == arg ) > + argp_usage(state); > + > + break; > + } > + > + case OPT_INTERVAL_CR3_SCHEDULE_TIME: > + { > + if(parse_array(arg, &opt.interval.array) < 0) > + goto usage; > + interval_table_alloc(opt.interval.array.count); > + opt.interval.output = INTERVAL_CR3_SCHEDULE_TIME; > + opt.interval.check = INTERVAL_CHECK_CR3; > + opt.interval.mode = INTERVAL_MODE_ARRAY; > + opt.interval_mode = 1; > + opt.summary_info = 1; > + opt.with_cr3_enumeration = 1; > + G.output_defined = 1; > + break; > + usage: > + fprintf(stderr, "Invalid input for cr3_schedule_time\n"); > + argp_usage(state); > + break; > + } > + > + case OPT_INTERVAL_CR3_SCHEDULE_TIME_ALL: > + opt.interval.output = INTERVAL_CR3_SCHEDULE_TIME; > + opt.interval.check = INTERVAL_CHECK_CR3; > + opt.interval.mode = INTERVAL_MODE_LIST; > + opt.interval_mode = 1; > + opt.summary_info = 1; > + opt.with_cr3_enumeration = 1; > + G.output_defined = 1; > + break; > + > + case OPT_INTERVAL_CR3_SCHEDULE_ORDERED: > + opt.interval.output = INTERVAL_CR3_SCHEDULE_ORDERED; > + opt.interval.check = INTERVAL_CHECK_CR3; > + opt.interval_mode = 1; > + opt.summary_info = 1; > + opt.with_cr3_enumeration = 1; > + G.output_defined = 1; > + break; > + > + case OPT_INTERVAL_CR3_SHORT_SUMMARY: > + { > + if(parse_array(arg, &opt.interval.array) < 0 > + || opt.interval.array.count != 1) > + goto usage; > + opt.interval.output = INTERVAL_CR3_SHORT_SUMMARY; > + opt.interval.check = INTERVAL_CHECK_CR3; > + opt.interval_mode = 1; > + opt.summary_info = 1; > + opt.with_cr3_enumeration = 1; > + G.output_defined = 1; > + break; > + } > + > + case OPT_INTERVAL_DOMAIN_TOTAL_TIME: > + { > + if(parse_array(arg, &opt.interval.array) < 0) > + goto idtt_usage; > + interval_table_alloc(opt.interval.array.count); > + opt.interval.output = INTERVAL_DOMAIN_TOTAL_TIME; > + opt.interval.check = INTERVAL_CHECK_DOMAIN; > + opt.interval.mode = INTERVAL_MODE_ARRAY; > + opt.interval_mode = 1; > + opt.summary_info = 1; > + G.output_defined = 1; > + break; > + idtt_usage: > + fprintf(stderr, "Invalid input for domain_total_time\n"); > + argp_usage(state); > + break; > + } > + > + case OPT_INTERVAL_DOMAIN_TOTAL_TIME_ALL: > + opt.interval.output = INTERVAL_DOMAIN_TOTAL_TIME; > + opt.interval.check = INTERVAL_CHECK_DOMAIN; > + opt.interval.mode = INTERVAL_MODE_LIST; > + opt.interval_mode = 1; > + opt.summary_info = 1; > + G.output_defined = 1; > + break; > + > + case OPT_INTERVAL_DOMAIN_SHORT_SUMMARY: > + { > + if((parse_array(arg, &opt.interval.array) < 0) > + || opt.interval.array.count != 1) > + argp_usage(state); > + > + opt.interval.output = INTERVAL_DOMAIN_SHORT_SUMMARY; > + opt.interval.check = INTERVAL_CHECK_DOMAIN; > + opt.interval_mode = 1; > + opt.summary_info = 1; > + G.output_defined = 1; > + break; > + } > + > + case OPT_INTERVAL_DOMAIN_GUEST_INTERRUPT: > + { > + if((parse_array(arg, &opt.interval.array) < 0) > + || opt.interval.array.count != 1) > + argp_usage(state); > + > + opt.interval.output = INTERVAL_DOMAIN_GUEST_INTERRUPT; > + opt.interval.check = INTERVAL_CHECK_DOMAIN; > + opt.interval_mode = 1; > + opt.summary_info = 1; > + G.output_defined = 1; > + break; > + } > + > + case OPT_INTERVAL_DOMAIN_GRANT_MAPS: > + { > + if((parse_array(arg, &opt.interval.array) < 0) > + || opt.interval.array.count != 1) > + argp_usage(state); > + > + opt.interval.output = INTERVAL_DOMAIN_GRANT_MAPS; > + opt.interval.check = INTERVAL_CHECK_DOMAIN; > + opt.interval_mode = 1; > + opt.summary_info = 1; > + G.output_defined = 1; > + break; > + } > + > + /* Summary group */ > + case OPT_SUMMARY: > + opt.summary = 1; > + opt.summary_info = 1; > + G.output_defined = 1; > + break; > + case OPT_REPORT_PCPU: > + opt.report_pcpu = 1; > + //opt.summary_info = 1; > + G.output_defined = 1; > + break; > + /* Guest info group */ > + case OPT_DEFAULT_GUEST_PAGING_LEVELS: > + { > + char *inval; > + opt.default_guest_paging_levels = (int)strtol(arg, &inval, 0); > + if ( inval == arg ) > + argp_usage(state); > + } > + break; > + case OPT_SYMBOL_FILE: > + /* FIXME - strcpy */ > + G.symbol_file = arg; > + break; > + /* Hardware info group */ > + case OPT_SVM_MODE: > + opt.svm_mode = 1; > + break; > + case OPT_CPU_HZ: > + parse_cpu_hz(arg); > + break; > + break; > + > + case OPT_TOLERANCE: > + { > + char * inval; > + > + opt.tolerance = (int)strtol(arg, &inval, 0); > + > + if( inval == arg ) > + argp_usage(state); > + > + if ( opt.tolerance > ERR_MAX_TOLERABLE ) > + { > + fprintf(stderr, "ERROR: Max tolerable error %d\n", > + ERR_MAX_TOLERABLE); > + exit(1); > + } > + > + printf("Tolerating errors at or below %d\n", > + opt.tolerance); > + } > + break; > + > + case OPT_PROGRESS: > + opt.progress = 1; > + break; > + > + case OPT_TSC_LOOP_FATAL: > + opt.tsc_loop_fatal = 1; > + break; > + > + case ARGP_KEY_ARG: > + { > + /* FIXME - strcpy */ > + if (state->arg_num == 0) > + G.trace_file = arg; > + else > + argp_usage(state); > + } > + break; > + case ARGP_KEY_END: > + { > + if(opt.interval_mode) { > + opt.interval.cycles = ( opt.interval.msec * opt.cpu_hz ) / 1000 ; > + interval_header(); > + } > + > + if(!G.output_defined) > + { > + fprintf(stderr, "No output defined, using summary.\n"); > + opt.summary = 1; > + opt.summary_info = 1; > + } > + fprintf(stderr, "Using %s hardware-assisted virtualization.\n", > + opt.svm_mode?"SVM":"VMX"); > + } > + break; > + > + default: > + return ARGP_ERR_UNKNOWN; > + } > + > + return 0; > +} > + > +const struct argp_option cmd_opts[] = { > + /* Dump group */ > + { .name = "dump-all", > + .key = OPT_DUMP_ALL, > + .group = OPT_GROUP_DUMP, > + .doc = "Dump all records as they come in.", }, > + > + { .name = "dump-raw-reads", > + .key = OPT_DUMP_RAW_READS, > + .group = OPT_GROUP_DUMP, > + .doc = "Dump raw data as it's read from disk. Useful mainly for > debugging the analysis tool.", }, > + > + { .name = "dump-no-processing", > + .key = OPT_DUMP_NO_PROCESSING, > + .group = OPT_GROUP_DUMP, > + .doc = "Don't do any processing on records other than cpu changes. > Implies dump-raw-reads (or you wouldn't get anything).", }, > + > + { .name = "dump-raw-process", > + .key = OPT_DUMP_RAW_PROCESS, > + .group = OPT_GROUP_DUMP, > + .doc = "Dump raw data as it's processed. Useful mainly for debugging > the analysis tool.", }, > + > + { .name = "dump-ipi-latency", > + .key = OPT_DUMP_IPI_LATENCY, > + .group = OPT_GROUP_DUMP, > + .doc = "Dump IPI latency info as IPIs are delivered (vector 0xd1 > only).", }, > + > + { .name = "dump-trace-volume-on-lost-record", > + .key = OPT_DUMP_TRACE_VOLUME_ON_LOST_RECORD, > + .group = OPT_GROUP_DUMP, > + .doc = "Dump the volume of trace types in the previous cpu buffer when > a lost record is created.", }, > + > + { .name = "dump-show-power-states", > + .key = OPT_DUMP_SHOW_POWER_STATES, > + .group = OPT_GROUP_DUMP, > + .doc = "Show the power-state of the physical cpu when dumping > output.", }, > + > + /* Extra processing group */ > + { .name = "with-cr3-enumeration", > + .key = OPT_WITH_CR3_ENUMERATION, > + .group = OPT_GROUP_EXTRA, > + .doc = "Keep track of per-cr3 values", }, > + > + { .name = "with-pio-enumeration", > + .key = OPT_WITH_PIO_ENUMERATION, > + .group = OPT_GROUP_EXTRA, > + .doc = "Report summary info on indiviaul IO addresses", }, > + > + { .name = "with-mmio-enumeration", > + .key = OPT_WITH_MMIO_ENUMERATION, > + .group = OPT_GROUP_EXTRA, > + .doc = "Report summary info on indiviaul MMIO addresses.", }, > + > + { .name = "with-interrupt-eip-enumeration", > + .key = OPT_WITH_INTERRUPT_EIP_ENUMERATION, > + .arg = "vector", > + .group = OPT_GROUP_EXTRA, > + .doc = "Report a summary on eips interrupted by specified vector.", }, > + > + { .name = "scatterplot-interrupt-eip", > + .key = OPT_SCATTERPLOT_INTERRUPT_EIP, > + .arg = "vector", > + .group = OPT_GROUP_EXTRA, > + .doc = "Output scatterplot of eips as a function of time.", }, > + > + { .name = "scatterplot-extint-cycles", > + .key = OPT_SCATTERPLOT_EXTINT_CYCLES, > + .arg = "vector", > + .group = OPT_GROUP_EXTRA, > + .doc = "Output a scatterplot of vmexit cycles for external interrupts > of the given vector as a funciton of time.", }, > + > + { .name = "scatterplot-cpi", > + .key = OPT_SCATTERPLOT_CPI, > + .group = OPT_GROUP_EXTRA, > + .doc = "Output scatterplot of cpi.", }, > + > + { .name = "scatterplot-unpin-promote", > + .key = OPT_SCATTERPLOT_UNPIN_PROMOTE, > + .group = OPT_GROUP_EXTRA, > + .doc = "Output scatterplot of unpins and promotions. If " \ > + "--with-cr3-enumeration is included, promotions include current cr3.", > }, > + > + { .name = "scatterplot-cr3-switch", > + .key = OPT_SCATTERPLOT_CR3_SWITCH, > + .group = OPT_GROUP_EXTRA, > + .doc = "Output scatterplot of cr3 switches.", }, > + > + { .name = "scatterplot-wake-to-halt", > + .key = OPT_SCATTERPLOT_WAKE_TO_HALT, > + .group = OPT_GROUP_EXTRA, > + .doc = "Output scatterplot of wake-to-halt.", }, > + > + { .name = "scatterplot-vmexit-eip", > + .key = OPT_SCATTERPLOT_VMEXIT_EIP, > + .group = OPT_GROUP_EXTRA, > + .doc = "Output scatterplot of vmexit eips.", }, > + > + { .name = "scatterplot-io", > + .key = OPT_SCATTERPLOT_IO, > + .arg = "port", > + .group = OPT_GROUP_EXTRA, > + .doc = "Output scatterplot of io latencies for givein address as a > function of time.", }, > + > + { .name = "scatterplot-runstate", > + .key = OPT_SCATTERPLOT_RUNSTATE, > + .group = OPT_GROUP_EXTRA, > + .doc = "Output scatterplot of runstate.", }, > + > + { .name = "scatterplot-runstate-time", > + .key = OPT_SCATTERPLOT_RUNSTATE_TIME, > + .group = OPT_GROUP_EXTRA, > + .doc = "Output scatterplot of time in a runstate.", }, > + > + { .name = "scatterplot-pcpu", > + .key = OPT_SCATTERPLOT_PCPU, > + .group = OPT_GROUP_EXTRA, > + .doc = "Output scatterplot of which pcpu vcpus are run on.", }, > + > + { .name = "scatterplot-rdtsc", > + .key = OPT_SCATTERPLOT_RDTSC, > + .group = OPT_GROUP_EXTRA, > + .doc = "Output scatterplot of rdtsc values.", }, > + > + { .name = "scatterplot-irq", > + .key = OPT_SCATTERPLOT_IRQ, > + .group = OPT_GROUP_EXTRA, > + .doc = "Output scatterplot of irqs on pcpus.", }, > + > + { .name = "histogram-interrupt-eip", > + .key = OPT_HISTOGRAM_INTERRUPT_EIP, > + .arg = "vector[,increment]", > + .group = OPT_GROUP_EXTRA, > + .doc = "Output histograms of eips.", }, > + > + { .name = "interval", > + .key = OPT_INTERVAL_LENGTH, > + .arg = "sec", > + .group = OPT_GROUP_INTERVAL, > + .doc = "Interval length to do time-based graphs, in seconds", }, > + > + { .name = "interval-cr3-schedule-time", > + .key = OPT_INTERVAL_CR3_SCHEDULE_TIME, > + .arg = "gmfn[,gmfn...]", > + .group = OPT_GROUP_INTERVAL, > + .doc = "Print a csv with the listed cr3 value(s) every interval.", }, > + > + { .name = "interval-cr3-schedule-time-all", > + .key = OPT_INTERVAL_CR3_SCHEDULE_TIME_ALL, > + .group = OPT_GROUP_INTERVAL, > + .doc = "Print a csv with all cr3 values every interval.", }, > + > + { .name = "interval-cr3-schedule-ordered", > + .key = OPT_INTERVAL_CR3_SCHEDULE_ORDERED, > + .group = OPT_GROUP_INTERVAL, > + .doc = "Print summary with the top 10 cr3 values every interval.", }, > + > + { .name = "interval-cr3-short-summary", > + .key = OPT_INTERVAL_CR3_SHORT_SUMMARY, > + .arg = "gmfn", > + .group = OPT_GROUP_INTERVAL, > + .doc = "Print a csv with the hvm short summary of cr3 value every > interval.", }, > + > + { .name = "interval-domain-total-time", > + .key = OPT_INTERVAL_DOMAIN_TOTAL_TIME, > + .arg = "domain[,domain...]", > + .group = OPT_GROUP_INTERVAL, > + .doc = "Print a csv with the listed domain(s) total runtime every > interval.", }, > + > + { .name = "interval-domain-total-time-all", > + .key = OPT_INTERVAL_DOMAIN_TOTAL_TIME_ALL, > + .group = OPT_GROUP_INTERVAL, > + .doc = "Print a csv with all domains every interval.", }, > + > + { .name = "interval-domain-short-summary", > + .key = OPT_INTERVAL_DOMAIN_SHORT_SUMMARY, > + .arg = "domain-id", > + .group = OPT_GROUP_INTERVAL, > + .doc = "Print a csv with the hvm short summary of given domain every > interval.", }, > + > + { .name = "interval-domain-guest-interrupt", > + .key = OPT_INTERVAL_DOMAIN_GUEST_INTERRUPT, > + .arg = "domain-id", > + .group = OPT_GROUP_INTERVAL, > + .doc = "Print a csv with the guest interrupt count of given domain > every interval.", }, > + > + { .name = "interval-domain-grant-maps", > + .key = OPT_INTERVAL_DOMAIN_GRANT_MAPS, > + .arg = "domain-id", > + .group = OPT_GROUP_INTERVAL, > + .doc = "Print a csv with the grant maps done on behalf of a given > domain every interval.", }, > + > + /* Summary group */ > + { .name = "show-default-domain-summary", > + .key = OPT_SHOW_DEFAULT_DOMAIN_SUMMARY, > + .group = OPT_GROUP_SUMMARY, > + .doc = "Show default domain information on summary", }, > + > + { .name = "mmio-enumeration-skip-vga", > + .key = OPT_MMIO_ENUMERATION_SKIP_VGA, > + .arg = "[0|1]", > + .group = OPT_GROUP_SUMMARY, > + .doc = "Control whether we enumerate MMIO accesses to the VGA area, > which can be extremly high during boot. Default: 0", }, > + > + { .name = "sample-size", > + .key = OPT_SAMPLE_SIZE, > + .arg = "size", > + .group = OPT_GROUP_SUMMARY, > + .doc = "Keep [size] samples for percentile purposes. Enter 0 to " \ > + "disable. Default 10240.", }, > + > + { .name = "summary", > + .key = OPT_SUMMARY, > + .group = OPT_GROUP_SUMMARY, > + .doc = "Output a summary", }, > + > + { .name = "report-pcpu", > + .key = OPT_REPORT_PCPU, > + .group = OPT_GROUP_SUMMARY, > + .doc = "Report utilization for pcpus", }, > + > + /* Guest info */ > + { .name = "default-guest-paging-levels", > + .key = OPT_DEFAULT_GUEST_PAGING_LEVELS, > + .group = OPT_GROUP_GUEST, > + .arg = "L", > + .doc = "Default guest paging levels. Mainly necessary for Rio, as > Miami traces include guest paging levels where appropriate.", }, > + > + { .name = "symbol-file", > + .key = OPT_SYMBOL_FILE, > + .group = OPT_GROUP_GUEST, > + .arg = "filename", > + .doc = "A symbol file for interpreting guest eips.", }, > + > + /* Hardware info */ > + { .name = "cpu-hz", > + .key = OPT_CPU_HZ, > + .group = OPT_GROUP_HARDWARE, > + .arg = "HZ", > + .doc = "Cpu speed of the tracing host, used to convert tsc into > seconds.", }, > + > + { .name = "svm-mode", > + .key = OPT_SVM_MODE, > + .group = OPT_GROUP_HARDWARE, > + .doc = "Assume AMD SVM-style vmexit error codes. (Default is Intel > VMX.)", }, > + > + { .name = "progress", > + .key = OPT_PROGRESS, > + .doc = "Progress dialog. Requires the zenity (GTK+) executable.", }, > + > + { .name = "tsc-loop-fatal", > + .key = OPT_TSC_LOOP_FATAL, > + .doc = "Stop processing and exit if tsc skew tracking detects a > dependency loop.", }, > + > + { .name = "tolerance", > + .key = OPT_TOLERANCE, > + .arg = "errlevel", > + .doc = "Sets tolerance for errors found in the file. Default is 3; > max is 6.", }, > + > + > + { 0 }, > +}; > + > +const struct argp parser_def = { > + .options = cmd_opts, > + .parser = cmd_parser, > + .args_doc = "[trace file]", > + .doc = "", > +}; > + > +const char *argp_program_version = "xenalyze - Open-source xen-unstable > (3.4)"; > +const char *argp_program_bug_address = "George Dunlap > <george.dunlap@xxxxxxxxxxxxx>"; > + > + > +int main(int argc, char *argv[]) { > + /* Start with warn at stderr. */ > + warn = stderr; > + > + argp_parse(&parser_def, argc, argv, 0, NULL, NULL); > + > + if (G.trace_file == NULL) > + exit(1); > + > + if ( (G.fd = open(G.trace_file, O_RDONLY|O_LARGEFILE)) < 0) { > + perror("open"); > + error(ERR_SYSTEM, NULL); > + } else { > + struct stat64 s; > + fstat64(G.fd, &s); > + G.file_size = s.st_size; > + } > + > + if ( (G.mh = mread_init(G.fd)) == NULL ) > + perror("mread"); > + > + if (G.symbol_file != NULL) > + parse_symbol_file(G.symbol_file); > + > + if(opt.dump_all) > + warn = stdout; > + > + init_pcpus(); > + > + if(opt.progress) > + progress_init(); > + > + process_records(); > + > + if(opt.interval_mode) > + interval_tail(); > + > + if(opt.summary) > + summary(); > + > + if(opt.report_pcpu) > + report_pcpu(); > + > + if(opt.progress) > + progress_finish(); > + > + return 0; > +} > +/* > + * Local variables: > + * mode: C > + * c-set-style: "BSD" > + * c-basic-offset: 4 > + * tab-width: 4 > + * indent-tabs-mode: nil > + * End: > + */ > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxx > http://lists.xen.org/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.