[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH] xenoprofile x86_64
Attached are patches for xenoprofile on x86_64. These are not "production ready", but they do work on EM64T so far. I have not added support for Opteron just yet (but will very soon). I wanted to get these out ASAP in case anyone wanted to try them. There are not too many changes from Renato's patches, mainly use of KERNEL_MODE instead of RING_1, u64's here and there, and new x86_64 specific files. I have not tested these patches on i386 (some changes needed). These should apply on changeset 6315. -Andrew Signed-off-by: Andrew Theurer <habanero@xxxxxxxxxx> diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/Makefile ./xen/arch/x86/Makefile --- ../xen-unstable.hg-6251/xen/arch/x86/Makefile 2005-08-19 23:46:22 -05:00 +++ ./xen/arch/x86/Makefile 2005-08-18 20:28:44 -05:00 @@ -33,7 +33,10 @@ ifneq ($(crash_debug),y) OBJS := $(patsubst cdb%.o,,$(OBJS)) endif +OBJS += oprofile/oprofile.o + default: $(TARGET) + make -C oprofile $(TARGET): $(TARGET)-syms boot/mkelf32 ./boot/mkelf32 $(TARGET)-syms $(TARGET) 0x100000 @@ -60,6 +63,9 @@ asm-offsets.s: $(TARGET_SUBARCH)/asm-off boot/mkelf32: boot/mkelf32.c $(HOSTCC) $(HOSTCFLAGS) -o $@ $< +oprofile/oprofile.o: + $(MAKE) -C oprofile + clean: rm -f *.o *.s *~ core boot/*.o boot/*~ boot/core boot/mkelf32 rm -f x86_32/*.o x86_32/*~ x86_32/core @@ -68,5 +74,6 @@ clean: rm -f acpi/*.o acpi/*~ acpi/core rm -f genapic/*.o genapic/*~ genapic/core rm -f cpu/*.o cpu/*~ cpu/core + rm -f oprofile/*.o .PHONY: default clean diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/nmi.c ./xen/arch/x86/nmi.c --- ../xen-unstable.hg-6251/xen/arch/x86/nmi.c 2005-08-19 23:46:22 -05:00 +++ ./xen/arch/x86/nmi.c 2005-08-18 20:28:44 -05:00 @@ -5,6 +5,10 @@ * * Started by Ingo Molnar <mingo@xxxxxxxxxx> * + * Modified by Aravind Menon for supporting oprofile + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + * * Fixes: * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog. * Mikael Pettersson : Power Management for local APIC NMI watchdog. @@ -35,6 +39,28 @@ static unsigned int nmi_p4_cccr_val; static struct ac_timer nmi_timer[NR_CPUS]; static unsigned int nmi_timer_ticks[NR_CPUS]; +/* + * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: + * - it may be reserved by some other driver, or not + * - when not reserved by some other driver, it may be used for + * the NMI watchdog, or not + * + * This is maintained separately from nmi_active because the NMI + * watchdog may also be driven from the I/O APIC timer. + */ +static spinlock_t lapic_nmi_owner_lock = SPIN_LOCK_UNLOCKED; +static unsigned int lapic_nmi_owner; +#define LAPIC_NMI_WATCHDOG (1<<0) +#define LAPIC_NMI_RESERVED (1<<1) + +/* nmi_active: + * +1: the lapic NMI watchdog is active, but can be disabled + * 0: the lapic NMI watchdog has not been set up, and cannot + * be enabled + * -1: the lapic NMI watchdog is disabled, but can be enabled + */ +int nmi_active; + #define K7_EVNTSEL_ENABLE (1 << 22) #define K7_EVNTSEL_INT (1 << 20) #define K7_EVNTSEL_OS (1 << 17) @@ -66,8 +92,6 @@ static unsigned int nmi_timer_ticks[NR_C * max threshold. [IA32-Vol3, Section 14.9.9] */ #define MSR_P4_IQ_COUNTER0 0x30C -#define MSR_P4_IQ_CCCR0 0x36C -#define MSR_P4_CRU_ESCR0 0x3B8 /* ESCR no. 4 */ #define P4_NMI_CRU_ESCR0 P4_ESCR_EVENT_SELECT(0x3F) #define P4_NMI_IQ_CCCR0 \ (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ @@ -124,6 +148,70 @@ static inline void nmi_pm_init(void) { } * Original code written by Keith Owens. */ +static void disable_lapic_nmi_watchdog(void) +{ + if (nmi_active <= 0) + return; + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + wrmsr(MSR_K7_EVNTSEL0, 0, 0); + break; + case X86_VENDOR_INTEL: + switch (boot_cpu_data.x86) { + case 6: + wrmsr(MSR_P6_EVNTSEL0, 0, 0); + break; + case 15: + if ( (smp_num_siblings <= 1) || + ( (smp_processor_id() % smp_num_siblings) == 0) ) + { + wrmsr(MSR_P4_IQ_CCCR0, 0, 0); + wrmsr(MSR_P4_CRU_ESCR0, 0, 0); + } else { + wrmsr(MSR_P4_IQ_CCCR1, 0, 0); + } + break; + } + break; + } + nmi_active = -1; + /* tell do_nmi() and others that we're not active any more */ + nmi_watchdog = 0; +} + +static void enable_lapic_nmi_watchdog(void) +{ + if (nmi_active < 0) { + nmi_watchdog = NMI_LOCAL_APIC; + setup_apic_nmi_watchdog(); + } +} + +int reserve_lapic_nmi(void) +{ + unsigned int old_owner; + spin_lock(&lapic_nmi_owner_lock); + old_owner = lapic_nmi_owner; + lapic_nmi_owner |= LAPIC_NMI_RESERVED; + spin_unlock(&lapic_nmi_owner_lock); + if (old_owner & LAPIC_NMI_RESERVED) + return -EBUSY; + if (old_owner & LAPIC_NMI_WATCHDOG) + disable_lapic_nmi_watchdog(); + return 0; +} + +void release_lapic_nmi(void) +{ + unsigned int new_owner; + spin_lock(&lapic_nmi_owner_lock); + new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED; + lapic_nmi_owner = new_owner; + spin_unlock(&lapic_nmi_owner_lock); + if (new_owner & LAPIC_NMI_WATCHDOG) + enable_lapic_nmi_watchdog(); +} + static void __pminit clear_msr_range(unsigned int base, unsigned int n) { unsigned int i; @@ -241,6 +329,9 @@ void __pminit setup_apic_nmi_watchdog(vo init_ac_timer(&nmi_timer[cpu], nmi_timer_fn, NULL, cpu); + lapic_nmi_owner = LAPIC_NMI_WATCHDOG; + nmi_active = 1; + nmi_pm_init(); } @@ -337,3 +428,7 @@ void nmi_watchdog_tick(struct cpu_user_r wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); } } + +EXPORT_SYMBOL(reserve_lapic_nmi); +EXPORT_SYMBOL(release_lapic_nmi); + diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/Makefile ./xen/arch/x86/oprofile/Makefile --- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/Makefile 1969-12-31 18:00:00 -06:00 +++ ./xen/arch/x86/oprofile/Makefile 2005-08-18 20:28:44 -05:00 @@ -0,0 +1,9 @@ + +include $(BASEDIR)/Rules.mk + +default: $(OBJS) + $(LD) $(LDFLAGS) -r -o oprofile.o $(OBJS) + +%.o: %.c $(HDRS) Makefile + $(CC) $(CFLAGS) -c $< -o $@ + diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/nmi_int.c ./xen/arch/x86/oprofile/nmi_int.c --- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/nmi_int.c 1969-12-31 18:00:00 -06:00 +++ ./xen/arch/x86/oprofile/nmi_int.c 2005-08-19 19:32:01 -05:00 @@ -0,0 +1,444 @@ +/** + * @file nmi_int.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> + * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + */ + +#include <xen/event.h> +#include <xen/types.h> +#include <xen/errno.h> +#include <xen/init.h> +#include <public/xen.h> +#include <asm/nmi.h> +#include <asm/msr.h> +#include <asm/apic.h> +#include <asm/regs.h> +#include <asm/current.h> +#include <xen/delay.h> + +#include "op_counter.h" +#include "op_x86_model.h" + +static struct op_x86_model_spec const * model; +static struct op_msrs cpu_msrs[NR_CPUS]; +static unsigned long saved_lvtpc[NR_CPUS]; + +#define VIRQ_BITMASK_SIZE (MAX_OPROF_DOMAINS/32 + 1) + +extern int active_domains[MAX_OPROF_DOMAINS]; +extern unsigned int adomains; + +extern struct domain * primary_profiler; +extern struct domain * adomain_ptrs[MAX_OPROF_DOMAINS]; +extern unsigned long virq_ovf_pending[VIRQ_BITMASK_SIZE]; + +extern int is_active(struct domain *d); +extern int active_id(struct domain *d); +extern int is_passive(struct domain *d); +extern int is_profiled(struct domain *d); + + +int nmi_profiling_started = 0; + +int active_virq_count = 0; +int passive_virq_count = 0; +int other_virq_count = 0; +int other_id = -1; +int xen_count = 0; +int dom_count = 0; +int ovf = 0; + +int nmi_callback(struct cpu_user_regs * regs, int cpu) +{ + int xen_mode = 0; + + ovf = model->check_ctrs(cpu, &cpu_msrs[cpu], regs); + xen_mode = RING_0(regs); + if (ovf) { + if (xen_mode) + xen_count++; + else + dom_count++; + + if (is_active(current->domain)) { + /* This is slightly incorrect. If we do not deliver + OVF virtual interrupts in a synchronous + manner, a process switch may happen in the domain + between the point the sample was collected and + the point at which a VIRQ was delivered. However, + it is not safe to call send_guest_virq from this + NMI context, it may lead to a deadlock since NMIs are + unmaskable. One optimization that we can do is + that if the sample occurs while domain code is + runnng, we know that it is safe to call + send_guest_virq, since we know no Xen code + is running at that time. + However, this may distort the sample distribution, + because we may lose more Xen mode samples.*/ + active_virq_count++; + if (!xen_mode) { + send_guest_virq(current, VIRQ_PMC_OVF); + clear_bit(active_id(current->domain), &virq_ovf_pending[0]); + } else + set_bit(active_id(current->domain), &virq_ovf_pending[0]); + primary_profiler->shared_info->active_samples++; + } + else if (is_passive(current->domain)) { + set_bit(active_id(primary_profiler), &virq_ovf_pending[0]); + passive_virq_count++; + primary_profiler->shared_info->passive_samples++; + } + else { + other_virq_count++; + other_id = current->domain->domain_id; + primary_profiler->shared_info->other_samples++; + } + } + return 1; +} + +static void free_msrs(void) +{ + int i; + for (i = 0; i < NR_CPUS; ++i) { + xfree(cpu_msrs[i].counters); + cpu_msrs[i].counters = NULL; + xfree(cpu_msrs[i].controls); + cpu_msrs[i].controls = NULL; + } +} + +static int allocate_msrs(void) +{ + int success = 1; + size_t controls_size = sizeof(struct op_msr) * model->num_controls; + size_t counters_size = sizeof(struct op_msr) * model->num_counters; + + int i; + for (i = 0; i < NR_CPUS; ++i) { + //if (!cpu_online(i)) + if (!test_bit(i, &cpu_online_map)) + continue; + + cpu_msrs[i].counters = xmalloc_bytes(counters_size); + if (!cpu_msrs[i].counters) { + success = 0; + break; + } + cpu_msrs[i].controls = xmalloc_bytes(controls_size); + if (!cpu_msrs[i].controls) { + success = 0; + break; + } + } + if (!success) + free_msrs(); + + return success; +} + +static void nmi_cpu_save_registers(struct op_msrs * msrs) +{ + unsigned int const nr_ctrs = model->num_counters; + unsigned int const nr_ctrls = model->num_controls; + struct op_msr * counters = msrs->counters; + struct op_msr * controls = msrs->controls; + unsigned int i; + + for (i = 0; i < nr_ctrs; ++i) { + rdmsr(counters[i].addr, + counters[i].saved.low, + counters[i].saved.high); + } + + for (i = 0; i < nr_ctrls; ++i) { + rdmsr(controls[i].addr, + controls[i].saved.low, + controls[i].saved.high); + } +} + +static void nmi_save_registers(void * dummy) +{ + int cpu = smp_processor_id(); + struct op_msrs * msrs = &cpu_msrs[cpu]; + model->fill_in_addresses(msrs); + nmi_cpu_save_registers(msrs); +} + +int nmi_reserve_counters(void) +{ + if (!allocate_msrs()) + return -ENOMEM; + + /* We walk a thin line between law and rape here. + * We need to be careful to install our NMI handler + * without actually triggering any NMIs as this will + * break the core code horrifically. + */ + /* Don't we need to do this on all CPUs?*/ + if (reserve_lapic_nmi() < 0) { + free_msrs(); + return -EBUSY; + } + /* We need to serialize save and setup for HT because the subset + * of msrs are distinct for save and setup operations + */ + on_each_cpu(nmi_save_registers, NULL, 0, 1); + return 0; +} + +static void nmi_cpu_setup(void * dummy) +{ + int cpu = smp_processor_id(); + struct op_msrs * msrs = &cpu_msrs[cpu]; + model->setup_ctrs(msrs); +} + +int nmi_setup_events(void) +{ + on_each_cpu(nmi_cpu_setup, NULL, 0, 1); + return 0; +} + +int nmi_enable_virq() +{ + set_nmi_callback(nmi_callback); + return 0; +} + +static void nmi_cpu_start(void * dummy) +{ + int cpu = smp_processor_id(); + struct op_msrs const * msrs = &cpu_msrs[cpu]; + saved_lvtpc[cpu] = apic_read(APIC_LVTPC); + apic_write(APIC_LVTPC, APIC_DM_NMI); + model->start(msrs); +} + +int nmi_start(void) +{ + on_each_cpu(nmi_cpu_start, NULL, 0, 1); + nmi_profiling_started = 1; + return 0; +} + +static void nmi_cpu_stop(void * dummy) +{ + unsigned int v; + int cpu = smp_processor_id(); + struct op_msrs const * msrs = &cpu_msrs[cpu]; + model->stop(msrs); + + /* restoring APIC_LVTPC can trigger an apic error because the delivery + * mode and vector nr combination can be illegal. That's by design: on + * power on apic lvt contain a zero vector nr which are legal only for + * NMI delivery mode. So inhibit apic err before restoring lvtpc + */ + if (!(apic_read(APIC_LVTPC) & APIC_DM_NMI) + || (apic_read(APIC_LVTPC) & APIC_LVT_MASKED)) { + printk("nmi_stop: APIC not good %ul\n", apic_read(APIC_LVTPC)); + mdelay(5000); + } + v = apic_read(APIC_LVTERR); + apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); + apic_write(APIC_LVTPC, saved_lvtpc[cpu]); + apic_write(APIC_LVTERR, v); +} + +void nmi_stop(void) +{ + nmi_profiling_started = 0; + on_each_cpu(nmi_cpu_stop, NULL, 0, 1); + active_virq_count = 0; + passive_virq_count = 0; + other_virq_count = 0; + xen_count = 0; + dom_count = 0; +} + +extern unsigned int read_ctr(struct op_msrs const * const msrs, int ctr); + +void nmi_sanity_check(struct cpu_user_regs *regs, int cpu) +{ + int i; + int masked = 0; + + /* We may have missed some NMI interrupts if we were already + in an NMI context at that time. If this happens, then + the counters are not reset and in the case of P4, the + APIC LVT disable mask is set. In both cases we end up + losing samples. On P4, this condition can be detected + by checking the APIC LVT mask. But in P6, we need to + examine the counters for overflow. So, every timer + interrupt, we check that everything is OK */ + + if (apic_read(APIC_LVTPC) & APIC_LVT_MASKED) + masked = 1; + + nmi_callback(regs, cpu); + + if (ovf && masked) { + if (is_active(current->domain)) + current->domain->shared_info->nmi_restarts++; + else if (is_passive(current->domain)) + primary_profiler->shared_info->nmi_restarts++; + } + + /*if (jiffies %1000 == 0) { + printk("cpu %d: sample count %d %d %d at %u\n", cpu, active_virq_count, passive_virq_count, other_virq_count, jiffies); + printk("other task id %d\n", other_id); + printk("%d in xen, %d in domain\n", xen_count, dom_count); + printk("counters %p %p\n", read_ctr(&cpu_msrs[cpu], 0), read_ctr(&cpu_msrs[cpu], 1)); + }*/ + + + for (i = 0; i < adomains; i++) + if (test_and_clear_bit(i, &virq_ovf_pending[0])) { + /* For now we do not support profiling of SMP guests */ + /* virq is delivered to first VCPU */ + send_guest_virq(adomain_ptrs[i]->vcpu[0], VIRQ_PMC_OVF); + } +} + +void nmi_disable_virq(void) +{ + unset_nmi_callback(); +} + +static void nmi_restore_registers(struct op_msrs * msrs) +{ + unsigned int const nr_ctrs = model->num_counters; + unsigned int const nr_ctrls = model->num_controls; + struct op_msr * counters = msrs->counters; + struct op_msr * controls = msrs->controls; + unsigned int i; + + for (i = 0; i < nr_ctrls; ++i) { + wrmsr(controls[i].addr, + controls[i].saved.low, + controls[i].saved.high); + } + + for (i = 0; i < nr_ctrs; ++i) { + wrmsr(counters[i].addr, + counters[i].saved.low, + counters[i].saved.high); + } +} + +static void nmi_cpu_shutdown(void * dummy) +{ + int cpu = smp_processor_id(); + struct op_msrs * msrs = &cpu_msrs[cpu]; + nmi_restore_registers(msrs); +} + +void nmi_release_counters(void) +{ + on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); + release_lapic_nmi(); + free_msrs(); +} + +struct op_counter_config counter_config[OP_MAX_COUNTER]; + +static int __init p4_init(void) +{ + __u8 cpu_model = current_cpu_data.x86_model; + printk("cpu model: %d\n", cpu_model); + if (cpu_model > 4) + return 0; + +#ifndef CONFIG_SMP + printk("model is op_p4_spec (uniprocessor)\n"); + model = &op_p4_spec; + return 1; +#else + //switch (smp_num_siblings) { + printk("model is op_p4_ht2_spec (SMP)\n"); + if (cpu_has_ht) + { + model = &op_p4_ht2_spec; + return 1; + } + else + { + printk("model is op_p4_spec (SMP)\n"); + model = &op_p4_spec; + return 1; + } +#endif + return 0; +} + + +static int __init ppro_init(void) +{ + __u8 cpu_model = current_cpu_data.x86_model; + + if (cpu_model > 0xd) + return 0; + + model = &op_ppro_spec; + return 1; +} + +int nmi_init(int *num_events, int *is_primary) +{ + __u8 vendor = current_cpu_data.x86_vendor; + __u8 family = current_cpu_data.x86; + int prim = 0; + + if (!cpu_has_apic) { + printk("(XEN) cpu has no APIC\n"); + return -ENODEV; + } + + if (primary_profiler == NULL) { + primary_profiler = current->domain; + prim = 1; + } + + if (primary_profiler != current->domain) + goto out; + + printk("cpu vendor: %d\n", vendor); + printk("cpu family: %d\n", family); + + switch (vendor) { + case X86_VENDOR_INTEL: + switch (family) { + /* Pentium IV */ + case 0xf: + if (!p4_init()) + return -ENODEV; + break; + /* A P6-class processor */ + case 6: + if (!ppro_init()) + return -ENODEV; + break; + default: + return -ENODEV; + } + break; + default: + return -ENODEV; + } +out: + if (copy_to_user((void *)num_events, (void *)&model->num_counters, sizeof(int))) + return -EFAULT; + if (copy_to_user((void *)is_primary, (void *)&prim, sizeof(int))) + return -EFAULT; + + return 0; +} + diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_counter.h ./xen/arch/x86/oprofile/op_counter.h --- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_counter.h 1969-12-31 18:00:00 -06:00 +++ ./xen/arch/x86/oprofile/op_counter.h 2005-08-18 20:28:44 -05:00 @@ -0,0 +1,33 @@ +/** + * @file op_counter.h + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + */ + +#ifndef OP_COUNTER_H +#define OP_COUNTER_H + +#define OP_MAX_COUNTER 8 + +/* Per-perfctr configuration as set via + * oprofilefs. + */ +struct op_counter_config { + unsigned long count; + unsigned long enabled; + unsigned long event; + unsigned long kernel; + unsigned long user; + unsigned long unit_mask; +}; + +extern struct op_counter_config counter_config[]; + +#endif /* OP_COUNTER_H */ diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_model_p4.c ./xen/arch/x86/oprofile/op_model_p4.c --- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_model_p4.c 1969-12-31 18:00:00 -06:00 +++ ./xen/arch/x86/oprofile/op_model_p4.c 2005-08-19 22:25:07 -05:00 @@ -0,0 +1,748 @@ +/** + * @file op_model_p4.c + * P4 model-specific MSR operations + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author Graydon Hoare + * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + */ + +#include <xen/types.h> +#include <asm/msr.h> +#include <asm/io.h> +#include <asm/apic.h> +#include <asm/processor.h> +#include <xen/sched.h> +#include <asm/regs.h> +#include <asm/current.h> + +#include "op_x86_model.h" +#include "op_counter.h" + +#define NUM_EVENTS 39 + +#define NUM_COUNTERS_NON_HT 8 +#define NUM_ESCRS_NON_HT 45 +#define NUM_CCCRS_NON_HT 18 +#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT) + +#define NUM_COUNTERS_HT2 4 +#define NUM_ESCRS_HT2 23 +#define NUM_CCCRS_HT2 9 +#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) + +static unsigned int num_counters = NUM_COUNTERS_NON_HT; + + +/* this has to be checked dynamically since the + hyper-threadedness of a chip is discovered at + kernel boot-time. */ +static inline void setup_num_counters(void) +{ +#ifdef CONFIG_SMP + if (cpu_has_ht) + num_counters = NUM_COUNTERS_HT2; +#endif +} + +static int inline addr_increment(void) +{ +#ifdef CONFIG_SMP + return cpu_has_ht ? 2 : 1; +#else + return 1; +#endif +} + + +/* tables to simulate simplified hardware view of p4 registers */ +struct p4_counter_binding { + int virt_counter; + int counter_address; + int cccr_address; +}; + +struct p4_event_binding { + int escr_select; /* value to put in CCCR */ + int event_select; /* value to put in ESCR */ + struct { + int virt_counter; /* for this counter... */ + int escr_address; /* use this ESCR */ + } bindings[2]; +}; + +/* nb: these CTR_* defines are a duplicate of defines in + event/i386.p4*events. */ + + +#define CTR_BPU_0 (1 << 0) +#define CTR_MS_0 (1 << 1) +#define CTR_FLAME_0 (1 << 2) +#define CTR_IQ_4 (1 << 3) +#define CTR_BPU_2 (1 << 4) +#define CTR_MS_2 (1 << 5) +#define CTR_FLAME_2 (1 << 6) +#define CTR_IQ_5 (1 << 7) + +static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = { + { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 }, + { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 }, + { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 }, + { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 }, + { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 }, + { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 }, + { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 }, + { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 } +}; + +#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT + +/* All cccr we don't use. */ +static int p4_unused_cccr[NUM_UNUSED_CCCRS] = { + MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3, + MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3, + MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3, + MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR1, + MSR_P4_IQ_CCCR2, MSR_P4_IQ_CCCR3 +}; + +/* p4 event codes in libop/op_event.h are indices into this table. */ + +static struct p4_event_binding p4_events[NUM_EVENTS] = { + + { /* BRANCH_RETIRED */ + 0x05, 0x06, + { {CTR_IQ_4, MSR_P4_CRU_ESCR2}, + {CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* MISPRED_BRANCH_RETIRED */ + 0x04, 0x03, + { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, + { CTR_IQ_5, MSR_P4_CRU_ESCR1} } + }, + + { /* TC_DELIVER_MODE */ + 0x01, 0x01, + { { CTR_MS_0, MSR_P4_TC_ESCR0}, + { CTR_MS_2, MSR_P4_TC_ESCR1} } + }, + + { /* BPU_FETCH_REQUEST */ + 0x00, 0x03, + { { CTR_BPU_0, MSR_P4_BPU_ESCR0}, + { CTR_BPU_2, MSR_P4_BPU_ESCR1} } + }, + + { /* ITLB_REFERENCE */ + 0x03, 0x18, + { { CTR_BPU_0, MSR_P4_ITLB_ESCR0}, + { CTR_BPU_2, MSR_P4_ITLB_ESCR1} } + }, + + { /* MEMORY_CANCEL */ + 0x05, 0x02, + { { CTR_FLAME_0, MSR_P4_DAC_ESCR0}, + { CTR_FLAME_2, MSR_P4_DAC_ESCR1} } + }, + + { /* MEMORY_COMPLETE */ + 0x02, 0x08, + { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, + { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } + }, + + { /* LOAD_PORT_REPLAY */ + 0x02, 0x04, + { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, + { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } + }, + + { /* STORE_PORT_REPLAY */ + 0x02, 0x05, + { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, + { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } + }, + + { /* MOB_LOAD_REPLAY */ + 0x02, 0x03, + { { CTR_BPU_0, MSR_P4_MOB_ESCR0}, + { CTR_BPU_2, MSR_P4_MOB_ESCR1} } + }, + + { /* PAGE_WALK_TYPE */ + 0x04, 0x01, + { { CTR_BPU_0, MSR_P4_PMH_ESCR0}, + { CTR_BPU_2, MSR_P4_PMH_ESCR1} } + }, + + { /* BSQ_CACHE_REFERENCE */ + 0x07, 0x0c, + { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, + { CTR_BPU_2, MSR_P4_BSU_ESCR1} } + }, + + { /* IOQ_ALLOCATION */ + 0x06, 0x03, + { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, + { 0, 0 } } + }, + + { /* IOQ_ACTIVE_ENTRIES */ + 0x06, 0x1a, + { { CTR_BPU_2, MSR_P4_FSB_ESCR1}, + { 0, 0 } } + }, + + { /* FSB_DATA_ACTIVITY */ + 0x06, 0x17, + { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, + { CTR_BPU_2, MSR_P4_FSB_ESCR1} } + }, + + { /* BSQ_ALLOCATION */ + 0x07, 0x05, + { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, + { 0, 0 } } + }, + + { /* BSQ_ACTIVE_ENTRIES */ + 0x07, 0x06, + { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */}, + { 0, 0 } } + }, + + { /* X87_ASSIST */ + 0x05, 0x03, + { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, + { CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* SSE_INPUT_ASSIST */ + 0x01, 0x34, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* PACKED_SP_UOP */ + 0x01, 0x08, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* PACKED_DP_UOP */ + 0x01, 0x0c, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* SCALAR_SP_UOP */ + 0x01, 0x0a, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* SCALAR_DP_UOP */ + 0x01, 0x0e, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* 64BIT_MMX_UOP */ + 0x01, 0x02, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* 128BIT_MMX_UOP */ + 0x01, 0x1a, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* X87_FP_UOP */ + 0x01, 0x04, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* X87_SIMD_MOVES_UOP */ + 0x01, 0x2e, + { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, + { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } + }, + + { /* MACHINE_CLEAR */ + 0x05, 0x02, + { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, + { CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* GLOBAL_POWER_EVENTS */ + 0x06, 0x13 /* older manual says 0x05, newer 0x13 */, + { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, + { CTR_BPU_2, MSR_P4_FSB_ESCR1} } + }, + + { /* TC_MS_XFER */ + 0x00, 0x05, + { { CTR_MS_0, MSR_P4_MS_ESCR0}, + { CTR_MS_2, MSR_P4_MS_ESCR1} } + }, + + { /* UOP_QUEUE_WRITES */ + 0x00, 0x09, + { { CTR_MS_0, MSR_P4_MS_ESCR0}, + { CTR_MS_2, MSR_P4_MS_ESCR1} } + }, + + { /* FRONT_END_EVENT */ + 0x05, 0x08, + { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, + { CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* EXECUTION_EVENT */ + 0x05, 0x0c, + { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, + { CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* REPLAY_EVENT */ + 0x05, 0x09, + { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, + { CTR_IQ_5, MSR_P4_CRU_ESCR3} } + }, + + { /* INSTR_RETIRED */ + 0x04, 0x02, + { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, + { CTR_IQ_5, MSR_P4_CRU_ESCR1} } + }, + + { /* UOPS_RETIRED */ + 0x04, 0x01, + { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, + { CTR_IQ_5, MSR_P4_CRU_ESCR1} } + }, + + { /* UOP_TYPE */ + 0x02, 0x02, + { { CTR_IQ_4, MSR_P4_RAT_ESCR0}, + { CTR_IQ_5, MSR_P4_RAT_ESCR1} } + }, + + { /* RETIRED_MISPRED_BRANCH_TYPE */ + 0x02, 0x05, + { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, + { CTR_MS_2, MSR_P4_TBPU_ESCR1} } + }, + + { /* RETIRED_BRANCH_TYPE */ + 0x02, 0x04, + { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, + { CTR_MS_2, MSR_P4_TBPU_ESCR1} } + } +}; + + +#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7) + +#define ESCR_RESERVED_BITS 0x80000003 +#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS) +#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2)) +#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3)) +#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1))) +#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) +#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) +#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) +#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) +#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) + +#define CCCR_RESERVED_BITS 0x38030FFF +#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) +#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000) +#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13)) +#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26)) +#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) +#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) +#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) +#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) +#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) +#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) +#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) + +#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) +#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) +#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) + + +/* this assigns a "stagger" to the current CPU, which is used throughout + the code in this module as an extra array offset, to select the "even" + or "odd" part of all the divided resources. */ +static unsigned int get_stagger(void) +{ +#ifdef CONFIG_SMP + /*int cpu = smp_processor_id(); + return (cpu != first_cpu(cpu_sibling_map[cpu]));*/ + /* We want the two logical cpus of a physical cpu to use + disjoint set of counters. The following code is wrong. */ + return 0; +#endif + return 0; +} + + +/* finally, mediate access to a real hardware counter + by passing a "virtual" counter numer to this macro, + along with your stagger setting. */ +#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger))) + +static unsigned long reset_value[NUM_COUNTERS_NON_HT]; + + +static void p4_fill_in_addresses(struct op_msrs * const msrs) +{ + unsigned int i; + unsigned int addr, stag; + + setup_num_counters(); + stag = get_stagger(); + + /* the counter registers we pay attention to */ + for (i = 0; i < num_counters; ++i) { + msrs->counters[i].addr = + p4_counters[VIRT_CTR(stag, i)].counter_address; + } + + /* FIXME: bad feeling, we don't save the 10 counters we don't use. */ + + /* 18 CCCR registers */ + for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag; + addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + + /* 43 ESCR registers in three or four discontiguous group */ + for (addr = MSR_P4_BSU_ESCR0 + stag; + addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + + /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1 + * to avoid special case in nmi_{save|restore}_registers() */ + if (boot_cpu_data.x86_model >= 0x3) { + for (addr = MSR_P4_BSU_ESCR0 + stag; + addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + } else { + for (addr = MSR_P4_IQ_ESCR0 + stag; + addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + } + + for (addr = MSR_P4_RAT_ESCR0 + stag; + addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + + for (addr = MSR_P4_MS_ESCR0 + stag; + addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + + for (addr = MSR_P4_IX_ESCR0 + stag; + addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { + msrs->controls[i].addr = addr; + } + + /* there are 2 remaining non-contiguously located ESCRs */ + + if (num_counters == NUM_COUNTERS_NON_HT) { + /* standard non-HT CPUs handle both remaining ESCRs*/ + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; + msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; + + } else if (stag == 0) { + /* HT CPUs give the first remainder to the even thread, as + the 32nd control register */ + msrs->controls[i++].addr = MSR_P4_CRU_ESCR4; + + } else { + /* and two copies of the second to the odd thread, + for the 22st and 23nd control registers */ + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; + msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; + } +} + + +static void pmc_setup_one_p4_counter(unsigned int ctr) +{ + int i; + int const maxbind = 2; + unsigned int cccr = 0; + unsigned int escr = 0; + unsigned int high = 0; + unsigned int counter_bit; + struct p4_event_binding *ev = NULL; + unsigned int stag; + + stag = get_stagger(); + + /* convert from counter *number* to counter *bit* */ + counter_bit = 1 << VIRT_CTR(stag, ctr); + + /* find our event binding structure. */ + if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) { + printk(KERN_ERR + "oprofile: P4 event code 0x%lx out of range\n", + counter_config[ctr].event); + return; + } + + ev = &(p4_events[counter_config[ctr].event - 1]); + + for (i = 0; i < maxbind; i++) { + if (ev->bindings[i].virt_counter & counter_bit) { + + /* modify ESCR */ + ESCR_READ(escr, high, ev, i); + ESCR_CLEAR(escr); + if (stag == 0) { + ESCR_SET_USR_0(escr, counter_config[ctr].user); + ESCR_SET_OS_0(escr, counter_config[ctr].kernel); + } else { + ESCR_SET_USR_1(escr, counter_config[ctr].user); + ESCR_SET_OS_1(escr, counter_config[ctr].kernel); + } + ESCR_SET_EVENT_SELECT(escr, ev->event_select); + ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); + ESCR_WRITE(escr, high, ev, i); + + /* modify CCCR */ + CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); + CCCR_CLEAR(cccr); + CCCR_SET_REQUIRED_BITS(cccr); + CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); + if (stag == 0) { + CCCR_SET_PMI_OVF_0(cccr); + } else { + CCCR_SET_PMI_OVF_1(cccr); + } + CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); + return; + } + } + + printk(KERN_ERR + "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n", + counter_config[ctr].event, stag, ctr); +} + + +static void p4_setup_ctrs(struct op_msrs const * const msrs) +{ + unsigned int i; + unsigned int low, high; + unsigned int addr; + unsigned int stag; + + stag = get_stagger(); + + rdmsr(MSR_IA32_MISC_ENABLE, low, high); + if (! MISC_PMC_ENABLED_P(low)) { + printk(KERN_ERR "oprofile: P4 PMC not available\n"); + return; + } + + /* clear the cccrs we will use */ + for (i = 0 ; i < num_counters ; i++) { + rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); + CCCR_CLEAR(low); + CCCR_SET_REQUIRED_BITS(low); + wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); + } + + /* clear cccrs outside our concern */ + for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) { + rdmsr(p4_unused_cccr[i], low, high); + CCCR_CLEAR(low); + CCCR_SET_REQUIRED_BITS(low); + wrmsr(p4_unused_cccr[i], low, high); + } + + /* clear all escrs (including those outside our concern) */ + for (addr = MSR_P4_BSU_ESCR0 + stag; + addr < MSR_P4_IQ_ESCR0; addr += addr_increment()) { + wrmsr(addr, 0, 0); + } + + /* On older models clear also MSR_P4_IQ_ESCR0/1 */ + if (boot_cpu_data.x86_model < 0x3) { + wrmsr(MSR_P4_IQ_ESCR0, 0, 0); + wrmsr(MSR_P4_IQ_ESCR1, 0, 0); + } + + for (addr = MSR_P4_RAT_ESCR0 + stag; + addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { + wrmsr(addr, 0, 0); + } + + for (addr = MSR_P4_MS_ESCR0 + stag; + addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ + wrmsr(addr, 0, 0); + } + + for (addr = MSR_P4_IX_ESCR0 + stag; + addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ + wrmsr(addr, 0, 0); + } + + if (num_counters == NUM_COUNTERS_NON_HT) { + wrmsr(MSR_P4_CRU_ESCR4, 0, 0); + wrmsr(MSR_P4_CRU_ESCR5, 0, 0); + } else if (stag == 0) { + wrmsr(MSR_P4_CRU_ESCR4, 0, 0); + } else { + wrmsr(MSR_P4_CRU_ESCR5, 0, 0); + } + + /* setup all counters */ + for (i = 0 ; i < num_counters ; ++i) { + if (counter_config[i].enabled) { + reset_value[i] = counter_config[i].count; + pmc_setup_one_p4_counter(i); + CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); + } else { + reset_value[i] = 0; + } + } +} + + +extern void pmc_log_event(struct domain *d, u64 eip, int mode, int event); +extern int is_profiled(struct domain * d); +extern struct domain * primary_profiler; + +static int p4_check_ctrs(unsigned int const cpu, + struct op_msrs const * const msrs, + struct cpu_user_regs * const regs) +{ + unsigned long ctr, low, high, stag, real; + int i, ovf = 0; + u64 eip = regs->eip; + int mode = 0; + struct vcpu *v = current; + + //if (RING_1(regs)) + if (KERNEL_MODE(v, regs)) + mode = 1; + else if (RING_0(regs)) + mode = 2; + + stag = get_stagger(); + + for (i = 0; i < num_counters; ++i) { + if (!reset_value[i]) + continue; + + /* + * there is some eccentricity in the hardware which + * requires that we perform 2 extra corrections: + * + * - check both the CCCR:OVF flag for overflow and the + * counter high bit for un-flagged overflows. + * + * - write the counter back twice to ensure it gets + * updated properly. + * + * the former seems to be related to extra NMIs happening + * during the current NMI; the latter is reported as errata + * N15 in intel doc 249199-029, pentium 4 specification + * update, though their suggested work-around does not + * appear to solve the problem. + */ + + real = VIRT_CTR(stag, i); + + CCCR_READ(low, high, real); + CTR_READ(ctr, high, real); + if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { + pmc_log_event(current->domain, eip, mode, i); + CTR_WRITE(reset_value[i], real); + CCCR_CLEAR_OVF(low); + CCCR_WRITE(low, high, real); + CTR_WRITE(reset_value[i], real); + ovf = 1; + } + } + + /* P4 quirk: you have to re-unmask the apic vector */ + apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); + + /* See op_model_ppro.c */ + return ovf; +} + + +static void p4_start(struct op_msrs const * const msrs) +{ + unsigned int low, high, stag; + int i; + + stag = get_stagger(); + + for (i = 0; i < num_counters; ++i) { + if (!reset_value[i]) + continue; + CCCR_READ(low, high, VIRT_CTR(stag, i)); + CCCR_SET_ENABLE(low); + CCCR_WRITE(low, high, VIRT_CTR(stag, i)); + } +} + + +static void p4_stop(struct op_msrs const * const msrs) +{ + unsigned int low, high, stag; + int i; + + stag = get_stagger(); + + for (i = 0; i < num_counters; ++i) { + CCCR_READ(low, high, VIRT_CTR(stag, i)); + CCCR_SET_DISABLE(low); + CCCR_WRITE(low, high, VIRT_CTR(stag, i)); + } +} + + +#ifdef CONFIG_SMP +struct op_x86_model_spec const op_p4_ht2_spec = { + .num_counters = NUM_COUNTERS_HT2, + .num_controls = NUM_CONTROLS_HT2, + .fill_in_addresses = &p4_fill_in_addresses, + .setup_ctrs = &p4_setup_ctrs, + .check_ctrs = &p4_check_ctrs, + .start = &p4_start, + .stop = &p4_stop +}; +#endif + +struct op_x86_model_spec const op_p4_spec = { + .num_counters = NUM_COUNTERS_NON_HT, + .num_controls = NUM_CONTROLS_NON_HT, + .fill_in_addresses = &p4_fill_in_addresses, + .setup_ctrs = &p4_setup_ctrs, + .check_ctrs = &p4_check_ctrs, + .start = &p4_start, + .stop = &p4_stop +}; diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_model_ppro.c ./xen/arch/x86/oprofile/op_model_ppro.c --- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_model_ppro.c 1969-12-31 18:00:00 -06:00 +++ ./xen/arch/x86/oprofile/op_model_ppro.c 2005-08-19 20:36:40 -05:00 @@ -0,0 +1,168 @@ +/** + * @file op_model_ppro.h + * pentium pro / P6 model-specific MSR operations + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + * @author Philippe Elie + * @author Graydon Hoare + * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + */ + +#include <xen/types.h> +#include <asm/msr.h> +#include <asm/io.h> +#include <asm/apic.h> +#include <asm/processor.h> +#include <xen/sched.h> +#include <asm/regs.h> +#include <asm/current.h> + +#include "op_x86_model.h" +#include "op_counter.h" + +#define NUM_COUNTERS 2 +#define NUM_CONTROLS 2 + +#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) +#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0) +#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) + +#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0) +#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0) +#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) +#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) +#define CTRL_CLEAR(x) (x &= (1<<21)) +#define CTRL_SET_ENABLE(val) (val |= 1<<20) +#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16)) +#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17)) +#define CTRL_SET_UM(val, m) (val |= (m << 8)) +#define CTRL_SET_EVENT(val, e) (val |= e) + +static unsigned long reset_value[NUM_COUNTERS]; + +static void ppro_fill_in_addresses(struct op_msrs * const msrs) +{ + msrs->counters[0].addr = MSR_P6_PERFCTR0; + msrs->counters[1].addr = MSR_P6_PERFCTR1; + + msrs->controls[0].addr = MSR_P6_EVNTSEL0; + msrs->controls[1].addr = MSR_P6_EVNTSEL1; +} + + +static void ppro_setup_ctrs(struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i; + + /* clear all counters */ + for (i = 0 ; i < NUM_CONTROLS; ++i) { + CTRL_READ(low, high, msrs, i); + CTRL_CLEAR(low); + CTRL_WRITE(low, high, msrs, i); + } + + /* avoid a false detection of ctr overflows in NMI handler */ + for (i = 0; i < NUM_COUNTERS; ++i) { + CTR_WRITE(1, msrs, i); + } + + /* enable active counters */ + for (i = 0; i < NUM_COUNTERS; ++i) { + if (counter_config[i].enabled) { + reset_value[i] = counter_config[i].count; + + CTR_WRITE(counter_config[i].count, msrs, i); + + CTRL_READ(low, high, msrs, i); + CTRL_CLEAR(low); + CTRL_SET_ENABLE(low); + CTRL_SET_USR(low, counter_config[i].user); + CTRL_SET_KERN(low, counter_config[i].kernel); + CTRL_SET_UM(low, counter_config[i].unit_mask); + CTRL_SET_EVENT(low, counter_config[i].event); + CTRL_WRITE(low, high, msrs, i); + } + } +} + +extern void pmc_log_event(struct domain *d, u64 eip, int mode, int event); +extern int is_profiled(struct domain * d); +extern struct domain * primary_profiler; + +static int ppro_check_ctrs(unsigned int const cpu, + struct op_msrs const * const msrs, + struct cpu_user_regs * const regs) +{ + unsigned int low, high; + int i, ovf = 0; + u64 eip = regs->eip; + int mode = 0; + + if (RING_1(regs)) + mode = 1; + else if (RING_0(regs)) + mode = 2; + + for (i = 0 ; i < NUM_COUNTERS; ++i) { + CTR_READ(low, high, msrs, i); + if (CTR_OVERFLOWED(low)) { + pmc_log_event(current->domain, eip, mode, i); + CTR_WRITE(reset_value[i], msrs, i); + ovf = 1; + } + } + + /* Only P6 based Pentium M need to re-unmask the apic vector but it + * doesn't hurt other P6 variant */ + apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); + + /* We can't work out if we really handled an interrupt. We + * might have caught a *second* counter just after overflowing + * the interrupt for this counter then arrives + * and we don't find a counter that's overflowed, so we + * would return 0 and get dazed + confused. Instead we always + * assume we found an overflow. This sucks. + */ + return ovf; +} + + +static void ppro_start(struct op_msrs const * const msrs) +{ + unsigned int low,high; + CTRL_READ(low, high, msrs, 0); + CTRL_SET_ACTIVE(low); + CTRL_WRITE(low, high, msrs, 0); +} + +static void ppro_stop(struct op_msrs const * const msrs) +{ + unsigned int low,high; + CTRL_READ(low, high, msrs, 0); + CTRL_SET_INACTIVE(low); + CTRL_WRITE(low, high, msrs, 0); +} + +unsigned int read_ctr(struct op_msrs const * const msrs, int i) +{ + unsigned int low, high; + CTR_READ(low, high, msrs, i); + return low; +} + +struct op_x86_model_spec const op_ppro_spec = { + .num_counters = NUM_COUNTERS, + .num_controls = NUM_CONTROLS, + .fill_in_addresses = &ppro_fill_in_addresses, + .setup_ctrs = &ppro_setup_ctrs, + .check_ctrs = &ppro_check_ctrs, + .start = &ppro_start, + .stop = &ppro_stop +}; diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_x86_model.h ./xen/arch/x86/oprofile/op_x86_model.h --- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_x86_model.h 1969-12-31 18:00:00 -06:00 +++ ./xen/arch/x86/oprofile/op_x86_model.h 2005-08-18 20:28:44 -05:00 @@ -0,0 +1,55 @@ +/** + * @file op_x86_model.h + * interface to x86 model-specific MSR operations + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author Graydon Hoare + * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + */ + +#ifndef OP_X86_MODEL_H +#define OP_X86_MODEL_H + +struct op_saved_msr { + unsigned int high; + unsigned int low; +}; + +struct op_msr { + unsigned long addr; + struct op_saved_msr saved; +}; + +struct op_msrs { + struct op_msr * counters; + struct op_msr * controls; +}; + +struct pt_regs; + +/* The model vtable abstracts the differences between + * various x86 CPU model's perfctr support. + */ +struct op_x86_model_spec { + unsigned int const num_counters; + unsigned int const num_controls; + void (*fill_in_addresses)(struct op_msrs * const msrs); + void (*setup_ctrs)(struct op_msrs const * const msrs); + int (*check_ctrs)(unsigned int const cpu, + struct op_msrs const * const msrs, + struct cpu_user_regs * const regs); + void (*start)(struct op_msrs const * const msrs); + void (*stop)(struct op_msrs const * const msrs); +}; + +extern struct op_x86_model_spec const op_ppro_spec; +extern struct op_x86_model_spec const op_p4_spec; +extern struct op_x86_model_spec const op_p4_ht2_spec; +extern struct op_x86_model_spec const op_athlon_spec; + +#endif /* OP_X86_MODEL_H */ diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/pmc.c ./xen/arch/x86/oprofile/pmc.c --- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/pmc.c 1969-12-31 18:00:00 -06:00 +++ ./xen/arch/x86/oprofile/pmc.c 2005-08-19 20:34:32 -05:00 @@ -0,0 +1,308 @@ +/* + * Copyright (C) 2005 Hewlett-Packard Co. + * written by Aravind Menon, email: xenoprof@xxxxxxxxxxxxx + */ + +#include <xen/sched.h> +#include <asm/current.h> + +#include "op_counter.h" + +int active_domains[MAX_OPROF_DOMAINS]; +int passive_domains[MAX_OPROF_DOMAINS]; +unsigned int adomains = 0; +unsigned int pdomains = 0; +unsigned int activated = 0; + +#define VIRQ_BITMASK_SIZE (MAX_OPROF_DOMAINS/32 + 1) + +struct domain * primary_profiler = NULL; +struct domain * adomain_ptrs[MAX_OPROF_DOMAINS]; +unsigned int virq_ovf_pending[VIRQ_BITMASK_SIZE]; + +int is_active(struct domain *d) +{ + int i; + for (i = 0; i < adomains; i++) + if (d->domain_id == active_domains[i]) + return 1; + return 0; +} + +int active_id(struct domain *d) +{ + int i; + for (i = 0; i < adomains; i++) + if (d == adomain_ptrs[i]) + return i; + return -1; +} + +void free_adomain_ptrs() +{ + int i; + int num = adomains; + + adomains = 0; + for (i = 0; i < VIRQ_BITMASK_SIZE; i++) + virq_ovf_pending[i] = 0; + + for (i = 0; i < num; i++) { + put_domain(adomain_ptrs[i]); + adomain_ptrs[i] = NULL; + } +} + +int set_adomain_ptrs(int num) +{ + int i; + struct domain *d; + + for (i = 0; i < VIRQ_BITMASK_SIZE; i++) + virq_ovf_pending[i] = 0; + + for (i = 0; i < num; i++) { + d = find_domain_by_id(active_domains[i]); + if (!d) { + free_adomain_ptrs(); + return -EFAULT; + } + adomain_ptrs[i] = d; + adomains++; + } + return 0; +} + +int set_active(struct domain *d) +{ + if (is_active(d)) + return 0; + /* hack if we run out of space */ + if (adomains >= MAX_OPROF_DOMAINS) { + adomains--; + put_domain(adomain_ptrs[adomains]); + } + active_domains[adomains] = d->domain_id; + if (get_domain(d)) + adomain_ptrs[adomains++] = d; + else { + free_adomain_ptrs(); + return -EFAULT; + } + return 0; +} + +int is_passive(struct domain *d) +{ + int i; + for (i = 0; i < pdomains; i++) + if (d->domain_id == passive_domains[i]) + return 1; + return 0; +} + +int is_profiled(struct domain *d) +{ + if (is_active(d) || is_passive(d)) + return 1; + return 0; +} + +void pmc_log_event(struct domain *d, u64 eip, int mode, int event) +{ + shared_info_t *s = NULL; + struct domain *dest = d; + int head; + int tail; + + if (!is_profiled(d)) + return; + + if (!is_passive(d)) { + s = dest->shared_info; + head = s->event_head; + tail = s->event_tail; + if ((head == tail - 1) || + (head == MAX_OPROF_EVENTS - 1 && tail == 0)) { + s->losing_samples = 1; + s->samples_lost++; + } + else { + s->event_log[head].eip = eip; + s->event_log[head].mode = mode; + s->event_log[head].event = event; + head++; + if (head >= MAX_OPROF_EVENTS) + head = 0; + s->event_head = head; + } + } + /* passive domains */ + else { + dest = primary_profiler; + s = dest->shared_info; + head = s->event_head; + tail = s->event_tail; + + /* We use the following inefficient format for logging + events from other domains. We put a special record + indicating that the next record is for another domain. + This is done for each sample from another domain */ + + head = s->event_head; + if (head >= MAX_OPROF_EVENTS) + head = 0; + /* for passive domains we need to have at least two + entries empty in the buffer */ + if ((head == tail - 1) || + (head == tail - 2) || + (head == MAX_OPROF_EVENTS - 1 && tail <= 1) || + (head == MAX_OPROF_EVENTS - 2 && tail == 0) ) { + s->losing_samples = 1; + s->samples_lost++; + } + else { + s->event_log[head].eip = ~1; + s->event_log[head].mode = ~0; + s->event_log[head].event = d->domain_id; + head++; + if (head >= MAX_OPROF_EVENTS) + head = 0; + s->event_log[head].eip = eip; + s->event_log[head].mode = mode; + s->event_log[head].event = event; + head++; + if (head >= MAX_OPROF_EVENTS) + head = 0; + s->event_head = head; + } + } +} + +static void pmc_event_init(struct domain *d) +{ + shared_info_t *s = d->shared_info; + s->event_head = 0; + s->event_tail = 0; + s->losing_samples = 0; + s->samples_lost = 0; + s->nmi_restarts = 0; + s->active_samples = 0; + s->passive_samples = 0; + s->other_samples = 0; +} + +extern int nmi_init(int *num_events, int *is_primary); +extern int nmi_reserve_counters(void); +extern int nmi_setup_events(void); +extern int nmi_enable_virq(void); +extern int nmi_start(void); +extern void nmi_stop(void); +extern void nmi_disable_virq(void); +extern void nmi_release_counters(void); + +#define PRIV_OP(op) ((op == PMC_SET_ACTIVE) || (op == PMC_SET_PASSIVE) || (op == PMC_RESERVE_COUNTERS) \ + || (op == PMC_SETUP_EVENTS) || (op == PMC_START) || (op == PMC_STOP) \ + || (op == PMC_RELEASE_COUNTERS) || (op == PMC_SHUTDOWN)) + +int do_pmc_op(int op, u64 arg1, u64 arg2) +{ + int ret = 0; + + if (PRIV_OP(op) && current->domain != primary_profiler) + return -EPERM; + + switch (op) { + case PMC_INIT: + printk("PMC_INIT]\n"); + ret = nmi_init((int *)arg1, (int *)arg2); + printk("nmi_init returned %d\n", ret); + break; + + case PMC_SET_ACTIVE: + printk("PMC_SETACTIVE]\n"); + if (adomains != 0) + return -EPERM; + if (copy_from_user((void *)&active_domains, + (void *)arg1, arg2*sizeof(int))) + return -EFAULT; + if (set_adomain_ptrs(arg2)) + return -EFAULT; + if (set_active(current->domain)) + return -EFAULT; + break; + + case PMC_SET_PASSIVE: + printk("PMC_SETPASSIVE\n"); + if (pdomains != 0) + return -EPERM; + if (copy_from_user((void *)&passive_domains, + (void *)arg1, arg2*sizeof(int))) + return -EFAULT; + pdomains = arg2; + break; + + case PMC_RESERVE_COUNTERS: + printk("PMC_RESERVE_COUNTERS\n"); + ret = nmi_reserve_counters(); + break; + + case PMC_SETUP_EVENTS: + printk("PMV_SETUP_EVENTS\n"); + if (copy_from_user((void *)&counter_config, + (void *)arg1, arg2*sizeof(struct op_counter_config))) + return -EFAULT; + ret = nmi_setup_events(); + break; + + case PMC_ENABLE_VIRQ: + printk("PMC_ENABLE_VIRQ\n"); + if (!is_active(current->domain)) { + if (current->domain != primary_profiler) + return -EPERM; + else + set_active(current->domain); + } + ret = nmi_enable_virq(); + pmc_event_init(current->domain); + activated++; + break; + + case PMC_START: + printk("PMC_START\n"); + if (activated < adomains) + return -EPERM; + ret = nmi_start(); + break; + + case PMC_STOP: + printk("PMC_STOP\n"); + nmi_stop(); + break; + + case PMC_DISABLE_VIRQ: + printk("PMC_DISBALE_VIRQ\n"); + if (!is_active(current->domain)) + return -EPERM; + nmi_disable_virq(); + activated--; + break; + + case PMC_RELEASE_COUNTERS: + printk("PMC_RELEASE_COUNTERS\n"); + nmi_release_counters(); + break; + + case PMC_SHUTDOWN: + printk("PMC_SHUTDOWN\n"); + free_adomain_ptrs(); + pdomains = 0; + activated = 0; + primary_profiler = NULL; + break; + + default: + ret = -EINVAL; + } + return ret; +} diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/traps.c ./xen/arch/x86/traps.c --- ../xen-unstable.hg-6251/xen/arch/x86/traps.c 2005-08-19 23:46:22 -05:00 +++ ./xen/arch/x86/traps.c 2005-08-18 20:28:44 -05:00 @@ -2,6 +2,10 @@ * arch/x86/traps.c * * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser + * + * Modified by Aravind Menon for supporting oprofile + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -54,6 +58,7 @@ #include <asm/debugger.h> #include <asm/msr.h> #include <asm/x86_emulate.h> +#include <asm/nmi.h> /* * opt_nmi: one of 'ignore', 'dom0', or 'fatal'. @@ -1040,7 +1045,7 @@ static void unknown_nmi_error(unsigned c printk("Do you have a strange power saving mode enabled?\n"); } -asmlinkage void do_nmi(struct cpu_user_regs *regs, unsigned long reason) +static void default_do_nmi(struct cpu_user_regs * regs, unsigned long reason) { ++nmi_count(smp_processor_id()); @@ -1055,6 +1060,35 @@ asmlinkage void do_nmi(struct cpu_user_r unknown_nmi_error((unsigned char)(reason&0xff)); } +static int dummy_nmi_callback(struct cpu_user_regs * regs, int cpu) +{ + return 0; +} + +static nmi_callback_t nmi_callback = dummy_nmi_callback; + +asmlinkage void do_nmi(struct cpu_user_regs * regs, unsigned long reason) +{ + int cpu; + cpu = smp_processor_id(); + + if (!nmi_callback(regs, cpu)) + default_do_nmi(regs, reason); +} + +void set_nmi_callback(nmi_callback_t callback) +{ + nmi_callback = callback; +} + +void unset_nmi_callback(void) +{ + nmi_callback = dummy_nmi_callback; +} + +EXPORT_SYMBOL(set_nmi_callback); +EXPORT_SYMBOL(unset_nmi_callback); + asmlinkage int math_state_restore(struct cpu_user_regs *regs) { /* Prevent recursion. */ diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/x86_32/entry.S ./xen/arch/x86/x86_32/entry.S --- ../xen-unstable.hg-6251/xen/arch/x86/x86_32/entry.S 2005-08-19 23:46:22 -05:00 +++ ./xen/arch/x86/x86_32/entry.S 2005-08-18 20:28:44 -05:00 @@ -763,7 +763,8 @@ ENTRY(hypercall_table) .long do_boot_vcpu .long do_ni_hypercall /* 25 */ .long do_mmuext_op - .long do_acm_op /* 27 */ + .long do_acm_op + .long do_pmc_op /* 28 */ .rept NR_hypercalls-((.-hypercall_table)/4) .long do_ni_hypercall .endr diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/x86_64/entry.S ./xen/arch/x86/x86_64/entry.S --- ../xen-unstable.hg-6251/xen/arch/x86/x86_64/entry.S 2005-08-19 23:46:22 -05:00 +++ ./xen/arch/x86/x86_64/entry.S 2005-08-18 20:37:21 -05:00 @@ -593,6 +593,7 @@ ENTRY(hypercall_table) .quad do_set_segment_base /* 25 */ .quad do_mmuext_op .quad do_acm_op + .quad do_pmc_op .rept NR_hypercalls-((.-hypercall_table)/4) .quad do_ni_hypercall .endr diff -Naurp ../xen-unstable.hg-6251/xen/include/asm-x86/msr.h ./xen/include/asm-x86/msr.h --- ../xen-unstable.hg-6251/xen/include/asm-x86/msr.h 2005-08-19 23:46:23 -05:00 +++ ./xen/include/asm-x86/msr.h 2005-08-18 20:28:44 -05:00 @@ -195,6 +195,89 @@ #define MSR_P6_EVNTSEL0 0x186 #define MSR_P6_EVNTSEL1 0x187 +/* Pentium IV performance counter MSRs */ +#define MSR_P4_BPU_PERFCTR0 0x300 +#define MSR_P4_BPU_PERFCTR1 0x301 +#define MSR_P4_BPU_PERFCTR2 0x302 +#define MSR_P4_BPU_PERFCTR3 0x303 +#define MSR_P4_MS_PERFCTR0 0x304 +#define MSR_P4_MS_PERFCTR1 0x305 +#define MSR_P4_MS_PERFCTR2 0x306 +#define MSR_P4_MS_PERFCTR3 0x307 +#define MSR_P4_FLAME_PERFCTR0 0x308 +#define MSR_P4_FLAME_PERFCTR1 0x309 +#define MSR_P4_FLAME_PERFCTR2 0x30a +#define MSR_P4_FLAME_PERFCTR3 0x30b +#define MSR_P4_IQ_PERFCTR0 0x30c +#define MSR_P4_IQ_PERFCTR1 0x30d +#define MSR_P4_IQ_PERFCTR2 0x30e +#define MSR_P4_IQ_PERFCTR3 0x30f +#define MSR_P4_IQ_PERFCTR4 0x310 +#define MSR_P4_IQ_PERFCTR5 0x311 +#define MSR_P4_BPU_CCCR0 0x360 +#define MSR_P4_BPU_CCCR1 0x361 +#define MSR_P4_BPU_CCCR2 0x362 +#define MSR_P4_BPU_CCCR3 0x363 +#define MSR_P4_MS_CCCR0 0x364 +#define MSR_P4_MS_CCCR1 0x365 +#define MSR_P4_MS_CCCR2 0x366 +#define MSR_P4_MS_CCCR3 0x367 +#define MSR_P4_FLAME_CCCR0 0x368 +#define MSR_P4_FLAME_CCCR1 0x369 +#define MSR_P4_FLAME_CCCR2 0x36a +#define MSR_P4_FLAME_CCCR3 0x36b +#define MSR_P4_IQ_CCCR0 0x36c +#define MSR_P4_IQ_CCCR1 0x36d +#define MSR_P4_IQ_CCCR2 0x36e +#define MSR_P4_IQ_CCCR3 0x36f +#define MSR_P4_IQ_CCCR4 0x370 +#define MSR_P4_IQ_CCCR5 0x371 +#define MSR_P4_ALF_ESCR0 0x3ca +#define MSR_P4_ALF_ESCR1 0x3cb +#define MSR_P4_BPU_ESCR0 0x3b2 +#define MSR_P4_BPU_ESCR1 0x3b3 +#define MSR_P4_BSU_ESCR0 0x3a0 +#define MSR_P4_BSU_ESCR1 0x3a1 +#define MSR_P4_CRU_ESCR0 0x3b8 +#define MSR_P4_CRU_ESCR1 0x3b9 +#define MSR_P4_CRU_ESCR2 0x3cc +#define MSR_P4_CRU_ESCR3 0x3cd +#define MSR_P4_CRU_ESCR4 0x3e0 +#define MSR_P4_CRU_ESCR5 0x3e1 +#define MSR_P4_DAC_ESCR0 0x3a8 +#define MSR_P4_DAC_ESCR1 0x3a9 +#define MSR_P4_FIRM_ESCR0 0x3a4 +#define MSR_P4_FIRM_ESCR1 0x3a5 +#define MSR_P4_FLAME_ESCR0 0x3a6 +#define MSR_P4_FLAME_ESCR1 0x3a7 +#define MSR_P4_FSB_ESCR0 0x3a2 +#define MSR_P4_FSB_ESCR1 0x3a3 +#define MSR_P4_IQ_ESCR0 0x3ba +#define MSR_P4_IQ_ESCR1 0x3bb +#define MSR_P4_IS_ESCR0 0x3b4 +#define MSR_P4_IS_ESCR1 0x3b5 +#define MSR_P4_ITLB_ESCR0 0x3b6 +#define MSR_P4_ITLB_ESCR1 0x3b7 +#define MSR_P4_IX_ESCR0 0x3c8 +#define MSR_P4_IX_ESCR1 0x3c9 +#define MSR_P4_MOB_ESCR0 0x3aa +#define MSR_P4_MOB_ESCR1 0x3ab +#define MSR_P4_MS_ESCR0 0x3c0 +#define MSR_P4_MS_ESCR1 0x3c1 +#define MSR_P4_PMH_ESCR0 0x3ac +#define MSR_P4_PMH_ESCR1 0x3ad +#define MSR_P4_RAT_ESCR0 0x3bc +#define MSR_P4_RAT_ESCR1 0x3bd +#define MSR_P4_SAAT_ESCR0 0x3ae +#define MSR_P4_SAAT_ESCR1 0x3af +#define MSR_P4_SSU_ESCR0 0x3be +#define MSR_P4_SSU_ESCR1 0x3bf /* guess: not defined in manual */ +#define MSR_P4_TBPU_ESCR0 0x3c2 +#define MSR_P4_TBPU_ESCR1 0x3c3 +#define MSR_P4_TC_ESCR0 0x3c4 +#define MSR_P4_TC_ESCR1 0x3c5 +#define MSR_P4_U2L_ESCR0 0x3b0 +#define MSR_P4_U2L_ESCR1 0x3b1 /* K7/K8 MSRs. Not complete. See the architecture manual for a more complete list. */ #define MSR_K7_EVNTSEL0 0xC0010000 diff -Naurp ../xen-unstable.hg-6251/xen/include/asm-x86/nmi.h ./xen/include/asm-x86/nmi.h --- ../xen-unstable.hg-6251/xen/include/asm-x86/nmi.h 1969-12-31 18:00:00 -06:00 +++ ./xen/include/asm-x86/nmi.h 2005-08-18 20:28:44 -05:00 @@ -0,0 +1,26 @@ +/* + * linux/include/asm-i386/nmi.h + */ +#ifndef ASM_NMI_H +#define ASM_NMI_H + +struct cpu_user_regs; + +typedef int (*nmi_callback_t)(struct cpu_user_regs * regs, int cpu); + +/** + * set_nmi_callback + * + * Set a handler for an NMI. Only one handler may be + * set. Return 1 if the NMI was handled. + */ +void set_nmi_callback(nmi_callback_t callback); + +/** + * unset_nmi_callback + * + * Remove the handler previously set. + */ +void unset_nmi_callback(void); + +#endif /* ASM_NMI_H */ diff -Naurp ../xen-unstable.hg-6251/xen/include/public/xen.h ./xen/include/public/xen.h --- ../xen-unstable.hg-6251/xen/include/public/xen.h 2005-08-19 23:46:23 -05:00 +++ ./xen/include/public/xen.h 2005-08-19 20:34:10 -05:00 @@ -4,6 +4,10 @@ * Guest OS interface to Xen. * * Copyright (c) 2004, K A Fraser + * + * Modified by Aravind Menon for supporting oprofile + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. */ #ifndef __XEN_PUBLIC_XEN_H__ @@ -59,6 +63,7 @@ #define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ #define __HYPERVISOR_mmuext_op 26 #define __HYPERVISOR_acm_op 27 +#define __HYPERVISOR_pmc_op 28 /* * VIRTUAL INTERRUPTS @@ -72,7 +77,8 @@ #define VIRQ_PARITY_ERR 4 /* (DOM0) NMI parity error. */ #define VIRQ_IO_ERR 5 /* (DOM0) NMI I/O error. */ #define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */ -#define NR_VIRQS 7 +#define VIRQ_PMC_OVF 7 /* PMC Overflow */ +#define NR_VIRQS 8 /* * MMU-UPDATE REQUESTS @@ -239,6 +245,21 @@ struct mmuext_op { #define VMASST_TYPE_writable_pagetables 2 #define MAX_VMASST_TYPE 2 +/* + * Commands to HYPERVISOR_pmc_op(). + */ +#define PMC_INIT 0 +#define PMC_SET_ACTIVE 1 +#define PMC_SET_PASSIVE 2 +#define PMC_RESERVE_COUNTERS 3 +#define PMC_SETUP_EVENTS 4 +#define PMC_ENABLE_VIRQ 5 +#define PMC_START 6 +#define PMC_STOP 7 +#define PMC_DISABLE_VIRQ 8 +#define PMC_RELEASE_COUNTERS 9 +#define PMC_SHUTDOWN 10 + #ifndef __ASSEMBLY__ typedef u16 domid_t; @@ -291,6 +312,8 @@ typedef struct /* Event channel endpoints per domain. */ #define NR_EVENT_CHANNELS 1024 +#define MAX_OPROF_EVENTS 32 +#define MAX_OPROF_DOMAINS 25 /* * Per-VCPU information goes here. This will be cleaned up more when Xen * actually supports multi-VCPU guests. @@ -406,6 +429,21 @@ typedef struct shared_info { u32 wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */ arch_shared_info_t arch; + + /* Oprofile structures */ + u8 event_head; + u8 event_tail; + struct { + u64 eip; + u8 mode; + u8 event; + } event_log[MAX_OPROF_EVENTS]; + u8 losing_samples; + u64 samples_lost; + u32 nmi_restarts; + u64 active_samples; + u64 passive_samples; + u64 other_samples; } shared_info_t; diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/Kconfig xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/Kconfig --- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/Kconfig 2005-08-22 19:43:15 -05:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/Kconfig 2005-08-22 20:17:51 -05:00 @@ -200,4 +200,6 @@ source "crypto/Kconfig" source "lib/Kconfig" +source "arch/xen/oprofile/Kconfig" + source "arch/xen/Kconfig.debug" diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/Makefile xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/Makefile --- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/Makefile 2005-08-22 19:43:15 -05:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/Makefile 2005-08-22 20:17:51 -05:00 @@ -32,6 +32,8 @@ ifneq ($(KBUILD_SRC),) $(Q)ln -fsn ../include/asm-$(XENARCH) include2/asm endif +drivers-$(CONFIG_OPROFILE) += arch/xen/oprofile/ + include/.asm-ignore: include/asm @rm -f include/.asm-ignore @mv include/asm include/.asm-ignore diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/configs/xen0_defconfig_x86_32 xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/configs/xen0_defconfig_x86_32 --- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/configs/xen0_defconfig_x86_32 2005-08-22 19:43:15 -05:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/configs/xen0_defconfig_x86_32 2005-08-22 20:17:51 -05:00 @@ -79,6 +79,12 @@ CONFIG_OBSOLETE_MODPARM=y CONFIG_KMOD=y # +# OProfile options +# +CONFIG_PROFILING=y +CONFIG_OPROFILE=m + +# # X86 Processor Configuration # CONFIG_XENARCH="i386" diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/configs/xenU_defconfig_x86_32 xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/configs/xenU_defconfig_x86_32 --- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/configs/xenU_defconfig_x86_32 2005-08-22 19:43:15 -05:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/configs/xenU_defconfig_x86_32 2005-08-22 20:17:51 -05:00 @@ -76,6 +76,12 @@ CONFIG_KMOD=y CONFIG_STOP_MACHINE=y # +# OProfile options +# +CONFIG_PROFILING=y +CONFIG_OPROFILE=m + +# # X86 Processor Configuration # CONFIG_XENARCH="i386" diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/i386/Makefile xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/i386/Makefile --- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/i386/Makefile 2005-08-22 19:43:15 -05:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/i386/Makefile 2005-08-22 20:17:51 -05:00 @@ -84,7 +84,6 @@ core-y += arch/xen/i386/kernel/ \ drivers-$(CONFIG_MATH_EMULATION) += arch/i386/math-emu/ drivers-$(CONFIG_PCI) += arch/xen/i386/pci/ # must be linked after kernel/ -drivers-$(CONFIG_OPROFILE) += arch/i386/oprofile/ drivers-$(CONFIG_PM) += arch/i386/power/ # for clean diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/kernel/evtchn.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/kernel/evtchn.c --- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/kernel/evtchn.c 2005-08-22 19:43:15 -05:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/kernel/evtchn.c 2005-08-22 20:17:51 -05:00 @@ -44,11 +44,16 @@ #include <asm-xen/hypervisor.h> #include <asm-xen/evtchn.h> +int virq_to_phys(int virq); + #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) EXPORT_SYMBOL(force_evtchn_callback); EXPORT_SYMBOL(evtchn_do_upcall); EXPORT_SYMBOL(bind_evtchn_to_irq); EXPORT_SYMBOL(unbind_evtchn_from_irq); +EXPORT_SYMBOL(virq_to_phys); +EXPORT_SYMBOL(bind_virq_to_irq); +EXPORT_SYMBOL(unbind_virq_from_irq); #endif /* @@ -178,6 +183,15 @@ static int find_unbound_irq(void) panic("No available IRQ to bind to: increase NR_IRQS!\n"); return irq; +} + +int virq_to_phys(int virq) +{ + int cpu = smp_processor_id(); + + if (virq >= NR_VIRQS) + return -1; + return per_cpu(virq_to_irq,cpu)[virq]; } int bind_virq_to_irq(int virq) diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/Kconfig xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/Kconfig --- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/Kconfig 1969-12-31 18:00:00 -06:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/Kconfig 2005-08-22 20:17:51 -05:00 @@ -0,0 +1,23 @@ + +menu "Profiling support" + depends on EXPERIMENTAL + +config PROFILING + bool "Profiling support (EXPERIMENTAL)" + help + Say Y here to enable the extended profiling support mechanisms used + by profilers such as OProfile. + + +config OPROFILE + tristate "OProfile system profiling (EXPERIMENTAL)" + depends on PROFILING + help + OProfile is a profiling system capable of profiling the + whole system, include the kernel, kernel modules, libraries, + and applications. + + If unsure, say N. + +endmenu + diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/Makefile xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/Makefile --- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/Makefile 1969-12-31 18:00:00 -06:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/Makefile 2005-08-22 20:17:51 -05:00 @@ -0,0 +1,9 @@ +obj-$(CONFIG_OPROFILE) += oprofile.o + +DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ + oprof.o cpu_buffer.o buffer_sync.o \ + event_buffer.o oprofile_files.o \ + oprofilefs.o oprofile_stats.o \ + timer_int.o ) + +oprofile-y := $(DRIVER_OBJS) pmc.o diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/op_counter.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/op_counter.h --- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/op_counter.h 1969-12-31 18:00:00 -06:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/op_counter.h 2005-08-22 20:17:51 -05:00 @@ -0,0 +1,29 @@ +/** + * @file op_counter.h + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon + */ + +#ifndef OP_COUNTER_H +#define OP_COUNTER_H + +#define OP_MAX_COUNTER 8 + +/* Per-perfctr configuration as set via + * oprofilefs. + */ +struct op_counter_config { + unsigned long count; + unsigned long enabled; + unsigned long event; + unsigned long kernel; + unsigned long user; + unsigned long unit_mask; +}; + +extern struct op_counter_config counter_config[]; + +#endif /* OP_COUNTER_H */ diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/pmc.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/pmc.c --- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/pmc.c 1969-12-31 18:00:00 -06:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/pmc.c 2005-08-22 20:17:51 -05:00 @@ -0,0 +1,323 @@ +/** + * @file nmi_int.c + * + * @remark Copyright 2002 OProfile authors + * @remark Read the file COPYING + * + * @author John Levon <levon@xxxxxxxxxxxxxxxxx> + * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + */ + +#include <linux/init.h> +#include <linux/notifier.h> +#include <linux/smp.h> +#include <linux/oprofile.h> +#include <linux/sysdev.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <asm/nmi.h> +#include <asm/msr.h> +#include <asm/apic.h> + +#include "op_counter.h" + +static int pmc_start(void); +static void pmc_stop(void); + +/* 0 == registered but off, 1 == registered and on */ +static int pmc_enabled = 0; +static int num_events = 0; +static int is_primary = 0; + +#ifdef CONFIG_PM + +static int pmc_suspend(struct sys_device *dev, u32 state) +{ + if (pmc_enabled == 1) + pmc_stop(); + return 0; +} + + +static int pmc_resume(struct sys_device *dev) +{ + if (pmc_enabled == 1) + pmc_start(); + return 0; +} + + +static struct sysdev_class oprofile_sysclass = { + set_kset_name("oprofile"), + .resume = pmc_resume, + .suspend = pmc_suspend, +}; + + +static struct sys_device device_oprofile = { + .id = 0, + .cls = &oprofile_sysclass, +}; + + +static int __init init_driverfs(void) +{ + int error; + if (!(error = sysdev_class_register(&oprofile_sysclass))) + error = sysdev_register(&device_oprofile); + return error; +} + + +static void __exit exit_driverfs(void) +{ + sysdev_unregister(&device_oprofile); + sysdev_class_unregister(&oprofile_sysclass); +} + +#else +#define init_driverfs() do { } while (0) +#define exit_driverfs() do { } while (0) +#endif /* CONFIG_PM */ + +unsigned long long oprofile_samples = 0; + +static irqreturn_t pmc_ovf_interrupt (int irq, void *dev_id, struct pt_regs *regs) +{ + int head, tail; + shared_info_t *s = HYPERVISOR_shared_info; + + head = s->event_head; + tail = s->event_tail; + + /* oprofile_add_sample will also handle samples from other domains */ + + if (tail > head) { + while (tail < MAX_OPROF_EVENTS) { + oprofile_add_sample_xen(s->event_log[tail].eip, + s->event_log[tail].mode, + s->event_log[tail].event); + /*printk(KERN_INFO "pmc_sample: %p, %d, %d\n", + s->event_log[tail].eip, s->event_log[tail].mode, + s->event_log[tail].event);*/ + oprofile_samples++; + tail++; + } + tail = 0; + } + while (tail < head) { + oprofile_add_sample_xen(s->event_log[tail].eip, + s->event_log[tail].mode, s->event_log[tail].event); + /*printk(KERN_INFO "pmc_sample: %p, %d, %d\n", + s->event_log[tail].eip, s->event_log[tail].mode, + s->event_log[tail].event);*/ + oprofile_samples++; + tail++; + } + + s->event_tail = tail; + s->losing_samples = 0; + + return IRQ_HANDLED; +} + +extern int virq_to_phys(int virq); + +static int pmc_setup(void) +{ + int ret; + + if ((ret = request_irq(bind_virq_to_irq(VIRQ_PMC_OVF), + pmc_ovf_interrupt, SA_INTERRUPT, "pmc_ovf", NULL))) + goto release_irq; + + if (is_primary) { + ret = HYPERVISOR_pmc_op(PMC_RESERVE_COUNTERS, (u64)NULL, (u64)NULL); + //printk(KERN_INFO "pmc_setup: reserve_counters: ret %d\n", ret); + + ret = HYPERVISOR_pmc_op(PMC_SETUP_EVENTS, (u64)&counter_config, (u64)num_events); + //printk(KERN_INFO "pmc_setup: setup_events: ret %d\n", ret); + } + + ret = HYPERVISOR_pmc_op(PMC_ENABLE_VIRQ, (u64)NULL, (u64)NULL); + //printk(KERN_INFO "pmc_setup: enable_virq: ret %d\n", ret); + + pmc_enabled = 1; + return 0; + +release_irq: + free_irq(virq_to_phys(VIRQ_PMC_OVF), NULL); + unbind_virq_from_irq(VIRQ_PMC_OVF); + + return ret; +} + +static void pmc_shutdown(void) +{ + int ret; + pmc_enabled = 0; + + ret = HYPERVISOR_pmc_op(PMC_DISABLE_VIRQ, (u64)NULL, (u64)NULL); + //printk(KERN_INFO "pmc_shutdown: disable_virq: ret %d\n", ret); + + if (is_primary) { + ret = HYPERVISOR_pmc_op(PMC_RELEASE_COUNTERS, (u64)NULL, (u64)NULL); + //printk(KERN_INFO "pmc_shutdown: release_counters: ret %d\n", ret); + } + + free_irq(virq_to_phys(VIRQ_PMC_OVF), NULL); + unbind_virq_from_irq(VIRQ_PMC_OVF); +} + +static int pmc_start(void) +{ + int ret = 0; + if (is_primary) + ret = HYPERVISOR_pmc_op(PMC_START, (u64)NULL, (u64)NULL); + //printk(KERN_INFO "pmc_start: ret %d\n", ret); + return ret; +} + +static void pmc_stop(void) +{ + int ret = 0; + if (is_primary) + ret = HYPERVISOR_pmc_op(PMC_STOP, (u64)NULL, (u64)NULL); + //printk(KERN_INFO "pmc_stop: ret %d\n", ret); + printk(KERN_INFO "pmc: oprofile samples %llu, active %llu, passive %llu, other %llu, buffering losses %llu, NMI restarted %d\n", + oprofile_samples, HYPERVISOR_shared_info->active_samples, HYPERVISOR_shared_info->passive_samples, + HYPERVISOR_shared_info->other_samples, HYPERVISOR_shared_info->samples_lost, HYPERVISOR_shared_info->nmi_restarts); +} + +static int pmc_set_active(int *active_domains, unsigned int adomains) +{ + int ret = 0; + if (is_primary) + ret = HYPERVISOR_pmc_op(PMC_SET_ACTIVE, + (u64)active_domains, (u64)adomains); + return ret; +} + +static int pmc_set_passive(int *passive_domains, unsigned int pdomains) +{ + int ret = 0; + if (is_primary) + ret = HYPERVISOR_pmc_op(PMC_SET_PASSIVE, + (u64)passive_domains, (u64)pdomains); + return ret; +} + +struct op_counter_config counter_config[OP_MAX_COUNTER]; + +static int pmc_create_files(struct super_block * sb, struct dentry * root) +{ + unsigned int i; + + for (i = 0; i < num_events; ++i) { + struct dentry * dir; + char buf[2]; + + snprintf(buf, 2, "%d", i); + dir = oprofilefs_mkdir(sb, root, buf); + oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); + oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); + oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); + oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); + oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); + oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); + } + + //printk(KERN_INFO "pmc_create_files\n"); + return 0; +} + + +struct oprofile_operations pmc_ops = { + .create_files = pmc_create_files, + .set_active = pmc_set_active, + .set_passive = pmc_set_passive, + .setup = pmc_setup, + .shutdown = pmc_shutdown, + .start = pmc_start, + .stop = pmc_stop +}; + + +static void __init p4_init(void) +{ + __u8 cpu_model = current_cpu_data.x86_model; + + if (cpu_model > 3) + pmc_ops.cpu_type = "type_unknown"; + + /* We always use a non-HT system because that goves us more events */ + pmc_ops.cpu_type = "i386/p4"; +} + + +static void __init ppro_init(void) +{ + __u8 cpu_model = current_cpu_data.x86_model; + + if (cpu_model > 0xd) + pmc_ops.cpu_type = "type_unknown"; + + if (cpu_model == 9) { + pmc_ops.cpu_type = "i386/p6_mobile"; + } else if (cpu_model > 5) { + pmc_ops.cpu_type = "i386/piii"; + } else if (cpu_model > 2) { + pmc_ops.cpu_type = "i386/pii"; + } else { + pmc_ops.cpu_type = "i386/ppro"; + } +} + +/* in order to get driverfs right */ +static int using_pmc; + +int __init oprofile_arch_init(struct oprofile_operations * ops) +{ + printk (KERN_INFO "oprofile_arch_init"); + int ret = HYPERVISOR_pmc_op(PMC_INIT, (u64)&num_events, (u64)&is_primary); + + if (!ret) { + __u8 vendor = current_cpu_data.x86_vendor; + __u8 family = current_cpu_data.x86; + + if (vendor == X86_VENDOR_INTEL) { + switch (family) { + /* Pentium IV */ + case 0xf: + p4_init(); + break; + /* A P6-class processor */ + case 6: + ppro_init(); + break; + default: + pmc_ops.cpu_type = "type_unknown"; + } + } else pmc_ops.cpu_type = "type_unknown"; + + init_driverfs(); + using_pmc = 1; + *ops = pmc_ops; + } + printk (KERN_INFO "oprofile_arch_init: ret %d, events %d, is_primary %d\n", ret, num_events, is_primary); + return ret; +} + + +void __exit oprofile_arch_exit(void) +{ + if (using_pmc) + exit_driverfs(); + + if (is_primary) + HYPERVISOR_pmc_op(PMC_SHUTDOWN, (u64)NULL, (u64)NULL); + +} diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/x86_64/Makefile xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/x86_64/Makefile --- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/x86_64/Makefile 2005-08-22 19:43:15 -05:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/x86_64/Makefile 2005-08-22 20:17:51 -05:00 @@ -69,7 +69,6 @@ libs-y += arch/x86_64/lib/ core-y += arch/xen/x86_64/kernel/ arch/xen/x86_64/mm/ core-$(CONFIG_IA32_EMULATION) += arch/xen/x86_64/ia32/ drivers-$(CONFIG_PCI) += arch/xen/x86_64/pci/ -drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/ # for clean obj- += kernel/ mm/ pci/ diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/buffer_sync.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/buffer_sync.c --- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/buffer_sync.c 2005-06-17 14:48:29 -05:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/buffer_sync.c 2005-08-22 20:17:51 -05:00 @@ -6,6 +6,10 @@ * * @author John Levon <levon@xxxxxxxxxxxxxxxxx> * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + * * This is the core of the buffer management. Each * CPU buffer is processed and entered into the * global event buffer. Such processing is necessary @@ -265,13 +269,30 @@ static void add_cpu_switch(int i) last_cookie = ~0UL; } -static void add_kernel_ctx_switch(unsigned int in_kernel) +static void add_cpu_mode_switch(unsigned int cpu_mode) { add_event_entry(ESCAPE_CODE); - if (in_kernel) - add_event_entry(KERNEL_ENTER_SWITCH_CODE); - else - add_event_entry(KERNEL_EXIT_SWITCH_CODE); + switch (cpu_mode) + { + case CPU_MODE_USER: + add_event_entry(USER_ENTER_SWITCH_CODE); + break; + case CPU_MODE_KERNEL: + add_event_entry(KERNEL_ENTER_SWITCH_CODE); + break; + case CPU_MODE_XEN: + add_event_entry(XEN_ENTER_SWITCH_CODE); + break; + default: + break; + } +} + +static void add_dom_switch(int domain_id) +{ + add_event_entry(ESCAPE_CODE); + add_event_entry(DOMAIN_SWITCH_CODE); + add_event_entry(domain_id); } static void @@ -337,10 +358,9 @@ static int add_us_sample(struct mm_struc * sample is converted into a persistent dentry/offset pair * for later lookup from userspace. */ -static int -add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel) +static int add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode) { - if (in_kernel) { + if (cpu_mode >= CPU_MODE_KERNEL) { add_sample_entry(s->eip, s->event); return 1; } else if (mm) { @@ -374,6 +394,11 @@ static inline int is_code(unsigned long { return val == ESCAPE_CODE; } + +static inline int is_dom_switch(unsigned long val) +{ + return val == DOMAIN_SWITCH_ESCAPE_CODE; +} /* "acquire" as many cpu buffer slots as we can */ @@ -489,10 +514,11 @@ void sync_buffer(int cpu) struct mm_struct *mm = NULL; struct task_struct * new; unsigned long cookie = 0; - int in_kernel = 1; + int cpu_mode = 1; unsigned int i; sync_buffer_state state = sb_buffer_start; unsigned long available; + int domain_switch = 0; down(&buffer_sem); @@ -506,12 +532,12 @@ void sync_buffer(int cpu) struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos]; if (is_code(s->eip)) { - if (s->event <= CPU_IS_KERNEL) { + if (s->event <= CPU_MODE_MAX) { /* kernel/userspace switch */ - in_kernel = s->event; + cpu_mode = s->event; if (state == sb_buffer_start) state = sb_sample_start; - add_kernel_ctx_switch(s->event); + add_cpu_mode_switch(s->event); } else if (s->event == CPU_TRACE_BEGIN) { state = sb_bt_start; add_trace_begin(); @@ -528,11 +554,23 @@ void sync_buffer(int cpu) add_user_ctx_switch(new, cookie); } } else { - if (state >= sb_bt_start && - !add_sample(mm, s, in_kernel)) { - if (state == sb_bt_start) { - state = sb_bt_ignore; - atomic_inc(&oprofile_stats.bt_lost_no_mapping); + if (is_dom_switch(s->eip)) { + add_dom_switch((int)(s->event)); + domain_switch = 1; + } + else { + if (domain_switch) { + add_sample_entry (s->eip, s->event); + domain_switch = 0; + } + else { + if (state >= sb_bt_start && + !add_sample(mm, s, cpu_mode)) { + if (state == sb_bt_start) { + state = sb_bt_ignore; + atomic_inc(&oprofile_stats.bt_lost_no_mapping); + } + } } } } diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.c --- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.c 2005-06-17 14:48:29 -05:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.c 2005-08-22 20:17:51 -05:00 @@ -6,6 +6,10 @@ * * @author John Levon <levon@xxxxxxxxxxxxxxxxx> * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. + * * Each CPU has a local buffer that stores PC value/event * pairs. We also log context switches when we notice them. * Eventually each CPU's buffer is processed into the global @@ -58,7 +62,7 @@ int alloc_cpu_buffers(void) goto fail; b->last_task = NULL; - b->last_is_kernel = -1; + b->last_cpu_mode = -1; b->tracing = 0; b->buffer_size = buffer_size; b->tail_pos = 0; @@ -117,7 +121,7 @@ void cpu_buffer_reset(struct oprofile_cp * collected will populate the buffer with proper * values to initialize the buffer */ - cpu_buf->last_is_kernel = -1; + cpu_buf->last_cpu_mode = -1; cpu_buf->last_task = NULL; } @@ -180,7 +184,7 @@ add_code(struct oprofile_cpu_buffer * bu * events whenever is_kernel changes */ static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc, - int is_kernel, unsigned long event) + int cpu_mode, unsigned long event) { struct task_struct * task; @@ -191,24 +195,39 @@ static int log_sample(struct oprofile_cp return 0; } - is_kernel = !!is_kernel; + // Ensure a valid cpu mode + if (cpu_mode > CPU_MODE_XEN) + return 0; task = current; - /* notice a switch from user->kernel or vice versa */ - if (cpu_buf->last_is_kernel != is_kernel) { - cpu_buf->last_is_kernel = is_kernel; - add_code(cpu_buf, is_kernel); - } - /* notice a task switch */ - if (cpu_buf->last_task != task) { - cpu_buf->last_task = task; - add_code(cpu_buf, (unsigned long)task); + /* We treat samples from other domains in a special manner: + each sample is preceded by a record with eip equal to ~1UL. + This record is non-sticky i.e. it holds only for the following + sample. The event field of this record stores the domain id.*/ + if (pc == DOMAIN_SWITCH_ESCAPE_CODE) { + add_sample(cpu_buf, pc, event); + return 1; + } else { + /* notice a switch from user->kernel or vice versa */ + if (cpu_buf->last_cpu_mode != cpu_mode) { + cpu_buf->last_cpu_mode = cpu_mode; + add_code(cpu_buf, cpu_mode); + } + + /* notice a task switch */ + if (cpu_buf->last_task != task) { + cpu_buf->last_task = task; + add_code(cpu_buf, (unsigned long)task); + } + + /* Note: at this point, we lose the cpu_mode of a sample + if it is from another domain */ + + add_sample(cpu_buf, pc, event); + return 1; } - - add_sample(cpu_buf, pc, event); - return 1; } static int oprofile_begin_trace(struct oprofile_cpu_buffer * cpu_buf) @@ -229,6 +248,14 @@ static void oprofile_end_trace(struct op cpu_buf->tracing = 0; } +void oprofile_add_sample_xen(unsigned long eip, unsigned int cpu_mode, + unsigned long event) +{ + struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()]; + log_sample(cpu_buf, eip, cpu_mode, event); + + +} void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) { diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.h --- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.h 2005-06-17 14:48:29 -05:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.h 2005-08-22 20:17:51 -05:00 @@ -36,7 +36,7 @@ struct oprofile_cpu_buffer { volatile unsigned long tail_pos; unsigned long buffer_size; struct task_struct * last_task; - int last_is_kernel; + int last_cpu_mode; int tracing; struct op_sample * buffer; unsigned long sample_received; @@ -51,7 +51,14 @@ extern struct oprofile_cpu_buffer cpu_bu void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf); /* transient events for the CPU buffer -> event buffer */ -#define CPU_IS_KERNEL 1 -#define CPU_TRACE_BEGIN 2 +#define CPU_MODE_USER 0 +#define CPU_MODE_KERNEL 1 +#define CPU_MODE_XEN 2 +#define CPU_MODE_MAX 2 +#define CPU_TRACE_BEGIN 3 +/* special escape code for indicating next sample in the CPU */ +/* buffer is from another Xen domain */ +#define DOMAIN_SWITCH_ESCAPE_CODE ~1UL + #endif /* OPROFILE_CPU_BUFFER_H */ diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/event_buffer.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/event_buffer.c --- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/event_buffer.c 2005-06-17 14:48:29 -05:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/event_buffer.c 2005-08-22 20:17:51 -05:00 @@ -56,6 +56,7 @@ void add_event_entry(unsigned long value /* Wake up the waiting process if any. This happens * on "echo 0 >/dev/oprofile/enable" so the daemon * processes the data remaining in the event buffer. + * also called on echo 1 > /dev/oprofile/dump */ void wake_up_buffer_waiter(void) { diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/event_buffer.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/event_buffer.h --- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/event_buffer.h 2005-06-17 14:48:29 -05:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/event_buffer.h 2005-08-22 20:17:51 -05:00 @@ -5,6 +5,10 @@ * @remark Read the file COPYING * * @author John Levon <levon@xxxxxxxxxxxxxxxxx> + * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. */ #ifndef EVENT_BUFFER_H @@ -29,11 +33,13 @@ void wake_up_buffer_waiter(void); #define CPU_SWITCH_CODE 2 #define COOKIE_SWITCH_CODE 3 #define KERNEL_ENTER_SWITCH_CODE 4 -#define KERNEL_EXIT_SWITCH_CODE 5 +#define USER_ENTER_SWITCH_CODE 5 #define MODULE_LOADED_CODE 6 #define CTX_TGID_CODE 7 #define TRACE_BEGIN_CODE 8 #define TRACE_END_CODE 9 +#define XEN_ENTER_SWITCH_CODE 10 +#define DOMAIN_SWITCH_CODE 11 /* add data to the event buffer */ void add_event_entry(unsigned long data); diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/oprof.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/oprof.c --- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/oprof.c 2005-06-17 14:48:29 -05:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/oprof.c 2005-08-22 20:17:51 -05:00 @@ -5,6 +5,10 @@ * @remark Read the file COPYING * * @author John Levon <levon@xxxxxxxxxxxxxxxxx> + * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. */ #include <linux/kernel.h> @@ -32,6 +36,25 @@ static DECLARE_MUTEX(start_sem); 1 - use the timer int mechanism regardless */ static int timer = 0; + +extern unsigned int adomains, pdomains; +extern int active_domains[MAX_OPROF_DOMAINS], passive_domains[MAX_OPROF_DOMAINS]; + +int oprofile_set_active(void) +{ + if (oprofile_ops.set_active) + return oprofile_ops.set_active(active_domains, adomains); + + return -EINVAL; +} + +int oprofile_set_passive(void) +{ + if (oprofile_ops.set_passive) + return oprofile_ops.set_passive(passive_domains, pdomains); + + return -EINVAL; +} int oprofile_setup(void) { diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/oprofile_files.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/oprofile_files.c --- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/oprofile_files.c 2005-06-17 14:48:29 -05:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/oprofile_files.c 2005-08-22 20:17:51 -05:00 @@ -5,10 +5,16 @@ * @remark Read the file COPYING * * @author John Levon <levon@xxxxxxxxxxxxxxxxx> + * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. */ #include <linux/fs.h> #include <linux/oprofile.h> +#include <linux/pagemap.h> +#include <linux/ctype.h> #include "event_buffer.h" #include "oprofile_stats.h" @@ -117,11 +123,140 @@ static ssize_t dump_write(struct file * static struct file_operations dump_fops = { .write = dump_write, }; - + +#define TMPBUFSIZE 50 + +unsigned int adomains = 0; +long active_domains[MAX_OPROF_DOMAINS]; + +extern int oprofile_set_active(void); + +static ssize_t adomain_write(struct file *file, char const __user *buf, size_t count, loff_t * offset) +{ + char tmpbuf[TMPBUFSIZE]; + char *startp = tmpbuf; + char *endp = tmpbuf; + int i; + unsigned long val; + + if (*offset) + return -EINVAL; + if (!count) + return 0; + if (count > TMPBUFSIZE - 1) + return -EINVAL; + + memset(tmpbuf, 0x0, TMPBUFSIZE); + + if (copy_from_user(tmpbuf, buf, count)) + return -EFAULT; + + for (i = 0; i < MAX_OPROF_DOMAINS; i++) + active_domains[i] = -1; + adomains = 0; + + while (1) { + val = simple_strtol(startp, &endp, 0); + if (endp == startp) + break; + while (ispunct(*endp)) + endp++; + active_domains[adomains++] = val; + if (adomains >= MAX_OPROF_DOMAINS) + break; + startp = endp; + } + if (oprofile_set_active()) + return -EINVAL; + return count; +} + +static ssize_t adomain_read(struct file *file, char __user * buf, size_t count, loff_t * offset) +{ + char tmpbuf[TMPBUFSIZE]; + size_t len = 0; + int i; + /* This is all screwed up if we run out of space */ + for (i = 0; i < adomains; i++) + len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "%u ", (unsigned int)active_domains[i]); + len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "\n"); + return simple_read_from_buffer((void __user *)buf, count, offset, tmpbuf, len); +} + + +static struct file_operations active_domain_ops = { + .read = adomain_read, + .write = adomain_write, +}; + +unsigned int pdomains = 0; +long passive_domains[MAX_OPROF_DOMAINS]; + +extern int oprofile_set_passive(void); + +static ssize_t pdomain_write(struct file *file, char const __user *buf, size_t count, loff_t * offset) +{ + char tmpbuf[TMPBUFSIZE]; + char *startp = tmpbuf; + char *endp = tmpbuf; + int i; + unsigned long val; + + if (*offset) + return -EINVAL; + if (!count) + return 0; + if (count > TMPBUFSIZE - 1) + return -EINVAL; + + memset(tmpbuf, 0x0, TMPBUFSIZE); + + if (copy_from_user(tmpbuf, buf, count)) + return -EFAULT; + + for (i = 0; i < MAX_OPROF_DOMAINS; i++) + passive_domains[i] = -1; + pdomains = 0; + + while (1) { + val = simple_strtol(startp, &endp, 0); + if (endp == startp) + break; + while (ispunct(*endp)) + endp++; + passive_domains[pdomains++] = val; + if (pdomains >= MAX_OPROF_DOMAINS) + break; + startp = endp; + } + if (oprofile_set_passive()) + return -EINVAL; + return count; +} + +static ssize_t pdomain_read(struct file *file, char __user * buf, size_t count, loff_t * offset) +{ + char tmpbuf[TMPBUFSIZE]; + size_t len = 0; + int i; + /* This is all screwed up if we run out of space */ + for (i = 0; i < pdomains; i++) + len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "%u ", (unsigned int)passive_domains[i]); + len += snprintf (tmpbuf + len, TMPBUFSIZE - len, "\n"); + return simple_read_from_buffer((void __user *)buf, count, offset, tmpbuf, len); +} + +static struct file_operations passive_domain_ops = { + .read = pdomain_read, + .write = pdomain_write, +}; + void oprofile_create_files(struct super_block * sb, struct dentry * root) { oprofilefs_create_file(sb, root, "enable", &enable_fops); oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666); + oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops); + oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops); oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops); oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size); oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed); diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/asm-i386/hypercall.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/asm-i386/hypercall.h --- xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/asm-i386/hypercall.h 2005-08-22 19:43:16 -05:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/asm-i386/hypercall.h 2005-08-22 20:20:01 -05:00 @@ -576,4 +576,21 @@ HYPERVISOR_vcpu_pickle( return ret; } + +static inline int +HYPERVISOR_pmc_op( + int op, unsigned int arg1, unsigned int arg2) +{ + int ret; + unsigned long ign1, ign2, ign3; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a"(ret), "=b"(ign1), "=c"(ign2), "=d"(ign3) + : "0"(__HYPERVISOR_pmc_op), "1"(op), "2"(arg1), "3"(arg2) + : "memory" ); + + return ret; +} + #endif /* __HYPERCALL_H__ */ diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/asm-x86_64/hypercall.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/asm-x86_64/hypercall.h --- xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/asm-x86_64/hypercall.h 2005-08-22 19:43:16 -05:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/asm-x86_64/hypercall.h 2005-08-22 20:17:51 -05:00 @@ -519,4 +519,19 @@ HYPERVISOR_vcpu_pickle( return ret; } +static inline int +HYPERVISOR_pmc_op( + int op, u64 arg1, u64 arg2) +{ + int ret; + + __asm__ __volatile__ ( + TRAP_INSTR + : "=a"(ret) + : "0"(__HYPERVISOR_pmc_op), "D"(op), "S"(arg1), "d"(arg2) + : __syscall_clobber ); + + return ret; +} + #endif /* __HYPERCALL_H__ */ diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/xen-public/xen.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/xen-public/xen.h --- xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/xen-public/xen.h 2005-08-22 19:43:14 -05:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/xen-public/xen.h 2005-08-22 20:17:51 -05:00 @@ -4,6 +4,10 @@ * Guest OS interface to Xen. * * Copyright (c) 2004, K A Fraser + * + * Modified by Aravind Menon for supporting oprofile + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. */ #ifndef __XEN_PUBLIC_XEN_H__ @@ -59,6 +63,7 @@ #define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ #define __HYPERVISOR_mmuext_op 26 #define __HYPERVISOR_acm_op 27 +#define __HYPERVISOR_pmc_op 28 /* * VIRTUAL INTERRUPTS @@ -72,7 +77,8 @@ #define VIRQ_PARITY_ERR 4 /* (DOM0) NMI parity error. */ #define VIRQ_IO_ERR 5 /* (DOM0) NMI I/O error. */ #define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */ -#define NR_VIRQS 7 +#define VIRQ_PMC_OVF 7 /* PMC Overflow */ +#define NR_VIRQS 8 /* * MMU-UPDATE REQUESTS @@ -240,6 +246,21 @@ struct mmuext_op { #define VMASST_TYPE_writable_pagetables 2 #define MAX_VMASST_TYPE 2 +/* + * Commands to HYPERVISOR_pmc_op(). + */ +#define PMC_INIT 0 +#define PMC_SET_ACTIVE 1 +#define PMC_SET_PASSIVE 2 +#define PMC_RESERVE_COUNTERS 3 +#define PMC_SETUP_EVENTS 4 +#define PMC_ENABLE_VIRQ 5 +#define PMC_START 6 +#define PMC_STOP 7 +#define PMC_DISABLE_VIRQ 8 +#define PMC_RELEASE_COUNTERS 9 +#define PMC_SHUTDOWN 10 + #ifndef __ASSEMBLY__ typedef u16 domid_t; @@ -292,6 +313,8 @@ typedef struct /* Event channel endpoints per domain. */ #define NR_EVENT_CHANNELS 1024 +#define MAX_OPROF_EVENTS 32 +#define MAX_OPROF_DOMAINS 25 /* * Per-VCPU information goes here. This will be cleaned up more when Xen * actually supports multi-VCPU guests. @@ -407,6 +430,21 @@ typedef struct shared_info { u32 wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */ arch_shared_info_t arch; + + /* Oprofile structures */ + u8 event_head; + u8 event_tail; + struct { + u64 eip; + u8 mode; + u8 event; + } event_log[MAX_OPROF_EVENTS]; + u8 losing_samples; + u64 samples_lost; + u32 nmi_restarts; + u64 active_samples; + u64 passive_samples; + u64 other_samples; } shared_info_t; diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/include/linux/oprofile.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/linux/oprofile.h --- xen-unstable.hg-20050822/linux-2.6.12-xen0/include/linux/oprofile.h 2005-06-17 14:48:29 -05:00 +++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/linux/oprofile.h 2005-08-22 20:17:51 -05:00 @@ -8,6 +8,10 @@ * @remark Read the file COPYING * * @author John Levon <levon@xxxxxxxxxxxxxxxxx> + * + * Modified by Aravind Menon for Xen + * These modifications are: + * Copyright (C) 2005 Hewlett-Packard Co. */ #ifndef OPROFILE_H @@ -27,6 +31,10 @@ struct oprofile_operations { /* create any necessary configuration files in the oprofile fs. * Optional. */ int (*create_files)(struct super_block * sb, struct dentry * root); + /* setup active domains with Xen */ + int (*set_active)(int *active_domains, unsigned int adomains); + /* setup passive domains with Xen */ + int (*set_passive)(int *passive_domains, unsigned int pdomains); /* Do any necessary interrupt setup. Optional. */ int (*setup)(void); /* Do any necessary interrupt shutdown. Optional. */ @@ -60,6 +68,15 @@ void oprofile_arch_exit(void); * smp_processor_id() as cpu. */ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event); + +/** + * alternative function to Add a sample for Xen. + * It would be better to combine both functions into only one but this would + * require getting parameter cpu_mode(old is_kernel) back to + * oprofile_add_sample() m(Xen is the best location to determine cpu_mode) + */ +extern void oprofile_add_sample_xen(unsigned long eip, unsigned int cpu_mode, + unsigned long event); /* Use this instead when the PC value is not from the regs. Doesn't * backtrace. */ _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |