[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] Merged.
# HG changeset patch # User emellor@xxxxxxxxxxxxxxxxxxxxxx # Node ID bcccadcc56e5bd460fcffdb4931652ebac928276 # Parent dc36edf1102fd8dc3638f1a2f0bd7f38cb97e8c3 # Parent b5903c9aeda56523b6676163e06b7db5cf4cd93d Merged. diff -r dc36edf1102f -r bcccadcc56e5 extras/mini-os/include/events.h --- a/extras/mini-os/include/events.h Sun Oct 30 12:52:38 2005 +++ b/extras/mini-os/include/events.h Sun Oct 30 13:00:35 2005 @@ -47,7 +47,7 @@ { evtchn_op_t op; op.cmd = EVTCHNOP_send; - op.u.send.local_port = port; + op.u.send.port = port; return HYPERVISOR_event_channel_op(&op); } diff -r dc36edf1102f -r bcccadcc56e5 extras/mini-os/include/hypervisor.h --- a/extras/mini-os/include/hypervisor.h Sun Oct 30 12:52:38 2005 +++ b/extras/mini-os/include/hypervisor.h Sun Oct 30 13:00:35 2005 @@ -14,6 +14,7 @@ #include <types.h> #include <xen/xen.h> +#include <xen/dom0_ops.h> /* * a placeholder for the start of day information passed up from the hypervisor @@ -37,548 +38,281 @@ * Assembler stubs for hyper-calls. */ #if defined(__i386__) +/* Taken from Linux */ + +#ifndef __HYPERCALL_H__ +#define __HYPERCALL_H__ + +#include <xen/sched.h> + +#define _hypercall0(type, name) \ +({ \ + long __res; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res) \ + : "0" (__HYPERVISOR_##name) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + long __res, __ign1; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res), "=b" (__ign1) \ + : "0" (__HYPERVISOR_##name), "1" ((long)(a1)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + long __res, __ign1, __ign2; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \ + : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \ + "2" ((long)(a2)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3) \ + : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \ + "2" ((long)(a2)), "3" ((long)(a3)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4) \ + : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \ + "2" ((long)(a2)), "3" ((long)(a3)), \ + "4" ((long)(a4)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4, __ign5; \ + asm volatile ( \ + TRAP_INSTR \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \ + : "0" (__HYPERVISOR_##name), "1" ((long)(a1)), \ + "2" ((long)(a2)), "3" ((long)(a3)), \ + "4" ((long)(a4)), "5" ((long)(a5)) \ + : "memory" ); \ + (type)__res; \ +}) + static inline int HYPERVISOR_set_trap_table( - trap_info_t *table) -{ - int ret; - unsigned long ignore; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ignore) - : "0" (__HYPERVISOR_set_trap_table), "1" (table) - : "memory" ); - - return ret; + trap_info_t *table) +{ + return _hypercall1(int, set_trap_table, table); } static inline int HYPERVISOR_mmu_update( - mmu_update_t *req, int count, int *success_count, domid_t domid) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_mmu_update), "1" (req), "2" (count), - "3" (success_count), "4" (domid) - : "memory" ); - - return ret; + mmu_update_t *req, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmu_update, req, count, success_count, domid); } static inline int HYPERVISOR_mmuext_op( - struct mmuext_op *op, int count, int *success_count, domid_t domid) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_mmuext_op), "1" (op), "2" (count), - "3" (success_count), "4" (domid) - : "memory" ); - - return ret; + struct mmuext_op *op, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmuext_op, op, count, success_count, domid); } static inline int HYPERVISOR_set_gdt( - unsigned long *frame_list, int entries) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_set_gdt), "1" (frame_list), "2" (entries) - : "memory" ); - - - return ret; + unsigned long *frame_list, int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); } static inline int HYPERVISOR_stack_switch( - unsigned long ss, unsigned long esp) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_stack_switch), "1" (ss), "2" (esp) - : "memory" ); - - return ret; + unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); } static inline int HYPERVISOR_set_callbacks( - unsigned long event_selector, unsigned long event_address, - unsigned long failsafe_selector, unsigned long failsafe_address) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_set_callbacks), "1" (event_selector), - "2" (event_address), "3" (failsafe_selector), "4" (failsafe_address) - : "memory" ); - - return ret; + unsigned long event_selector, unsigned long event_address, + unsigned long failsafe_selector, unsigned long failsafe_address) +{ + return _hypercall4(int, set_callbacks, + event_selector, event_address, + failsafe_selector, failsafe_address); } static inline int HYPERVISOR_fpu_taskswitch( - int set) -{ - int ret; - unsigned long ign; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign) - : "0" (__HYPERVISOR_fpu_taskswitch), "1" (set) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_yield( - void) -{ - int ret; - unsigned long ign; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign) - : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_yield) - : "memory", "ecx" ); - - return ret; -} - -static inline int -HYPERVISOR_block( - void) -{ - int ret; - unsigned long ign1; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_block) - : "memory", "ecx" ); - - return ret; -} - -static inline int -HYPERVISOR_shutdown( - void) -{ - int ret; - unsigned long ign1; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift)) - : "memory", "ecx" ); - - return ret; -} - -static inline int -HYPERVISOR_reboot( - void) -{ - int ret; - unsigned long ign1; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift)) - : "memory", "ecx" ); - - return ret; -} - -static inline int -HYPERVISOR_suspend( - unsigned long srec) -{ - int ret; - unsigned long ign1, ign2; - - /* NB. On suspend, control software expects a suspend record in %esi. */ - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=S" (ign2) - : "0" (__HYPERVISOR_sched_op), - "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)), - "S" (srec) : "memory", "ecx"); - - return ret; -} - -static inline int -HYPERVISOR_crash( - void) -{ - int ret; - unsigned long ign1; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_shutdown | (SHUTDOWN_crash << SCHEDOP_reasonshift)) - : "memory", "ecx" ); - - return ret; + int set) +{ + return _hypercall1(int, fpu_taskswitch, set); +} + +static inline int +HYPERVISOR_sched_op( + int cmd, unsigned long arg) +{ + return _hypercall2(int, sched_op, cmd, arg); } static inline long HYPERVISOR_set_timer_op( - u64 timeout) -{ - int ret; - unsigned long timeout_hi = (unsigned long)(timeout>>32); - unsigned long timeout_lo = (unsigned long)timeout; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_set_timer_op), "b" (timeout_lo), "c" (timeout_hi) - : "memory"); - - return ret; -} - -#if 0 + u64 timeout) +{ + unsigned long timeout_hi = (unsigned long)(timeout>>32); + unsigned long timeout_lo = (unsigned long)timeout; + return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); +} + static inline int HYPERVISOR_dom0_op( - dom0_op_t *dom0_op) -{ - int ret; - unsigned long ign1; - - dom0_op->interface_version = DOM0_INTERFACE_VERSION; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_dom0_op), "1" (dom0_op) - : "memory"); - - return ret; -} -#endif + dom0_op_t *dom0_op) +{ + dom0_op->interface_version = DOM0_INTERFACE_VERSION; + return _hypercall1(int, dom0_op, dom0_op); +} static inline int HYPERVISOR_set_debugreg( - int reg, unsigned long value) -{ - int ret; - unsigned long ign1, ign2; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_set_debugreg), "1" (reg), "2" (value) - : "memory" ); - - return ret; + int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); } static inline unsigned long HYPERVISOR_get_debugreg( - int reg) -{ - unsigned long ret; - unsigned long ign; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign) - : "0" (__HYPERVISOR_get_debugreg), "1" (reg) - : "memory" ); - - return ret; + int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); } static inline int HYPERVISOR_update_descriptor( - u64 ma, u64 desc) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_update_descriptor), - "1" ((unsigned long)ma), "2" ((unsigned long)(ma>>32)), - "3" ((unsigned long)desc), "4" ((unsigned long)(desc>>32)) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_dom_mem_op( - unsigned int op, unsigned long *extent_list, - unsigned long nr_extents, unsigned int extent_order) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4, ign5; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4), - "=D" (ign5) - : "0" (__HYPERVISOR_dom_mem_op), "1" (op), "2" (extent_list), - "3" (nr_extents), "4" (extent_order), "5" (DOMID_SELF) - : "memory" ); - - return ret; + u64 ma, u64 desc) +{ + return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); +} + +static inline int +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); } static inline int HYPERVISOR_multicall( - void *call_list, int nr_calls) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_multicall), "1" (call_list), "2" (nr_calls) - : "memory" ); - - return ret; + void *call_list, int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); } static inline int HYPERVISOR_update_va_mapping( - unsigned long va, pte_t new_val, unsigned long flags) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), "=S" (ign4) - : "0" (__HYPERVISOR_update_va_mapping), - "1" (va), "2" ((new_val).pte_low), + unsigned long va, pte_t new_val, unsigned long flags) +{ + unsigned long pte_hi = 0; #ifdef CONFIG_X86_PAE - "3" ((new_val).pte_high), -#else - "3" (0), + pte_hi = new_val.pte_high; #endif - "4" (flags) - : "memory" ); - - return ret; + return _hypercall4(int, update_va_mapping, va, + new_val.pte_low, pte_hi, flags); } static inline int HYPERVISOR_event_channel_op( - void *op) -{ - int ret; - unsigned long ignore; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ignore) - : "0" (__HYPERVISOR_event_channel_op), "1" (op) - : "memory" ); - - return ret; + void *op) +{ + return _hypercall1(int, event_channel_op, op); } static inline int HYPERVISOR_xen_version( - int cmd, void *arg) -{ - int ret; - unsigned long ignore, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ignore), "=c" (ign2) - : "0" (__HYPERVISOR_xen_version), "1" (cmd), "2" (arg) - : "memory" ); - - return ret; + int cmd, void *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); } static inline int HYPERVISOR_console_io( - int cmd, int count, char *str) -{ - int ret; - unsigned long ign1, ign2, ign3; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3) - : "0" (__HYPERVISOR_console_io), "1" (cmd), "2" (count), "3" (str) - : "memory" ); - - return ret; + int cmd, int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); } static inline int HYPERVISOR_physdev_op( - void *physdev_op) -{ - int ret; - unsigned long ign; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign) - : "0" (__HYPERVISOR_physdev_op), "1" (physdev_op) - : "memory" ); - - return ret; + void *physdev_op) +{ + return _hypercall1(int, physdev_op, physdev_op); } static inline int HYPERVISOR_grant_table_op( - unsigned int cmd, void *uop, unsigned int count) -{ - int ret; - unsigned long ign1, ign2, ign3; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3) - : "0" (__HYPERVISOR_grant_table_op), "1" (cmd), "2" (uop), "3" (count) - : "memory" ); - - return ret; + unsigned int cmd, void *uop, unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); } static inline int HYPERVISOR_update_va_mapping_otherdomain( - unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) -{ - int ret; - unsigned long ign1, ign2, ign3, ign4, ign5; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3), - "=S" (ign4), "=D" (ign5) - : "0" (__HYPERVISOR_update_va_mapping_otherdomain), - "1" (va), "2" ((new_val).pte_low), + unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) +{ + unsigned long pte_hi = 0; #ifdef CONFIG_X86_PAE - "3" ((new_val).pte_high), -#else - "3" (0), + pte_hi = new_val.pte_high; #endif - "4" (flags), "5" (domid) : - "memory" ); - - return ret; + return _hypercall5(int, update_va_mapping_otherdomain, va, + new_val.pte_low, pte_hi, flags, domid); } static inline int HYPERVISOR_vm_assist( - unsigned int cmd, unsigned int type) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_vm_assist), "1" (cmd), "2" (type) - : "memory" ); - - return ret; -} - -static inline int -HYPERVISOR_boot_vcpu( - unsigned long vcpu, vcpu_guest_context_t *ctxt) -{ - int ret; - unsigned long ign1, ign2; - - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_boot_vcpu), "1" (vcpu), "2" (ctxt) - : "memory"); - - return ret; -} - -static inline int -HYPERVISOR_vcpu_down( - int vcpu) -{ - int ret; - unsigned long ign1; - /* Yes, I really do want to clobber edx here: when we resume a - vcpu after unpickling a multi-processor domain, it returns - here, but clobbers all of the call clobbered registers. */ - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_vcpu_down | (vcpu << SCHEDOP_vcpushift)) - : "memory", "ecx", "edx" ); - - return ret; -} - -static inline int -HYPERVISOR_vcpu_up( - int vcpu) -{ - int ret; - unsigned long ign1; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_vcpu_up | (vcpu << SCHEDOP_vcpushift)) - : "memory", "ecx" ); - - return ret; -} - -static inline int -HYPERVISOR_vcpu_pickle( - int vcpu, vcpu_guest_context_t *ctxt) -{ - int ret; - unsigned long ign1, ign2; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret), "=b" (ign1), "=c" (ign2) - : "0" (__HYPERVISOR_sched_op), - "1" (SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)), - "2" (ctxt) - : "memory" ); - - return ret; -} + unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int +HYPERVISOR_vcpu_op( + int cmd, int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); +} + +static inline int +HYPERVISOR_suspend( + unsigned long srec) +{ + return _hypercall3(int, sched_op, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); +} + +#endif /* __HYPERCALL_H__ */ #elif defined(__x86_64__) #define __syscall_clobber "r11","rcx","memory" @@ -792,106 +526,4 @@ } #endif - -static __inline__ int HYPERVISOR_dom0_op(void *dom0_op) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_dom0_op), - _a1 (dom0_op) : "memory" ); - - return ret; -} - -static __inline__ int HYPERVISOR_set_debugreg(int reg, unsigned long value) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_set_debugreg), - _a1 (reg), _a2 (value) : "memory" ); - - return ret; -} - -static __inline__ unsigned long HYPERVISOR_get_debugreg(int reg) -{ - unsigned long ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_get_debugreg), - _a1 (reg) : "memory" ); - - return ret; -} - -static __inline__ int HYPERVISOR_update_descriptor( - unsigned long pa, unsigned long word1, unsigned long word2) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_update_descriptor), - _a1 (pa), _a2 (word1), _a3 (word2) : "memory" ); - - return ret; -} - -static __inline__ int HYPERVISOR_dom_mem_op(void *dom_mem_op) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_memory_op), - _a1 (dom_mem_op) : "memory" ); - - return ret; -} - -static __inline__ int HYPERVISOR_multicall(void *call_list, int nr_calls) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_multicall), - _a1 (call_list), _a2 (nr_calls) : "memory" ); - - return ret; -} - -static __inline__ int HYPERVISOR_update_va_mapping( - unsigned long page_nr, unsigned long new_val, unsigned long flags) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_update_va_mapping), - _a1 (page_nr), _a2 (new_val), _a3 (flags) : "memory" ); - - return ret; -} - -static __inline__ int HYPERVISOR_xen_version(int cmd) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_xen_version), - _a1 (cmd) : "memory" ); - - return ret; -} - -static __inline__ int HYPERVISOR_console_io(int cmd, int count, char *str) -{ - int ret; - __asm__ __volatile__ ( - TRAP_INSTR - : "=a" (ret) : "0" (__HYPERVISOR_console_io), - _a1 (cmd), _a2 (count), _a3 (str) : "memory" ); - - return ret; -} - #endif /* __HYPERVISOR_H__ */ diff -r dc36edf1102f -r bcccadcc56e5 extras/mini-os/include/os.h --- a/extras/mini-os/include/os.h Sun Oct 30 12:52:38 2005 +++ b/extras/mini-os/include/os.h Sun Oct 30 13:00:35 2005 @@ -24,7 +24,7 @@ #include <xen/xen.h> -#define force_evtchn_callback() ((void)HYPERVISOR_xen_version(0)) +#define force_evtchn_callback() ((void)HYPERVISOR_xen_version(0, 0)) #define __KERNEL_CS FLAT_KERNEL_CS #define __KERNEL_DS FLAT_KERNEL_DS @@ -55,6 +55,8 @@ /* Everything below this point is not included by assembler (.S) files. */ #ifndef __ASSEMBLY__ +extern shared_info_t *HYPERVISOR_shared_info; + void trap_init(void); /* diff -r dc36edf1102f -r bcccadcc56e5 extras/mini-os/include/types.h --- a/extras/mini-os/include/types.h Sun Oct 30 12:52:38 2005 +++ b/extras/mini-os/include/types.h Sun Oct 30 13:00:35 2005 @@ -54,7 +54,14 @@ typedef struct { unsigned long pte; } pte_t; #endif - +typedef u8 uint8_t; +typedef s8 int8_t; +typedef u16 uint16_t; +typedef s16 int16_t; +typedef u32 uint32_t; +typedef s32 int32_t; +typedef u64 uint64_t; +typedef s64 int64_t; #define INT_MAX ((int)(~0U>>1)) diff -r dc36edf1102f -r bcccadcc56e5 extras/mini-os/kernel.c --- a/extras/mini-os/kernel.c Sun Oct 30 12:52:38 2005 +++ b/extras/mini-os/kernel.c Sun Oct 30 13:00:35 2005 @@ -61,7 +61,7 @@ extern char shared_info[PAGE_SIZE]; -#define __pte(x) ((pte_t) { (0) } ) +#define __pte(x) ((pte_t) { (x) } ) static shared_info_t *map_shared_info(unsigned long pa) { @@ -150,5 +150,5 @@ void do_exit(void) { printk("do_exit called!\n"); - for ( ;; ) HYPERVISOR_shutdown(); + for ( ;; ) HYPERVISOR_sched_op(SCHEDOP_shutdown, SHUTDOWN_crash); } diff -r dc36edf1102f -r bcccadcc56e5 extras/mini-os/time.c --- a/extras/mini-os/time.c Sun Oct 30 12:52:38 2005 +++ b/extras/mini-os/time.c Sun Oct 30 13:00:35 2005 @@ -208,7 +208,7 @@ struct timeval tv; gettimeofday(&tv); HYPERVISOR_set_timer_op(monotonic_clock() + 1000000LL * (s64) millisecs); - HYPERVISOR_block(); + HYPERVISOR_sched_op(SCHEDOP_block, 0); } diff -r dc36edf1102f -r bcccadcc56e5 extras/mini-os/xenbus/xenbus_xs.c --- a/extras/mini-os/xenbus/xenbus_xs.c Sun Oct 30 12:52:38 2005 +++ b/extras/mini-os/xenbus/xenbus_xs.c Sun Oct 30 13:00:35 2005 @@ -39,7 +39,7 @@ #include <wait.h> #include <sched.h> #include <semaphore.h> -#include "xenstored.h" +#include <xen/io/xs_wire.h> #include "xenbus_comms.h" #define streq(a, b) (strcmp((a), (b)) == 0) @@ -408,7 +408,12 @@ static int xs_acknowledge_watch(const char *token) { +#if 0 return xs_error(xs_single(XS_WATCH_ACK, token, NULL)); +#else + /* XS_WATCH_ACK is no longer available */ + return 0; +#endif } static int xs_unwatch(const char *path, const char *token) diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/arch/ia64/xen/drivers/evtchn_ia64.c --- a/linux-2.6-xen-sparse/arch/ia64/xen/drivers/evtchn_ia64.c Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/drivers/evtchn_ia64.c Sun Oct 30 13:00:35 2005 @@ -29,7 +29,7 @@ return op.u.bind_virq.port; } -int bind_virq_to_irq(int virq) +int bind_virq_to_irq(int virq, int cpu) { printk("bind_virq_to_irq called... FIXME??\n"); while(1); @@ -66,7 +66,11 @@ evtchns[evtchn].handler = handler; evtchns[evtchn].dev_id = dev_id; unmask_evtchn(evtchn); - return 0; + //return 0; + /* On ia64, there's only one irq vector allocated for all event channels, + * so let's just return evtchn as handle for later communication + */ + return evtchn; } void unbind_evtchn_from_irqhandler(unsigned int evtchn, void *dev_id) diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S Sun Oct 30 13:00:35 2005 @@ -46,8 +46,6 @@ .prologue alloc r16=ar.pfs,1,0,0,0 #endif - .prologue - alloc r16=ar.pfs,1,0,0,0 DO_SAVE_SWITCH_STACK .body diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S Sun Oct 30 13:00:35 2005 @@ -653,7 +653,7 @@ ENTRY(device_not_available) pushl $-1 # mark this as an int SAVE_ALL - preempt_stop + #preempt_stop /* This is already an interrupt gate on Xen. */ call math_state_restore jmp ret_from_exception diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c Sun Oct 30 13:00:35 2005 @@ -483,6 +483,9 @@ mcl->args[0] = 1; mcl++; } +#if 0 /* lazy fpu sanity check */ + else BUG_ON(!(read_cr0() & 8)); +#endif /* * Reload esp0, LDT and the page table pointer: diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c Sun Oct 30 13:00:35 2005 @@ -647,6 +647,12 @@ do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); } #endif + +static inline void conditional_sti(struct pt_regs *regs) +{ + if ((uint8_t)(regs->xcs >> 16) == 0) + local_irq_enable(); +} /* * Our handling of the processor debug registers is non-trivial. @@ -680,11 +686,9 @@ if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, SIGTRAP) == NOTIFY_STOP) return; -#if 0 + /* It's safe to allow irq's after DR6 has been saved */ - if (regs->eflags & X86_EFLAGS_IF) - local_irq_enable(); -#endif + conditional_sti(regs); /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { @@ -967,15 +971,18 @@ #endif -/* NB. All these are "trap gates" (i.e. events_mask isn't cleared). */ +/* + * NB. All these are "trap gates" (i.e. events_mask isn't cleared) except + * for those that specify <dpl>|4 in the second field. + */ static trap_info_t trap_table[] = { { 0, 0, __KERNEL_CS, (unsigned long)divide_error }, - { 1, 0, __KERNEL_CS, (unsigned long)debug }, - { 3, 3, __KERNEL_CS, (unsigned long)int3 }, + { 1, 0|4, __KERNEL_CS, (unsigned long)debug }, + { 3, 3|4, __KERNEL_CS, (unsigned long)int3 }, { 4, 3, __KERNEL_CS, (unsigned long)overflow }, { 5, 3, __KERNEL_CS, (unsigned long)bounds }, { 6, 0, __KERNEL_CS, (unsigned long)invalid_op }, - { 7, 0, __KERNEL_CS, (unsigned long)device_not_available }, + { 7, 0|4, __KERNEL_CS, (unsigned long)device_not_available }, { 9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun }, { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS }, { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present }, diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c Sun Oct 30 13:00:35 2005 @@ -733,6 +733,7 @@ #ifdef CONFIG_XEN { int i, j, k, fpp; + unsigned long va; /* Make sure we have a large enough P->M table. */ phys_to_machine_mapping = alloc_bootmem( @@ -746,9 +747,21 @@ __pa(xen_start_info->mfn_list), PFN_PHYS(PFN_UP(xen_start_info->nr_pages * sizeof(unsigned long)))); - make_pages_readonly((void *)xen_start_info->mfn_list, - PFN_UP(xen_start_info->nr_pages * - sizeof(unsigned long))); + + /* 'Initial mapping' of old p2m table must be destroyed. */ + for (va = xen_start_info->mfn_list; + va < (xen_start_info->mfn_list + + (xen_start_info->nr_pages*sizeof(unsigned long))); + va += PAGE_SIZE) { + HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0); + } + + /* 'Initial mapping' of initrd must be destroyed. */ + for (va = xen_start_info->mod_start; + va < (xen_start_info->mod_start+xen_start_info->mod_len); + va += PAGE_SIZE) { + HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0); + } /* * Initialise the list of the frames that specify the list of diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S Sun Oct 30 13:00:35 2005 @@ -5,7 +5,7 @@ #define evtchn_upcall_pending 0 #define evtchn_upcall_mask 1 -#define sizeof_vcpu_shift 3 +#define sizeof_vcpu_shift 4 #ifdef CONFIG_SMP //#define preempt_disable(reg) incl threadinfo_preempt_count(reg) diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c --- a/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c Sun Oct 30 13:00:35 2005 @@ -191,12 +191,13 @@ rc = HYPERVISOR_memory_op( XENMEM_increase_reservation, &reservation); if (rc < nr_pages) { + int ret; /* We hit the Xen hard limit: reprobe. */ reservation.extent_start = mfn_list; reservation.nr_extents = rc; - BUG_ON(HYPERVISOR_memory_op( - XENMEM_decrease_reservation, - &reservation) != rc); + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &reservation); + BUG_ON(ret != rc); hard_limit = current_pages + rc - driver_pages; goto out; } @@ -213,11 +214,14 @@ xen_machphys_update(mfn_list[i], pfn); /* Link back into the page tables if not highmem. */ - if (pfn < max_low_pfn) - BUG_ON(HYPERVISOR_update_va_mapping( + if (pfn < max_low_pfn) { + int ret; + ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), pfn_pte_ma(mfn_list[i], PAGE_KERNEL), - 0)); + 0); + BUG_ON(ret); + } /* Relinquish the page back to the allocator. */ ClearPageReserved(page); @@ -242,6 +246,7 @@ struct page *page; void *v; int need_sleep = 0; + int ret; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, @@ -268,8 +273,9 @@ if (!PageHighMem(page)) { v = phys_to_virt(pfn << PAGE_SHIFT); scrub_pages(v, 1); - BUG_ON(HYPERVISOR_update_va_mapping( - (unsigned long)v, __pte_ma(0), 0)); + ret = HYPERVISOR_update_va_mapping( + (unsigned long)v, __pte_ma(0), 0); + BUG_ON(ret); } #ifdef CONFIG_XEN_SCRUB_PAGES else { @@ -295,8 +301,8 @@ reservation.extent_start = mfn_list; reservation.nr_extents = nr_pages; - BUG_ON(HYPERVISOR_memory_op( - XENMEM_decrease_reservation, &reservation) != nr_pages); + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); + BUG_ON(ret != nr_pages); current_pages -= nr_pages; totalram_pages = current_pages; @@ -501,6 +507,7 @@ pte_t *pte, struct page *pte_page, unsigned long addr, void *data) { unsigned long mfn = pte_mfn(*pte); + int ret; struct xen_memory_reservation reservation = { .extent_start = &mfn, .nr_extents = 1, @@ -510,8 +517,8 @@ set_pte_at(&init_mm, addr, pte, __pte_ma(0)); phys_to_machine_mapping[__pa(addr) >> PAGE_SHIFT] = INVALID_P2M_ENTRY; - BUG_ON(HYPERVISOR_memory_op( - XENMEM_decrease_reservation, &reservation) != 1); + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); + BUG_ON(ret != 1); return 0; } @@ -519,6 +526,7 @@ { unsigned long vstart, flags; unsigned int order = get_order(nr_pages * PAGE_SIZE); + int ret; vstart = __get_free_pages(GFP_KERNEL, order); if (vstart == 0) @@ -527,8 +535,9 @@ scrub_pages(vstart, 1 << order); balloon_lock(flags); - BUG_ON(generic_page_range( - &init_mm, vstart, PAGE_SIZE << order, dealloc_pte_fn, NULL)); + ret = generic_page_range( + &init_mm, vstart, PAGE_SIZE << order, dealloc_pte_fn, NULL); + BUG_ON(ret); current_pages -= 1UL << order; balloon_unlock(flags); diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c Sun Oct 30 13:00:35 2005 @@ -108,6 +108,7 @@ struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int i, invcount = 0; u16 handle; + int ret; for (i = 0; i < nr_pages; i++) { handle = pending_handle(idx, i); @@ -120,8 +121,9 @@ invcount++; } - BUG_ON(HYPERVISOR_grant_table_op( - GNTTABOP_unmap_grant_ref, unmap, invcount)); + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, unmap, invcount); + BUG_ON(ret); } @@ -338,6 +340,7 @@ struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int nbio = 0; request_queue_t *q; + int ret; /* Check that number of segments is sane. */ nseg = req->nr_segments; @@ -367,8 +370,8 @@ map[i].flags |= GNTMAP_readonly; } - BUG_ON(HYPERVISOR_grant_table_op( - GNTTABOP_map_grant_ref, map, nseg)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); + BUG_ON(ret); for (i = 0; i < nseg; i++) { if (unlikely(map[i].handle < 0)) { @@ -493,6 +496,7 @@ { int i; struct page *page; + int ret; blkif_interface_init(); @@ -509,7 +513,8 @@ spin_lock_init(&blkio_schedule_list_lock); INIT_LIST_HEAD(&blkio_schedule_list); - BUG_ON(kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0); + ret = kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES); + BUG_ON(ret < 0); blkif_xenbus_init(); diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/drivers/xen/blkback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/interface.c Sun Oct 30 13:00:35 2005 @@ -31,6 +31,7 @@ static int map_frontend_page(blkif_t *blkif, unsigned long shared_page) { struct gnttab_map_grant_ref op; + int ret; op.host_addr = (unsigned long)blkif->blk_ring_area->addr; op.flags = GNTMAP_host_map; @@ -38,8 +39,9 @@ op.dom = blkif->domid; lock_vm_area(blkif->blk_ring_area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); unlock_vm_area(blkif->blk_ring_area); + BUG_ON(ret); if (op.handle < 0) { DPRINTK(" Grant table operation failure !\n"); @@ -55,14 +57,16 @@ static void unmap_frontend_page(blkif_t *blkif) { struct gnttab_unmap_grant_ref op; + int ret; op.host_addr = (unsigned long)blkif->blk_ring_area->addr; op.handle = blkif->shmem_handle; op.dev_bus_addr = 0; lock_vm_area(blkif->blk_ring_area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); unlock_vm_area(blkif->blk_ring_area); + BUG_ON(ret); } int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c Sun Oct 30 13:00:35 2005 @@ -305,6 +305,7 @@ for (i = info->ring.rsp_cons; i != rp; i++) { unsigned long id; + int ret; bret = RING_GET_RESPONSE(&info->ring, i); id = bret->id; @@ -321,9 +322,10 @@ DPRINTK("Bad return from blkdev data " "request: %x\n", bret->status); - BUG_ON(end_that_request_first( + ret = end_that_request_first( req, (bret->status == BLKIF_RSP_OKAY), - req->hard_nr_sectors)); + req->hard_nr_sectors); + BUG_ON(ret); end_that_request_last(req); break; default: diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Sun Oct 30 13:00:35 2005 @@ -413,6 +413,7 @@ unsigned int i, op = 0; struct grant_handle_pair *handle; unsigned long ptep; + int ret; for ( i = 0; i < nr_pages; i++) { @@ -440,8 +441,8 @@ BLKTAP_INVALIDATE_HANDLE(handle); } - BUG_ON(HYPERVISOR_grant_table_op( - GNTTABOP_unmap_grant_ref, unmap, op)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, op); + BUG_ON(ret); if (blktap_vma != NULL) zap_page_range(blktap_vma, @@ -673,6 +674,7 @@ struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2]; int op, ret; unsigned int nseg; + int retval; /* Check that number of segments is sane. */ nseg = req->nr_segments; @@ -740,8 +742,8 @@ op++; } - BUG_ON(HYPERVISOR_grant_table_op( - GNTTABOP_map_grant_ref, map, op)); + retval = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op); + BUG_ON(retval); op = 0; for (i = 0; i < (req->nr_segments*2); i += 2) { @@ -877,7 +879,8 @@ spin_lock_init(&blkio_schedule_list_lock); INIT_LIST_HEAD(&blkio_schedule_list); - BUG_ON(kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0); + i = kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES); + BUG_ON(i<0); blkif_xenbus_init(); diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/drivers/xen/blktap/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/interface.c Sun Oct 30 13:00:35 2005 @@ -31,6 +31,7 @@ static int map_frontend_page(blkif_t *blkif, unsigned long shared_page) { struct gnttab_map_grant_ref op; + int ret; op.host_addr = (unsigned long)blkif->blk_ring_area->addr; op.flags = GNTMAP_host_map; @@ -38,8 +39,9 @@ op.dom = blkif->domid; lock_vm_area(blkif->blk_ring_area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); unlock_vm_area(blkif->blk_ring_area); + BUG_ON(ret); if (op.handle < 0) { DPRINTK(" Grant table operation failure !\n"); @@ -55,14 +57,16 @@ static void unmap_frontend_page(blkif_t *blkif) { struct gnttab_unmap_grant_ref op; + int ret; op.host_addr = (unsigned long)blkif->blk_ring_area->addr; op.handle = blkif->shmem_handle; op.dev_bus_addr = 0; lock_vm_area(blkif->blk_ring_area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); unlock_vm_area(blkif->blk_ring_area); + BUG_ON(ret); } int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c --- a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c Sun Oct 30 13:00:35 2005 @@ -282,6 +282,7 @@ case IOCTL_EVTCHN_UNBIND: { struct ioctl_evtchn_unbind unbind; + int ret; rc = -EFAULT; if (copy_from_user(&unbind, (void *)arg, sizeof(unbind))) @@ -306,7 +307,8 @@ op.cmd = EVTCHNOP_close; op.u.close.port = unbind.port; - BUG_ON(HYPERVISOR_event_channel_op(&op)); + ret = HYPERVISOR_event_channel_op(&op); + BUG_ON(ret); rc = 0; break; @@ -399,6 +401,7 @@ for (i = 0; i < NR_EVENT_CHANNELS; i++) { + int ret; if (port_user[i] != u) continue; @@ -407,7 +410,8 @@ op.cmd = EVTCHNOP_close; op.u.close.port = i; - BUG_ON(HYPERVISOR_event_channel_op(&op)); + ret = HYPERVISOR_event_channel_op(&op); + BUG_ON(ret); } spin_unlock_irq(&port_user_lock); diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/drivers/xen/netback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Sun Oct 30 13:00:35 2005 @@ -115,6 +115,7 @@ netif_t *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref) { struct gnttab_map_grant_ref op; + int ret; op.host_addr = (unsigned long)netif->comms_area->addr; op.flags = GNTMAP_host_map; @@ -122,8 +123,9 @@ op.dom = netif->domid; lock_vm_area(netif->comms_area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); unlock_vm_area(netif->comms_area); + BUG_ON(ret); if (op.handle < 0) { DPRINTK(" Gnttab failure mapping tx_ring_ref!\n"); @@ -139,8 +141,9 @@ op.dom = netif->domid; lock_vm_area(netif->comms_area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); unlock_vm_area(netif->comms_area); + BUG_ON(ret); if (op.handle < 0) { DPRINTK(" Gnttab failure mapping rx_ring_ref!\n"); @@ -156,22 +159,25 @@ static void unmap_frontend_pages(netif_t *netif) { struct gnttab_unmap_grant_ref op; + int ret; op.host_addr = (unsigned long)netif->comms_area->addr; op.handle = netif->tx_shmem_handle; op.dev_bus_addr = 0; lock_vm_area(netif->comms_area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); unlock_vm_area(netif->comms_area); + BUG_ON(ret); op.host_addr = (unsigned long)netif->comms_area->addr + PAGE_SIZE; op.handle = netif->rx_shmem_handle; op.dev_bus_addr = 0; lock_vm_area(netif->comms_area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); unlock_vm_area(netif->comms_area); + BUG_ON(ret); } int netif_map(netif_t *netif, unsigned long tx_ring_ref, diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Sun Oct 30 13:00:35 2005 @@ -112,9 +112,12 @@ spin_lock_irqsave(&mfn_lock, flags); if ( alloc_index != MAX_MFN_ALLOC ) mfn_list[alloc_index++] = mfn; - else - BUG_ON(HYPERVISOR_memory_op(XENMEM_decrease_reservation, - &reservation) != 1); + else { + int ret; + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &reservation); + BUG_ON(ret != 1); + } spin_unlock_irqrestore(&mfn_lock, flags); } #endif @@ -159,13 +162,15 @@ */ if (skb_shared(skb) || skb_cloned(skb) || !is_xen_skb(skb)) { int hlen = skb->data - skb->head; + int ret; struct sk_buff *nskb = dev_alloc_skb(hlen + skb->len); if ( unlikely(nskb == NULL) ) goto drop; skb_reserve(nskb, hlen); __skb_put(nskb, skb->len); - BUG_ON(skb_copy_bits(skb, -hlen, nskb->data - hlen, - skb->len + hlen)); + ret = skb_copy_bits(skb, -hlen, nskb->data - hlen, + skb->len + hlen); + BUG_ON(ret); nskb->dev = skb->dev; nskb->proto_csum_valid = skb->proto_csum_valid; dev_kfree_skb(skb); @@ -218,6 +223,7 @@ struct sk_buff *skb; u16 notify_list[NETIF_RX_RING_SIZE]; int notify_nr = 0; + int ret; skb_queue_head_init(&rxq); @@ -279,7 +285,8 @@ mcl++; mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; - BUG_ON(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0); + ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl); + BUG_ON(ret != 0); mcl = rx_mcl; if( HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, @@ -421,6 +428,7 @@ u16 pending_idx; PEND_RING_IDX dc, dp; netif_t *netif; + int ret; dc = dealloc_cons; dp = dealloc_prod; @@ -436,8 +444,9 @@ gop->handle = grant_tx_ref[pending_idx]; gop++; } - BUG_ON(HYPERVISOR_grant_table_op( - GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops)); + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops); + BUG_ON(ret); while (dealloc_cons != dp) { pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)]; @@ -477,6 +486,7 @@ NETIF_RING_IDX i; gnttab_map_grant_ref_t *mop; unsigned int data_len; + int ret; if (dealloc_cons != dealloc_prod) net_tx_action_dealloc(); @@ -599,8 +609,9 @@ if (mop == tx_map_ops) return; - BUG_ON(HYPERVISOR_grant_table_op( - GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops)); + ret = HYPERVISOR_grant_table_op( + GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops); + BUG_ON(ret); mop = tx_map_ops; while ((skb = __skb_dequeue(&tx_queue)) != NULL) { diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Sun Oct 30 13:00:35 2005 @@ -25,8 +25,8 @@ #include <asm/pgtable.h> #include <asm/uaccess.h> #include <asm/tlb.h> +#include <asm/hypervisor.h> #include <asm-xen/linux-public/privcmd.h> -#include <asm/hypervisor.h> #include <asm-xen/xen-public/xen.h> #include <asm-xen/xen-public/dom0_ops.h> #include <asm-xen/xen_proc.h> diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/interface.c Sun Oct 30 13:00:35 2005 @@ -78,6 +78,7 @@ static int map_frontend_page(tpmif_t *tpmif, unsigned long shared_page) { + int ret; struct gnttab_map_grant_ref op = { .host_addr = (unsigned long)tpmif->tx_area->addr, .flags = GNTMAP_host_map, @@ -86,8 +87,9 @@ }; lock_vm_area(tpmif->tx_area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); unlock_vm_area(tpmif->tx_area); + BUG_ON(ret); if (op.handle < 0) { DPRINTK(" Grant table operation failure !\n"); @@ -104,14 +106,16 @@ unmap_frontend_page(tpmif_t *tpmif) { struct gnttab_unmap_grant_ref op; + int ret; op.host_addr = (unsigned long)tpmif->tx_area->addr; op.handle = tpmif->shmem_handle; op.dev_bus_addr = 0; lock_vm_area(tpmif->tx_area); - BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); unlock_vm_area(tpmif->tx_area); + BUG_ON(ret); } int diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/drivers/xen/util.c --- a/linux-2.6-xen-sparse/drivers/xen/util.c Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/util.c Sun Oct 30 13:00:35 2005 @@ -34,7 +34,9 @@ void free_vm_area(struct vm_struct *area) { - BUG_ON(remove_vm_area(area->addr) != area); + struct vm_struct *ret; + ret = remove_vm_area(area->addr); + BUG_ON(ret != area); kfree(area); } diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Sun Oct 30 13:00:35 2005 @@ -714,11 +714,7 @@ static int __init xenbus_probe_init(void) { - int err = 0; - /* - ** Domain0 doesn't have a store_evtchn or store_mfn yet. - */ - int dom0 = (xen_start_info->store_evtchn == 0); + int err = 0, dom0; printk("xenbus_probe_init\n"); @@ -733,10 +729,16 @@ device_register(&xenbus_frontend.dev); device_register(&xenbus_backend.dev); + /* + ** Domain0 doesn't have a store_evtchn or store_mfn yet. + */ + dom0 = (xen_start_info->store_evtchn == 0); + if (dom0) { unsigned long page; evtchn_op_t op = { 0 }; + int ret; /* Allocate page. */ @@ -757,7 +759,8 @@ op.u.alloc_unbound.dom = DOMID_SELF; op.u.alloc_unbound.remote_dom = 0; - BUG_ON(HYPERVISOR_event_channel_op(&op)); + ret = HYPERVISOR_event_channel_op(&op); + BUG_ON(ret); xen_start_info->store_evtchn = op.u.alloc_unbound.port; /* And finally publish the above info in /proc/xen */ diff -r dc36edf1102f -r bcccadcc56e5 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Sun Oct 30 12:52:38 2005 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Sun Oct 30 13:00:35 2005 @@ -28,6 +28,7 @@ * IN THE SOFTWARE. */ +#include <linux/unistd.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/uio.h> diff -r dc36edf1102f -r bcccadcc56e5 tools/examples/Makefile --- a/tools/examples/Makefile Sun Oct 30 12:52:38 2005 +++ b/tools/examples/Makefile Sun Oct 30 13:00:35 2005 @@ -17,6 +17,7 @@ XEN_CONFIGS += xmexample1 XEN_CONFIGS += xmexample2 XEN_CONFIGS += xmexample.vmx +XEN_CONFIGS += xmexample.vti # Xen script dir and scripts to go there. XEN_SCRIPT_DIR = /etc/xen/scripts diff -r dc36edf1102f -r bcccadcc56e5 tools/ioemu/hw/ide.c --- a/tools/ioemu/hw/ide.c Sun Oct 30 12:52:38 2005 +++ b/tools/ioemu/hw/ide.c Sun Oct 30 13:00:35 2005 @@ -22,6 +22,7 @@ * THE SOFTWARE. */ #include "vl.h" +#include <pthread.h> /* debug IDE devices */ //#define DEBUG_IDE @@ -359,6 +360,48 @@ IDEState ide_if[4]; BMDMAState bmdma[2]; } PCIIDEState; + +#define DMA_MULTI_THREAD + +#ifdef DMA_MULTI_THREAD + +static int file_pipes[2]; + +static void ide_dma_loop(BMDMAState *bm); +static void dma_thread_loop(BMDMAState *bm); + +static void *dma_thread_func(void* opaque) +{ + BMDMAState* req; + + while (read(file_pipes[0], &req, sizeof(req))) { + dma_thread_loop(req); + } + + return NULL; +} + +static void dma_create_thread() +{ + pthread_t tid; + int rt; + + if (pipe(file_pipes) != 0){ + fprintf(stderr, "create pipe failed\n"); + exit(1); + } + + if ( (rt = pthread_create(&tid, NULL, dma_thread_func, NULL)) ) { + fprintf(stderr, "Oops, dma thread creation failed, errno=%d\n", rt); + exit(1); + } + + if ( (rt = pthread_detach(tid)) ) { + fprintf(stderr, "Oops, dma thread detachment failed, errno=%d\n", rt); + exit(1); + } +} +#endif //DMA_MULTI_THREAD static void ide_dma_start(IDEState *s, IDEDMAFunc *dma_cb); @@ -1978,7 +2021,15 @@ /* XXX: full callback usage to prepare non blocking I/Os support - error handling */ +#ifdef DMA_MULTI_THREAD static void ide_dma_loop(BMDMAState *bm) +{ + write(file_pipes[1], &bm, sizeof(bm)); +} +static void dma_thread_loop(BMDMAState *bm) +#else +static void ide_dma_loop(BMDMAState *bm) +#endif //DMA_MULTI_THREAD { struct { uint32_t addr; @@ -2166,6 +2217,9 @@ d->ide_if[i].pci_dev = (PCIDevice *)d; ide_init2(&d->ide_if[0], 16, hd_table[0], hd_table[1]); ide_init2(&d->ide_if[2], 16, hd_table[2], hd_table[3]); +#ifdef DMA_MULTI_THREAD + dma_create_thread(); +#endif //DMA_MULTI_THREAD } /* hd_table must contain 4 block drivers */ @@ -2196,6 +2250,9 @@ ide_init2(&d->ide_if[2], 15, hd_table[2], hd_table[3]); ide_init_ioport(&d->ide_if[0], 0x1f0, 0x3f6); ide_init_ioport(&d->ide_if[2], 0x170, 0x376); +#ifdef DMA_MULTI_THREAD + dma_create_thread(); +#endif //DMA_MULTI_THREAD } /***********************************************************/ diff -r dc36edf1102f -r bcccadcc56e5 tools/libxc/xc_elf.h --- a/tools/libxc/xc_elf.h Sun Oct 30 12:52:38 2005 +++ b/tools/libxc/xc_elf.h Sun Oct 30 13:00:35 2005 @@ -24,26 +24,26 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -typedef u_int8_t Elf_Byte; - -typedef u_int32_t Elf32_Addr; /* Unsigned program address */ -typedef u_int32_t Elf32_Off; /* Unsigned file offset */ +typedef uint8_t Elf_Byte; + +typedef uint32_t Elf32_Addr; /* Unsigned program address */ +typedef uint32_t Elf32_Off; /* Unsigned file offset */ typedef int32_t Elf32_Sword; /* Signed large integer */ -typedef u_int32_t Elf32_Word; /* Unsigned large integer */ -typedef u_int16_t Elf32_Half; /* Unsigned medium integer */ - -typedef u_int64_t Elf64_Addr; -typedef u_int64_t Elf64_Off; +typedef uint32_t Elf32_Word; /* Unsigned large integer */ +typedef uint16_t Elf32_Half; /* Unsigned medium integer */ + +typedef uint64_t Elf64_Addr; +typedef uint64_t Elf64_Off; typedef int32_t Elf64_Shalf; typedef int32_t Elf64_Sword; -typedef u_int32_t Elf64_Word; +typedef uint32_t Elf64_Word; typedef int64_t Elf64_Sxword; -typedef u_int64_t Elf64_Xword; - -typedef u_int32_t Elf64_Half; -typedef u_int16_t Elf64_Quarter; +typedef uint64_t Elf64_Xword; + +typedef uint32_t Elf64_Half; +typedef uint16_t Elf64_Quarter; /* * e_ident[] identification indexes diff -r dc36edf1102f -r bcccadcc56e5 tools/libxc/xc_ia64_stubs.c --- a/tools/libxc/xc_ia64_stubs.c Sun Oct 30 12:52:38 2005 +++ b/tools/libxc/xc_ia64_stubs.c Sun Oct 30 13:00:35 2005 @@ -1,5 +1,11 @@ #include "xg_private.h" #include "xenguest.h" +#include "xc_private.h" +#include "xc_elf.h" +#include <stdlib.h> +#include <zlib.h> +#include "xen/arch-ia64.h" +#include <xen/io/ioreq.h> int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, uint32_t max_factor, uint32_t flags) @@ -13,22 +19,6 @@ unsigned int console_evtchn, unsigned long *console_mfn) { PERROR("xc_linux_restore not implemented\n"); - return -1; -} - -int xc_vmx_build(int xc_handle, - uint32_t domid, - int memsize, - const char *image_name, - struct mem_map *mem_mapp, - const char *ramdisk_name, - const char *cmdline, - unsigned int control_evtchn, - unsigned int vcpus, - unsigned int store_evtchn, - unsigned long *store_mfn) -{ - PERROR("xc_vmx_build not implemented\n"); return -1; } @@ -40,6 +30,653 @@ unsigned int control_evtchn, unsigned long flags) { PERROR("xc_plan9_build not implemented\n"); + return -1; +} + +int xc_ia64_get_pfn_list(int xc_handle, + uint32_t domid, + unsigned long *pfn_buf, + unsigned int start_page, + unsigned int nr_pages) +{ + dom0_op_t op; + int ret; + unsigned long max_pfns = ((unsigned long)start_page << 32) | nr_pages; + + op.cmd = DOM0_GETMEMLIST; + op.u.getmemlist.domain = (domid_t)domid; + op.u.getmemlist.max_pfns = max_pfns; + op.u.getmemlist.buffer = pfn_buf; + + if ( (max_pfns != -1UL) + && mlock(pfn_buf, nr_pages * sizeof(unsigned long)) != 0 ) + { + PERROR("Could not lock pfn list buffer"); + return -1; + } + + ret = do_dom0_op(xc_handle, &op); + + if (max_pfns != -1UL) + (void)munlock(pfn_buf, nr_pages * sizeof(unsigned long)); + + return (ret < 0) ? -1 : op.u.getmemlist.num_pfns; +} + +long xc_get_max_pages(int xc_handle, uint32_t domid) +{ + dom0_op_t op; + op.cmd = DOM0_GETDOMAININFO; + op.u.getdomaininfo.domain = (domid_t)domid; + return (do_dom0_op(xc_handle, &op) < 0) ? + -1 : op.u.getdomaininfo.max_pages; +} + +int xc_ia64_copy_to_domain_pages(int xc_handle, uint32_t domid, + void* src_page, unsigned long dst_pfn, int nr_pages) +{ + // N.B. gva should be page aligned + + unsigned long *page_array=NULL; + int i; + + if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL ){ + PERROR("Could not allocate memory"); + goto error_out; + } + if ( xc_ia64_get_pfn_list(xc_handle, domid, page_array, + dst_pfn>>PAGE_SHIFT, nr_pages) != nr_pages ){ + PERROR("Could not get the page frame list"); + goto error_out; + } + + for ( i=0; i< nr_pages; i++ ){ + if (xc_copy_to_domain_page(xc_handle, domid, page_array[i], + src_page + (i << PAGE_SHIFT))) + goto error_out; + } + free(page_array); + return 0; + +error_out: + if (page_array) + free(page_array); + return -1; +} + + +#define HOB_SIGNATURE 0x3436474953424f48 // "HOBSIG64" +#define GFW_HOB_START ((4UL<<30)-(14UL<<20)) //4G -14M +#define GFW_HOB_SIZE (1UL<<20) //1M +#define MEM_G (1UL << 30) +#define MEM_M (1UL << 20) + +typedef struct { + unsigned long signature; + unsigned int type; + unsigned int length; +} HOB_GENERIC_HEADER; + +/* + * INFO HOB is the first data data in one HOB list + * it contains the control information of the HOB list + */ +typedef struct { + HOB_GENERIC_HEADER header; + unsigned long length; // current length of hob + unsigned long cur_pos; // current poisiton of hob + unsigned long buf_size; // size of hob buffer +}HOB_INFO; + +typedef struct{ + unsigned long start; + unsigned long size; +}hob_mem_t; + +typedef enum { + HOB_TYPE_INFO=0, + HOB_TYPE_TERMINAL, + HOB_TYPE_MEM, + HOB_TYPE_PAL_BUS_GET_FEATURES_DATA, + HOB_TYPE_PAL_CACHE_SUMMARY, + HOB_TYPE_PAL_MEM_ATTRIB, + HOB_TYPE_PAL_CACHE_INFO, + HOB_TYPE_PAL_CACHE_PROT_INFO, + HOB_TYPE_PAL_DEBUG_INFO, + HOB_TYPE_PAL_FIXED_ADDR, + HOB_TYPE_PAL_FREQ_BASE, + HOB_TYPE_PAL_FREQ_RATIOS, + HOB_TYPE_PAL_HALT_INFO, + HOB_TYPE_PAL_PERF_MON_INFO, + HOB_TYPE_PAL_PROC_GET_FEATURES, + HOB_TYPE_PAL_PTCE_INFO, + HOB_TYPE_PAL_REGISTER_INFO, + HOB_TYPE_PAL_RSE_INFO, + HOB_TYPE_PAL_TEST_INFO, + HOB_TYPE_PAL_VM_SUMMARY, + HOB_TYPE_PAL_VM_INFO, + HOB_TYPE_PAL_VM_PAGE_SIZE, + HOB_TYPE_MAX +}hob_type_t; + +static int hob_init( void *buffer ,unsigned long buf_size); +static int add_pal_hob(void* hob_buf); +static int add_mem_hob(void* hob_buf, unsigned long dom_mem_size); +static int build_hob (void* hob_buf, unsigned long hob_buf_size, + unsigned long dom_mem_size); +static int load_hob(int xc_handle,uint32_t dom, void *hob_buf); + +int xc_ia64_build_hob(int xc_handle, uint32_t dom, unsigned long memsize){ + + char hob_buf[GFW_HOB_SIZE]; + + if ( build_hob( hob_buf, GFW_HOB_SIZE, memsize<<20) < 0){ + PERROR("Could not build hob"); + return -1; + } + + if ( load_hob( xc_handle, dom, hob_buf) <0){ + PERROR("Could not load hob"); + return -1; + } + + return 0; + +} +static int +hob_init( void *buffer ,unsigned long buf_size) +{ + HOB_INFO *phit; + HOB_GENERIC_HEADER *terminal; + + if (sizeof(HOB_INFO) + sizeof(HOB_GENERIC_HEADER) > buf_size){ + // buffer too small + return -1; + } + + phit = (HOB_INFO*)buffer; + phit->header.signature = HOB_SIGNATURE; + phit->header.type = HOB_TYPE_INFO; + phit->header.length = sizeof(HOB_INFO); + phit->length = sizeof(HOB_INFO) + sizeof(HOB_GENERIC_HEADER); + phit->cur_pos = 0; + phit->buf_size = buf_size; + + terminal = (HOB_GENERIC_HEADER*) (buffer + sizeof(HOB_INFO)); + terminal->signature= HOB_SIGNATURE; + terminal->type = HOB_TYPE_TERMINAL; + terminal->length = sizeof(HOB_GENERIC_HEADER); + + return 0; +} + +/* + * Add a new HOB to the HOB List. + * + * hob_start - start address of hob buffer + * type - type of the hob to be added + * data - data of the hob to be added + * data_size - size of the data + */ +static int +hob_add( + void* hob_start, + int type, + void* data, + int data_size +) +{ + HOB_INFO *phit; + HOB_GENERIC_HEADER *newhob,*tail; + + phit = (HOB_INFO*)hob_start; + + if (phit->length + data_size > phit->buf_size){ + // no space for new hob + return -1; + } + + //append new HOB + newhob = (HOB_GENERIC_HEADER*) + (hob_start + phit->length - sizeof(HOB_GENERIC_HEADER)); + newhob->signature = HOB_SIGNATURE; + newhob->type = type; + newhob->length = data_size + sizeof(HOB_GENERIC_HEADER); + memcpy((void*)newhob + sizeof(HOB_GENERIC_HEADER), data, data_size); + + // append terminal HOB + tail = (HOB_GENERIC_HEADER*) ( hob_start + phit->length + data_size); + tail->signature = HOB_SIGNATURE; + tail->type = HOB_TYPE_TERMINAL; + tail->length = sizeof(HOB_GENERIC_HEADER); + + // adjust HOB list length + phit->length += sizeof(HOB_GENERIC_HEADER)+ data_size; + + return 0; + +} + +int get_hob_size(void* hob_buf){ + + HOB_INFO *phit = (HOB_INFO*)hob_buf; + + if (phit->header.signature != HOB_SIGNATURE){ + PERROR("xc_get_hob_size:Incorrect signature"); + return -1; + } + return phit->length; +} + +int build_hob (void* hob_buf, unsigned long hob_buf_size, + unsigned long dom_mem_size) +{ + //Init HOB List + if (hob_init (hob_buf, hob_buf_size)<0){ + PERROR("buffer too small"); + goto err_out; + } + + if ( add_mem_hob( hob_buf,dom_mem_size) < 0){ + PERROR("Add memory hob failed, buffer too small"); + goto err_out; + } + + if ( add_pal_hob( hob_buf ) < 0 ){ + PERROR("Add PAL hob failed, buffer too small"); + goto err_out; + } + + return 0; + +err_out: + return -1; +} + +static int +load_hob(int xc_handle, uint32_t dom, void *hob_buf) +{ + // hob_buf should be page aligned + int hob_size; + int nr_pages; + + if ((hob_size = get_hob_size(hob_buf)) < 0){ + PERROR("Invalid hob data"); + return -1; + } + + if (hob_size > GFW_HOB_SIZE){ + PERROR("No enough memory for hob data"); + return -1; + } + + nr_pages = (hob_size + PAGE_SIZE -1) >> PAGE_SHIFT; + + return xc_ia64_copy_to_domain_pages(xc_handle, dom, + hob_buf, GFW_HOB_START, nr_pages ); +} + +#define MIN(x, y) ((x) < (y)) ? (x) : (y) +static int +add_mem_hob(void* hob_buf, unsigned long dom_mem_size){ + hob_mem_t memhob; + + // less than 3G + memhob.start = 0; + memhob.size = MIN(dom_mem_size, 0xC0000000); + + if (hob_add(hob_buf, HOB_TYPE_MEM, &memhob, sizeof(memhob)) < 0){ + return -1; + } + + if (dom_mem_size > 0xC0000000) { + // 4G ~ 4G+remain + memhob.start = 0x100000000; //4G + memhob.size = dom_mem_size - 0xC0000000; + if (hob_add(hob_buf, HOB_TYPE_MEM, &memhob, sizeof(memhob)) < 0) + return -1; + } + return 0; +} + +unsigned char config_pal_bus_get_features_data[24] = { + 0, 0, 0, 32, 0, 0, 240, 189, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; +unsigned char config_pal_cache_summary[16] = { + 3, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0}; +unsigned char config_pal_mem_attrib[8] = { + 241, 0, 0, 0, 0, 0, 0, 0 +}; +unsigned char config_pal_cache_info[152] = { + 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 6, 4, 6, 7, 255, 1, 0, 1, 0, 64, 0, 0, 12, 12, + 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 6, 7, 0, 1, + 0, 1, 0, 64, 0, 0, 12, 12, 49, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 6, 8, 7, 7, 255, 7, 0, 11, 0, 0, 16, 0, + 12, 17, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 8, 7, + 7, 7, 5, 9, 11, 0, 0, 4, 0, 12, 15, 49, 0, 254, 255, + 255, 255, 255, 255, 255, 255, 2, 8, 7, 7, 7, 5, 9, + 11, 0, 0, 4, 0, 12, 15, 49, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 12, 7, 7, 7, 14, 1, 3, 0, 0, 192, 0, 12, 20, 49, 0 +}; +unsigned char config_pal_cache_prot_info[200] = { + 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 45, 0, 16, 8, 0, 76, 12, 64, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 8, 0, 16, 4, 0, 76, 44, 68, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, + 0, 16, 8, 0, 81, 44, 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, + 112, 12, 0, 79, 124, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 254, 255, 255, 255, 255, 255, 255, 255, + 32, 0, 112, 12, 0, 79, 124, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 0, 160, + 12, 0, 84, 124, 76, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0 +}; +unsigned char config_pal_debug_info[16] = { + 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 +}; +unsigned char config_pal_fixed_addr[8] = { + 0, 0, 0, 0, 0, 0, 0, 0 +}; +unsigned char config_pal_freq_base[8] = { + 109, 219, 182, 13, 0, 0, 0, 0 +}; +unsigned char config_pal_freq_ratios[24] = { + 11, 1, 0, 0, 77, 7, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 4, + 0, 0, 0, 7, 0, 0, 0 +}; +unsigned char config_pal_halt_info[64] = { + 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; +unsigned char config_pal_perf_mon_info[136] = { + 12, 47, 18, 8, 0, 0, 0, 0, 241, 255, 0, 0, 255, 7, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 241, 255, 0, 0, 223, 0, 255, 255, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 240, 255, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 240, 255, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; +unsigned char config_pal_proc_get_features[104] = { + 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 64, 6, 64, 49, 0, 0, 0, 0, 64, 6, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, + 231, 0, 0, 0, 0, 0, 0, 0, 228, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0, + 63, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; +unsigned char config_pal_ptce_info[24] = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; +unsigned char config_pal_register_info[64] = { + 255, 0, 47, 127, 17, 17, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, + 255, 208, 128, 238, 238, 0, 0, 248, 255, 255, 255, 255, 255, 0, 0, 7, 3, + 251, 3, 0, 0, 0, 0, 255, 7, 3, 0, 0, 0, 0, 0, 248, 252, 4, + 252, 255, 255, 255, 255, 2, 248, 252, 255, 255, 255, 255, 255 +}; +unsigned char config_pal_rse_info[16] = { + 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; +unsigned char config_pal_test_info[48] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; +unsigned char config_pal_vm_summary[16] = { + 101, 18, 15, 2, 7, 7, 4, 2, 59, 18, 0, 0, 0, 0, 0, 0 +}; +unsigned char config_pal_vm_info[104] = { + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 32, 32, 0, 0, 0, 0, 0, 0, 112, 85, 21, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 32, 32, 0, 0, 0, 0, 0, 0, 112, 85, + 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 128, 128, 0, + 4, 0, 0, 0, 0, 112, 85, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 128, 128, 0, 4, 0, 0, 0, 0, 112, 85, 0, 0, 0, 0, 0 +}; +unsigned char config_pal_vm_page_size[16] = { + 0, 112, 85, 21, 0, 0, 0, 0, 0, 112, 85, 21, 0, 0, 0, 0 +}; + +typedef struct{ + hob_type_t type; + void* data; + unsigned long size; +}hob_batch_t; + +hob_batch_t hob_batch[]={ + { HOB_TYPE_PAL_BUS_GET_FEATURES_DATA, + &config_pal_bus_get_features_data, + sizeof(config_pal_bus_get_features_data) + }, + { HOB_TYPE_PAL_CACHE_SUMMARY, + &config_pal_cache_summary, + sizeof(config_pal_cache_summary) + }, + { HOB_TYPE_PAL_MEM_ATTRIB, + &config_pal_mem_attrib, + sizeof(config_pal_mem_attrib) + }, + { HOB_TYPE_PAL_CACHE_INFO, + &config_pal_cache_info, + sizeof(config_pal_cache_info) + }, + { HOB_TYPE_PAL_CACHE_PROT_INFO, + &config_pal_cache_prot_info, + sizeof(config_pal_cache_prot_info) + }, + { HOB_TYPE_PAL_DEBUG_INFO, + &config_pal_debug_info, + sizeof(config_pal_debug_info) + }, + { HOB_TYPE_PAL_FIXED_ADDR, + &config_pal_fixed_addr, + sizeof(config_pal_fixed_addr) + }, + { HOB_TYPE_PAL_FREQ_BASE, + &config_pal_freq_base, + sizeof(config_pal_freq_base) + }, + { HOB_TYPE_PAL_FREQ_RATIOS, + &config_pal_freq_ratios, + sizeof(config_pal_freq_ratios) + }, + { HOB_TYPE_PAL_HALT_INFO, + &config_pal_halt_info, + sizeof(config_pal_halt_info) + }, + { HOB_TYPE_PAL_PERF_MON_INFO, + &config_pal_perf_mon_info, + sizeof(config_pal_perf_mon_info) + }, + { HOB_TYPE_PAL_PROC_GET_FEATURES, + &config_pal_proc_get_features, + sizeof(config_pal_proc_get_features) + }, + { HOB_TYPE_PAL_PTCE_INFO, + &config_pal_ptce_info, + sizeof(config_pal_ptce_info) + }, + { HOB_TYPE_PAL_REGISTER_INFO, + &config_pal_register_info, + sizeof(config_pal_register_info) + }, + { HOB_TYPE_PAL_RSE_INFO, + &config_pal_rse_info, + sizeof(config_pal_rse_info) + }, + { HOB_TYPE_PAL_TEST_INFO, + &config_pal_test_info, + sizeof(config_pal_test_info) + }, + { HOB_TYPE_PAL_VM_SUMMARY, + &config_pal_vm_summary, + sizeof(config_pal_vm_summary) + }, + { HOB_TYPE_PAL_VM_INFO, + &config_pal_vm_info, + sizeof(config_pal_vm_info) + }, + { HOB_TYPE_PAL_VM_PAGE_SIZE, + &config_pal_vm_page_size, + sizeof(config_pal_vm_page_size) + }, +}; + +static int add_pal_hob(void* hob_buf){ + int i; + for (i=0; i<sizeof(hob_batch)/sizeof(hob_batch_t); i++){ + if (hob_add(hob_buf, hob_batch[i].type, + hob_batch[i].data, + hob_batch[i].size)<0) + return -1; + } + return 0; +} + +static int setup_guest( int xc_handle, + uint32_t dom, unsigned long memsize, + char *image, unsigned long image_size, + unsigned int control_evtchn, + unsigned int store_evtchn, + unsigned long *store_mfn) +{ + unsigned long page_array[2]; + shared_iopage_t *sp; + // FIXME: initialize pfn list for a temp hack + if (xc_ia64_get_pfn_list(xc_handle, dom, NULL, -1, -1) == -1) { + PERROR("Could not allocate continuous memory"); + goto error_out; + } + + if ((image_size > 12 * MEM_M) || (image_size & (PAGE_SIZE - 1))) { + PERROR("Guest firmware size is incorrect [%ld]?", image_size); + return -1; + } + + /* Load guest firmware */ + if( xc_ia64_copy_to_domain_pages( xc_handle, dom, + image, 4*MEM_G-image_size, image_size>>PAGE_SHIFT)) { + PERROR("Could not load guest firmware into domain"); + goto error_out; + } + + /* Hand-off state passed to guest firmware */ + if (xc_ia64_build_hob(xc_handle, dom, memsize) < 0){ + PERROR("Could not build hob\n"); + goto error_out; + } + + /* Retrieve special pages like io, xenstore, etc. */ + if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, IO_PAGE_START>>PAGE_SHIFT, 2) != 2 ) + { + PERROR("Could not get the page frame list"); + goto error_out; + } + + *store_mfn = page_array[1]; + if ((sp = (shared_iopage_t *) xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, + page_array[0])) == 0) + goto error_out; + memset(sp, 0, PAGE_SIZE); + sp->sp_global.eport = control_evtchn; + munmap(sp, PAGE_SIZE); + + return 0; + + error_out: + return -1; +} + +int xc_vmx_build(int xc_handle, + uint32_t domid, + int memsize, + const char *image_name, + unsigned int control_evtchn, + unsigned int vcpus, + unsigned int store_evtchn, + unsigned long *store_mfn) +{ + dom0_op_t launch_op, op; + int rc ; + vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt; + char *image = NULL; + unsigned long image_size; + unsigned long nr_pages; + + if ( (nr_pages = xc_get_max_pages(xc_handle, domid)) < 0 ) + { + PERROR("Could not find total pages for domain"); + goto error_out; + } + + if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL ){ + PERROR("Could not read guest firmware image %s",image_name); + goto error_out; + } + + image_size = (image_size + PAGE_SIZE - 1) & PAGE_MASK; + + if ( mlock(&st_ctxt, sizeof(st_ctxt) ) ){ + PERROR("Unable to mlock ctxt"); + return 1; + } + + op.cmd = DOM0_GETDOMAININFO; + op.u.getdomaininfo.domain = (domid_t)domid; + if ( (do_dom0_op(xc_handle, &op) < 0) || + ((uint16_t)op.u.getdomaininfo.domain != domid) ) { + PERROR("Could not get info on domain"); + goto error_out; + } + + if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) ){ + PERROR("Could not get vcpu context"); + goto error_out; + } + + if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ) { + ERROR("Domain is already constructed"); + goto error_out; + } + + if ( setup_guest(xc_handle, domid, (unsigned long)memsize, image, image_size, + control_evtchn, store_evtchn, store_mfn ) < 0 ){ + ERROR("Error constructing guest OS"); + goto error_out; + } + + if ( image != NULL ) + free(image); + + ctxt->flags = VGCF_VMX_GUEST; + ctxt->regs.cr_iip = 0x80000000ffffffb0UL; + ctxt->vcpu.privregs = 0; + + memset( &launch_op, 0, sizeof(launch_op) ); + + launch_op.u.setdomaininfo.domain = (domid_t)domid; + launch_op.u.setdomaininfo.vcpu = 0; + launch_op.u.setdomaininfo.ctxt = ctxt; + + launch_op.cmd = DOM0_SETDOMAININFO; + rc = do_dom0_op(xc_handle, &launch_op); + return rc; + + error_out: + if ( image != NULL ) + free(image); + return -1; } diff -r dc36edf1102f -r bcccadcc56e5 tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Sun Oct 30 12:52:38 2005 +++ b/tools/libxc/xc_linux_build.c Sun Oct 30 13:00:35 2005 @@ -350,6 +350,8 @@ start_info = xc_map_foreign_range( xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, page_array[0]); memset(start_info, 0, sizeof(*start_info)); + rc = xc_version(xc_handle, XENVER_version, NULL); + sprintf(start_info->magic, "Xen-%i.%i", rc >> 16, rc & (0xFFFF)); start_info->flags = flags; start_info->store_mfn = nr_pages - 2; start_info->store_evtchn = store_evtchn; @@ -624,6 +626,8 @@ xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, page_array[(vstartinfo_start-dsi.v_start)>>PAGE_SHIFT]); memset(start_info, 0, sizeof(*start_info)); + rc = xc_version(xc_handle, XENVER_version, NULL); + sprintf(start_info->magic, "Xen-%i.%i", rc >> 16, rc & (0xFFFF)); start_info->nr_pages = nr_pages; start_info->shared_info = shared_info_frame << PAGE_SHIFT; start_info->flags = flags; diff -r dc36edf1102f -r bcccadcc56e5 tools/libxc/xc_private.c --- a/tools/libxc/xc_private.c Sun Oct 30 12:52:38 2005 +++ b/tools/libxc/xc_private.c Sun Oct 30 13:00:35 2005 @@ -313,46 +313,6 @@ return (ret < 0) ? -1 : op.u.getmemlist.num_pfns; } -#ifdef __ia64__ -int xc_ia64_get_pfn_list(int xc_handle, - uint32_t domid, - unsigned long *pfn_buf, - unsigned int start_page, - unsigned int nr_pages) -{ - dom0_op_t op; - int ret; - - op.cmd = DOM0_GETMEMLIST; - op.u.getmemlist.domain = (domid_t)domid; - op.u.getmemlist.max_pfns = ((unsigned long)start_page << 32) | nr_pages; - op.u.getmemlist.buffer = pfn_buf; - - if ( mlock(pfn_buf, nr_pages * sizeof(unsigned long)) != 0 ) - { - PERROR("Could not lock pfn list buffer"); - return -1; - } - - /* XXX Hack to put pages in TLB, hypervisor should be able to handle this */ - memset(pfn_buf, 0, nr_pages * sizeof(unsigned long)); - ret = do_dom0_op(xc_handle, &op); - - (void)munlock(pfn_buf, nr_pages * sizeof(unsigned long)); - - return (ret < 0) ? -1 : op.u.getmemlist.num_pfns; -} - -long xc_get_max_pages(int xc_handle, uint32_t domid) -{ - dom0_op_t op; - op.cmd = DOM0_GETDOMAININFO; - op.u.getdomaininfo.domain = (domid_t)domid; - return (do_dom0_op(xc_handle, &op) < 0) ? - -1 : op.u.getdomaininfo.max_pages; -} -#endif - long xc_get_tot_pages(int xc_handle, uint32_t domid) { dom0_op_t op; diff -r dc36edf1102f -r bcccadcc56e5 tools/libxc/xc_vmx_build.c --- a/tools/libxc/xc_vmx_build.c Sun Oct 30 12:52:38 2005 +++ b/tools/libxc/xc_vmx_build.c Sun Oct 30 13:00:35 2005 @@ -279,6 +279,7 @@ vcpu_guest_context_t *ctxt, unsigned long shared_info_frame, unsigned int control_evtchn, + unsigned int lapic, unsigned int vcpus, unsigned int store_evtchn, unsigned long *store_mfn) @@ -554,7 +555,7 @@ ctxt->user_regs.eax = 0; ctxt->user_regs.esp = 0; ctxt->user_regs.ebx = 0; /* startup_32 expects this to be 0 to signal boot cpu */ - ctxt->user_regs.ecx = 0; + ctxt->user_regs.ecx = lapic; ctxt->user_regs.esi = 0; ctxt->user_regs.edi = 0; ctxt->user_regs.ebp = 0; @@ -597,6 +598,7 @@ int memsize, const char *image_name, unsigned int control_evtchn, + unsigned int lapic, unsigned int vcpus, unsigned int store_evtchn, unsigned long *store_mfn) @@ -651,9 +653,9 @@ goto error_out; } - if ( setup_guest(xc_handle, domid, memsize, image, image_size, - nr_pages, ctxt, op.u.getdomaininfo.shared_info_frame, - control_evtchn, vcpus, store_evtchn, store_mfn) < 0) + if ( setup_guest(xc_handle, domid, memsize, image, image_size, nr_pages, + ctxt, op.u.getdomaininfo.shared_info_frame, control_evtchn, + lapic, vcpus, store_evtchn, store_mfn) < 0) { ERROR("Error constructing guest OS"); goto error_out; diff -r dc36edf1102f -r bcccadcc56e5 tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Sun Oct 30 12:52:38 2005 +++ b/tools/libxc/xenctrl.h Sun Oct 30 13:00:35 2005 @@ -414,6 +414,12 @@ unsigned long *pfn_buf, unsigned int start_page, unsigned int nr_pages); +int xc_copy_to_domain_page(int xc_handle, uint32_t domid, + unsigned long dst_pfn, void *src_page); + +int xc_ia64_copy_to_domain_pages(int xc_handle, uint32_t domid, + void* src_page, unsigned long dst_pfn, int nr_pages); + long xc_get_max_pages(int xc_handle, uint32_t domid); int xc_mmuext_op(int xc_handle, struct mmuext_op *op, unsigned int nr_ops, diff -r dc36edf1102f -r bcccadcc56e5 tools/libxc/xenguest.h --- a/tools/libxc/xenguest.h Sun Oct 30 12:52:38 2005 +++ b/tools/libxc/xenguest.h Sun Oct 30 13:00:35 2005 @@ -56,6 +56,7 @@ int memsize, const char *image_name, unsigned int control_evtchn, + unsigned int lapic, unsigned int vcpus, unsigned int store_evtchn, unsigned long *store_mfn); diff -r dc36edf1102f -r bcccadcc56e5 tools/misc/cpuperf/cpuperf.c --- a/tools/misc/cpuperf/cpuperf.c Sun Oct 30 12:52:38 2005 +++ b/tools/misc/cpuperf/cpuperf.c Sun Oct 30 13:00:35 2005 @@ -16,7 +16,6 @@ #include <sys/types.h> #include <sched.h> -#include <error.h> #include <stdio.h> #include <unistd.h> #include <stdlib.h> diff -r dc36edf1102f -r bcccadcc56e5 tools/misc/miniterm/miniterm.c --- a/tools/misc/miniterm/miniterm.c Sun Oct 30 12:52:38 2005 +++ b/tools/misc/miniterm/miniterm.c Sun Oct 30 13:00:35 2005 @@ -29,7 +29,7 @@ #include <stdlib.h> #include <unistd.h> #include <fcntl.h> -#include <sys/signal.h> +#include <signal.h> #include <sys/types.h> #include <sys/wait.h> diff -r dc36edf1102f -r bcccadcc56e5 tools/python/xen/lowlevel/xc/xc.c --- a/tools/python/xen/lowlevel/xc/xc.c Sun Oct 30 12:52:38 2005 +++ b/tools/python/xen/lowlevel/xc/xc.c Sun Oct 30 13:00:35 2005 @@ -438,19 +438,20 @@ char *image; int control_evtchn, store_evtchn; int vcpus = 1; + int lapic = 0; int memsize; unsigned long store_mfn = 0; static char *kwd_list[] = { "dom", "control_evtchn", "store_evtchn", - "memsize", "image", "vcpus", NULL }; - - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiisi", kwd_list, + "memsize", "image", "lapic", "vcpus", NULL }; + + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiisii", kwd_list, &dom, &control_evtchn, &store_evtchn, - &memsize, &image, &vcpus) ) + &memsize, &image, &lapic, &vcpus) ) return NULL; if ( xc_vmx_build(xc->xc_handle, dom, memsize, image, control_evtchn, - vcpus, store_evtchn, &store_mfn) != 0 ) + lapic, vcpus, store_evtchn, &store_mfn) != 0 ) return PyErr_SetFromErrno(xc_error); return Py_BuildValue("{s:i}", "store_mfn", store_mfn); diff -r dc36edf1102f -r bcccadcc56e5 tools/python/xen/web/tcp.py --- a/tools/python/xen/web/tcp.py Sun Oct 30 12:52:38 2005 +++ b/tools/python/xen/web/tcp.py Sun Oct 30 13:00:35 2005 @@ -99,7 +99,7 @@ return l def SetCloExec(SocketListener): - SocketListener.SetCloExec() + SocketListener.setCloExec() def connectTCP(host, port, factory, timeout=None, bindAddress=None): c = TCPConnector(host, port, factory, timeout=timeout, bindAddress=bindAddress) diff -r dc36edf1102f -r bcccadcc56e5 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Sun Oct 30 12:52:38 2005 +++ b/tools/python/xen/xend/image.py Sun Oct 30 13:00:35 2005 @@ -203,6 +203,10 @@ self.dmargs += self.configVNC(imageConfig) + self.lapic = 0 + lapic = sxp.child_value(imageConfig, 'lapic') + if not lapic is None: + self.lapic = int(lapic) def buildDomain(self): # Create an event channel @@ -217,6 +221,7 @@ log.debug("control_evtchn = %d", self.device_channel) log.debug("store_evtchn = %d", store_evtchn) log.debug("memsize = %d", self.vm.getMemoryTarget() / 1024) + log.debug("lapic = %d", self.lapic) log.debug("vcpus = %d", self.vm.getVCpuCount()) return xc.vmx_build(dom = self.vm.getDomid(), @@ -224,6 +229,7 @@ control_evtchn = self.device_channel, store_evtchn = store_evtchn, memsize = self.vm.getMemoryTarget() / 1024, + lapic = self.lapic, vcpus = self.vm.getVCpuCount()) @@ -342,12 +348,15 @@ def getDomainMemory(self, mem): """@see ImageHandler.getDomainMemory""" + page_kb = 4 + if os.uname()[4] == 'ia64': + page_kb = 16 # for ioreq_t and xenstore static_pages = 2 - return mem + self.getPageTableSize(mem / 1024) + 4 * static_pages + return mem + (self.getPageTableSize(mem / 1024) + static_pages) * page_kb def getPageTableSize(self, mem_mb): - """Return the size of memory needed for 1:1 page tables for physical + """Return the pages of memory needed for 1:1 page tables for physical mode. @param mem_mb: size in MB @@ -355,13 +364,13 @@ """ # 1 page for the PGD + 1 pte page for 4MB of memory (rounded) if os.uname()[4] == 'x86_64': - return (5 + ((mem_mb + 1) >> 1)) * 4 + return 5 + ((mem_mb + 1) >> 1) elif os.uname()[4] == 'ia64': - # XEN/IA64 has p2m table allocated on demand, so only return - # guest firmware size here. - return 16 * 1024 + # 1:1 pgtable is allocated on demand ia64, so just return rom size + # for guest firmware + return 1024 else: - return (1 + ((mem_mb + 3) >> 2)) * 4 + return 1 + ((mem_mb + 3) >> 2) """Table of image handler classes for virtual machine images. Indexed by diff -r dc36edf1102f -r bcccadcc56e5 tools/python/xen/xend/server/event.py --- a/tools/python/xen/xend/server/event.py Sun Oct 30 12:52:38 2005 +++ b/tools/python/xen/xend/server/event.py Sun Oct 30 13:00:35 2005 @@ -192,4 +192,5 @@ if xroot.get_xend_http_server(): port = xroot.get_xend_event_port() interface = xroot.get_xend_address() - tcp.listenTCP(port, factory, interface=interface) + l = tcp.listenTCP(port, factory, interface=interface) + l.setCloExec() diff -r dc36edf1102f -r bcccadcc56e5 tools/python/xen/xm/create.py --- a/tools/python/xen/xm/create.py Sun Oct 30 12:52:38 2005 +++ b/tools/python/xen/xm/create.py Sun Oct 30 13:00:35 2005 @@ -157,6 +157,10 @@ fn=set_int, default=None, use="CPU to run the domain on.") +gopts.var('lapic', val='LAPIC', + fn=set_int, default=0, + use="Disable or enable local APIC of VMX domain.") + gopts.var('vcpus', val='VCPUS', fn=set_int, default=1, use="# of Virtual CPUS in domain.") @@ -314,10 +318,6 @@ gopts.var('nfs_root', val="PATH", fn=set_value, default=None, use="Set the path of the root NFS directory.") - -gopts.var('memmap', val='FILE', - fn=set_value, default='', - use="Path to memap SXP file.") gopts.var('device_model', val='FILE', fn=set_value, default='', @@ -556,9 +556,9 @@ def configure_vmx(config_image, vals): """Create the config for VMX devices. """ - args = [ 'memmap', 'device_model', 'vcpus', 'cdrom', - 'boot', 'fda', 'fdb', 'localtime', 'serial', 'macaddr', 'stdvga', - 'isa', 'nographic', 'vnc', 'vncviewer', 'sdl', 'display', 'ne2000'] + args = [ 'device_model', 'vcpus', 'cdrom', 'boot', 'fda', 'fdb', + 'localtime', 'serial', 'macaddr', 'stdvga', 'isa', 'nographic', + 'vnc', 'vncviewer', 'sdl', 'display', 'ne2000', 'lapic'] for a in args: if (vals.__dict__[a]): config_image.append([a, vals.__dict__[a]]) diff -r dc36edf1102f -r bcccadcc56e5 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Sun Oct 30 12:52:38 2005 +++ b/tools/python/xen/xm/main.py Sun Oct 30 13:00:35 2005 @@ -61,6 +61,8 @@ top monitor system and domains in real-time unpause <DomId> unpause a paused domain +<DomName> can be substituted for <DomId> in xm subcommands. + For a complete list of subcommands run 'xm help --long' For more help on xm see the xm(1) man page For more help on xm create, see the xmdomain.cfg(5) man page""" @@ -118,6 +120,8 @@ vnet-list [-l|--long] list vnets vnet-create <config> create a vnet from a config file vnet-delete <vnetid> delete a vnet + +<DomName> can be substituted for <DomId> in xm subcommands. For a short list of subcommands run 'xm help' For more help on xm see the xm(1) man page diff -r dc36edf1102f -r bcccadcc56e5 tools/security/getlabel.sh --- a/tools/security/getlabel.sh Sun Oct 30 12:52:38 2005 +++ b/tools/security/getlabel.sh Sun Oct 30 13:00:35 2005 @@ -36,18 +36,21 @@ usage () { - echo "Usage: $0 -sid <ssidref> [<policy name>] or" - echo " $0 -dom <domid> [<policy name>] " - echo "" - echo "policy name : the name of the policy, i.e. 'chwall'" - echo " If the policy name is omitted, the grub.conf" - echo " entry of the running system is tried to be read" - echo " and the policy name determined from there." - echo "ssidref : an ssidref in hex or decimal format, i.e., '0x00010002'" - echo " or '65538'" - echo "domid : id of the domain, i.e., '1'; Use numbers from the 2nd" - echo " column shown when invoking 'xm list'" - echo "" +echo "Use this tool to display the label of a domain or the label that is +corresponding to an ssidref given the name of the running policy. + +Usage: $0 -sid <ssidref> [<policy name>] or + $0 -dom <domid> [<policy name>] + +policy name : the name of the policy, i.e. 'chwall' + If the policy name is omitted, the grub.conf + entry of the running system is tried to be read + and the policy name determined from there. +ssidref : an ssidref in hex or decimal format, i.e., '0x00010002' + or '65538' +domid : id of the domain, i.e., '1'; Use numbers from the 2nd + column shown when invoking 'xm list' +" } diff -r dc36edf1102f -r bcccadcc56e5 tools/security/setlabel.sh --- a/tools/security/setlabel.sh Sun Oct 30 12:52:38 2005 +++ b/tools/security/setlabel.sh Sun Oct 30 13:00:35 2005 @@ -39,21 +39,27 @@ usage () { - echo "Usage: $0 [Option] <vmfile> <label> [<policy name>]" - echo " or $0 -l [<policy name>]" - echo "" - echo "Valid options are:" - echo "-r : to relabel a file without being prompted" - echo "" - echo "vmfile : XEN vm configuration file" - echo "label : the label to map to an ssidref" - echo "policy name : the name of the policy, i.e. 'chwall'" - echo " If the policy name is omitted, it is attempted" - echo " to find the current policy's name in grub.conf." - echo "" - echo "-l [<policy name>] is used to show valid labels in the map file of" - echo " the given or current policy." - echo "" +echo "Use this tool to put the ssidref corresponding to a label of a policy into +the VM configuration file, or use it to display all labels of a policy. + +Usage: $0 [Option] <vmfile> <label> [<policy name>] + or $0 -l [<policy name>] + +Valid options are: +-r : to relabel a file without being prompted + +vmfile : XEN vm configuration file; give complete path +label : the label to map to an ssidref +policy name : the name of the policy, i.e. 'chwall' + If the policy name is omitted, it is attempted + to find the current policy's name in grub.conf. + +-l [<policy name>] is used to show valid labels in the map file of + the given or current policy. If the policy name + is omitted, it will be tried to determine the + current policy from grub.conf (/boot/grub/grub.conf) + +" } @@ -83,7 +89,7 @@ exit -1; fi else - policy=$3; + policy=$1; fi @@ -92,7 +98,7 @@ if [ "$res" != "0" ]; then showLabels $mapfile else - echo "Could not find map file for policy '$1'." + echo "Could not find map file for policy '$policy'." fi elif [ "$mode" == "usage" ]; then usage diff -r dc36edf1102f -r bcccadcc56e5 tools/security/updategrub.sh --- a/tools/security/updategrub.sh Sun Oct 30 12:52:38 2005 +++ b/tools/security/updategrub.sh Sun Oct 30 13:00:35 2005 @@ -26,11 +26,16 @@ # Show usage of this program usage () { - echo "Usage: $0 <policy name> <root of xen repository>" - echo "" - echo "<policy name> : The name of the policy, i.e. xen_null" - echo "<root of xen repository> : The root of the XEN repositrory." - echo "" +echo "Use this tool to add the binary policy to the Xen grub entry and +have Xen automatically enforce the policy when starting. + +Usage: $0 <policy name> <root of xen repository> + +<policy name> : The name of the policy, i.e. xen_null +<root of xen repository> : The root of the XEN repository. Give + complete path. + +" } # This function sets the global variable 'linux' @@ -43,11 +48,24 @@ for f in $path/linux-*-xen0 ; do versionfile=$f/include/linux/version.h if [ -r $versionfile ]; then - lnx=`cat $versionfile | \ - grep UTS_RELEASE | \ - awk '{ \ - len=length($3); \ - print substr($3,2,len-2) }'` + lnx=`cat $versionfile | \ + grep UTS_RELEASE | \ + awk '{ \ + len=length($3); \ + version=substr($3,2,len-2); \ + split(version,numbers,"."); \ + if (numbers[4]=="") { \ + printf("%s.%s.%s", \ + numbers[1], \ + numbers[2], \ + numbers[3]); \ + } else { \ + printf("%s.%s.%s[.0-9]*-xen0",\ + numbers[1], \ + numbers[2], \ + numbers[3]); \ + } \ + }'` fi if [ "$lnx" != "" ]; then linux="[./0-9a-zA-z]*$lnx" @@ -143,10 +161,19 @@ echo "Could not create temporary file! Aborting." exit -1 fi - mv -f $tmpfile $grubconf + diff $tmpfile $grubconf > /dev/null + RES=$? + if [ "$RES" == "0" ]; then + echo "No changes were made to $grubconf." + else + echo "Successfully updated $grubconf." + mv -f $tmpfile $grubconf + fi } if [ "$1" == "" -o "$2" == "" ]; then + echo "Error: Not enough command line parameters." + echo "" usage exit -1 fi diff -r dc36edf1102f -r bcccadcc56e5 tools/xenstore/Makefile --- a/tools/xenstore/Makefile Sun Oct 30 12:52:38 2005 +++ b/tools/xenstore/Makefile Sun Oct 30 13:00:35 2005 @@ -77,7 +77,7 @@ clean: testsuite-clean rm -f *.o *.opic *.so rm -f xenstored xs_random xs_stress xs_crashme - rm -f xs_test xenstored_test + rm -f xs_test xenstored_test xs_tdb_dump $(RM) $(PROG_DEP) print-dir: diff -r dc36edf1102f -r bcccadcc56e5 xen/acm/acm_simple_type_enforcement_hooks.c --- a/xen/acm/acm_simple_type_enforcement_hooks.c Sun Oct 30 12:52:38 2005 +++ b/xen/acm/acm_simple_type_enforcement_hooks.c Sun Oct 30 13:00:35 2005 @@ -392,8 +392,11 @@ int i; printkd("checking cache: %x --> %x.\n", dom->domain_id, rdom); + + if (dom->ssid == NULL) + return 0; ste_ssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, - (struct acm_ssid_domain *)(dom)->ssid); + (struct acm_ssid_domain *)(dom->ssid)); for(i=0; i< ACM_TE_CACHE_SIZE; i++) { if ((ste_ssid->ste_cache[i].valid == VALID) && @@ -412,6 +415,8 @@ struct ste_ssid *ste_ssid; int i; printkd("caching from doms: %x --> %x.\n", subj->domain_id, obj->domain_id); + if (subj->ssid == NULL) + return; ste_ssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, (struct acm_ssid_domain *)(subj)->ssid); for(i=0; i< ACM_TE_CACHE_SIZE; i++) @@ -431,26 +436,34 @@ struct ste_ssid *ste_ssid; int i; struct domain **pd; + struct acm_ssid_domain *ssid; printkd("deleting cache for dom %x.\n", id); - read_lock(&domlist_lock); /* look through caches of all domains */ pd = &domain_list; for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) { - ste_ssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, - (struct acm_ssid_domain *)(*pd)->ssid); + ssid = (struct acm_ssid_domain *)((*pd)->ssid); + + if (ssid == NULL) + continue; /* hanging domain structure, no ssid any more ... */ + ste_ssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY, ssid); + if (!ste_ssid) { + printk("%s: deleting ID from cache ERROR (no ste_ssid)!\n", + __func__); + goto out; + } for (i=0; i<ACM_TE_CACHE_SIZE; i++) if ((ste_ssid->ste_cache[i].valid == VALID) && - (ste_ssid->ste_cache[i].id = id)) + (ste_ssid->ste_cache[i].id == id)) ste_ssid->ste_cache[i].valid = FREE; } + out: read_unlock(&domlist_lock); } /*************************** * Authorization functions **************************/ - static int ste_pre_domain_create(void *subject_ssid, ssidref_t ssidref) { @@ -484,19 +497,29 @@ /* -------- EVENTCHANNEL OPERATIONS -----------*/ static int -ste_pre_eventchannel_unbound(domid_t id) { +ste_pre_eventchannel_unbound(domid_t id1, domid_t id2) { struct domain *subj, *obj; int ret; - traceprintk("%s: dom%x-->dom%x.\n", - __func__, current->domain->domain_id, id); - - if (check_cache(current->domain, id)) { + traceprintk("%s: dom%x-->dom%x.\n", __func__, + (id1 == DOMID_SELF) ? current->domain->domain_id : id1, + (id2 == DOMID_SELF) ? current->domain->domain_id : id2); + + if (id1 == DOMID_SELF) id1 = current->domain->domain_id; + if (id2 == DOMID_SELF) id2 = current->domain->domain_id; + + subj = find_domain_by_id(id1); + obj = find_domain_by_id(id2); + if ((subj == NULL) || (obj == NULL)) { + ret = ACM_ACCESS_DENIED; + goto out; + } + /* cache check late */ + if (check_cache(subj, obj->domain_id)) { atomic_inc(&ste_bin_pol.ec_cachehit_count); - return ACM_ACCESS_PERMITTED; + ret = ACM_ACCESS_PERMITTED; + goto out; } atomic_inc(&ste_bin_pol.ec_eval_count); - subj = current->domain; - obj = find_domain_by_id(id); if (share_common_type(subj, obj)) { cache_result(subj, obj); @@ -505,38 +528,43 @@ atomic_inc(&ste_bin_pol.ec_denied_count); ret = ACM_ACCESS_DENIED; } + out: if (obj != NULL) put_domain(obj); + if (subj != NULL) + put_domain(subj); return ret; } static int -ste_pre_eventchannel_interdomain(domid_t id1, domid_t id2) -{ - struct domain *subj, *obj; +ste_pre_eventchannel_interdomain(domid_t id) +{ + struct domain *subj=NULL, *obj=NULL; int ret; + traceprintk("%s: dom%x-->dom%x.\n", __func__, - (id1 == DOMID_SELF) ? current->domain->domain_id : id1, - (id2 == DOMID_SELF) ? current->domain->domain_id : id2); + current->domain->domain_id, + (id == DOMID_SELF) ? current->domain->domain_id : id); /* following is a bit longer but ensures that we * "put" only domains that we where "find"-ing */ - if (id1 == DOMID_SELF) id1 = current->domain->domain_id; - if (id2 == DOMID_SELF) id2 = current->domain->domain_id; - - subj = find_domain_by_id(id1); - obj = find_domain_by_id(id2); - if ((subj == NULL) || (obj == NULL)) { + if (id == DOMID_SELF) id = current->domain->domain_id; + + subj = current->domain; + obj = find_domain_by_id(id); + if (obj == NULL) { ret = ACM_ACCESS_DENIED; goto out; } + /* cache check late, but evtchn is not on performance critical path */ if (check_cache(subj, obj->domain_id)) { atomic_inc(&ste_bin_pol.ec_cachehit_count); ret = ACM_ACCESS_PERMITTED; goto out; } + atomic_inc(&ste_bin_pol.ec_eval_count); if (share_common_type(subj, obj)) { @@ -549,8 +577,6 @@ out: if (obj != NULL) put_domain(obj); - if (subj != NULL) - put_domain(subj); return ret; } diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/asm-offsets.c --- a/xen/arch/ia64/asm-offsets.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/asm-offsets.c Sun Oct 30 13:00:35 2005 @@ -59,6 +59,8 @@ DEFINE(XSI_BANKNUM_OFS, offsetof(mapped_regs_t, banknum)); DEFINE(XSI_BANK0_OFS, offsetof(mapped_regs_t, bank0_regs[0])); DEFINE(XSI_BANK1_OFS, offsetof(mapped_regs_t, bank1_regs[0])); + DEFINE(XSI_B0NATS_OFS, offsetof(mapped_regs_t, vbnat)); + DEFINE(XSI_B1NATS_OFS, offsetof(mapped_regs_t, vnat)); DEFINE(XSI_RR0_OFS, offsetof(mapped_regs_t, rrs[0])); DEFINE(XSI_METAPHYS_OFS, offsetof(mapped_regs_t, metaphysical_mode)); DEFINE(XSI_PRECOVER_IFS_OFS, offsetof(mapped_regs_t, precover_ifs)); @@ -79,13 +81,17 @@ //DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand)); //DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal)); //DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid)); + DEFINE(IA64_PGD, offsetof(struct domain, arch.mm)); DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct vcpu, arch._thread.ksp)); DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct vcpu, arch._thread.on_ustack)); + DEFINE(IA64_VCPU_DOMAIN_OFFSET, offsetof (struct vcpu, domain)); DEFINE(IA64_VCPU_META_RR0_OFFSET, offsetof (struct vcpu, arch.metaphysical_rr0)); DEFINE(IA64_VCPU_META_SAVED_RR0_OFFSET, offsetof (struct vcpu, arch.metaphysical_saved_rr0)); DEFINE(IA64_VCPU_BREAKIMM_OFFSET, offsetof (struct vcpu, arch.breakimm)); DEFINE(IA64_VCPU_IVA_OFFSET, offsetof (struct vcpu, arch.iva)); + DEFINE(IA64_VCPU_DTLB_PTE_OFFSET, offsetof (struct vcpu, arch.dtlb_pte)); + DEFINE(IA64_VCPU_ITLB_PTE_OFFSET, offsetof (struct vcpu, arch.itlb_pte)); DEFINE(IA64_VCPU_IRR0_OFFSET, offsetof (struct vcpu, arch.irr[0])); DEFINE(IA64_VCPU_IRR3_OFFSET, offsetof (struct vcpu, arch.irr[3])); DEFINE(IA64_VCPU_INSVC3_OFFSET, offsetof (struct vcpu, arch.insvc[3])); diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/linux-xen/entry.S --- a/xen/arch/ia64/linux-xen/entry.S Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/linux-xen/entry.S Sun Oct 30 13:00:35 2005 @@ -900,10 +900,17 @@ adds r7 = PT(EML_UNAT)+16,r12 ;; ld8 r7 = [r7] + ;; +#if 0 +leave_kernel_self: + cmp.ne p8,p0 = r0, r7 +(p8) br.sptk.few leave_kernel_self + ;; +#endif (p6) br.call.sptk.many b0=deliver_pending_interrupt ;; mov ar.pfs=loc0 - mov ar.unat=r7 /* load eml_unat */ + mov ar.unat=r7 /* load eml_unat */ mov r31=r0 diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/linux-xen/head.S --- a/xen/arch/ia64/linux-xen/head.S Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/linux-xen/head.S Sun Oct 30 13:00:35 2005 @@ -324,6 +324,9 @@ mov r16=-1 (isBP) br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it +#ifndef XEN + // XEN: stack is allocated in xenheap, which is currently always + // mapped. // load mapping for stack (virtaddr in r2, physaddr in r3) rsm psr.ic movl r17=PAGE_KERNEL @@ -353,7 +356,8 @@ ssm psr.ic srlz.d ;; - +#endif + .load_current: // load the "current" pointer (r13) and ar.k6 with the current task #if defined(XEN) && defined(VALIDATE_VT) diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/linux-xen/irq_ia64.c --- a/xen/arch/ia64/linux-xen/irq_ia64.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/linux-xen/irq_ia64.c Sun Oct 30 13:00:35 2005 @@ -281,5 +281,8 @@ ipi_data = (delivery_mode << 8) | (vector & 0xff); ipi_addr = ipi_base_addr + ((phys_cpu_id << 4) | ((redirect & 1) << 3)); +#ifdef XEN + printf ("send_ipi to %d (%x)\n", cpu, phys_cpu_id); +#endif writeq(ipi_data, ipi_addr); } diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/linux-xen/mm_contig.c --- a/xen/arch/ia64/linux-xen/mm_contig.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/linux-xen/mm_contig.c Sun Oct 30 13:00:35 2005 @@ -193,8 +193,8 @@ */ if (smp_processor_id() == 0) { #ifdef XEN - cpu_data = alloc_xenheap_pages(PERCPU_PAGE_SHIFT - - PAGE_SHIFT + get_order(NR_CPUS)); + cpu_data = alloc_xenheap_pages(get_order(NR_CPUS + * PERCPU_PAGE_SIZE)); #else cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS, PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/linux-xen/setup.c --- a/xen/arch/ia64/linux-xen/setup.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/linux-xen/setup.c Sun Oct 30 13:00:35 2005 @@ -366,6 +366,7 @@ } #endif +void __init #ifdef XEN early_setup_arch (char **cmdline_p) #else @@ -377,14 +378,12 @@ ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist); *cmdline_p = __va(ia64_boot_param->command_line); -#ifdef XEN - efi_init(); -#else +#ifndef XEN strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE); +#endif efi_init(); io_port_init(); -#endif #ifdef CONFIG_IA64_GENERIC { @@ -414,11 +413,17 @@ #ifdef XEN early_cmdline_parse(cmdline_p); cmdline_parse(*cmdline_p); -#undef CONFIG_ACPI_BOOT #endif if (early_console_setup(*cmdline_p) == 0) mark_bsp_online(); +#ifdef XEN +} + +void __init +late_setup_arch (char **cmdline_p) +{ +#endif #ifdef CONFIG_ACPI_BOOT /* Initialize the ACPI boot-time table parser */ acpi_table_init(); @@ -433,20 +438,16 @@ #ifndef XEN find_memory(); -#else - io_port_init(); -} - -void __init -late_setup_arch (char **cmdline_p) -{ -#undef CONFIG_ACPI_BOOT - acpi_table_init(); -#endif +#endif + /* process SAL system table: */ ia64_sal_init(efi.sal_systab); #ifdef CONFIG_SMP +#ifdef XEN + init_smp_config (); +#endif + cpu_physical_id(0) = hard_smp_processor_id(); cpu_set(0, cpu_sibling_map[0]); @@ -768,6 +769,11 @@ cpu_data = per_cpu_init(); +#ifdef XEN + printf ("cpu_init: current=%p, current->domain->arch.mm=%p\n", + current, current->domain->arch.mm); +#endif + /* * We set ar.k3 so that assembly code in MCA handler can compute * physical addresses of per cpu variables with a simple: @@ -887,6 +893,16 @@ #ifndef XEN pm_idle = default_idle; #endif + +#ifdef XEN + /* surrender usage of kernel registers to domain, use percpu area instead */ + __get_cpu_var(cpu_kr)._kr[IA64_KR_IO_BASE] = ia64_get_kr(IA64_KR_IO_BASE); + __get_cpu_var(cpu_kr)._kr[IA64_KR_PER_CPU_DATA] = ia64_get_kr(IA64_KR_PER_CPU_DATA); + __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT_STACK] = ia64_get_kr(IA64_KR_CURRENT_STACK); + __get_cpu_var(cpu_kr)._kr[IA64_KR_FPU_OWNER] = ia64_get_kr(IA64_KR_FPU_OWNER); + __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT] = ia64_get_kr(IA64_KR_CURRENT); + __get_cpu_var(cpu_kr)._kr[IA64_KR_PT_BASE] = ia64_get_kr(IA64_KR_PT_BASE); +#endif } void diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/linux-xen/smp.c --- a/xen/arch/ia64/linux-xen/smp.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/linux-xen/smp.c Sun Oct 30 13:00:35 2005 @@ -63,9 +63,18 @@ //Huh? This seems to be used on ia64 even if !CONFIG_SMP void smp_send_event_check_mask(cpumask_t mask) { - printf("smp_send_event_check_mask called\n"); - //dummy(); - //send_IPI_mask(cpu_mask, EVENT_CHECK_VECTOR); + int cpu; + + /* Not for me. */ + cpu_clear(smp_processor_id(), mask); + if (cpus_empty(mask)) + return; + + printf("smp_send_event_check_mask called\n"); + + for (cpu = 0; cpu < NR_CPUS; ++cpu) + if (cpu_isset(cpu, mask)) + platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); } @@ -249,6 +258,7 @@ send_IPI_single(smp_processor_id(), op); } +#ifndef XEN /* * Called with preeemption disabled. */ @@ -257,6 +267,7 @@ { platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); } +#endif void smp_flush_tlb_all (void) @@ -395,15 +406,14 @@ if (wait) atomic_set(&data.finished, 0); - printk("smp_call_function: about to spin_lock \n"); spin_lock(&call_lock); - printk("smp_call_function: done with spin_lock \n"); +#if 0 //def XEN + printk("smp_call_function: %d lock\n", smp_processor_id ()); +#endif call_data = &data; mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */ - printk("smp_call_function: about to send_IPI \n"); send_IPI_allbutself(IPI_CALL_FUNC); - printk("smp_call_function: done with send_IPI \n"); /* Wait for response */ while (atomic_read(&data.started) != cpus) @@ -414,9 +424,10 @@ cpu_relax(); call_data = NULL; - printk("smp_call_function: about to spin_unlock \n"); spin_unlock(&call_lock); +#if 0 //def XEN printk("smp_call_function: DONE WITH spin_unlock, returning \n"); +#endif return 0; } EXPORT_SYMBOL(smp_call_function); diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/linux-xen/smpboot.c --- a/xen/arch/ia64/linux-xen/smpboot.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/linux-xen/smpboot.c Sun Oct 30 13:00:35 2005 @@ -477,6 +477,22 @@ do_rest: task_for_booting_cpu = c_idle.idle; +#else + struct domain *idle; + struct vcpu *v; + void *stack; + + if ( (idle = do_createdomain(IDLE_DOMAIN_ID, cpu)) == NULL ) + panic("failed 'createdomain' for CPU %d", cpu); + set_bit(_DOMF_idle_domain, &idle->domain_flags); + v = idle->vcpu[0]; + + printf ("do_boot_cpu: cpu=%d, domain=%p, vcpu=%p\n", cpu, idle, v); + + task_for_booting_cpu = v; + + /* Set cpu number. */ + get_thread_info(v)->cpu = cpu; #endif Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid); diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/vmx/mm.c --- a/xen/arch/ia64/vmx/mm.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/vmx/mm.c Sun Oct 30 13:00:35 2005 @@ -87,7 +87,7 @@ */ #include <xen/config.h> -#include <public/xen.h> +//#include <public/xen.h> #include <xen/init.h> #include <xen/lib.h> #include <xen/mm.h> diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/vmx/vmx_entry.S --- a/xen/arch/ia64/vmx/vmx_entry.S Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/vmx/vmx_entry.S Sun Oct 30 13:00:35 2005 @@ -720,11 +720,11 @@ // re-pin mappings for guest_vhpt - mov r24=IA64_TR_VHPT + mov r24=IA64_TR_PERVP_VHPT movl r25=PAGE_KERNEL ;; or loc5 = r25,loc5 // construct PA | page properties - mov r23 = VCPU_TLB_SHIFT<<2 + mov r23 = IA64_GRANULE_SHIFT <<2 ;; ptr.d in3,r23 ;; diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/vmx/vmx_hypercall.c --- a/xen/arch/ia64/vmx/vmx_hypercall.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/vmx/vmx_hypercall.c Sun Oct 30 13:00:35 2005 @@ -22,7 +22,7 @@ #include <xen/config.h> #include <xen/errno.h> #include <asm/vmx_vcpu.h> -#include <public/xen.h> +//#include <public/xen.h> #include <public/event_channel.h> #include <asm/vmmu.h> #include <asm/tlb.h> diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/vmx/vmx_init.c --- a/xen/arch/ia64/vmx/vmx_init.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/vmx/vmx_init.c Sun Oct 30 13:00:35 2005 @@ -47,6 +47,7 @@ #include <asm/processor.h> #include <asm/vmx.h> #include <xen/mm.h> +#include <public/arch-ia64.h> /* Global flag to identify whether Intel vmx feature is on */ u32 vmx_enabled = 0; @@ -134,39 +135,6 @@ /* Init stub for rr7 switch */ vmx_init_double_mapping_stub(); #endif -} - -void vmx_setup_platform(struct vcpu *v, struct vcpu_guest_context *c) -{ - struct domain *d = v->domain; - shared_iopage_t *sp; - - ASSERT(d != dom0); /* only for non-privileged vti domain */ - d->arch.vmx_platform.shared_page_va = __va(c->share_io_pg); - sp = get_sp(d); - memset((char *)sp,0,PAGE_SIZE); - /* FIXME: temp due to old CP */ - sp->sp_global.eport = 2; -#ifdef V_IOSAPIC_READY - sp->vcpu_number = 1; -#endif - /* TEMP */ - d->arch.vmx_platform.pib_base = 0xfee00000UL; - - /* One more step to enable interrupt assist */ - set_bit(ARCH_VMX_INTR_ASSIST, &v->arch.arch_vmx.flags); - /* Only open one port for I/O and interrupt emulation */ - if (v == d->vcpu[0]) { - memset(&d->shared_info->evtchn_mask[0], 0xff, - sizeof(d->shared_info->evtchn_mask)); - clear_bit(iopacket_port(d), &d->shared_info->evtchn_mask[0]); - } - - /* FIXME: only support PMT table continuously by far */ -// d->arch.pmt = __va(c->pt_base); - - - vmx_final_setup_domain(d); } typedef union { @@ -376,40 +344,6 @@ /* Other vmx specific initialization work */ } -/* - * Following stuff should really move to domain builder. However currently - * XEN/IA64 doesn't export physical -> machine page table to domain builder, - * instead only the copy. Also there's no hypercall to notify hypervisor - * IO ranges by far. Let's enhance it later. - */ - -#define MEM_G (1UL << 30) -#define MEM_M (1UL << 20) - -#define MMIO_START (3 * MEM_G) -#define MMIO_SIZE (512 * MEM_M) - -#define VGA_IO_START 0xA0000UL -#define VGA_IO_SIZE 0x20000 - -#define LEGACY_IO_START (MMIO_START + MMIO_SIZE) -#define LEGACY_IO_SIZE (64*MEM_M) - -#define IO_PAGE_START (LEGACY_IO_START + LEGACY_IO_SIZE) -#define IO_PAGE_SIZE PAGE_SIZE - -#define STORE_PAGE_START (IO_PAGE_START + IO_PAGE_SIZE) -#define STORE_PAGE_SIZE PAGE_SIZE - -#define IO_SAPIC_START 0xfec00000UL -#define IO_SAPIC_SIZE 0x100000 - -#define PIB_START 0xfee00000UL -#define PIB_SIZE 0x100000 - -#define GFW_START (4*MEM_G -16*MEM_M) -#define GFW_SIZE (16*MEM_M) - typedef struct io_range { unsigned long start; unsigned long size; @@ -424,17 +358,25 @@ {PIB_START, PIB_SIZE, GPFN_PIB}, }; -#define VMX_SYS_PAGES (2 + GFW_SIZE >> PAGE_SHIFT) +#define VMX_SYS_PAGES (2 + (GFW_SIZE >> PAGE_SHIFT)) #define VMX_CONFIG_PAGES(d) ((d)->max_pages - VMX_SYS_PAGES) int vmx_alloc_contig_pages(struct domain *d) { - unsigned int order, i, j; - unsigned long start, end, pgnr, conf_nr; + unsigned int order; + unsigned long i, j, start, end, pgnr, conf_nr; struct pfn_info *page; struct vcpu *v = d->vcpu[0]; ASSERT(!test_bit(ARCH_VMX_CONTIG_MEM, &v->arch.arch_vmx.flags)); + + /* Mark I/O ranges */ + for (i = 0; i < (sizeof(io_ranges) / sizeof(io_range_t)); i++) { + for (j = io_ranges[i].start; + j < io_ranges[i].start + io_ranges[i].size; + j += PAGE_SIZE) + map_domain_page(d, j, io_ranges[i].type); + } conf_nr = VMX_CONFIG_PAGES(d); order = get_order_from_pages(conf_nr); @@ -462,10 +404,20 @@ d->arch.max_pfn = end >> PAGE_SHIFT; - order = get_order_from_pages(VMX_SYS_PAGES); + order = get_order_from_pages(GFW_SIZE >> PAGE_SHIFT); if (unlikely((page = alloc_domheap_pages(d, order, 0)) == NULL)) { printk("Could not allocate order=%d pages for vmx contig alloc\n", order); + return -1; + } + + /* Map guest firmware */ + pgnr = page_to_pfn(page); + for (i = GFW_START; i < GFW_START + GFW_SIZE; i += PAGE_SIZE, pgnr++) + map_domain_page(d, i, pgnr << PAGE_SHIFT); + + if (unlikely((page = alloc_domheap_pages(d, 1, 0)) == NULL)) { + printk("Could not allocate order=1 pages for vmx contig alloc\n"); return -1; } @@ -474,20 +426,42 @@ map_domain_page(d, IO_PAGE_START, pgnr << PAGE_SHIFT); pgnr++; map_domain_page(d, STORE_PAGE_START, pgnr << PAGE_SHIFT); - pgnr++; - - /* Map guest firmware */ - for (i = GFW_START; i < GFW_START + GFW_SIZE; i += PAGE_SIZE, pgnr++) - map_domain_page(d, i, pgnr << PAGE_SHIFT); - - /* Mark I/O ranges */ - for (i = 0; i < (sizeof(io_ranges) / sizeof(io_range_t)); i++) { - for (j = io_ranges[i].start; - j < io_ranges[i].start + io_ranges[i].size; - j += PAGE_SIZE) - map_domain_page(d, j, io_ranges[i].type); - } set_bit(ARCH_VMX_CONTIG_MEM, &v->arch.arch_vmx.flags); return 0; } + +void vmx_setup_platform(struct vcpu *v, struct vcpu_guest_context *c) +{ + struct domain *d = v->domain; + shared_iopage_t *sp; + + ASSERT(d != dom0); /* only for non-privileged vti domain */ + d->arch.vmx_platform.shared_page_va = + __va(__gpa_to_mpa(d, IO_PAGE_START)); + sp = get_sp(d); + //memset((char *)sp,0,PAGE_SIZE); + //sp->sp_global.eport = 2; +#ifdef V_IOSAPIC_READY + sp->vcpu_number = 1; +#endif + /* TEMP */ + d->arch.vmx_platform.pib_base = 0xfee00000UL; + + /* One more step to enable interrupt assist */ + set_bit(ARCH_VMX_INTR_ASSIST, &v->arch.arch_vmx.flags); + /* Only open one port for I/O and interrupt emulation */ + if (v == d->vcpu[0]) { + memset(&d->shared_info->evtchn_mask[0], 0xff, + sizeof(d->shared_info->evtchn_mask)); + clear_bit(iopacket_port(d), &d->shared_info->evtchn_mask[0]); + } + + /* FIXME: only support PMT table continuously by far */ +// d->arch.pmt = __va(c->pt_base); + + + vmx_final_setup_domain(d); +} + + diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/vmx/vmx_irq_ia64.c --- a/xen/arch/ia64/vmx/vmx_irq_ia64.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/vmx/vmx_irq_ia64.c Sun Oct 30 13:00:35 2005 @@ -101,7 +101,10 @@ if (vector != IA64_TIMER_VECTOR) { /* FIXME: Leave IRQ re-route later */ - vmx_vcpu_pend_interrupt(dom0->vcpu[0],vector); + if (!VMX_DOMAIN(dom0->vcpu[0])) + vcpu_pend_interrupt(dom0->vcpu[0],vector); + else + vmx_vcpu_pend_interrupt(dom0->vcpu[0],vector); wake_dom0 = 1; } else { // FIXME: Handle Timer only now diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/vmx/vmx_phy_mode.c --- a/xen/arch/ia64/vmx/vmx_phy_mode.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/vmx/vmx_phy_mode.c Sun Oct 30 13:00:35 2005 @@ -157,13 +157,6 @@ #endif void -physical_itlb_miss(VCPU *vcpu, u64 vadr) -{ - physical_itlb_miss_dom0(vcpu, vadr); -} - - -void physical_itlb_miss_dom0(VCPU *vcpu, u64 vadr) { u64 psr; @@ -183,6 +176,13 @@ ia64_set_psr(psr); ia64_srlz_i(); return; +} + + +void +physical_itlb_miss(VCPU *vcpu, u64 vadr) +{ + physical_itlb_miss_dom0(vcpu, vadr); } diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/vmx/vmx_process.c --- a/xen/arch/ia64/vmx/vmx_process.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/vmx/vmx_process.c Sun Oct 30 13:00:35 2005 @@ -56,6 +56,38 @@ extern struct ia64_sal_retval sal_emulator(UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64,UINT64); extern void rnat_consumption (VCPU *vcpu); #define DOMN_PAL_REQUEST 0x110000 + +static UINT64 vec2off[68] = {0x0,0x400,0x800,0xc00,0x1000, 0x1400,0x1800, + 0x1c00,0x2000,0x2400,0x2800,0x2c00,0x3000,0x3400,0x3800,0x3c00,0x4000, + 0x4400,0x4800,0x4c00,0x5000,0x5100,0x5200,0x5300,0x5400,0x5500,0x5600, + 0x5700,0x5800,0x5900,0x5a00,0x5b00,0x5c00,0x5d00,0x5e00,0x5f00,0x6000, + 0x6100,0x6200,0x6300,0x6400,0x6500,0x6600,0x6700,0x6800,0x6900,0x6a00, + 0x6b00,0x6c00,0x6d00,0x6e00,0x6f00,0x7000,0x7100,0x7200,0x7300,0x7400, + 0x7500,0x7600,0x7700,0x7800,0x7900,0x7a00,0x7b00,0x7c00,0x7d00,0x7e00, + 0x7f00, +}; + + + +void vmx_reflect_interruption(UINT64 ifa,UINT64 isr,UINT64 iim, + UINT64 vector,REGS *regs) +{ + VCPU *vcpu = current; + UINT64 viha,vpsr = vmx_vcpu_get_psr(vcpu); + if(!(vpsr&IA64_PSR_IC)&&(vector!=5)){ + panic("Guest nested fault!"); + } + VCPU(vcpu,isr)=isr; + VCPU(vcpu,iipa) = regs->cr_iip; + vector=vec2off[vector]; + if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR) + VCPU(vcpu,iim) = iim; + else { + set_ifa_itir_iha(vcpu,ifa,1,1,1); + } + inject_guest_interruption(vcpu, vector); +} + IA64FAULT vmx_ia64_handle_break (unsigned long ifa, struct pt_regs *regs, unsigned long isr, unsigned long iim) { @@ -157,37 +189,6 @@ vmx_reflect_interruption(ifa,isr,iim,11,regs); } -static UINT64 vec2off[68] = {0x0,0x400,0x800,0xc00,0x1000, 0x1400,0x1800, - 0x1c00,0x2000,0x2400,0x2800,0x2c00,0x3000,0x3400,0x3800,0x3c00,0x4000, - 0x4400,0x4800,0x4c00,0x5000,0x5100,0x5200,0x5300,0x5400,0x5500,0x5600, - 0x5700,0x5800,0x5900,0x5a00,0x5b00,0x5c00,0x5d00,0x5e00,0x5f00,0x6000, - 0x6100,0x6200,0x6300,0x6400,0x6500,0x6600,0x6700,0x6800,0x6900,0x6a00, - 0x6b00,0x6c00,0x6d00,0x6e00,0x6f00,0x7000,0x7100,0x7200,0x7300,0x7400, - 0x7500,0x7600,0x7700,0x7800,0x7900,0x7a00,0x7b00,0x7c00,0x7d00,0x7e00, - 0x7f00, -}; - - - -void vmx_reflect_interruption(UINT64 ifa,UINT64 isr,UINT64 iim, - UINT64 vector,REGS *regs) -{ - VCPU *vcpu = current; - UINT64 viha,vpsr = vmx_vcpu_get_psr(vcpu); - if(!(vpsr&IA64_PSR_IC)&&(vector!=5)){ - panic("Guest nested fault!"); - } - VCPU(vcpu,isr)=isr; - VCPU(vcpu,iipa) = regs->cr_iip; - vector=vec2off[vector]; - if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR) - VCPU(vcpu,iim) = iim; - else { - set_ifa_itir_iha(vcpu,ifa,1,1,1); - } - inject_guest_interruption(vcpu, vector); -} - void save_banked_regs_to_vpd(VCPU *v, REGS *regs) { @@ -271,10 +272,10 @@ { IA64_PSR vpsr; CACHE_LINE_TYPE type; - u64 vhpt_adr; + u64 vhpt_adr, gppa; ISR misr; ia64_rr vrr; - REGS *regs; +// REGS *regs; thash_cb_t *vtlb, *vhpt; thash_data_t *data, me; VCPU *v = current; @@ -314,9 +315,9 @@ // prepare_if_physical_mode(v); if(data=vtlb_lookup_ex(vtlb, vrr.rid, vadr,type)){ - if(v->domain!=dom0&&type==DSIDE_TLB && __gpfn_is_io(v->domain,data->ppn>>(PAGE_SHIFT-12))){ - vadr=(vadr&((1UL<<data->ps)-1))+(data->ppn>>(data->ps-12)<<data->ps); - emulate_io_inst(v, vadr, data->ma); + gppa = (vadr&((1UL<<data->ps)-1))+(data->ppn>>(data->ps-12)<<data->ps); + if(v->domain!=dom0&&type==DSIDE_TLB && __gpfn_is_io(v->domain,gppa>>PAGE_SHIFT)){ + emulate_io_inst(v, gppa, data->ma); return IA64_FAULT; } diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/vmx/vmx_support.c --- a/xen/arch/ia64/vmx/vmx_support.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/vmx/vmx_support.c Sun Oct 30 13:00:35 2005 @@ -158,7 +158,7 @@ #ifdef V_IOSAPIC_READY vlapic_update_ext_irq(v); #else - panic("IOSAPIC model is missed in qemu\n"); + //panic("IOSAPIC model is missed in qemu\n"); #endif return; } diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/vmx/vtlb.c --- a/xen/arch/ia64/vmx/vtlb.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/vmx/vtlb.c Sun Oct 30 13:00:35 2005 @@ -387,6 +387,15 @@ thash_insert(hcb->ts->vhpt, entry, va); return; } + +#if 1 + vrr=vmx_vcpu_rr(current, va); + if (vrr.ps != entry->ps) { + printk("not preferred ps with va: 0x%lx\n", va); + return; + } +#endif + flag = 1; gppn = (POFFSET(va,entry->ps)|PAGEALIGN((entry->ppn<<12),entry->ps))>>PAGE_SHIFT; ppns = PAGEALIGN((entry->ppn<<12),entry->ps); diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/xen/acpi.c --- a/xen/arch/ia64/xen/acpi.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/xen/acpi.c Sun Oct 30 13:00:35 2005 @@ -121,6 +121,7 @@ #ifdef CONFIG_ACPI_BOOT #define ACPI_MAX_PLATFORM_INTERRUPTS 256 +#define NR_IOSAPICS 4 #if 0 /* Array to record platform interrupt vectors for generic interrupt routing. */ @@ -162,7 +163,6 @@ struct acpi_table_madt * acpi_madt __initdata; static u8 has_8259; -#if 0 static int __init acpi_parse_lapic_addr_ovr ( acpi_table_entry_header *header, const unsigned long end) @@ -247,11 +247,12 @@ acpi_table_print_madt_entry(header); +#if 0 iosapic_init(iosapic->address, iosapic->global_irq_base); - - return 0; -} - +#endif + + return 0; +} static int __init acpi_parse_plat_int_src ( @@ -267,6 +268,7 @@ acpi_table_print_madt_entry(header); +#if 0 /* * Get vector assignment for this interrupt, set attributes, * and program the IOSAPIC routing table. @@ -280,6 +282,7 @@ (plintsrc->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL); platform_intr_list[plintsrc->type] = vector; +#endif return 0; } @@ -297,12 +300,13 @@ acpi_table_print_madt_entry(header); +#if 0 iosapic_override_isa_irq(p->bus_irq, p->global_irq, (p->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, (p->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL); - return 0; -} - +#endif + return 0; +} static int __init acpi_parse_nmi_src (acpi_table_entry_header *header, const unsigned long end) @@ -331,8 +335,10 @@ */ sal_platform_features |= IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT; +#if 0 /*Start cyclone clock*/ cyclone_setup(0); +#endif } } @@ -350,7 +356,9 @@ #else has_8259 = acpi_madt->flags.pcat_compat; #endif +#if 0 iosapic_system_init(has_8259); +#endif /* Get base address of IPI Message Block */ @@ -364,7 +372,6 @@ return 0; } -#endif #ifdef CONFIG_ACPI_NUMA @@ -529,6 +536,7 @@ return acpi_register_irq(gsi, polarity, trigger); } EXPORT_SYMBOL(acpi_register_gsi); +#endif static int __init acpi_parse_fadt (unsigned long phys_addr, unsigned long size) { @@ -550,10 +558,11 @@ if (fadt->iapc_boot_arch & BAF_LEGACY_DEVICES) acpi_legacy_devices = 1; +#if 0 acpi_register_gsi(fadt->sci_int, ACPI_ACTIVE_LOW, ACPI_LEVEL_SENSITIVE); - return 0; -} -#endif +#endif + return 0; +} unsigned long __init acpi_find_rsdp (void) @@ -567,7 +576,6 @@ return rsdp_phys; } -#if 0 int __init acpi_boot_init (void) { @@ -646,6 +654,7 @@ printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, total_cpus); return 0; } +#if 0 int acpi_gsi_to_irq (u32 gsi, unsigned int *irq) { diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/xen/dom0_ops.c --- a/xen/arch/ia64/xen/dom0_ops.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/xen/dom0_ops.c Sun Oct 30 13:00:35 2005 @@ -177,13 +177,8 @@ for ( i = start_page; i < (start_page + nr_pages); i++ ) { - page = map_new_domain_page(d, i << PAGE_SHIFT); - if ( page == NULL ) - { - ret = -ENOMEM; - break; - } - pfn = page_to_pfn(page); + pfn = __gpfn_to_mfn_foreign(d, i); + if ( put_user(pfn, buffer) ) { ret = -EFAULT; diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/xen/dom_fw.c --- a/xen/arch/ia64/xen/dom_fw.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/xen/dom_fw.c Sun Oct 30 13:00:35 2005 @@ -301,7 +301,7 @@ // pal code must be mapped by a TR when pal is called, however // calls are rare enough that we will map it lazily rather than // at every context switch - efi_map_pal_code(); + //efi_map_pal_code(); switch (index) { case PAL_MEM_ATTRIB: status = ia64_pal_mem_attrib(&r9); diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/xen/domain.c Sun Oct 30 13:00:35 2005 @@ -23,11 +23,13 @@ #include <asm/io.h> #include <asm/processor.h> #include <asm/desc.h> +#include <asm/hw_irq.h> //#include <asm/mpspec.h> #include <xen/irq.h> #include <xen/event.h> //#include <xen/shadow.h> #include <xen/console.h> +#include <xen/compile.h> #include <xen/elf.h> //#include <asm/page.h> @@ -58,6 +60,7 @@ // initialized by arch/ia64/setup.c:find_initrd() unsigned long initrd_start = 0, initrd_end = 0; +extern unsigned long running_on_sim; #define IS_XEN_ADDRESS(d,a) ((a >= d->xen_vastart) && (a <= d->xen_vaend)) @@ -75,35 +78,21 @@ //free_page((unsigned long)d->mm.perdomain_pt); } -int hlt_counter; - -void disable_hlt(void) -{ - hlt_counter++; -} - -void enable_hlt(void) -{ - hlt_counter--; -} - static void default_idle(void) { - if ( hlt_counter == 0 ) - { + int cpu = smp_processor_id(); local_irq_disable(); - if ( !softirq_pending(smp_processor_id()) ) + if ( !softirq_pending(cpu)) safe_halt(); - //else - local_irq_enable(); - } -} - -void continue_cpu_idle_loop(void) + local_irq_enable(); +} + +static void continue_cpu_idle_loop(void) { int cpu = smp_processor_id(); for ( ; ; ) { + printf ("idle%dD\n", cpu); #ifdef IA64 // __IRQ_STAT(cpu, idle_timestamp) = jiffies #else @@ -111,23 +100,32 @@ #endif while ( !softirq_pending(cpu) ) default_idle(); + add_preempt_count(SOFTIRQ_OFFSET); raise_softirq(SCHEDULE_SOFTIRQ); do_softirq(); + sub_preempt_count(SOFTIRQ_OFFSET); } } void startup_cpu_idle_loop(void) { + int cpu = smp_processor_id (); /* Just some sanity to ensure that the scheduler is set up okay. */ ASSERT(current->domain == IDLE_DOMAIN_ID); + printf ("idle%dA\n", cpu); raise_softirq(SCHEDULE_SOFTIRQ); +#if 0 /* All this work is done within continue_cpu_idle_loop */ + printf ("idle%dB\n", cpu); + asm volatile ("mov ar.k2=r0"); do_softirq(); + printf ("idle%dC\n", cpu); /* * Declares CPU setup done to the boot processor. * Therefore memory barrier to ensure state is visible. */ smp_mb(); +#endif #if 0 //do we have to ensure the idle task has a shared page so that, for example, //region registers can be loaded from it. Apparently not... @@ -204,6 +202,15 @@ while (1); } memset(d->shared_info, 0, PAGE_SIZE); + if (v == d->vcpu[0]) + memset(&d->shared_info->evtchn_mask[0], 0xff, + sizeof(d->shared_info->evtchn_mask)); +#if 0 + d->vcpu[0].arch.privregs = + alloc_xenheap_pages(get_order(sizeof(mapped_regs_t))); + printf("arch_vcpu_info=%p\n", d->vcpu[0].arch.privregs); + memset(d->vcpu.arch.privregs, 0, PAGE_SIZE); +#endif v->vcpu_info = &(d->shared_info->vcpu_data[0]); d->max_pages = (128UL*1024*1024)/PAGE_SIZE; // 128MB default // FIXME @@ -233,17 +240,21 @@ v->arch.breakimm = d->arch.breakimm; d->arch.sys_pgnr = 0; - d->arch.mm = xmalloc(struct mm_struct); - if (unlikely(!d->arch.mm)) { - printk("Can't allocate mm_struct for domain %d\n",d->domain_id); - return -ENOMEM; - } - memset(d->arch.mm, 0, sizeof(*d->arch.mm)); - d->arch.mm->pgd = pgd_alloc(d->arch.mm); - if (unlikely(!d->arch.mm->pgd)) { - printk("Can't allocate pgd for domain %d\n",d->domain_id); - return -ENOMEM; - } + if (d->domain_id != IDLE_DOMAIN_ID) { + d->arch.mm = xmalloc(struct mm_struct); + if (unlikely(!d->arch.mm)) { + printk("Can't allocate mm_struct for domain %d\n",d->domain_id); + return -ENOMEM; + } + memset(d->arch.mm, 0, sizeof(*d->arch.mm)); + d->arch.mm->pgd = pgd_alloc(d->arch.mm); + if (unlikely(!d->arch.mm->pgd)) { + printk("Can't allocate pgd for domain %d\n",d->domain_id); + return -ENOMEM; + } + } else + d->arch.mm = NULL; + printf ("arch_do_create_domain: domain=%p\n", d); } void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c) @@ -267,6 +278,14 @@ printf("arch_set_info_guest\n"); if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) ) return 0; + + /* Sync d/i cache conservatively */ + if (!running_on_sim) { + ret = ia64_pal_cache_flush(4, 0, &progress, NULL); + if (ret != PAL_STATUS_SUCCESS) + panic("PAL CACHE FLUSH failed for domain.\n"); + printk("Sync i/d cache for dom0 image SUCC\n"); + } if (c->flags & VGCF_VMX_GUEST) { if (!vmx_enabled) { @@ -527,7 +546,8 @@ if (pte_present(*pte)) { //printk("lookup_domain_page: found mapping for %lx, pte=%lx\n",mpaddr,pte_val(*pte)); return *(unsigned long *)pte; - } + } else if (VMX_DOMAIN(d->vcpu[0])) + return GPFN_INV_MASK; } } } @@ -779,7 +799,6 @@ set_bit(_DOMF_physdev_access, &d->domain_flags); } -extern unsigned long running_on_sim; unsigned int vmx_dom0 = 0; int construct_dom0(struct domain *d, unsigned long image_start, unsigned long image_len, @@ -930,6 +949,7 @@ si = (start_info_t *)alloc_xenheap_page(); memset(si, 0, PAGE_SIZE); d->shared_info->arch.start_info_pfn = __pa(si) >> PAGE_SHIFT; + sprintf(si->magic, "Xen-%i.%i", XEN_VERSION, XEN_SUBVERSION); #if 0 si->nr_pages = d->tot_pages; diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/xen/hyperprivop.S --- a/xen/arch/ia64/xen/hyperprivop.S Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/xen/hyperprivop.S Sun Oct 30 13:00:35 2005 @@ -13,6 +13,10 @@ #include <asm/processor.h> #include <asm/system.h> #include <public/arch-ia64.h> + +#define _PAGE_PPN_MASK 0x0003fffffffff000 //asm/pgtable.h doesn't do assembly +#define PAGE_PHYS 0x0010000000000761 //__pgprot(__DIRTY_BITS|_PAGE_PL_2|_PAGE_AR_RWX) +#define _PAGE_PL_2 (2<<7) #if 1 // change to 0 to turn off all fast paths #define FAST_HYPERPRIVOPS @@ -24,6 +28,7 @@ #define FAST_RFI #define FAST_SSM_I #define FAST_PTC_GA +#undef FAST_ITC // working but default off for now #undef RFI_TO_INTERRUPT // not working yet #endif @@ -802,8 +807,11 @@ // OK, now all set to go except for switch to virtual bank1 mov r22=1;; st4 [r20]=r22; mov r30=r2; mov r29=r3;; + adds r16=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18 adds r2=XSI_BANK1_OFS-XSI_PSR_IC_OFS,r18; adds r3=(XSI_BANK1_OFS+8)-XSI_PSR_IC_OFS,r18;; + ld8 r16=[r16];; + mov ar.unat=r16;; bsw.1;; // FIXME?: ar.unat is not really handled correctly, // but may not matter if the OS is NaT-clean @@ -1663,10 +1671,159 @@ ;; END(hyper_ptc_ga) +// Registers at entry +// r17 = break immediate (XEN_HYPER_ITC_D or I) +// r18 == XSI_PSR_IC_OFS +// r31 == pr +GLOBAL_ENTRY(hyper_itc) +ENTRY(hyper_itc_i) + // fall through, hyper_itc_d handles both i and d ENTRY(hyper_itc_d) +#ifndef FAST_ITC br.spnt.many dispatch_break_fault ;; +#endif + adds r23=XSI_ITIR_OFS-XSI_PSR_IC_OFS,r18 ;; + ld8 r23=[r23];; + extr.u r24=r23,2,6;; // r24==logps + cmp.gt p7,p0=PAGE_SHIFT,r24 +(p7) br.spnt.many dispatch_break_fault ;; + // translate_domain_pte(r8=pteval,PSCB(ifa)=address,r24=itir) + mov r19=1;; + shl r20=r19,r24;; + adds r20=-1,r20;; // r20 == mask + movl r19=_PAGE_PPN_MASK;; + and r22=r8,r19;; // r22 == pteval & _PAGE_PPN_MASK + andcm r19=r22,r20;; + adds r21=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;; + ld8 r21=[r21];; + and r20=r21,r20;; + or r19=r19,r20;; // r19 == mpaddr + movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r27=[r27];; + adds r27=IA64_VCPU_DOMAIN_OFFSET,r27;; + ld8 r27=[r27];; +// FIXME: is the global var dom0 always pinned? assume so for now + movl r28=dom0;; + ld8 r28=[r28];; +// FIXME: for now, only handle dom0 (see lookup_domain_mpa below) + cmp.ne p7,p0=r27,r28 +(p7) br.spnt.many dispatch_break_fault ;; + // if region 6, go slow way +#ifdef FAST_HYPERPRIVOP_CNT + cmp.eq p6,p7=XEN_HYPER_ITC_D,r17;; +(p6) movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_ITC_D);; +(p7) movl r20=fast_hyperpriv_cnt+(8*XEN_HYPER_ITC_I);; + ld8 r21=[r20];; + adds r21=1,r21;; + st8 [r20]=r21;; +#endif +// FIXME: for now, just do domain0 and skip mpaddr range checks + dep r20=r0,r19,0,PAGE_SHIFT + movl r21=PAGE_PHYS ;; + or r20=r20,r21 ;; // r20==return value from lookup_domain_mpa + // r8=pteval,r20=pteval2 + movl r19=_PAGE_PPN_MASK + movl r21=_PAGE_PL_2;; + andcm r25=r8,r19;; // r25==pteval & ~_PAGE_PPN_MASK + and r22=r20,r19;; + or r22=r22,r21;; + or r22=r22,r25;; // r22==return value from translate_domain_pte + // done with translate_domain_pte + // now do vcpu_itc_no_srlz(vcpu,IorD,ifa,r22=pte,r8=mppte,r24=logps) +// FIXME: for now, just domain0 and skip range check + // psr.ic already cleared + // NOTE: r24 still contains ps (from above) + shladd r24=r24,2,r0;; + mov cr.itir=r24;; + adds r23=XSI_IFA_OFS-XSI_PSR_IC_OFS,r18 ;; + ld8 r23=[r23];; + mov cr.ifa=r23;; + cmp.eq p6,p7=XEN_HYPER_ITC_D,r17;; +(p6) itc.d r22;; +(p7) itc.i r22;; + dv_serialize_data + // FIXME: how do I make assembler warnings go away here? + // vhpt_insert(r23=vaddr,r22=pte,r24=logps<<2) + thash r28=r23 + or r26=1,r22;; + ttag r21=r23 + adds r25=8,r28 + mov r19=r28;; + st8 [r25]=r24 + adds r20=16,r28;; + st8 [r19]=r26 + st8 [r20]=r21;; + // vcpu_set_tr_entry(trp,r22=pte|1,r24=itir,r23=ifa) + // TR_ENTRY = {page_flags,itir,addr,rid} + cmp.eq p6,p7=XEN_HYPER_ITC_D,r17 + movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r27=[r27];; + adds r28=IA64_VCPU_STARTING_RID_OFFSET,r27 +(p6) adds r27=IA64_VCPU_DTLB_OFFSET,r27 +(p7) adds r27=IA64_VCPU_ITLB_OFFSET,r27;; + st8 [r27]=r22,8;; // page_flags: already has pl >= 2 and p==1 + st8 [r27]=r24,8;; // itir + mov r19=-4096;; + and r23=r23,r19;; + st8 [r27]=r23,8;; // ifa & ~0xfff +// ?? is virtualize_rid(v,get_rr(ifa))==vcpu_get_rr(ifa)?? YES!! + adds r29 = XSI_RR0_OFS-XSI_PSR_IC_OFS,r18 + extr.u r25=r23,61,3;; + shladd r29=r25,3,r29;; + ld8 r29=[r29];; + movl r20=0xffffff00;; + and r29=r29,r20;; + st8 [r27]=r29,-8;; // rid + //if ps > 12 + cmp.eq p7,p0=12<<2,r24 +(p7) br.cond.sptk.many 1f;; + // if (ps > 12) { + // trp->ppn &= ~((1UL<<(ps-12))-1); trp->vadr &= ~((1UL<<ps)-1); } + extr.u r29=r24,2,6 + mov r28=1;; + shl r26=r28,r29;; + adds r29=-12,r29;; + shl r25=r28,r29;; + mov r29=-1 + adds r26=-1,r26 + adds r25=-1,r25;; + andcm r26=r29,r26 // ~((1UL<<ps)-1) + andcm r25=r29,r25;; // ~((1UL<<(ps-12))-1) + ld8 r29=[r27];; + and r29=r29,r26;; + st8 [r27]=r29,-16;; + ld8 r29=[r27];; + extr.u r28=r29,12,38;; + movl r26=0xfffc000000000fff;; + and r29=r29,r26 + and r28=r28,r25;; + shl r28=r28,12;; + or r29=r29,r28;; + st8 [r27]=r29;; +1: // done with vcpu_set_tr_entry + //PSCBX(vcpu,i/dtlb_pte) = mp_pte + movl r27=THIS_CPU(cpu_kr)+IA64_KR_CURRENT_OFFSET;; + ld8 r27=[r27];; + cmp.eq p6,p7=XEN_HYPER_ITC_D,r17;; +(p6) adds r27=IA64_VCPU_DTLB_PTE_OFFSET,r27 +(p7) adds r27=IA64_VCPU_ITLB_PTE_OFFSET,r27;; + st8 [r27]=r8;; + // done with vcpu_itc_no_srlz + + // done, increment to point to next instruction + mov r29=cr.ipsr + mov r30=cr.iip;; + extr.u r26=r29,41,2 ;; + cmp.eq p6,p7=2,r26 ;; +(p6) mov r26=0 +(p6) adds r30=16,r30 +(p7) adds r26=1,r26 + ;; + dep r29=r26,r29,41,2 + ;; + mov cr.ipsr=r29 + mov cr.iip=r30 + mov pr=r31,-1 ;; + rfi + ;; END(hyper_itc_d) - -ENTRY(hyper_itc_i) - br.spnt.many dispatch_break_fault ;; -END(hyper_itc_i) diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/xen/ivt.S --- a/xen/arch/ia64/xen/ivt.S Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/xen/ivt.S Sun Oct 30 13:00:35 2005 @@ -484,6 +484,7 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) ENTRY(nested_dtlb_miss) + DBG_FAULT(5) /* * In the absence of kernel bugs, we get here when the virtually mapped linear * page table is accessed non-speculatively (e.g., in the Dirty-bit, Instruction @@ -552,10 +553,10 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) ENTRY(ikey_miss) + DBG_FAULT(6) #ifdef XEN REFLECT(6) #endif - DBG_FAULT(6) FAULT(6) END(ikey_miss) @@ -597,10 +598,10 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) ENTRY(dkey_miss) + DBG_FAULT(7) #ifdef XEN REFLECT(7) #endif - DBG_FAULT(7) FAULT(7) END(dkey_miss) @@ -608,10 +609,10 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) ENTRY(dirty_bit) + DBG_FAULT(8) #ifdef XEN REFLECT(8) #endif - DBG_FAULT(8) /* * What we do here is to simply turn on the dirty bit in the PTE. We need to * update both the page-table and the TLB entry. To efficiently access the PTE, @@ -673,6 +674,7 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) ENTRY(iaccess_bit) + DBG_FAULT(9) #ifdef XEN mov r31=pr; mov r16=cr.isr @@ -681,7 +683,6 @@ movl r20=0x2400 br.sptk.many fast_access_reflect;; #endif - DBG_FAULT(9) // Like Entry 8, except for instruction access mov r16=cr.ifa // get the address that caused the fault movl r30=1f // load continuation point in case of nested fault @@ -746,6 +747,7 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) ENTRY(daccess_bit) + DBG_FAULT(10) #ifdef XEN mov r31=pr; mov r16=cr.isr @@ -754,7 +756,6 @@ movl r20=0x2800 br.sptk.many fast_access_reflect;; #endif - DBG_FAULT(10) // Like Entry 8, except for data access mov r16=cr.ifa // get the address that caused the fault movl r30=1f // load continuation point in case of nested fault @@ -971,8 +972,10 @@ mov out0=cr.ivr // pass cr.ivr as first arg #endif add out1=16,sp // pass pointer to pt_regs as second arg +#ifndef XEN ;; srlz.d // make sure we see the effect of cr.ivr +#endif movl r14=ia64_leave_kernel ;; mov rp=r14 @@ -1363,10 +1366,10 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49) ENTRY(page_not_present) + DBG_FAULT(20) #ifdef XEN REFLECT(20) #endif - DBG_FAULT(20) mov r16=cr.ifa rsm psr.dt /* @@ -1386,10 +1389,10 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52) ENTRY(key_permission) + DBG_FAULT(21) #ifdef XEN REFLECT(21) #endif - DBG_FAULT(21) mov r16=cr.ifa rsm psr.dt mov r31=pr @@ -1402,10 +1405,10 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) ENTRY(iaccess_rights) + DBG_FAULT(22) #ifdef XEN REFLECT(22) #endif - DBG_FAULT(22) mov r16=cr.ifa rsm psr.dt mov r31=pr @@ -1418,6 +1421,7 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) ENTRY(daccess_rights) + DBG_FAULT(23) #ifdef XEN mov r31=pr; mov r16=cr.isr @@ -1426,7 +1430,6 @@ movl r20=0x5300 br.sptk.many fast_access_reflect;; #endif - DBG_FAULT(23) mov r16=cr.ifa rsm psr.dt mov r31=pr @@ -1459,10 +1462,31 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) ENTRY(disabled_fp_reg) -#ifdef XEN + DBG_FAULT(25) +#ifdef XEN +#if 0 + mov r20=pr + movl r16=0x2000000000000000 + movl r17=0x2000000000176b60 + mov r18=cr.iip + mov r19=rr[r16] + movl r22=0xe95d0439 + ;; + mov pr=r0,-1 + ;; + cmp.eq p6,p7=r22,r19 + ;; + (p6) cmp.eq p8,p9=r17,r18 + (p8) br.sptk.few floating_panic + ;; + mov pr=r20,-1 + ;; +#endif REFLECT(25) -#endif - DBG_FAULT(25) +//floating_panic: +// br.sptk.many floating_panic + ;; +#endif rsm psr.dfh // ensure we can access fph ;; srlz.d @@ -1475,10 +1499,10 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) ENTRY(nat_consumption) + DBG_FAULT(26) #ifdef XEN REFLECT(26) #endif - DBG_FAULT(26) FAULT(26) END(nat_consumption) @@ -1486,11 +1510,11 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x5700 Entry 27 (size 16 bundles) Speculation (40) ENTRY(speculation_vector) + DBG_FAULT(27) #ifdef XEN // this probably need not reflect... REFLECT(27) #endif - DBG_FAULT(27) /* * A [f]chk.[as] instruction needs to take the branch to the recovery code but * this part of the architecture is not implemented in hardware on some CPUs, such @@ -1533,10 +1557,10 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) ENTRY(debug_vector) + DBG_FAULT(29) #ifdef XEN REFLECT(29) #endif - DBG_FAULT(29) FAULT(29) END(debug_vector) @@ -1544,10 +1568,10 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) ENTRY(unaligned_access) + DBG_FAULT(30) #ifdef XEN REFLECT(30) #endif - DBG_FAULT(30) mov r16=cr.ipsr mov r31=pr // prepare to save predicates ;; @@ -1558,10 +1582,10 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) ENTRY(unsupported_data_reference) + DBG_FAULT(31) #ifdef XEN REFLECT(31) #endif - DBG_FAULT(31) FAULT(31) END(unsupported_data_reference) @@ -1569,10 +1593,10 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64) ENTRY(floating_point_fault) + DBG_FAULT(32) #ifdef XEN REFLECT(32) #endif - DBG_FAULT(32) FAULT(32) END(floating_point_fault) @@ -1580,10 +1604,10 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) ENTRY(floating_point_trap) + DBG_FAULT(33) #ifdef XEN REFLECT(33) #endif - DBG_FAULT(33) FAULT(33) END(floating_point_trap) @@ -1591,10 +1615,10 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) ENTRY(lower_privilege_trap) + DBG_FAULT(34) #ifdef XEN REFLECT(34) #endif - DBG_FAULT(34) FAULT(34) END(lower_privilege_trap) @@ -1602,10 +1626,10 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) ENTRY(taken_branch_trap) + DBG_FAULT(35) #ifdef XEN REFLECT(35) #endif - DBG_FAULT(35) FAULT(35) END(taken_branch_trap) @@ -1613,10 +1637,10 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) ENTRY(single_step_trap) + DBG_FAULT(36) #ifdef XEN REFLECT(36) #endif - DBG_FAULT(36) FAULT(36) END(single_step_trap) @@ -1672,10 +1696,10 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) ENTRY(ia32_exception) + DBG_FAULT(45) #ifdef XEN REFLECT(45) #endif - DBG_FAULT(45) FAULT(45) END(ia32_exception) @@ -1683,10 +1707,10 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) ENTRY(ia32_intercept) + DBG_FAULT(46) #ifdef XEN REFLECT(46) #endif - DBG_FAULT(46) #ifdef CONFIG_IA32_SUPPORT mov r31=pr mov r16=cr.isr @@ -1716,10 +1740,10 @@ ///////////////////////////////////////////////////////////////////////////////////////// // 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74) ENTRY(ia32_interrupt) + DBG_FAULT(47) #ifdef XEN REFLECT(47) #endif - DBG_FAULT(47) #ifdef CONFIG_IA32_SUPPORT mov r31=pr br.sptk.many dispatch_to_ia32_handler diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/xen/process.c --- a/xen/arch/ia64/xen/process.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/xen/process.c Sun Oct 30 13:00:35 2005 @@ -62,11 +62,23 @@ return 0; } +#include <xen/sched-if.h> + +extern struct schedule_data schedule_data[NR_CPUS]; + void schedule_tail(struct vcpu *next) { unsigned long rr7; //printk("current=%lx,shared_info=%lx\n",current,current->vcpu_info); //printk("next=%lx,shared_info=%lx\n",next,next->vcpu_info); + + // TG: Real HACK FIXME. + // This is currently necessary because when a new domain is started, + // the context_switch function of xen/common/schedule.c(__enter_scheduler) + // never returns. Therefore, the lock must be released. + // schedule_tail is only called when a domain is started. + spin_unlock_irq(&schedule_data[current->processor].schedule_lock); + /* rr7 will be postponed to last point when resuming back to guest */ if(VMX_DOMAIN(current)){ vmx_load_all_rr(current); @@ -733,6 +745,8 @@ case 26: printf("*** NaT fault... attempting to handle as privop\n"); printf("isr=%p, ifa=%p,iip=%p,ipsr=%p\n",isr,ifa,regs->cr_iip,psr); + regs->eml_unat = 0; + return; vector = priv_emulate(v,regs,isr); if (vector == IA64_NO_FAULT) { printf("*** Handled privop masquerading as NaT fault\n"); diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/xen/regionreg.c --- a/xen/arch/ia64/xen/regionreg.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/xen/regionreg.c Sun Oct 30 13:00:35 2005 @@ -15,7 +15,8 @@ #include <asm/regionreg.h> #include <asm/vhpt.h> #include <asm/vcpu.h> -extern void ia64_new_rr7(unsigned long rid,void *shared_info, void *shared_arch_info); +extern void ia64_new_rr7(unsigned long rid,void *shared_info, void *shared_arch_info, unsigned long p_vhpt, unsigned long v_pal); +extern void *pal_vaddr; #define IA64_MIN_IMPL_RID_BITS (IA64_MIN_IMPL_RID_MSB+1) @@ -66,9 +67,12 @@ { ia64_rr rrv; + rrv.rrval = 0; // Or else may see reserved bit fault rrv.rid = allocate_reserved_rid(); rrv.ps = PAGE_SHIFT; rrv.ve = 0; + /* Mangle metaphysical rid */ + rrv.rrval = vmMangleRID(rrv.rrval); return rrv.rrval; } @@ -213,6 +217,7 @@ unsigned long rreg = REGION_NUMBER(rr); ia64_rr rrv, newrrv, memrrv; unsigned long newrid; + extern unsigned long vhpt_paddr; if (val == -1) return 1; @@ -250,9 +255,10 @@ newrrv.rid = newrid; newrrv.ve = 1; // VHPT now enabled for region 7!! newrrv.ps = PAGE_SHIFT; - if (rreg == 0) v->arch.metaphysical_saved_rr0 = newrrv.rrval; + if (rreg == 0) v->arch.metaphysical_saved_rr0 = + vmMangleRID(newrrv.rrval); if (rreg == 7) ia64_new_rr7(vmMangleRID(newrrv.rrval),v->vcpu_info, - v->arch.privregs); + v->arch.privregs, vhpt_paddr, pal_vaddr); else set_rr(rr,newrrv.rrval); #endif return 1; @@ -265,7 +271,8 @@ ia64_rr rrv; // rrv.ve = 1; FIXME: TURN ME BACK ON WHEN VHPT IS WORKING - set_rr(0,v->arch.metaphysical_rr0); + ia64_set_rr(0,v->arch.metaphysical_rr0); + ia64_srlz_d(); } // validates/changes region registers 0-6 in the currently executing domain @@ -290,7 +297,7 @@ ia64_rr rrv; rrv.rrval = 0; - rrv.rrval = v->domain->arch.metaphysical_rr0; + //rrv.rrval = v->domain->arch.metaphysical_rr0; rrv.ps = PAGE_SHIFT; rrv.ve = 1; if (!v->vcpu_info) { printf("Stopping in init_all_rr\n"); dummy(); } @@ -343,12 +350,16 @@ if (VCPU(v,metaphysical_mode)) { ia64_rr rrv; +#if 0 rrv.rrval = 0; rrv.rid = v->domain->arch.metaphysical_rr0; rrv.ps = PAGE_SHIFT; rrv.ve = 1; rr0 = rrv.rrval; set_rr_no_srlz(0x0000000000000000L, rr0); +#endif + rr0 = v->domain->arch.metaphysical_rr0; + ia64_set_rr(0x0000000000000000L, rr0); ia64_srlz_d(); } else { diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/xen/vcpu.c --- a/xen/arch/ia64/xen/vcpu.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/xen/vcpu.c Sun Oct 30 13:00:35 2005 @@ -66,8 +66,16 @@ unsigned long vcpu_verbose = 0; #define verbose(a...) do {if (vcpu_verbose) printf(a);} while(0) -extern TR_ENTRY *match_tr(VCPU *vcpu, unsigned long ifa); -extern TR_ENTRY *match_dtlb(VCPU *vcpu, unsigned long ifa); +//#define vcpu_quick_region_check(_tr_regions,_ifa) 1 +#define vcpu_quick_region_check(_tr_regions,_ifa) \ + (_tr_regions & (1 << ((unsigned long)_ifa >> 61))) +#define vcpu_quick_region_set(_tr_regions,_ifa) \ + do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0) + +// FIXME: also need to check && (!trp->key || vcpu_pkr_match(trp->key)) +#define vcpu_match_tr_entry(_trp,_ifa,_rid) \ + ((_trp->p && (_trp->rid==_rid) && (_ifa >= _trp->vadr) && \ + (_ifa < (_trp->vadr + (1L<< _trp->ps)) - 1))) /************************************************************************** VCPU general register access routines @@ -620,7 +628,7 @@ return; } if ( VMX_DOMAIN(vcpu) ) { - set_bit(vector,VCPU(vcpu,irr)); + set_bit(vector,VCPU(vcpu,irr)); } else { /* if (!test_bit(vector,PSCB(vcpu,delivery_mask))) return; */ @@ -630,16 +638,6 @@ set_bit(vector,PSCBX(vcpu,irr)); PSCB(vcpu,pending_interruption) = 1; } - -#if 0 - /* Keir: I think you should unblock when an interrupt is pending. */ - { - int running = test_bit(_VCPUF_running, &vcpu->vcpu_flags); - vcpu_unblock(vcpu); - if ( running ) - smp_send_event_check_cpu(vcpu->processor); - } -#endif } void early_tick(VCPU *vcpu) @@ -710,14 +708,6 @@ } //printf("returned to caller\n"); -#if 0 -if (vector == (PSCB(vcpu,itv) & 0xff)) { - UINT64 now = ia64_get_itc(); - UINT64 itm = PSCBX(vcpu,domain_itm); - if (now < itm) early_tick(vcpu); - -} -#endif return vector; } @@ -775,6 +765,7 @@ } #ifdef HEARTBEAT_FREQ if (domid >= N_DOMS) domid = N_DOMS-1; +#if 0 if (vector == (PSCB(vcpu,itv) & 0xff)) { if (!(++count[domid] & ((HEARTBEAT_FREQ*1024)-1))) { printf("Dom%d heartbeat... ticks=%lx,nonticks=%lx\n", @@ -783,6 +774,7 @@ //dump_runq(); } } +#endif else nonclockcount[domid]++; #endif // now have an unmasked, pending, deliverable vector! @@ -1068,23 +1060,6 @@ /* gloss over the wraparound problem for now... we know it exists * but it doesn't matter right now */ -#if 0 - /* ensure at least next SP tick is in the future */ - if (!interval) PSCBX(vcpu,xen_itm) = now + -#if 0 - (running_on_sim() ? SIM_DEFAULT_CLOCK_RATE : - DEFAULT_CLOCK_RATE); -#else - 3000000; -//printf("vcpu_set_next_timer: HACK!\n"); -#endif -#if 0 - if (PSCBX(vcpu,xen_itm) < now) - while (PSCBX(vcpu,xen_itm) < now + (interval>>1)) - PSCBX(vcpu,xen_itm) += interval; -#endif -#endif - if (is_idle_task(vcpu->domain)) { // printf("****** vcpu_set_next_timer called during idle!!\n"); vcpu_safe_set_itm(s); @@ -1175,14 +1150,6 @@ // don't deliver another return; } -#if 0 - // attempt to flag "timer tick before its due" source - { - UINT64 itm = PSCBX(vcpu,domain_itm); - UINT64 now = ia64_get_itc(); - if (now < itm) printf("******* vcpu_pend_timer: pending before due!\n"); - } -#endif vcpu_pend_interrupt(vcpu, itv); } @@ -1196,33 +1163,6 @@ if (!itm) return 0; return (vcpu_deliverable_timer(vcpu) && (now < itm)); } - -//FIXME: This is a hack because everything dies if a timer tick is lost -void vcpu_poke_timer(VCPU *vcpu) -{ - UINT64 itv = PSCB(vcpu,itv) & 0xff; - UINT64 now = ia64_get_itc(); - UINT64 itm = PSCBX(vcpu,domain_itm); - UINT64 irr; - - if (vcpu_timer_disabled(vcpu)) return; - if (!itm) return; - if (itv != 0xefL) { - printf("vcpu_poke_timer: unimplemented itv=%lx!\n",itv); - while(1); - } - // using 0xef instead of itv so can get real irr - if (now > itm && !test_bit(0xefL, PSCBX(vcpu,insvc))) { - if (!test_bit(0xefL,PSCBX(vcpu,irr))) { - irr = ia64_getreg(_IA64_REG_CR_IRR3); - if (irr & (1L<<(0xef-0xc0))) return; -if (now-itm>0x800000) -printf("*** poking timer: now=%lx,vitm=%lx,xitm=%lx,itm=%lx\n",now,itm,local_cpu_data->itm_next,ia64_get_itm()); - vcpu_pend_timer(vcpu); - } - } -} - /************************************************************************** Privileged operation emulation routines @@ -1316,13 +1256,6 @@ UINT64 VHPT_addr = VHPT_addr1 | ((VHPT_addr2a | VHPT_addr2b) << 15) | VHPT_addr3; -#if 0 - if (VHPT_addr1 == 0xe000000000000000L) { - printf("vcpu_thash: thash unsupported with rr7 @%lx\n", - PSCB(vcpu,iip)); - return (IA64_ILLOP_FAULT); - } -#endif //verbose("vcpu_thash: vadr=%p, VHPT_addr=%p\n",vadr,VHPT_addr); *pval = VHPT_addr; return (IA64_NO_FAULT); @@ -1341,9 +1274,9 @@ IA64FAULT vcpu_translate(VCPU *vcpu, UINT64 address, BOOLEAN is_data, UINT64 *pteval, UINT64 *itir, UINT64 *iha) { - unsigned long pta, pta_mask, pte, ps; + unsigned long pta, pte, rid, rr; + int i; TR_ENTRY *trp; - ia64_rr rr; if (!(address >> 61)) { if (!PSCB(vcpu,metaphysical_mode)) { @@ -1361,67 +1294,80 @@ return IA64_NO_FAULT; } - /* check translation registers */ - if ((trp = match_tr(vcpu,address))) { - tr_translate_count++; - *pteval = trp->page_flags; - *itir = trp->itir; - return IA64_NO_FAULT; + rr = PSCB(vcpu,rrs)[address>>61]; + rid = rr & RR_RID_MASK; + if (is_data) { + if (vcpu_quick_region_check(vcpu->arch.dtr_regions,address)) { + for (trp = vcpu->arch.dtrs, i = NDTRS; i; i--, trp++) { + if (vcpu_match_tr_entry(trp,address,rid)) { + *pteval = trp->page_flags; + *itir = trp->itir; + tr_translate_count++; + return IA64_NO_FAULT; + } + } + } + } + // FIXME?: check itr's for data accesses too, else bad things happen? + /* else */ { + if (vcpu_quick_region_check(vcpu->arch.itr_regions,address)) { + for (trp = vcpu->arch.itrs, i = NITRS; i; i--, trp++) { + if (vcpu_match_tr_entry(trp,address,rid)) { + *pteval = trp->page_flags; + *itir = trp->itir; + tr_translate_count++; + return IA64_NO_FAULT; + } + } + } } /* check 1-entry TLB */ - if ((trp = match_dtlb(vcpu,address))) { - dtlb_translate_count++; + // FIXME?: check dtlb for inst accesses too, else bad things happen? + trp = &vcpu->arch.dtlb; + if (/* is_data && */ vcpu_match_tr_entry(trp,address,rid)) { if (vcpu->domain==dom0 && !in_tpa) *pteval = trp->page_flags; else *pteval = vcpu->arch.dtlb_pte; -// printf("DTLB MATCH... NEW, DOM%s, %s\n", vcpu->domain==dom0? -// "0":"U", in_tpa?"vcpu_tpa":"ia64_do_page_fault"); *itir = trp->itir; + dtlb_translate_count++; return IA64_NO_FAULT; } /* check guest VHPT */ pta = PSCB(vcpu,pta); - rr.rrval = PSCB(vcpu,rrs)[address>>61]; - if (!rr.ve || !(pta & IA64_PTA_VE)) { -// FIXME? does iha get set for alt faults? does xenlinux depend on it? - vcpu_thash(vcpu, address, iha); -// FIXME?: does itir get set for alt faults? - *itir = vcpu_get_itir_on_fault(vcpu,address); - return (is_data ? IA64_ALT_DATA_TLB_VECTOR : - IA64_ALT_INST_TLB_VECTOR); - } if (pta & IA64_PTA_VF) { /* long format VHPT - not implemented */ - // thash won't work right? panic_domain(vcpu_regs(vcpu),"can't do long format VHPT\n"); //return (is_data ? IA64_DATA_TLB_VECTOR:IA64_INST_TLB_VECTOR); } + *itir = rr & (RR_RID_MASK | RR_PS_MASK); + // note: architecturally, iha is optionally set for alt faults but + // xenlinux depends on it so should document it as part of PV interface + vcpu_thash(vcpu, address, iha); + if (!(rr & RR_VE_MASK) || !(pta & IA64_PTA_VE)) + return (is_data ? IA64_ALT_DATA_TLB_VECTOR : IA64_ALT_INST_TLB_VECTOR); + /* avoid recursively walking (short format) VHPT */ - pta_mask = (itir_mask(pta) << 3) >> 3; - if (((address ^ pta) & pta_mask) == 0) + if (((address ^ pta) & ((itir_mask(pta) << 3) >> 3)) == 0) return (is_data ? IA64_DATA_TLB_VECTOR : IA64_INST_TLB_VECTOR); - vcpu_thash(vcpu, address, iha); - if (__copy_from_user(&pte, (void *)(*iha), sizeof(pte)) != 0) { -// FIXME?: does itir get set for vhpt faults? - *itir = vcpu_get_itir_on_fault(vcpu,*iha); + if (__copy_from_user(&pte, (void *)(*iha), sizeof(pte)) != 0) + // virtual VHPT walker "missed" in TLB return IA64_VHPT_FAULT; - } /* - * Optimisation: this VHPT walker aborts on not-present pages - * instead of inserting a not-present translation, this allows - * vectoring directly to the miss handler. - */ - if (pte & _PAGE_P) { - *pteval = pte; - *itir = vcpu_get_itir_on_fault(vcpu,address); - vhpt_translate_count++; - return IA64_NO_FAULT; - } - *itir = vcpu_get_itir_on_fault(vcpu,address); - return (is_data ? IA64_DATA_TLB_VECTOR : IA64_INST_TLB_VECTOR); + * Optimisation: this VHPT walker aborts on not-present pages + * instead of inserting a not-present translation, this allows + * vectoring directly to the miss handler. + */ + if (!(pte & _PAGE_P)) + return (is_data ? IA64_DATA_TLB_VECTOR : IA64_INST_TLB_VECTOR); + + /* found mapping in guest VHPT! */ + *itir = rr & RR_PS_MASK; + *pteval = pte; + vhpt_translate_count++; + return IA64_NO_FAULT; } IA64FAULT vcpu_tpa(VCPU *vcpu, UINT64 vadr, UINT64 *padr) @@ -1736,33 +1682,6 @@ } } -TR_ENTRY *vcpu_match_tr_entry(VCPU *vcpu, TR_ENTRY *trp, UINT64 ifa, int count) -{ - unsigned long rid = (get_rr(ifa) & RR_RID_MASK); - int i; - - for (i = 0; i < count; i++, trp++) { - if (!trp->p) continue; - if (physicalize_rid(vcpu,trp->rid) != rid) continue; - if (ifa < trp->vadr) continue; - if (ifa >= (trp->vadr + (1L << trp->ps)) - 1) continue; - //if (trp->key && !match_pkr(vcpu,trp->key)) continue; - return trp; - } - return 0; -} - -TR_ENTRY *match_tr(VCPU *vcpu, unsigned long ifa) -{ - TR_ENTRY *trp; - - trp = vcpu_match_tr_entry(vcpu,vcpu->arch.dtrs,ifa,NDTRS); - if (trp) return trp; - trp = vcpu_match_tr_entry(vcpu,vcpu->arch.itrs,ifa,NITRS); - if (trp) return trp; - return 0; -} - IA64FAULT vcpu_itr_d(VCPU *vcpu, UINT64 slot, UINT64 pte, UINT64 itir, UINT64 ifa) { @@ -1772,6 +1691,7 @@ trp = &PSCBX(vcpu,dtrs[slot]); //printf("***** itr.d: setting slot %d: ifa=%p\n",slot,ifa); vcpu_set_tr_entry(trp,pte,itir,ifa); + vcpu_quick_region_set(PSCBX(vcpu,dtr_regions),ifa); return IA64_NO_FAULT; } @@ -1784,6 +1704,7 @@ trp = &PSCBX(vcpu,itrs[slot]); //printf("***** itr.i: setting slot %d: ifa=%p\n",slot,ifa); vcpu_set_tr_entry(trp,pte,itir,ifa); + vcpu_quick_region_set(PSCBX(vcpu,itr_regions),ifa); return IA64_NO_FAULT; } @@ -1835,17 +1756,6 @@ } } -// NOTE: returns a physical pte, NOT a "metaphysical" pte, so do not check -// the physical address contained for correctness -TR_ENTRY *match_dtlb(VCPU *vcpu, unsigned long ifa) -{ - TR_ENTRY *trp; - - if (trp = vcpu_match_tr_entry(vcpu,&vcpu->arch.dtlb,ifa,1)) - return (&vcpu->arch.dtlb); - return 0UL; -} - IA64FAULT vcpu_itc_d(VCPU *vcpu, UINT64 pte, UINT64 itir, UINT64 ifa) { unsigned long pteval, logps = (itir >> 2) & 0x3f; @@ -1952,12 +1862,14 @@ IA64FAULT vcpu_ptr_d(VCPU *vcpu,UINT64 vadr,UINT64 addr_range) { printf("vcpu_ptr_d: Purging TLB is unsupported\n"); + // don't forget to recompute dtr_regions return (IA64_ILLOP_FAULT); } IA64FAULT vcpu_ptr_i(VCPU *vcpu,UINT64 vadr,UINT64 addr_range) { printf("vcpu_ptr_i: Purging TLB is unsupported\n"); + // don't forget to recompute itr_regions return (IA64_ILLOP_FAULT); } diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/xen/xenasm.S --- a/xen/arch/ia64/xen/xenasm.S Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/xen/xenasm.S Sun Oct 30 13:00:35 2005 @@ -48,11 +48,11 @@ // FIXME? Note that this turns off the DB bit (debug) #define PSR_BITS_TO_SET IA64_PSR_BN -//extern void ia64_new_rr7(unsigned long rid,void *shared_info, void *shared_arch_info); +//extern void ia64_new_rr7(unsigned long rid,void *shared_info, void *shared_arch_info, unsigned long p_vhpt, unsigned long v_pal); GLOBAL_ENTRY(ia64_new_rr7) // not sure this unwind statement is correct... .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(1) - alloc loc1 = ar.pfs, 3, 8, 0, 0 + alloc loc1 = ar.pfs, 5, 9, 0, 0 1: { mov r28 = in0 // copy procedure index mov r8 = ip // save ip to compute branch @@ -63,10 +63,12 @@ ;; tpa loc2=loc2 // grab this BEFORE changing rr7 ;; + dep loc8=0,in4,60,4 + ;; #if VHPT_ENABLED - movl loc6=VHPT_ADDR - ;; - tpa loc6=loc6 // grab this BEFORE changing rr7 + mov loc6=in3 + ;; + //tpa loc6=loc6 // grab this BEFORE changing rr7 ;; #endif mov loc5=in1 @@ -229,6 +231,21 @@ mov r25=IA64_TR_ARCH_INFO ;; itr.d dtr[r25]=r23 // wire in new mapping... + ;; + + //Purge/insert PAL TR + mov r24=IA64_TR_PALCODE + movl r25=PAGE_KERNEL + ;; + or loc8=r25,loc8 + mov r23=IA64_GRANULE_SHIFT<<2 + ;; + ptr.i in4,r23 + ;; + mov cr.itir=r23 + mov cr.ifa=in4 + ;; + itr.i itr[r24]=loc8 ;; // done, switch back to virtual and return diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/xen/xenirq.c --- a/xen/arch/ia64/xen/xenirq.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/xen/xenirq.c Sun Oct 30 13:00:35 2005 @@ -35,7 +35,7 @@ int xen_do_IRQ(ia64_vector vector) { - if (vector != 0xef) { + if (vector != IA64_TIMER_VECTOR && vector != IA64_IPI_VECTOR) { extern void vcpu_pend_interrupt(void *, int); #if 0 if (firsttime[vector]) { @@ -57,22 +57,18 @@ return(0); } -/* From linux/kernel/softirq.c */ -#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED -# define invoke_softirq() __do_softirq() -#else -# define invoke_softirq() do_softirq() -#endif - /* * Exit an interrupt context. Process softirqs if needed and possible: */ void irq_exit(void) { //account_system_vtime(current); - //sub_preempt_count(IRQ_EXIT_OFFSET); - if (!in_interrupt() && local_softirq_pending()) - invoke_softirq(); + sub_preempt_count(IRQ_EXIT_OFFSET); + if (!in_interrupt() && local_softirq_pending()) { + add_preempt_count(SOFTIRQ_OFFSET); + do_softirq(); + sub_preempt_count(SOFTIRQ_OFFSET); + } //preempt_enable_no_resched(); } /* end from linux/kernel/softirq.c */ diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/xen/xenmisc.c --- a/xen/arch/ia64/xen/xenmisc.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/xen/xenmisc.c Sun Oct 30 13:00:35 2005 @@ -17,6 +17,7 @@ #include <asm/io.h> #include <xen/softirq.h> #include <public/sched.h> +#include <asm/vhpt.h> efi_memory_desc_t ia64_efi_io_md; EXPORT_SYMBOL(ia64_efi_io_md); @@ -280,6 +281,8 @@ unsigned long context_switch_count = 0; +#include <asm/vcpu.h> + void context_switch(struct vcpu *prev, struct vcpu *next) { //printk("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n"); @@ -287,7 +290,8 @@ //prev->domain->domain_id,(long)prev&0xffffff,next->domain->domain_id,(long)next&0xffffff); //if (prev->domain->domain_id == 1 && next->domain->domain_id == 0) cs10foo(); //if (prev->domain->domain_id == 0 && next->domain->domain_id == 1) cs01foo(); -//printk("@@sw %d->%d\n",prev->domain->domain_id,next->domain->domain_id); +printk("@@sw%d/%x %d->%d\n",smp_processor_id(), hard_smp_processor_id (), + prev->domain->domain_id,next->domain->domain_id); if(VMX_DOMAIN(prev)){ vtm_domain_out(prev); } @@ -307,9 +311,13 @@ if (!i--) { printk("+",id); i = 1000000; } } - if (VMX_DOMAIN(current)){ + if (VMX_DOMAIN(current)){ vmx_load_all_rr(current); }else{ + extern char ia64_ivt; + ia64_set_iva(&ia64_ivt); + ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | + VHPT_ENABLED); if (!is_idle_task(current->domain)) { load_region_regs(current); if (vcpu_timer_expired(current)) vcpu_pend_timer(current); diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/xen/xensetup.c --- a/xen/arch/ia64/xen/xensetup.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/xen/xensetup.c Sun Oct 30 13:00:35 2005 @@ -253,11 +253,11 @@ printk("About to call scheduler_init()\n"); scheduler_init(); local_irq_disable(); + init_IRQ (); printk("About to call init_xen_time()\n"); init_xen_time(); /* initialise the time */ printk("About to call ac_timer_init()\n"); ac_timer_init(); -// init_xen_time(); ??? #ifdef CONFIG_SMP if ( opt_nosmp ) @@ -275,6 +275,9 @@ cpu_set(i, cpu_present_map); //BUG_ON(!local_irq_is_enabled()); + + /* Enable IRQ to receive IPI (needed for ITC sync). */ + local_irq_enable(); printk("num_online_cpus=%d, max_cpus=%d\n",num_online_cpus(),max_cpus); for_each_present_cpu ( i ) @@ -287,24 +290,16 @@ } } + local_irq_disable(); + printk("Brought up %ld CPUs\n", (long)num_online_cpus()); smp_cpus_done(max_cpus); #endif - - // FIXME: Should the following be swapped and moved later? - schedulers_start(); do_initcalls(); printk("About to call sort_main_extable()\n"); sort_main_extable(); - /* surrender usage of kernel registers to domain, use percpu area instead */ - __get_cpu_var(cpu_kr)._kr[IA64_KR_IO_BASE] = ia64_get_kr(IA64_KR_IO_BASE); - __get_cpu_var(cpu_kr)._kr[IA64_KR_PER_CPU_DATA] = ia64_get_kr(IA64_KR_PER_CPU_DATA); - __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT_STACK] = ia64_get_kr(IA64_KR_CURRENT_STACK); - __get_cpu_var(cpu_kr)._kr[IA64_KR_FPU_OWNER] = ia64_get_kr(IA64_KR_FPU_OWNER); - __get_cpu_var(cpu_kr)._kr[IA64_KR_CURRENT] = ia64_get_kr(IA64_KR_CURRENT); - __get_cpu_var(cpu_kr)._kr[IA64_KR_PT_BASE] = ia64_get_kr(IA64_KR_PT_BASE); /* Create initial domain 0. */ printk("About to call do_createdomain()\n"); @@ -342,6 +337,11 @@ 0, 0) != 0) panic("Could not set up DOM0 guest OS\n"); + + /* PIN domain0 on CPU 0. */ + dom0->vcpu[0]->cpumap=1; + set_bit(_VCPUF_cpu_pinned, &dom0->vcpu[0]->vcpu_flags); + #ifdef CLONE_DOMAIN0 { int i; @@ -379,9 +379,16 @@ domain_unpause_by_systemcontroller(clones[i]); } #endif + domain0_ready = 1; + + local_irq_enable(); + + printf("About to call schedulers_start dom0=%p, idle0_dom=%p\n", + dom0, &idle0_domain); + schedulers_start(); + domain_unpause_by_systemcontroller(dom0); - domain0_ready = 1; - local_irq_enable(); + printk("About to call startup_cpu_idle_loop()\n"); startup_cpu_idle_loop(); } diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/ia64/xen/xentime.c --- a/xen/arch/ia64/xen/xentime.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/ia64/xen/xentime.c Sun Oct 30 13:00:35 2005 @@ -38,20 +38,6 @@ unsigned long itc_scale, ns_scale; unsigned long itc_at_irq; -static inline u64 get_time_delta(void) -{ - s64 delta_itc; - u64 cur_itc; - - cur_itc = ia64_get_itc(); - - delta_itc = (s64)(cur_itc - itc_at_irq); - - /* Ensure that the returned system time is monotonically increasing. */ - if ( unlikely(delta_itc < 0) ) delta_itc = 0; - return cycle_to_ns(delta_itc); -} - /* We don't expect an absolute cycle value here, since then no way * to prevent overflow for large norminator. Normally this conversion * is used for relative offset. @@ -66,6 +52,21 @@ return (ns * ns_scale) >> 32; } +static inline u64 get_time_delta(void) +{ + s64 delta_itc; + u64 cur_itc; + + cur_itc = ia64_get_itc(); + + delta_itc = (s64)(cur_itc - itc_at_irq); + + /* Ensure that the returned system time is monotonically increasing. */ + if ( unlikely(delta_itc < 0) ) delta_itc = 0; + return cycle_to_ns(delta_itc); +} + + s_time_t get_s_time(void) { s_time_t now; @@ -99,16 +100,18 @@ { unsigned long new_itm, old_itc; +#if 0 #define HEARTBEAT_FREQ 16 // period in seconds #ifdef HEARTBEAT_FREQ static long count = 0; if (!(++count & ((HEARTBEAT_FREQ*1024)-1))) { - printf("Heartbeat... iip=%p,psr.i=%d,pend=%d\n", - regs->cr_iip, + printf("Heartbeat... iip=%p\n", /*",psr.i=%d,pend=%d\n", */ + regs->cr_iip /*, VCPU(current,interrupt_delivery_enabled), - VCPU(current,pending_interruption)); + VCPU(current,pending_interruption) */); count = 0; } +#endif #endif if (current->domain == dom0) { // FIXME: there's gotta be a better way of doing this... @@ -117,12 +120,14 @@ //domain0_ready = 1; // moved to xensetup.c VCPU(current,pending_interruption) = 1; } - if (domain0_ready && vcpu_timer_expired(dom0->vcpu[0])) { - vcpu_pend_timer(dom0->vcpu[0]); - //vcpu_set_next_timer(dom0->vcpu[0]); - vcpu_wake(dom0->vcpu[0]); - } - if (!is_idle_task(current->domain) && current->domain != dom0) { + if (domain0_ready && current->domain != dom0) { + if(vcpu_timer_expired(dom0->vcpu[0])) { + vcpu_pend_timer(dom0->vcpu[0]); + //vcpu_set_next_timer(dom0->vcpu[0]); + vcpu_wake(dom0->vcpu[0]); + } + } + if (!is_idle_task(current->domain)) { if (vcpu_timer_expired(current)) { vcpu_pend_timer(current); // ensure another timer interrupt happens even if domain doesn't @@ -132,8 +137,11 @@ } new_itm = local_cpu_data->itm_next; - if (!time_after(ia64_get_itc(), new_itm)) + if (!VMX_DOMAIN(current) && !time_after(ia64_get_itc(), new_itm)) return; + + if (VMX_DOMAIN(current)) + vcpu_wake(current); while (1) { new_itm += local_cpu_data->itm_delta; @@ -233,7 +241,7 @@ s_time_t expire; unsigned long seq, cur_itc, itm_next; - if (!domain0_ready) return 1; + if (!domain0_ready || timeout == 0) return 1; do { seq = read_seqbegin(&xtime_lock); diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/x86/apic.c --- a/xen/arch/x86/apic.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/x86/apic.c Sun Oct 30 13:00:35 2005 @@ -815,6 +815,10 @@ return result; } +unsigned int get_apic_bus_scale(void) +{ + return bus_scale; +} static unsigned int calibration_result; diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/x86/dm/i8259.c --- a/xen/arch/x86/dm/i8259.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/x86/dm/i8259.c Sun Oct 30 13:00:35 2005 @@ -32,8 +32,8 @@ #include <public/io/ioreq.h> #include <asm/vmx.h> #include <public/io/vmx_vpic.h> -#include <public/io/vmx_vlapic.h> #include <asm/current.h> +#include <asm/vmx_vlapic.h> /* set irq level. If an edge is detected, then the IRR is set to 1 */ static inline void pic_set_irq1(PicState *s, int irq, int level) @@ -135,7 +135,6 @@ { s->pics[1].irr |= (uint8_t)(irqs >> 8); s->pics[0].irr |= (uint8_t) irqs; - /* TODO for alt_irq_func */ pic_update_irq(s); } @@ -505,14 +504,22 @@ { int intno; struct vmx_virpic *s = &v->domain->arch.vmx_platform.vmx_pic; - + struct vmx_platform *plat = &v->domain->arch.vmx_platform; + + if ( !vlapic_accept_pic_intr(v) ) + return -1; + + if ( !plat->interrupt_request ) + return -1; + /* read the irq from the PIC */ intno = pic_read_irq(s); *type = VLAPIC_DELIV_MODE_EXT; + plat->interrupt_request = 0; return intno; } -int is_pit_irq(struct vcpu *v, int irq) +int is_pit_irq(struct vcpu *v, int irq, int type) { int pit_vec = v->domain->arch.vmx_platform.vmx_pic.pics[0].irq_base; diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/x86/domain_build.c Sun Oct 30 13:00:35 2005 @@ -15,6 +15,7 @@ #include <xen/elf.h> #include <xen/kernel.h> #include <xen/domain.h> +#include <xen/compile.h> #include <asm/regs.h> #include <asm/system.h> #include <asm/io.h> @@ -582,26 +583,23 @@ _initrd_start, (_initrd_start+initrd_len+PAGE_SIZE-1) & PAGE_MASK); } - d->next_io_page = max_page; - /* Set up start info area. */ si = (start_info_t *)vstartinfo_start; memset(si, 0, PAGE_SIZE); si->nr_pages = nr_pages; + si->shared_info = virt_to_phys(d->shared_info); if ( opt_dom0_translate ) { - si->shared_info = d->next_io_page << PAGE_SHIFT; - set_pfn_from_mfn(virt_to_phys(d->shared_info) >> PAGE_SHIFT, d->next_io_page); - d->next_io_page++; - } - else - si->shared_info = virt_to_phys(d->shared_info); + si->shared_info = max_page << PAGE_SHIFT; + set_pfn_from_mfn(virt_to_phys(d->shared_info) >> PAGE_SHIFT, max_page); + } si->flags = SIF_PRIVILEGED | SIF_INITDOMAIN; si->pt_base = vpt_start; si->nr_pt_frames = nr_pt_pages; si->mfn_list = vphysmap_start; + sprintf(si->magic, "Xen-%i.%i", XEN_VERSION, XEN_SUBVERSION); /* Write the phys->machine and machine->phys table entries. */ for ( pfn = 0; pfn < d->tot_pages; pfn++ ) diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/x86/mm.c Sun Oct 30 13:00:35 2005 @@ -1164,6 +1164,7 @@ { l3_pgentry_t ol3e; unsigned long vaddr; + int okay; if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) ) { @@ -1218,7 +1219,9 @@ return 0; } - BUG_ON(!create_pae_xen_mappings(pl3e)); + okay = create_pae_xen_mappings(pl3e); + BUG_ON(!okay); + put_page_from_l3e(ol3e, pfn); return 1; } diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/x86/time.c --- a/xen/arch/x86/time.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/x86/time.c Sun Oct 30 13:00:35 2005 @@ -323,7 +323,7 @@ return pit_counter64 + (u16)(pit_stamp - pit_read_counter()); } -static int init_pit(void) +static void init_pit(void) { read_platform_count = read_pit_count; @@ -333,8 +333,6 @@ printk("Platform timer is %s PIT\n", freq_string(CLOCK_TICK_RATE)); using_pit = 1; - - return 1; } /************************************************************ @@ -563,7 +561,7 @@ static void init_platform_timer(void) { if ( !init_cyclone() && !init_hpet() ) - BUG_ON(!init_pit()); + init_pit(); } diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/x86/traps.c Sun Oct 30 13:00:35 2005 @@ -1147,6 +1147,9 @@ asmlinkage int math_state_restore(struct cpu_user_regs *regs) { + struct trap_bounce *tb; + trap_info_t *ti; + /* Prevent recursion. */ clts(); @@ -1154,10 +1157,15 @@ if ( current->arch.guest_context.ctrlreg[0] & X86_CR0_TS ) { - struct trap_bounce *tb = ¤t->arch.trap_bounce; + tb = ¤t->arch.trap_bounce; + ti = ¤t->arch.guest_context.trap_ctxt[TRAP_no_device]; + tb->flags = TBF_EXCEPTION; - tb->cs = current->arch.guest_context.trap_ctxt[7].cs; - tb->eip = current->arch.guest_context.trap_ctxt[7].address; + tb->cs = ti->cs; + tb->eip = ti->address; + if ( TI_GET_IF(ti) ) + tb->flags |= TBF_INTERRUPT; + current->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS; } @@ -1169,6 +1177,7 @@ unsigned long condition; struct vcpu *v = current; struct trap_bounce *tb = &v->arch.trap_bounce; + trap_info_t *ti; __asm__ __volatile__("mov %%db6,%0" : "=r" (condition)); @@ -1198,9 +1207,12 @@ /* Save debug status register where guest OS can peek at it */ v->arch.guest_context.debugreg[6] = condition; + ti = &v->arch.guest_context.trap_ctxt[TRAP_debug]; tb->flags = TBF_EXCEPTION; - tb->cs = v->arch.guest_context.trap_ctxt[TRAP_debug].cs; - tb->eip = v->arch.guest_context.trap_ctxt[TRAP_debug].address; + tb->cs = ti->cs; + tb->eip = ti->address; + if ( TI_GET_IF(ti) ) + tb->flags |= TBF_INTERRUPT; out: return EXCRET_not_a_fault; diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/x86/vmx.c --- a/xen/arch/x86/vmx.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/x86/vmx.c Sun Oct 30 13:00:35 2005 @@ -65,6 +65,11 @@ if ( v == v->domain->vcpu[0] ) { + v->domain->arch.vmx_platform.lapic_enable = + v->arch.guest_context.user_regs.ecx; + v->arch.guest_context.user_regs.ecx = 0; + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "lapic enable is %d.\n", + v->domain->arch.vmx_platform.lapic_enable); /* * Required to do this once per domain * XXX todo: add a seperate function to do these. @@ -96,6 +101,10 @@ destroy_vmcs(&v->arch.arch_vmx); free_monitor_pagetable(v); rem_ac_timer(&v->domain->arch.vmx_platform.vmx_pit.pit_timer); + if ( vmx_apic_support(v->domain) ) { + rem_ac_timer( &(VLAPIC(v)->vlapic_timer) ); + xfree( VLAPIC(v) ); + } } #ifdef __x86_64__ @@ -442,7 +451,9 @@ /* Use 1:1 page table to identify MMIO address space */ if ( mmio_space(gpa) ){ - if (gpa >= 0xFEE00000) { /* workaround for local APIC */ + struct vcpu *v = current; + /* No support for APIC */ + if (!vmx_apic_support(v->domain) && gpa >= 0xFEC00000) { u32 inst_len; __vmread(VM_EXIT_INSTRUCTION_LEN, &(inst_len)); __update_guest_eip(inst_len); @@ -487,6 +498,7 @@ { unsigned int eax, ebx, ecx, edx; unsigned long eip; + struct vcpu *v = current; __vmread(GUEST_RIP, &eip); @@ -500,6 +512,9 @@ cpuid(input, &eax, &ebx, &ecx, &edx); if (input == 1) { + if ( vmx_apic_support(v->domain) && + !vlapic_global_enabled((VLAPIC(v))) ) + clear_bit(X86_FEATURE_APIC, &edx); #ifdef __i386__ clear_bit(X86_FEATURE_PSE, &edx); clear_bit(X86_FEATURE_PAE, &edx); @@ -1441,6 +1456,7 @@ static inline void vmx_do_msr_read(struct cpu_user_regs *regs) { u64 msr_content = 0; + struct vcpu *v = current; VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%lx, eax=%lx, edx=%lx", (unsigned long)regs->ecx, (unsigned long)regs->eax, @@ -1455,6 +1471,9 @@ case MSR_IA32_SYSENTER_EIP: __vmread(GUEST_SYSENTER_EIP, &msr_content); break; + case MSR_IA32_APICBASE: + msr_content = VLAPIC(v) ? VLAPIC(v)->apic_base_msr : 0; + break; default: if(long_mode_do_msr_read(regs)) return; @@ -1474,6 +1493,7 @@ static inline void vmx_do_msr_write(struct cpu_user_regs *regs) { u64 msr_content; + struct vcpu *v = current; VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_write: ecx=%lx, eax=%lx, edx=%lx", (unsigned long)regs->ecx, (unsigned long)regs->eax, @@ -1490,6 +1510,9 @@ break; case MSR_IA32_SYSENTER_EIP: __vmwrite(GUEST_SYSENTER_EIP, msr_content); + break; + case MSR_IA32_APICBASE: + vlapic_msr_set(VLAPIC(v), msr_content); break; default: long_mode_do_msr_write(regs); diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/x86/vmx_intercept.c --- a/xen/arch/x86/vmx_intercept.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/x86/vmx_intercept.c Sun Oct 30 13:00:35 2005 @@ -23,6 +23,7 @@ #include <asm/vmx_platform.h> #include <asm/vmx_virpit.h> #include <asm/vmx_intercept.h> +#include <asm/vmx_vlapic.h> #include <public/io/ioreq.h> #include <xen/lib.h> #include <xen/sched.h> @@ -31,6 +32,123 @@ #include <xen/event.h> #ifdef CONFIG_VMX + +struct vmx_mmio_handler vmx_mmio_handers[VMX_MMIO_HANDLER_NR] = +{ + { + .check_handler = vlapic_range, + .read_handler = vlapic_read, + .write_handler = vlapic_write + } +}; + +static inline void vmx_mmio_access(struct vcpu *v, + ioreq_t *p, + vmx_mmio_read_t read_handler, + vmx_mmio_write_t write_handler) +{ + ioreq_t *req; + vcpu_iodata_t *vio = get_vio(v->domain, v->vcpu_id); + unsigned int tmp1, tmp2; + unsigned long data; + + if (vio == NULL) { + printk("vlapic_access: bad shared page\n"); + domain_crash_synchronous(); + } + + req = &vio->vp_ioreq; + + switch (req->type) { + case IOREQ_TYPE_COPY: + { + int sign = (req->df) ? -1 : 1, i; + + if (!req->pdata_valid) { + if (req->dir == IOREQ_READ){ + req->u.data = read_handler(v, req->addr, req->size); + } else { /* req->dir != IOREQ_READ */ + write_handler(v, req->addr, req->size, req->u.data); + } + } else { /* !req->pdata_valid */ + if (req->dir == IOREQ_READ) { + for (i = 0; i < req->count; i++) { + data = read_handler(v, + req->addr + (sign * i * req->size), + req->size); + vmx_copy(&data, + (unsigned long)p->u.pdata + (sign * i * req->size), + p->size, + VMX_COPY_OUT); + } + } else { /* !req->dir == IOREQ_READ */ + for (i = 0; i < req->count; i++) { + vmx_copy(&data, + (unsigned long)p->u.pdata + (sign * i * req->size), + p->size, + VMX_COPY_IN); + write_handler(v, + req->addr + (sign * i * req->size), + req->size, data); + } + } + } + break; + } + + case IOREQ_TYPE_AND: + tmp1 = read_handler(v, req->addr, req->size); + if (req->dir == IOREQ_WRITE) { + tmp2 = tmp1 & (unsigned long) req->u.data; + write_handler(v, req->addr, req->size, tmp2); + } + req->u.data = tmp1; + break; + + case IOREQ_TYPE_OR: + tmp1 = read_handler(v, req->addr, req->size); + if (req->dir == IOREQ_WRITE) { + tmp2 = tmp1 | (unsigned long) req->u.data; + write_handler(v, req->addr, req->size, tmp2); + } + req->u.data = tmp1; + break; + + case IOREQ_TYPE_XOR: + tmp1 = read_handler(v, req->addr, req->size); + if (req->dir == IOREQ_WRITE) { + tmp2 = tmp1 ^ (unsigned long) req->u.data; + write_handler(v, req->addr, req->size, tmp2); + } + req->u.data = tmp1; + break; + + default: + printk("error ioreq type for local APIC %x\n", req->type); + domain_crash_synchronous(); + break; + } +} + +int vmx_mmio_intercept(ioreq_t *p) +{ + struct vcpu *v = current; + int i; + struct vmx_mmio_handler *handler = vmx_mmio_handers; + + /* XXX currently only APIC use intercept */ + if ( !vmx_apic_support(v->domain) ) + return 0; + + for ( i = 0; i < VMX_MMIO_HANDLER_NR; i++ ) { + if ( handler[i].check_handler(v, p->addr) ) { + vmx_mmio_access(v, p, + handler[i].read_handler, handler[i].write_handler); + return 1; + } + } + return 0; +} /* * Check if the request is handled inside xen diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/x86/vmx_io.c --- a/xen/arch/x86/vmx_io.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/x86/vmx_io.c Sun Oct 30 13:00:35 2005 @@ -36,9 +36,9 @@ #include <asm/apic.h> #include <asm/shadow.h> +#include <asm/vmx_vlapic.h> #include <public/io/ioreq.h> #include <public/io/vmx_vpic.h> -#include <public/io/vmx_vlapic.h> #ifdef CONFIG_VMX #if defined (__i386__) @@ -732,48 +732,6 @@ } while(1); } -#if defined(__i386__) || defined(__x86_64__) -static inline int __fls(u32 word) -{ - int bit; - - __asm__("bsrl %1,%0" - :"=r" (bit) - :"rm" (word)); - return word ? bit : -1; -} -#else -#define __fls(x) generic_fls(x) -static __inline__ int generic_fls(u32 x) -{ - int r = 31; - - if (!x) - return -1; - if (!(x & 0xffff0000u)) { - x <<= 16; - r -= 16; - } - if (!(x & 0xff000000u)) { - x <<= 8; - r -= 8; - } - if (!(x & 0xf0000000u)) { - x <<= 4; - r -= 4; - } - if (!(x & 0xc0000000u)) { - x <<= 2; - r -= 2; - } - if (!(x & 0x80000000u)) { - x <<= 1; - r -= 1; - } - return r; -} -#endif - /* Simple minded Local APIC priority implementation. Fix later */ static __inline__ int find_highest_irq(u32 *pintr) { @@ -801,31 +759,31 @@ struct vmx_virpit *vpit = &(v->domain->arch.vmx_platform.vmx_pit); u64 drift; + if ( is_pit_irq(v, vector, type) ) { + if ( !vpit->first_injected ) { + vpit->first_injected = 1; + vpit->pending_intr_nr = 0; + } else { + vpit->pending_intr_nr--; + } + vpit->inject_point = NOW(); + drift = vpit->period_cycles * vpit->pending_intr_nr; + drift = v->arch.arch_vmx.tsc_offset - drift; + __vmwrite(TSC_OFFSET, drift); + +#if defined (__i386__) + __vmwrite(TSC_OFFSET_HIGH, (drift >> 32)); +#endif + + } + switch(type) { case VLAPIC_DELIV_MODE_EXT: - if ( is_pit_irq(v, vector) ) { - if ( !vpit->first_injected ) { - vpit->first_injected = 1; - vpit->pending_intr_nr = 0; - } - else { - vpit->pending_intr_nr--; - } - vpit->inject_point = NOW(); - drift = vpit->period_cycles * vpit->pending_intr_nr; - drift = v->arch.arch_vmx.tsc_offset - drift; - __vmwrite(TSC_OFFSET, drift); - -#if defined (__i386__) - __vmwrite(TSC_OFFSET_HIGH, (drift >> 32)); -#endif - - } break; default: - printk("Not support interrupt type\n"); + vlapic_post_injection(v, vector, type); break; } } @@ -885,6 +843,24 @@ } +int cpu_get_interrupt(struct vcpu *v, int *type) +{ + int intno; + struct vmx_virpic *s = &v->domain->arch.vmx_platform.vmx_pic; + + if ( (intno = cpu_get_apic_interrupt(v, type)) != -1 ) { + /* set irq request if a PIC irq is still pending */ + /* XXX: improve that */ + pic_update_irq(s); + return intno; + } + /* read the irq from the PIC */ + if ( (intno = cpu_get_pic_interrupt(v, type)) != -1 ) + return intno; + + return -1; +} + asmlinkage void vmx_intr_assist(void) { int intr_type = 0; @@ -902,11 +878,6 @@ pic_set_irq(pic, 0, 1); } - if ( !plat->interrupt_request ) { - disable_irq_window(cpu_exec_control); - return; - } - __vmread(VM_ENTRY_INTR_INFO_FIELD, &intr_fields); if (intr_fields & INTR_INFO_VALID_MASK) { @@ -928,16 +899,21 @@ enable_irq_window(cpu_exec_control); return; } - plat->interrupt_request = 0; - highest_vector = cpu_get_pic_interrupt(v, &intr_type); + + highest_vector = cpu_get_interrupt(v, &intr_type); + + if (highest_vector == -1) { + disable_irq_window(cpu_exec_control); + return; + } switch (intr_type) { case VLAPIC_DELIV_MODE_EXT: + case VLAPIC_DELIV_MODE_FIXED: + case VLAPIC_DELIV_MODE_LPRI: vmx_inject_extint(v, highest_vector, VMX_INVALID_ERROR_CODE); TRACE_3D(TRC_VMX_INT, v->domain->domain_id, highest_vector, 0); break; - case VLAPIC_DELIV_MODE_FIXED: - case VLAPIC_DELIV_MODE_LPRI: case VLAPIC_DELIV_MODE_SMI: case VLAPIC_DELIV_MODE_NMI: case VLAPIC_DELIV_MODE_INIT: diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/x86/vmx_vmcs.c --- a/xen/arch/x86/vmx_vmcs.c Sun Oct 30 12:52:38 2005 +++ b/xen/arch/x86/vmx_vmcs.c Sun Oct 30 13:00:35 2005 @@ -252,6 +252,10 @@ pic_init(&platform->vmx_pic, pic_irq_request, &platform->interrupt_request); register_pic_io_hook(); + + if ( vmx_apic_support(d) ) { + spin_lock_init(&d->arch.vmx_platform.round_robin_lock); + } } static void vmx_set_host_env(struct vcpu *v) @@ -312,6 +316,9 @@ error |= __vmwrite(CR4_READ_SHADOW, cr4); vmx_stts(); + + if(vmx_apic_support(v->domain)) + vlapic_init(v); vmx_set_host_env(v); diff -r dc36edf1102f -r bcccadcc56e5 xen/common/acm_ops.c --- a/xen/common/acm_ops.c Sun Oct 30 12:52:38 2005 +++ b/xen/common/acm_ops.c Sun Oct 30 13:00:35 2005 @@ -133,7 +133,10 @@ struct domain *subj = find_domain_by_id(op->u.getssid.id.domainid); if (!subj) return -ESRCH; /* domain not found */ - + if (subj->ssid == NULL) { + put_domain(subj); + return -ESRCH; + } ssidref = ((struct acm_ssid_domain *)(subj->ssid))->ssidref; put_domain(subj); } else @@ -167,6 +170,10 @@ ret = -ESRCH; /* domain not found */ goto out; } + if (subj->ssid == NULL) { + put_domain(subj); + ret = -ESRCH; + } ssidref1 = ((struct acm_ssid_domain *)(subj->ssid))->ssidref; put_domain(subj); } else { @@ -181,6 +188,10 @@ if (!subj) { ret = -ESRCH; /* domain not found */ goto out; + } + if (subj->ssid == NULL) { + put_domain(subj); + return -ESRCH; } ssidref2 = ((struct acm_ssid_domain *)(subj->ssid))->ssidref; put_domain(subj); diff -r dc36edf1102f -r bcccadcc56e5 xen/common/schedule.c --- a/xen/common/schedule.c Sun Oct 30 12:52:38 2005 +++ b/xen/common/schedule.c Sun Oct 30 13:00:35 2005 @@ -514,7 +514,7 @@ /* Initialise the data structures. */ void __init scheduler_init(void) { - int i; + int i, rc; open_softirq(SCHEDULE_SOFTIRQ, __enter_scheduler); @@ -540,7 +540,9 @@ printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name); - BUG_ON(SCHED_OP(alloc_task, idle_task[0]) < 0); + rc = SCHED_OP(alloc_task, idle_task[0]); + BUG_ON(rc < 0); + sched_add_domain(idle_task[0]); } diff -r dc36edf1102f -r bcccadcc56e5 xen/include/acm/acm_hooks.h --- a/xen/include/acm/acm_hooks.h Sun Oct 30 12:52:38 2005 +++ b/xen/include/acm/acm_hooks.h Sun Oct 30 13:00:35 2005 @@ -100,10 +100,10 @@ void (*fail_domain_create) (void *subject_ssid, ssidref_t ssidref); void (*post_domain_destroy) (void *object_ssid, domid_t id); /* event channel control hooks (can be NULL) */ - int (*pre_eventchannel_unbound) (domid_t id); - void (*fail_eventchannel_unbound) (domid_t id); - int (*pre_eventchannel_interdomain) (domid_t id1, domid_t id2); - int (*fail_eventchannel_interdomain) (domid_t id1, domid_t id2); + int (*pre_eventchannel_unbound) (domid_t id1, domid_t id2); + void (*fail_eventchannel_unbound) (domid_t id1, domid_t id2); + int (*pre_eventchannel_interdomain) (domid_t id); + void (*fail_eventchannel_interdomain) (domid_t id); /* grant table control hooks (can be NULL) */ int (*pre_grant_map_ref) (domid_t id); void (*fail_grant_map_ref) (domid_t id); @@ -193,31 +193,31 @@ return; } -static inline int acm_pre_eventchannel_unbound(domid_t id) +static inline int acm_pre_eventchannel_unbound(domid_t id1, domid_t id2) { if ((acm_primary_ops->pre_eventchannel_unbound != NULL) && - acm_primary_ops->pre_eventchannel_unbound(id)) + acm_primary_ops->pre_eventchannel_unbound(id1, id2)) return ACM_ACCESS_DENIED; else if ((acm_secondary_ops->pre_eventchannel_unbound != NULL) && - acm_secondary_ops->pre_eventchannel_unbound(id)) { + acm_secondary_ops->pre_eventchannel_unbound(id1, id2)) { /* roll-back primary */ if (acm_primary_ops->fail_eventchannel_unbound != NULL) - acm_primary_ops->fail_eventchannel_unbound(id); + acm_primary_ops->fail_eventchannel_unbound(id1, id2); return ACM_ACCESS_DENIED; } else return ACM_ACCESS_PERMITTED; } -static inline int acm_pre_eventchannel_interdomain(domid_t id1, domid_t id2) +static inline int acm_pre_eventchannel_interdomain(domid_t id) { if ((acm_primary_ops->pre_eventchannel_interdomain != NULL) && - acm_primary_ops->pre_eventchannel_interdomain(id1, id2)) + acm_primary_ops->pre_eventchannel_interdomain(id)) return ACM_ACCESS_DENIED; else if ((acm_secondary_ops->pre_eventchannel_interdomain != NULL) && - acm_secondary_ops->pre_eventchannel_interdomain(id1, id2)) { + acm_secondary_ops->pre_eventchannel_interdomain(id)) { /* roll-back primary */ if (acm_primary_ops->fail_eventchannel_interdomain != NULL) - acm_primary_ops->fail_eventchannel_interdomain(id1, id2); + acm_primary_ops->fail_eventchannel_interdomain(id); return ACM_ACCESS_DENIED; } else return ACM_ACCESS_PERMITTED; @@ -234,10 +234,22 @@ current->domain->ssid, op->u.createdomain.ssidref); break; case DOM0_DESTROYDOMAIN: + if (*ssid != NULL) { + printkd("%s: Warning. Overlapping destruction.\n", + __func__); + return -EACCES; + } d = find_domain_by_id(op->u.destroydomain.domain); if (d != NULL) { *ssid = d->ssid; /* save for post destroy when d is gone */ - /* no policy-specific hook */ + if (*ssid == NULL) { + printk("%s: Warning. Destroying domain without ssid pointer.\n", + __func__); + put_domain(d); + return -EACCES; + } + d->ssid = NULL; /* make sure it's not used any more */ + /* no policy-specific hook */ put_domain(d); ret = 0; } @@ -248,7 +260,7 @@ return ret; } -static inline void acm_post_dom0_op(dom0_op_t *op, void *ssid) +static inline void acm_post_dom0_op(dom0_op_t *op, void **ssid) { switch(op->cmd) { case DOM0_CREATEDOMAIN: @@ -261,7 +273,8 @@ case DOM0_DESTROYDOMAIN: acm_post_domain_destroy(ssid, op->u.destroydomain.domain); /* free security ssid for the destroyed domain (also if null policy */ - acm_free_domain_ssid((struct acm_ssid_domain *)ssid); + acm_free_domain_ssid((struct acm_ssid_domain *)(*ssid)); + *ssid = NULL; break; } } @@ -282,12 +295,13 @@ switch(op->cmd) { case EVTCHNOP_alloc_unbound: - ret = acm_pre_eventchannel_unbound(op->u.alloc_unbound.dom); + ret = acm_pre_eventchannel_unbound( + op->u.alloc_unbound.dom, + op->u.alloc_unbound.remote_dom); break; case EVTCHNOP_bind_interdomain: ret = acm_pre_eventchannel_interdomain( - current->domain->domain_id, - op->u.bind_interdomain.remote_dom); + op->u.bind_interdomain.remote_dom); break; default: ret = 0; /* ok */ diff -r dc36edf1102f -r bcccadcc56e5 xen/include/asm-ia64/config.h --- a/xen/include/asm-ia64/config.h Sun Oct 30 12:52:38 2005 +++ b/xen/include/asm-ia64/config.h Sun Oct 30 13:00:35 2005 @@ -28,8 +28,8 @@ #ifdef CONFIG_XEN_SMP #define CONFIG_SMP 1 -#define NR_CPUS 2 -#define CONFIG_NR_CPUS 2 +#define NR_CPUS 8 +#define CONFIG_NR_CPUS 8 #else #undef CONFIG_SMP #define NR_CPUS 1 @@ -102,7 +102,7 @@ #endif // xen/include/asm/config.h -#define HZ 100 +//#define HZ 1000 // FIXME SMP: leave SMP for a later time #define barrier() __asm__ __volatile__("": : :"memory") @@ -123,8 +123,7 @@ #ifdef CONFIG_SMP #warning "Lots of things to fix to enable CONFIG_SMP!" #endif -// FIXME SMP -#define get_cpu() 0 +#define get_cpu() smp_processor_id() #define put_cpu() do {} while(0) // needed for common/dom0_ops.c until hyperthreading is supported @@ -140,6 +139,7 @@ // function calls; see decl in xen/include/xen/sched.h #undef free_task_struct #undef alloc_task_struct +#define get_thread_info(v) alloc_thread_info(v) // initial task has a different name in Xen //#define idle0_task init_task @@ -299,7 +299,11 @@ #endif /* __XEN_IA64_CONFIG_H__ */ // needed for include/xen/smp.h +#ifdef CONFIG_SMP +#define __smp_processor_id() current_thread_info()->cpu +#else #define __smp_processor_id() 0 +#endif // FOLLOWING ADDED FOR XEN POST-NGIO and/or LINUX 2.6.7 diff -r dc36edf1102f -r bcccadcc56e5 xen/include/asm-ia64/domain.h --- a/xen/include/asm-ia64/domain.h Sun Oct 30 12:52:38 2005 +++ b/xen/include/asm-ia64/domain.h Sun Oct 30 13:00:35 2005 @@ -49,6 +49,8 @@ TR_ENTRY dtrs[NDTRS]; TR_ENTRY itlb; TR_ENTRY dtlb; + unsigned int itr_regions; + unsigned int dtr_regions; unsigned long itlb_pte; unsigned long dtlb_pte; unsigned long irr[4]; diff -r dc36edf1102f -r bcccadcc56e5 xen/include/asm-ia64/event.h --- a/xen/include/asm-ia64/event.h Sun Oct 30 12:52:38 2005 +++ b/xen/include/asm-ia64/event.h Sun Oct 30 13:00:35 2005 @@ -14,6 +14,21 @@ static inline void evtchn_notify(struct vcpu *v) { + /* + * NB1. 'vcpu_flags' and 'processor' must be checked /after/ update of + * pending flag. These values may fluctuate (after all, we hold no + * locks) but the key insight is that each change will cause + * evtchn_upcall_pending to be polled. + * + * NB2. We save VCPUF_running across the unblock to avoid a needless + * IPI for domains that we IPI'd to unblock. + */ + int running = test_bit(_VCPUF_running, &v->vcpu_flags); + vcpu_unblock(v); + if ( running ) + smp_send_event_check_cpu(v->processor); + + if(!VMX_DOMAIN(v)) vcpu_pend_interrupt(v, v->vcpu_info->arch.evtchn_vector); } diff -r dc36edf1102f -r bcccadcc56e5 xen/include/asm-ia64/linux-xen/asm/spinlock.h --- a/xen/include/asm-ia64/linux-xen/asm/spinlock.h Sun Oct 30 12:52:38 2005 +++ b/xen/include/asm-ia64/linux-xen/asm/spinlock.h Sun Oct 30 13:00:35 2005 @@ -17,10 +17,15 @@ #include <asm/intrinsics.h> #include <asm/system.h> +#define DEBUG_SPINLOCK + typedef struct { volatile unsigned int lock; #ifdef CONFIG_PREEMPT unsigned int break_lock; +#endif +#ifdef DEBUG_SPINLOCK + void *locker; #endif #ifdef XEN unsigned char recurse_cpu; @@ -95,6 +100,10 @@ "(p14) brl.call.spnt.many b6=ia64_spinlock_contention;;" : "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS); # endif /* CONFIG_MCKINLEY */ +#endif + +#ifdef DEBUG_SPINLOCK + asm volatile ("mov %0=ip" : "=r" (lock->locker)); #endif } #define _raw_spin_lock(lock) _raw_spin_lock_flags(lock, 0) diff -r dc36edf1102f -r bcccadcc56e5 xen/include/asm-ia64/linux-xen/linux/hardirq.h --- a/xen/include/asm-ia64/linux-xen/linux/hardirq.h Sun Oct 30 12:52:38 2005 +++ b/xen/include/asm-ia64/linux-xen/linux/hardirq.h Sun Oct 30 13:00:35 2005 @@ -67,11 +67,7 @@ */ #define in_irq() (hardirq_count()) #define in_softirq() (softirq_count()) -#ifdef XEN -#define in_interrupt() 0 // FIXME SMP LATER -#else #define in_interrupt() (irq_count()) -#endif #if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL) # define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != kernel_locked()) diff -r dc36edf1102f -r bcccadcc56e5 xen/include/asm-ia64/linux-xen/linux/interrupt.h --- a/xen/include/asm-ia64/linux-xen/linux/interrupt.h Sun Oct 30 12:52:38 2005 +++ b/xen/include/asm-ia64/linux-xen/linux/interrupt.h Sun Oct 30 13:00:35 2005 @@ -88,6 +88,7 @@ #define save_and_cli(x) save_and_cli(&x) #endif /* CONFIG_SMP */ +#ifndef XEN /* SoftIRQ primitives. */ #define local_bh_disable() \ do { add_preempt_count(SOFTIRQ_OFFSET); barrier(); } while (0) @@ -95,6 +96,7 @@ do { barrier(); sub_preempt_count(SOFTIRQ_OFFSET); } while (0) extern void local_bh_enable(void); +#endif /* PLEASE, avoid to allocate new softirqs, if you need not _really_ high frequency threaded job scheduling. For almost all the purposes diff -r dc36edf1102f -r bcccadcc56e5 xen/include/asm-ia64/mm.h --- a/xen/include/asm-ia64/mm.h Sun Oct 30 12:52:38 2005 +++ b/xen/include/asm-ia64/mm.h Sun Oct 30 13:00:35 2005 @@ -405,6 +405,7 @@ extern int nr_swap_pages; extern unsigned long *mpt_table; +extern unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr); #undef machine_to_phys_mapping #define machine_to_phys_mapping mpt_table @@ -433,10 +434,10 @@ #define __gpfn_is_mem(_d, gpfn) \ (__gpfn_valid(_d, gpfn) ? \ - (lookup_domain_mpa((_d), ((gpfn)<<PAGE_SHIFT) & GPFN_IO_MASK) == GPFN_MEM) : 0) - - -//#define __gpa_to_mpa(_d, gpa) \ -// ((__gpfn_to_mfn((_d),(gpa)>>PAGE_SHIFT)<<PAGE_SHIFT)|((gpa)&~PAGE_MASK)) + ((lookup_domain_mpa((_d), ((gpfn)<<PAGE_SHIFT)) & GPFN_IO_MASK) == GPFN_MEM) : 0) + + +#define __gpa_to_mpa(_d, gpa) \ + ((__gpfn_to_mfn((_d),(gpa)>>PAGE_SHIFT)<<PAGE_SHIFT)|((gpa)&~PAGE_MASK)) #endif /* __ASM_IA64_MM_H__ */ diff -r dc36edf1102f -r bcccadcc56e5 xen/include/asm-ia64/vhpt.h --- a/xen/include/asm-ia64/vhpt.h Sun Oct 30 12:52:38 2005 +++ b/xen/include/asm-ia64/vhpt.h Sun Oct 30 13:00:35 2005 @@ -5,27 +5,16 @@ #define VHPT_ENABLED_REGION_0_TO_6 1 #define VHPT_ENABLED_REGION_7 0 - -#if 0 +/* Size of the VHPT. */ +#define VHPT_SIZE_LOG2 24 + +/* Number of entries in the VHPT. The size of an entry is 4*8B == 32B */ +#define VHPT_NUM_ENTRIES (1 << (VHPT_SIZE_LOG2 - 5)) + +#define VHPT_CACHE_MASK (VHPT_NUM_ENTRIES - 1) #define VHPT_CACHE_ENTRY_SIZE 64 -#define VHPT_CACHE_MASK 2097151 -#define VHPT_CACHE_NUM_ENTRIES 32768 -#define VHPT_NUM_ENTRIES 2097152 -#define VHPT_CACHE_ENTRY_SIZE_LOG2 6 -#define VHPT_SIZE_LOG2 26 //???? -#define VHPT_PAGE_SHIFT 26 //???? -#else -//#define VHPT_CACHE_NUM_ENTRIES 2048 -//#define VHPT_NUM_ENTRIES 131072 -//#define VHPT_CACHE_MASK 131071 -//#define VHPT_SIZE_LOG2 22 //???? -#define VHPT_CACHE_ENTRY_SIZE 64 -#define VHPT_CACHE_NUM_ENTRIES 8192 -#define VHPT_NUM_ENTRIES 524288 -#define VHPT_CACHE_MASK 524287 -#define VHPT_SIZE_LOG2 24 //???? -#define VHPT_PAGE_SHIFT 24 //???? -#endif + +#define VHPT_PAGE_SHIFT VHPT_SIZE_LOG2 // FIXME: These should be automatically generated @@ -52,7 +41,7 @@ // VHPT collison chain entry (part of the "V-Cache") // DO NOT CHANGE THE SIZE OF THIS STRUCTURE (see vhpt.S banked regs calculations) // -typedef struct vcache_entry { +struct vcache_entry { union { struct { unsigned long tag : 63; // 0-62 @@ -123,12 +112,21 @@ #define INVALID_TI_TAG 0x8000000000000000L +extern void vhpt_init (void); +extern void zero_vhpt_stats(void); +extern int dump_vhpt_stats(char *buf); +extern void vhpt_flush_address(unsigned long vadr, unsigned long addr_range); +extern void vhpt_multiple_insert(unsigned long vaddr, unsigned long pte, + unsigned long logps); +extern void vhpt_insert (unsigned long vadr, unsigned long ptr, + unsigned logps); +extern void vhpt_flush(void); #endif /* !__ASSEMBLY */ #if !VHPT_ENABLED #define VHPT_CCHAIN_LOOKUP(Name, i_or_d) #else -#ifdef CONFIG_SMP +#if 0 /* One VHPT per cpu! def CONFIG_SMP */ #warning "FIXME SMP: VHPT_CCHAIN_LOOKUP needs a semaphore on the VHPT!" #endif diff -r dc36edf1102f -r bcccadcc56e5 xen/include/asm-ia64/vmx.h --- a/xen/include/asm-ia64/vmx.h Sun Oct 30 12:52:38 2005 +++ b/xen/include/asm-ia64/vmx.h Sun Oct 30 13:00:35 2005 @@ -24,6 +24,7 @@ #define RR7_SWITCH_SHIFT 12 /* 4k enough */ #include <public/io/ioreq.h> + extern void identify_vmx_feature(void); extern unsigned int vmx_enabled; diff -r dc36edf1102f -r bcccadcc56e5 xen/include/asm-ia64/xenkregs.h --- a/xen/include/asm-ia64/xenkregs.h Sun Oct 30 12:52:38 2005 +++ b/xen/include/asm-ia64/xenkregs.h Sun Oct 30 13:00:35 2005 @@ -6,7 +6,8 @@ */ #define IA64_TR_SHARED_INFO 3 /* dtr3: page shared with domain */ #define IA64_TR_VHPT 4 /* dtr4: vhpt */ -#define IA64_TR_ARCH_INFO 5 +#define IA64_TR_ARCH_INFO 5 +#define IA64_TR_PERVP_VHPT 6 /* Processor status register bits: */ #define IA64_PSR_VM_BIT 46 diff -r dc36edf1102f -r bcccadcc56e5 xen/include/asm-x86/vmx_intercept.h --- a/xen/include/asm-x86/vmx_intercept.h Sun Oct 30 12:52:38 2005 +++ b/xen/include/asm-x86/vmx_intercept.h Sun Oct 30 13:00:35 2005 @@ -14,6 +14,16 @@ #define VMX_MMIO 1 typedef int (*intercept_action_t)(ioreq_t *); +typedef unsigned long (*vmx_mmio_read_t)(struct vcpu *v, + unsigned long addr, + unsigned long length); + +typedef unsigned long (*vmx_mmio_write_t)(struct vcpu *v, + unsigned long addr, + unsigned long length, + unsigned long val); + +typedef int (*vmx_mmio_check_t)(struct vcpu *v, unsigned long addr); struct io_handler { int type; @@ -27,6 +37,16 @@ struct io_handler hdl_list[MAX_IO_HANDLER]; }; +struct vmx_mmio_handler { + vmx_mmio_check_t check_handler; + vmx_mmio_read_t read_handler; + vmx_mmio_write_t write_handler; +}; + +#define VMX_MMIO_HANDLER_NR 1 + +extern struct vmx_mmio_handler vmx_mmio_handers[VMX_MMIO_HANDLER_NR]; + /* global io interception point in HV */ extern int vmx_io_intercept(ioreq_t *p, int type); extern int register_io_handler(unsigned long addr, unsigned long size, @@ -37,10 +57,7 @@ return vmx_io_intercept(p, VMX_PORTIO); } -static inline int vmx_mmio_intercept(ioreq_t *p) -{ - return vmx_io_intercept(p, VMX_MMIO); -} +int vmx_mmio_intercept(ioreq_t *p); static inline int register_portio_handler(unsigned long addr, unsigned long size, @@ -49,11 +66,4 @@ return register_io_handler(addr, size, action, VMX_PORTIO); } -static inline int register_mmio_handler(unsigned long addr, - unsigned long size, - intercept_action_t action) -{ - return register_io_handler(addr, size, action, VMX_MMIO); -} - #endif /* _VMX_INTERCEPT_H */ diff -r dc36edf1102f -r bcccadcc56e5 xen/include/asm-x86/vmx_platform.h --- a/xen/include/asm-x86/vmx_platform.h Sun Oct 30 12:52:38 2005 +++ b/xen/include/asm-x86/vmx_platform.h Sun Oct 30 13:00:35 2005 @@ -80,10 +80,13 @@ struct vmx_platform { unsigned long shared_page_va; unsigned int nr_vcpu; + unsigned int lapic_enable; struct vmx_virpit vmx_pit; struct vmx_io_handler vmx_io_handler; struct vmx_virpic vmx_pic; + unsigned char round_info[256]; + spinlock_t round_robin_lock; int interrupt_request; }; diff -r dc36edf1102f -r bcccadcc56e5 xen/include/asm-x86/vmx_vmcs.h --- a/xen/include/asm-x86/vmx_vmcs.h Sun Oct 30 12:52:38 2005 +++ b/xen/include/asm-x86/vmx_vmcs.h Sun Oct 30 13:00:35 2005 @@ -22,6 +22,7 @@ #include <asm/config.h> #include <asm/vmx_cpu.h> #include <asm/vmx_platform.h> +#include <asm/vmx_vlapic.h> #include <public/vmx_assist.h> extern int start_vmx(void); @@ -96,6 +97,7 @@ struct msr_state msr_content; struct mmio_op mmio_op; /* MMIO */ void *io_bitmap_a, *io_bitmap_b; + struct vlapic *vlapic; u64 tsc_offset; }; @@ -272,18 +274,21 @@ #define VMX_DEBUG 1 #if VMX_DEBUG -#define DBG_LEVEL_0 (1 << 0) -#define DBG_LEVEL_1 (1 << 1) -#define DBG_LEVEL_2 (1 << 2) -#define DBG_LEVEL_3 (1 << 3) -#define DBG_LEVEL_IO (1 << 4) -#define DBG_LEVEL_VMMU (1 << 5) +#define DBG_LEVEL_0 (1 << 0) +#define DBG_LEVEL_1 (1 << 1) +#define DBG_LEVEL_2 (1 << 2) +#define DBG_LEVEL_3 (1 << 3) +#define DBG_LEVEL_IO (1 << 4) +#define DBG_LEVEL_VMMU (1 << 5) +#define DBG_LEVEL_VLAPIC (1 << 6) +#define DBG_LEVEL_VLAPIC_TIMER (1 << 7) +#define DBG_LEVEL_VLAPIC_INTERRUPT (1 << 7) extern unsigned int opt_vmx_debug_level; #define VMX_DBG_LOG(level, _f, _a...) \ if ((level) & opt_vmx_debug_level) \ printk("[VMX:%d.%d] " _f "\n", \ - current->domain->domain_id, current->vcpu_id, ## _a) + current->domain->domain_id, current->vcpu_id, ## _a) #else #define VMX_DBG_LOG(level, _f, _a...) #endif diff -r dc36edf1102f -r bcccadcc56e5 xen/include/public/arch-ia64.h --- a/xen/include/public/arch-ia64.h Sun Oct 30 12:52:38 2005 +++ b/xen/include/public/arch-ia64.h Sun Oct 30 13:00:35 2005 @@ -37,6 +37,33 @@ #define GPFN_INV_MASK (31UL << 59) /* Guest pfn is invalid */ #define INVALID_MFN (~0UL) + +#define MEM_G (1UL << 30) +#define MEM_M (1UL << 20) + +#define MMIO_START (3 * MEM_G) +#define MMIO_SIZE (512 * MEM_M) + +#define VGA_IO_START 0xA0000UL +#define VGA_IO_SIZE 0x20000 + +#define LEGACY_IO_START (MMIO_START + MMIO_SIZE) +#define LEGACY_IO_SIZE (64*MEM_M) + +#define IO_PAGE_START (LEGACY_IO_START + LEGACY_IO_SIZE) +#define IO_PAGE_SIZE PAGE_SIZE + +#define STORE_PAGE_START (IO_PAGE_START + IO_PAGE_SIZE) +#define STORE_PAGE_SIZE PAGE_SIZE + +#define IO_SAPIC_START 0xfec00000UL +#define IO_SAPIC_SIZE 0x100000 + +#define PIB_START 0xfee00000UL +#define PIB_SIZE 0x100000 + +#define GFW_START (4*MEM_G -16*MEM_M) +#define GFW_SIZE (16*MEM_M) /* * NB. This may become a 64-bit count with no shift. If this happens then the diff -r dc36edf1102f -r bcccadcc56e5 xen/include/public/io/ioreq.h --- a/xen/include/public/io/ioreq.h Sun Oct 30 12:52:38 2005 +++ b/xen/include/public/io/ioreq.h Sun Oct 30 13:00:35 2005 @@ -29,11 +29,11 @@ #define STATE_IORESP_READY 3 #define STATE_IORESP_HOOK 4 -#define IOREQ_TYPE_PIO 0 /* pio */ -#define IOREQ_TYPE_COPY 1 /* mmio ops */ -#define IOREQ_TYPE_AND 2 -#define IOREQ_TYPE_OR 3 -#define IOREQ_TYPE_XOR 4 +#define IOREQ_TYPE_PIO 0 /* pio */ +#define IOREQ_TYPE_COPY 1 /* mmio ops */ +#define IOREQ_TYPE_AND 2 +#define IOREQ_TYPE_OR 3 +#define IOREQ_TYPE_XOR 4 /* * VMExit dispatcher should cooperate with instruction decoder to @@ -55,9 +55,10 @@ uint8_t type; /* I/O type */ } ioreq_t; -#define MAX_VECTOR 256 +#define MAX_VECTOR 256 #define BITS_PER_BYTE 8 #define INTR_LEN (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint64_t))) +#define INTR_LEN_32 (MAX_VECTOR/(BITS_PER_BYTE * sizeof(uint32_t))) typedef struct { uint16_t pic_elcr; diff -r dc36edf1102f -r bcccadcc56e5 xen/include/public/io/vmx_vpic.h --- a/xen/include/public/io/vmx_vpic.h Sun Oct 30 12:52:38 2005 +++ b/xen/include/public/io/vmx_vpic.h Sun Oct 30 13:00:35 2005 @@ -76,7 +76,7 @@ uint32_t pic_intack_read(struct vmx_virpic *s); void register_pic_io_hook (void); int cpu_get_pic_interrupt(struct vcpu *v, int *type); -int is_pit_irq(struct vcpu *v, int irq); +int is_pit_irq(struct vcpu *v, int irq, int type); void do_pic_irqs (struct vmx_virpic *s, uint16_t irqs); void do_pic_irqs_clear (struct vmx_virpic *s, uint16_t irqs); diff -r dc36edf1102f -r bcccadcc56e5 xen/include/public/xen.h --- a/xen/include/public/xen.h Sun Oct 30 12:52:38 2005 +++ b/xen/include/public/xen.h Sun Oct 30 13:00:35 2005 @@ -410,6 +410,7 @@ #define MAX_GUEST_CMDLINE 1024 typedef struct start_info { /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ + char magic[32]; /* "Xen-<version>.<subversion>". */ unsigned long nr_pages; /* Total pages allocated to this domain. */ unsigned long shared_info; /* MACHINE address of shared info struct. */ uint32_t flags; /* SIF_xxx flags. */ diff -r dc36edf1102f -r bcccadcc56e5 xen/include/xen/sched.h --- a/xen/include/xen/sched.h Sun Oct 30 12:52:38 2005 +++ b/xen/include/xen/sched.h Sun Oct 30 13:00:35 2005 @@ -101,7 +101,6 @@ struct list_head xenpage_list; /* linked list, of size xenheap_pages */ unsigned int tot_pages; /* number of pages currently possesed */ unsigned int max_pages; /* maximum value for tot_pages */ - unsigned int next_io_page; /* next io pfn to give to domain */ unsigned int xenheap_pages; /* # pages allocated from Xen heap */ /* Scheduling. */ diff -r dc36edf1102f -r bcccadcc56e5 tools/examples/xmexample.vti --- /dev/null Sun Oct 30 12:52:38 2005 +++ b/tools/examples/xmexample.vti Sun Oct 30 13:00:35 2005 @@ -0,0 +1,100 @@ +# -*- mode: python; -*- +#============================================================================ +# Python configuration setup for 'xm create'. +# This script sets the parameters used when a domain is created using 'xm create'. +# You use a separate script for each domain you want to create, or +# you can set the parameters for the domain on the xm command line. +#============================================================================ + +import os, re +arch = os.uname()[4] +arch_libdir = 'lib' + +#---------------------------------------------------------------------------- +# Kernel image file. +kernel = "/boot/Flash.fd" + +# The domain build function. VMX domain uses 'vmx'. +builder='vmx' + +# Initial memory allocation (in megabytes) for the new domain. +memory = 256 + +# A name for your domain. All domains must have different names. +name = "ExampleVMXDomain" + +# Which CPU to start domain on? +#cpu = -1 # leave to Xen to pick + +# Optionally define mac and/or bridge for the network interfaces. +# Random MACs are assigned if not given. +#vif = [ 'mac=aa:00:00:00:00:11, bridge=xen-br0' ] + +#---------------------------------------------------------------------------- +# Define the disk devices you want the domain to have access to, and +# what you want them accessible as. +# Each disk entry is of the form phy:UNAME,DEV,MODE +# where UNAME is the device, DEV is the device name the domain will see, +# and MODE is r for read-only, w for read-write. + +#disk = [ 'phy:hda1,hda1,r' ] +disk = [ 'file:/var/images/xenia64.img,ioemu:hda,w' ] + +#---------------------------------------------------------------------------- +# Set according to whether you want the domain restarted when it exits. +# The default is 'onreboot', which restarts the domain when it shuts down +# with exit code reboot. +# Other values are 'always', and 'never'. + +#restart = 'onreboot' + +#============================================================================ + +# New stuff +device_model = '/usr/' + arch_libdir + '/xen/bin/qemu-dm.debug' + +# Advanced users only. Don't touch if you don't know what you're doing +memmap = '/usr/lib/xen/boot/mem-map.sxp' + +#----------------------------------------------------------------------------- +# Disk image for +#cdrom= + +#----------------------------------------------------------------------------- +# boot on floppy (a), hard disk (c) or CD-ROM (d) +#boot=[a|c|d] +#----------------------------------------------------------------------------- +# write to temporary files instead of disk image files +#snapshot=1 + +#---------------------------------------------------------------------------- +# enable SDL library for graphics, default = 0 +sdl=1 + +stdvga=1 +#---------------------------------------------------------------------------- +# enable VNC library for graphics, default = 1 +vnc=0 + +#---------------------------------------------------------------------------- +# enable spawning vncviewer(only valid when vnc=1), default = 1 +vncviewer=0 + +#---------------------------------------------------------------------------- +# no graphics, use serial port +#nographic=0 + + +#----------------------------------------------------------------------------- +# enable audio support +#enable-audio=1 + + +#----------------------------------------------------------------------------- +# set the real time clock to local time [default=0 i.e. set to utc] +#localtime=1 + + +#----------------------------------------------------------------------------- +# start in full screen +#full-screen=1 diff -r 42cab8724273 tools/libxc/xc_ia64_stubs.c diff -r dc36edf1102f -r bcccadcc56e5 xen/arch/x86/vmx_vlapic.c --- /dev/null Sun Oct 30 12:52:38 2005 +++ b/xen/arch/x86/vmx_vlapic.c Sun Oct 30 13:00:35 2005 @@ -0,0 +1,997 @@ +/* + * vmx_vlapic.c: virtualize LAPIC for VMX vcpus. + * Copyright (c) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <xen/config.h> +#include <xen/types.h> +#include <xen/mm.h> +#include <xen/xmalloc.h> +#include <asm/shadow.h> +#include <asm/page.h> +#include <xen/event.h> +#include <xen/trace.h> +#include <asm/vmx.h> +#include <asm/vmx_platform.h> +#include <asm/vmx_vlapic.h> + +#include <xen/lib.h> +#include <xen/sched.h> +#include <asm/current.h> +#include <public/io/ioreq.h> + +#ifdef CONFIG_VMX + +/* XXX remove this definition after GFW enabled */ +#define VLAPIC_NO_BIOS + +extern unsigned int get_apic_bus_scale(void); + +static unsigned int vlapic_lvt_mask[VLAPIC_LVT_NUM] = +{ + 0x310ff, 0x117ff, 0x117ff, 0x1f7ff, 0x1f7ff, 0x117ff +}; + +int vlapic_find_highest_irr(struct vlapic *vlapic) +{ + int result; + + result = find_highest_bit((uint32_t *)&vlapic->irr[0], INTR_LEN_32); + + if (result != -1 && result < 16) { + printk("VLAPIC: irr on reserved bits %d\n ", result); + domain_crash_synchronous(); + } + + return result; +} + +inline int vmx_apic_support(struct domain *d) +{ + return d->arch.vmx_platform.lapic_enable; +} + +int vlapic_find_highest_isr(struct vlapic *vlapic) +{ + int result; + + result = find_highest_bit((uint32_t *)&vlapic->isr[0], INTR_LEN_32); + + if (result != -1 && result < 16) { + int i = 0; + printk("VLAPIC: isr on reserved bits %d, isr is\n ", result); + for (i = 0; i < INTR_LEN_32; i += 2) + printk("%d: 0x%08x%08x\n", i, vlapic->isr[i], vlapic->isr[i+1]); + return -1; + } + + return result; +} + +uint32_t vlapic_update_ppr(struct vlapic *vlapic) +{ + uint32_t tpr, isrv, ppr; + int isr; + + tpr = (vlapic->task_priority >> 4) & 0xf; /* we want 7:4 */ + + isr = vlapic_find_highest_isr(vlapic); + if (isr != -1) + isrv = (isr >> 4) & 0xf; /* ditto */ + else + isrv = 0; + + if (tpr >= isrv) + ppr = vlapic->task_priority & 0xff; + else + ppr = isrv << 4; /* low 4 bits of PPR have to be cleared */ + + vlapic->processor_priority = ppr; + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC_INTERRUPT, + "vlapic_update_ppr: vlapic %p ppr %x isr %x isrv %x", + vlapic, ppr, isr, isrv); + + return ppr; +} + +/* This only for fixed delivery mode */ +int vlapic_match_dest(struct vlapic *target, struct vlapic *source, + int short_hand, int dest, int dest_mode, + int delivery_mode) +{ + int result = 0; + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_match_dest: " + "target %p source %p dest %x dest_mode %x short_hand %x " + "delivery_mode %x", + target, source, dest, dest_mode, short_hand, delivery_mode); + + switch (short_hand) { + case VLAPIC_NO_SHORTHAND: + if (!dest_mode) { /* Physical */ + result = (target->id == dest); + } else { /* Logical */ + if (((target->dest_format >> 28) & 0xf) == 0xf) { /* Flat mode */ + result = (target->logical_dest >> 24) & dest; + } else { + if ((delivery_mode == VLAPIC_DELIV_MODE_LPRI) && + (dest == 0xff)) { + /* What shall we do now? */ + printk("Broadcast IPI with lowest priority " + "delivery mode\n"); + domain_crash_synchronous(); + } + result = (target->logical_dest == (dest & 0xf)) ? + ((target->logical_dest >> 4) & (dest >> 4)) : 0; + } + } + break; + + case VLAPIC_SHORTHAND_SELF: + if (target == source) + result = 1; + break; + + case VLAPIC_SHORTHAND_INCLUDE_SELF: + result = 1; + break; + + case VLAPIC_SHORTHAND_EXCLUDE_SELF: + if (target != source) + result = 1; + break; + + default: + break; + } + + return result; +} + +/* + * Add a pending IRQ into lapic. + * Return 1 if successfully added and 0 if discarded. + */ +int vlapic_accept_irq(struct vlapic *vlapic, int delivery_mode, + int vector, int level, int trig_mode) +{ + int result = 1; + + switch (delivery_mode) { + case VLAPIC_DELIV_MODE_FIXED: + case VLAPIC_DELIV_MODE_LPRI: + /* FIXME add logic for vcpu on reset */ + if (!vlapic->vcpu || !vlapic_enabled(vlapic)) + return 0; + + if (test_and_set_bit(vector, &vlapic->irr[0])) { + printk("<vlapic_accept_irq>" + "level trig mode repeatedly for vector %d\n", vector); + result = 0; + } else { + if (level) { + printk("<vlapic_accept_irq> level trig mode for vector %d\n", vector); + set_bit(vector, &vlapic->tmr[0]); + } + } + evtchn_set_pending(vlapic->vcpu, iopacket_port(vlapic->domain)); + break; + + case VLAPIC_DELIV_MODE_RESERVED: + printk("Ignore deliver mode 3 in vlapic_accept_irq\n"); + break; + + case VLAPIC_DELIV_MODE_SMI: + case VLAPIC_DELIV_MODE_NMI: + /* Fixme */ + printk("TODO: for guest SMI/NMI\n"); + break; + + case VLAPIC_DELIV_MODE_INIT: + if (!level && trig_mode == 1) { //Deassert + printk("This vmx_vlapic is for P4, no work for De-assert init\n"); + } else { + /* FIXME How to check the situation after vcpu reset? */ + vlapic->init_sipi_sipi_state = VLAPIC_INIT_SIPI_SIPI_STATE_WAIT_SIPI; + if (vlapic->vcpu) { + vcpu_pause(vlapic->vcpu); + } + } + break; + + case VLAPIC_DELIV_MODE_STARTUP: + if (vlapic->init_sipi_sipi_state != VLAPIC_INIT_SIPI_SIPI_STATE_WAIT_SIPI) + break; + vlapic->init_sipi_sipi_state = VLAPIC_INIT_SIPI_SIPI_STATE_NORM; + if (!vlapic->vcpu) { + /* XXX Call vmx_bringup_ap here */ + result = 0; + }else{ + //vmx_vcpu_reset(vlapic->vcpu); + } + break; + + default: + printk("TODO: not support interrup type %x\n", delivery_mode); + domain_crash_synchronous(); + break; + } + + return result; +} +/* + This function is used by both ioapic and local APIC + The bitmap is for vcpu_id + */ +struct vlapic* apic_round_robin(struct domain *d, + uint8_t dest_mode, + uint8_t vector, + uint32_t bitmap) +{ + int next, old; + struct vlapic* target = NULL; + + if (dest_mode == 0) { //Physical mode + printk("<apic_round_robin> lowest priority for physical mode\n"); + return NULL; + } + + if (!bitmap) { + printk("<apic_round_robin> no bit on bitmap\n"); + return NULL; + } + + spin_lock(&d->arch.vmx_platform.round_robin_lock); + + old = next = d->arch.vmx_platform.round_info[vector]; + + next++; + if (next == MAX_VIRT_CPUS || !d->vcpu[next]) + next = 0; + + do { + /* the vcpu array is arranged according to vcpu_id */ + if (test_bit(next, &bitmap)) { + target = d->vcpu[next]->arch.arch_vmx.vlapic; + if (!vlapic_enabled(target)) { + printk("warning: targe round robin local apic disabled\n"); + /* XXX should we domain crash?? Or should we return NULL */ + } + break; + } + + next ++; + if (next == MAX_VIRT_CPUS || !d->vcpu[next]) + next = 0; + }while(next != old); + + d->arch.vmx_platform.round_info[vector] = next; + spin_unlock(&d->arch.vmx_platform.round_robin_lock); + return target; +} + +void +vlapic_EOI_set(struct vlapic *vlapic) +{ + int vector = vlapic_find_highest_isr(vlapic); + + /* Not every write EOI will has correpsoning ISR, + one example is when Kernel check timer on setup_IO_APIC */ + if (vector == -1) { + return ; + } + + vlapic_clear_isr(vlapic, vector); + vlapic_update_ppr(vlapic); +} + +int vlapic_check_vector(struct vlapic *vlapic, + unsigned char dm, int vector) +{ + if ((dm == VLAPIC_DELIV_MODE_FIXED) && (vector < 16)) { + vlapic->err_status |= 0x40; + vlapic_accept_irq(vlapic, VLAPIC_DELIV_MODE_FIXED, + vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR), 0, 0); + printk("<vlapic_check_vector>: check fail\n"); + return 0; + } + return 1; +} + + +void vlapic_ipi(struct vlapic *vlapic) +{ + unsigned int dest = (vlapic->icr_high >> 24) & 0xff; + unsigned int short_hand = (vlapic->icr_low >> 18) & 3; + unsigned int trig_mode = (vlapic->icr_low >> 15) & 1; + unsigned int level = (vlapic->icr_low >> 14) & 1; + unsigned int dest_mode = (vlapic->icr_low >> 11) & 1; + unsigned int delivery_mode = (vlapic->icr_low >> 8) & 7; + unsigned int vector = (vlapic->icr_low & 0xff); + + struct vlapic *target; + struct vcpu *v = NULL; + int result = 0; + uint32_t lpr_map; + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_ipi: " + "icr_high %x icr_low %x " + "short_hand %x dest %x trig_mode %x level %x " + "dest_mode %x delivery_mode %x vector %x", + vlapic->icr_high, vlapic->icr_low, + short_hand, dest, trig_mode, level, dest_mode, + delivery_mode, vector); + + for_each_vcpu ( vlapic->domain, v ) { + target = VLAPIC(v); + if (vlapic_match_dest(target, vlapic, short_hand, + dest, dest_mode, delivery_mode)) { + if (delivery_mode == VLAPIC_DELIV_MODE_LPRI) { + set_bit(v->vcpu_id, &lpr_map); + }else + result = vlapic_accept_irq(target, delivery_mode, + vector, level, trig_mode); + } + } + + if (delivery_mode == VLAPIC_DELIV_MODE_LPRI) { + extern struct vlapic* + apic_round_robin(struct domain *d, + uint8_t dest_mode, uint8_t vector, uint32_t bitmap); + + v = vlapic->vcpu; + target = apic_round_robin(v->domain, dest_mode, vector, lpr_map); + + if (target) + vlapic_accept_irq(target, delivery_mode, + vector, level, trig_mode); + } +} + +void vlapic_begin_timer(struct vlapic *vlapic) +{ + s_time_t cur = NOW(), offset; + + offset = vlapic->timer_current * + (262144 / get_apic_bus_scale()) * vlapic->timer_divide_counter; + vlapic->vlapic_timer.expires = cur + offset; + + set_ac_timer(&(vlapic->vlapic_timer), vlapic->vlapic_timer.expires ); + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_begin_timer: " + "bus_scale %x now %08x%08x expire %08x%08x " + "offset %08x%08x current %x", + get_apic_bus_scale(), (uint32_t)(cur >> 32), (uint32_t)cur, + (uint32_t)(vlapic->vlapic_timer.expires >> 32), + (uint32_t) vlapic->vlapic_timer.expires, + (uint32_t)(offset >> 32), (uint32_t)offset, + vlapic->timer_current); +} + +void vlapic_read_aligned(struct vlapic *vlapic, unsigned int offset, + unsigned int len, unsigned int *result) +{ + if (len != 4) { + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, + "local apic read with len=%d (should be 4)", len); + } + + *result = 0; + + switch (offset) { + case APIC_ID: + *result = (vlapic->id) << 24; + break; + + case APIC_LVR: + *result = vlapic->version; + break; + + case APIC_TASKPRI: + *result = vlapic->task_priority; + break; + + case APIC_ARBPRI: + printk("Access local APIC ARBPRI register which is for P6\n"); + break; + + case APIC_PROCPRI: + *result = vlapic->processor_priority; + break; + + case APIC_EOI: /* EOI is write only */ + break; + + case APIC_LDR: + *result = vlapic->logical_dest; + break; + + case APIC_DFR: + *result = vlapic->dest_format; + break; + + case APIC_SPIV: + *result = vlapic->spurious_vec; + break; + + case APIC_ISR: + case 0x110: + case 0x120: + case 0x130: + case 0x140: + case 0x150: + case 0x160: + case 0x170: + *result = vlapic->isr[(offset - APIC_ISR) >> 4]; + break; + + case APIC_TMR: + case 0x190: + case 0x1a0: + case 0x1b0: + case 0x1c0: + case 0x1d0: + case 0x1e0: + case 0x1f0: + *result = vlapic->tmr[(offset - APIC_TMR) >> 4]; + break; + + case APIC_IRR: + case 0x210: + case 0x220: + case 0x230: + case 0x240: + case 0x250: + case 0x260: + case 0x270: + *result = vlapic->irr[(offset - APIC_IRR) >> 4]; + break; + + case APIC_ESR: + if (vlapic->err_write_count) + *result = vlapic->err_status; + break; + + case APIC_ICR: + *result = vlapic->icr_low; + break; + + case APIC_ICR2: + *result = vlapic->icr_high; + break; + + case APIC_LVTT: /* LVT Timer Reg */ + case APIC_LVTTHMR: /* LVT Thermal Monitor */ + case APIC_LVTPC: /* LVT Performance Counter */ + case APIC_LVT0: /* LVT LINT0 Reg */ + case APIC_LVT1: /* LVT Lint1 Reg */ + case APIC_LVTERR: /* LVT Error Reg */ + *result = vlapic->lvt[(offset - APIC_LVTT) >> 4]; + break; + + case APIC_TMICT: + *result = vlapic->timer_initial; + break; + + case APIC_TMCCT: //Timer CCR + { + uint32_t counter; + s_time_t passed, cur = NOW(); + + if (cur <= vlapic->timer_current_update) { + passed = ~0x0LL - vlapic->timer_current_update + cur; + VMX_DBG_LOG(DBG_LEVEL_VLAPIC,"time elapsed"); + }else + passed = cur - vlapic->timer_current_update; + + counter = (passed * get_apic_bus_scale()) / (262144* vlapic->timer_divide_counter); + if (vlapic->timer_current > counter) + *result = vlapic->timer_current - counter; + else { + if (!vlapic_lvt_timer_period(vlapic)) + *result = 0; + //FIXME should we add interrupt here? + else + //*result = counter % vlapic->timer_initial; + *result = vlapic->timer_initial - (counter - vlapic->timer_current); + } + vlapic->timer_current = *result; + vlapic->timer_current_update = NOW(); + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, + "initial %x timer current %x " + "update %08x%08x cur %08x%08x offset %d", + vlapic->timer_initial, vlapic->timer_current, + (uint32_t)(vlapic->timer_current_update >> 32), + (uint32_t)vlapic->timer_current_update , + (uint32_t)(cur >> 32), (uint32_t)cur, counter); + } + break; + + case APIC_TDCR: + *result = vlapic->timer_divconf; + break; + + default: + printk("Read local APIC address %x not implemented\n",offset); + *result = 0; + break; + } +} + +unsigned long vlapic_read(struct vcpu *v, unsigned long address, + unsigned long len) +{ + unsigned int alignment; + unsigned int tmp; + unsigned long result; + struct vlapic *vlapic = VLAPIC(v); + unsigned int offset = address - vlapic->base_address; + + if ( len != 4) { + /* some bugs on kernel cause read this with byte*/ + printk("Local APIC read with len = %lx, should be 4 instead\n", len); + } + + alignment = offset & 0x3; + + vlapic_read_aligned(vlapic, offset & ~0x3, 4, &tmp); + switch (len) { + case 1: + result = *((unsigned char *)&tmp + alignment); + break; + + case 2: + result = *(unsigned short *)((unsigned char *)&tmp + alignment); + break; + + case 4: + result = *(unsigned int *)((unsigned char *)&tmp + alignment); + break; + + default: + printk("Local APIC read with len = %lx, should be 4 instead\n", len); + domain_crash_synchronous(); + break; + } + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, + "vlapic_read offset %x with length %lx and the result is %lx", + offset, len, result); + return result; +} + +unsigned long vlapic_write(struct vcpu *v, unsigned long address, + unsigned long len, unsigned long val) +{ + struct vlapic *vlapic = VLAPIC(v); + unsigned int offset = address - vlapic->base_address; + + if (offset != 0xb0) + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, + "vlapic_write offset %x with length %lx source %lx", + offset, len, val); + + /* + * According to IA 32 Manual, all resgiters should be accessed with + * 32 bits alignment. + */ + if (len != 4) { + unsigned int tmp; + unsigned char alignment; + + /* Some kernel do will access with byte/word alignment*/ + printk("Notice: Local APIC write with len = %lx\n",len); + alignment = offset & 0x3; + tmp = vlapic_read(v, offset & (~0x3), 4); + switch (len) { + case 1: + /* XXX the saddr is a tmp variable from caller, so should be ok + But we should still change the following ref to val to + local variable later */ + val = (tmp & ~(0xff << alignment)) | + ((val & 0xff) << alignment); + break; + + case 2: + if (alignment != 0x0 && alignment != 0x2) { + printk("alignment error for vlapic with len == 2\n"); + domain_crash_synchronous(); + } + + val = (tmp & ~(0xffff << alignment)) | + ((val & 0xffff) << alignment); + break; + + case 3: + /* will it happen? */ + printk("vlapic_write with len = 3 !!!\n"); + domain_crash_synchronous(); + break; + + default: + printk("Local APIC write with len = %lx, should be 4 instead\n", len); + domain_crash_synchronous(); + break; + } + } + + offset &= 0xff0; + + switch (offset) { + case APIC_ID: /* Local APIC ID */ + vlapic->id = ((val) >> 24) & VAPIC_ID_MASK; + break; + + case APIC_TASKPRI: + vlapic->task_priority = val & 0xff; + vlapic_update_ppr(vlapic); + break; + + case APIC_EOI: + vlapic_EOI_set(vlapic); + break; + + case APIC_LDR: + vlapic->logical_dest = val & VAPIC_LDR_MASK; + break; + + case APIC_DFR: + vlapic->dest_format = val ; + break; + + case APIC_SPIV: + vlapic->spurious_vec = val & 0x1ff; + if (!(vlapic->spurious_vec & 0x100)) { + int i = 0; + for (i=0; i < VLAPIC_LVT_NUM; i++) + vlapic->lvt[i] |= 0x10000; + vlapic->status |= VLAPIC_SOFTWARE_DISABLE_MASK; + } + else + vlapic->status &= ~VLAPIC_SOFTWARE_DISABLE_MASK; + break; + + case APIC_ESR: + vlapic->err_write_count = !vlapic->err_write_count; + if (!vlapic->err_write_count) + vlapic->err_status = 0; + break; + + case APIC_ICR: + /* No delay here, so we always clear the pending bit*/ + vlapic->icr_low = val & ~(1 << 12); + vlapic_ipi(vlapic); + break; + + case APIC_ICR2: + vlapic->icr_high = val & 0xff000000; + break; + + case APIC_LVTT: // LVT Timer Reg + case APIC_LVTTHMR: // LVT Thermal Monitor + case APIC_LVTPC: // LVT Performance Counter + case APIC_LVT0: // LVT LINT0 Reg + case APIC_LVT1: // LVT Lint1 Reg + case APIC_LVTERR: // LVT Error Reg + { + int vt = (offset - APIC_LVTT) >> 4; + + vlapic->lvt[vt] = val & vlapic_lvt_mask[vt]; + if (vlapic->status & VLAPIC_SOFTWARE_DISABLE_MASK) + vlapic->lvt[vt] |= VLAPIC_LVT_BIT_MASK; + + /* On hardware, when write vector less than 0x20 will error */ + vlapic_check_vector(vlapic, vlapic_lvt_dm(vlapic->lvt[vt]), + vlapic_lvt_vector(vlapic, vt)); + + if (!vlapic->vcpu_id && (offset == APIC_LVT0)) { + if ((vlapic->lvt[VLAPIC_LVT_LINT0] & VLAPIC_LVT_BIT_DELIMOD) + == 0x700) { + if (!(vlapic->lvt[VLAPIC_LVT_LINT0] & VLAPIC_LVT_BIT_MASK)) { + set_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status); + }else + clear_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status); + } + else + clear_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status); + } + + } + break; + + case APIC_TMICT: + if (vlapic_timer_active(vlapic)) + rem_ac_timer(&(vlapic->vlapic_timer)); + + vlapic->timer_initial = val; + vlapic->timer_current = val; + vlapic->timer_current_update = NOW(); + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, + "timer_init %x timer_current %x timer_current_update %08x%08x", + vlapic->timer_initial, vlapic->timer_current, (uint32_t)(vlapic->timer_current_update>>32), (uint32_t)vlapic->timer_current_update); + vlapic_begin_timer(vlapic); + break; + + case APIC_TDCR: + { + //FIXME clean this code + unsigned char tmp1,tmp2; + tmp1 = (val & 0xf); + tmp2 = ((tmp1 & 0x3 )|((tmp1 & 0x8) >>1)) + 1; + vlapic->timer_divide_counter = 0x1<<tmp2; + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, + "timer divider is 0x%x", + vlapic->timer_divide_counter); + } + break; + + default: + printk("Local APIC Write to read-only register\n"); + break; + } + return 1; +} + +int vlapic_range(struct vcpu *v, unsigned long addr) +{ + struct vlapic *vlapic = VLAPIC(v); + + if (vlapic_global_enabled(vlapic) && + (addr >= vlapic->base_address) && + (addr <= (vlapic->base_address + VLOCAL_APIC_MEM_LENGTH))) + return 1; + + return 0; +} + +void vlapic_msr_set(struct vlapic *vlapic, uint64_t value) +{ + /* When apic disabled */ + if (!vlapic) + return; + + if (vlapic->vcpu_id) + value &= ~MSR_IA32_APICBASE_BSP; + + vlapic->apic_base_msr = value; + vlapic->base_address = vlapic_get_base_address(vlapic); + + if (!(value & 0x800)) + set_bit(_VLAPIC_GLOB_DISABLE, &vlapic->status ); + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, + "apic base msr = 0x%08x%08x,\nbase address = 0x%lx", + (uint32_t)(vlapic->apic_base_msr >> 32), + (uint32_t)vlapic->apic_base_msr, + vlapic->base_address); +} + +static inline int vlapic_get_init_id(struct vcpu *v) +{ + return v->vcpu_id; +} + +void vlapic_timer_fn(void *data) +{ + struct vlapic *vlapic; + + vlapic = data; + if (!vlapic_enabled(vlapic)) return; + + vlapic->timer_current_update = NOW(); + + if (vlapic_lvt_timer_enabled(vlapic)) { + if (!vlapic_irr_status(vlapic, + vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER))) { + test_and_set_bit(vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER), + &vlapic->irr[0]); + } + else + vlapic->intr_pending_count[vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER)]++; + } + + vlapic->timer_current_update = NOW(); + if (vlapic_lvt_timer_period(vlapic)) { + s_time_t offset; + + vlapic->timer_current = vlapic->timer_initial; + offset = vlapic->timer_current * (262144/get_apic_bus_scale()) * vlapic->timer_divide_counter; + vlapic->vlapic_timer.expires = NOW() + offset; + set_ac_timer(&(vlapic->vlapic_timer), vlapic->vlapic_timer.expires); + }else { + vlapic->timer_current = 0; + } + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC_TIMER, + "vlapic_timer_fn: now: %08x%08x expire %08x%08x init %x current %x", + (uint32_t)(NOW() >> 32),(uint32_t)NOW(), + (uint32_t)(vlapic->vlapic_timer.expires >> 32), + (uint32_t)vlapic->vlapic_timer.expires, + vlapic->timer_initial,vlapic->timer_current); +} + +#if 0 +static int +vlapic_check_direct_intr(struct vcpu *v, int * mode) +{ + struct vlapic *vlapic = VLAPIC(v); + int type; + + type = __fls(vlapic->direct_intr.deliver_mode); + if (type == -1) + return -1; + + *mode = type; + return 0; +} +#endif + +int +vlapic_accept_pic_intr(struct vcpu *v) +{ + struct vlapic *vlapic = VLAPIC(v); + + return vlapic ? test_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status) : 1; +} + +int cpu_get_apic_interrupt(struct vcpu* v, int *mode) +{ + struct vlapic *vlapic = VLAPIC(v); + + if (vlapic && vlapic_enabled(vlapic)) { + int highest_irr = vlapic_find_highest_irr(vlapic); + + if (highest_irr != -1 && highest_irr >= vlapic->processor_priority) { + if (highest_irr < 0x10) { + vlapic->err_status |= 0x20; + /* XXX What will happen if this vector illegal stil */ + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, + "vmx_intr_assist: illegal vector number %x err_status %x", + highest_irr, vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR)); + + set_bit(vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR), &vlapic->irr[0]); + highest_irr = vlapic_lvt_vector(vlapic, VLAPIC_LVT_ERROR); + } + + *mode = VLAPIC_DELIV_MODE_FIXED; + return highest_irr; + } + } + return -1; +} + +void vlapic_post_injection(struct vcpu *v, int vector, int deliver_mode) { + struct vlapic *vlapic = VLAPIC(v); + + if (!vlapic) + return; + + switch (deliver_mode) { + case VLAPIC_DELIV_MODE_FIXED: + case VLAPIC_DELIV_MODE_LPRI: + vlapic_set_isr(vlapic, vector); + vlapic_clear_irr(vlapic, vector); + vlapic_update_ppr(vlapic); + + if (vector == vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER)) { + vlapic->intr_pending_count[vector]--; + if (vlapic->intr_pending_count[vector] > 0) + test_and_set_bit(vlapic_lvt_vector(vlapic, VLAPIC_LVT_TIMER), + &vlapic->irr[0]); + } + + break; + /*XXX deal with these later */ + + case VLAPIC_DELIV_MODE_RESERVED: + printk("Ignore deliver mode 3 in vlapic_post_injection\n"); + break; + + case VLAPIC_DELIV_MODE_SMI: + case VLAPIC_DELIV_MODE_NMI: + case VLAPIC_DELIV_MODE_INIT: + case VLAPIC_DELIV_MODE_STARTUP: + vlapic->direct_intr.deliver_mode &= ~(1 << deliver_mode); + break; + + default: + printk("<vlapic_post_injection> error deliver mode\n"); + break; + } +} + +static int vlapic_reset(struct vlapic *vlapic) +{ + struct vcpu *v = vlapic->vcpu; + int apic_id = v->vcpu_id, i; + + if (!v || !vlapic) + return 0; + + memset(vlapic, 0,sizeof(struct vlapic)); + + v->arch.arch_vmx.vlapic = vlapic; + + vlapic->domain = v->domain; + + vlapic->id = apic_id; + + vlapic->version = VLAPIC_VERSION; + + vlapic->apic_base_msr = VLAPIC_BASE_MSR_INIT_VALUE; + + if (apic_id == 0) + vlapic->apic_base_msr |= MSR_IA32_APICBASE_BSP; + vlapic->base_address = vlapic_get_base_address(vlapic); + + for (i = 0; i < VLAPIC_LVT_NUM; i++) + vlapic->lvt[i] = VLAPIC_LVT_BIT_MASK; + + vlapic->dest_format = 0xffffffffU; + + vlapic->spurious_vec = 0xff; + + + init_ac_timer(&vlapic->vlapic_timer, + vlapic_timer_fn, vlapic, v->processor); + +#ifdef VLAPIC_NO_BIOS + /* + * XXX According to mp sepcific, BIOS will enable LVT0/1, + * remove it after BIOS enabled + */ + if (!v->vcpu_id) { + vlapic->lvt[VLAPIC_LVT_LINT0] = 0x700; + vlapic->lvt[VLAPIC_LVT_LINT1] = 0x500; + set_bit(_VLAPIC_BSP_ACCEPT_PIC, &vlapic->status); + } +#endif + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_reset: " + "vcpu=%p id=%d vlapic_apic_base_msr=%08x%08x " + "vlapic_base_address=%0lx", + v, vlapic->id, (uint32_t)(vlapic->apic_base_msr >> 32), + (uint32_t)vlapic->apic_base_msr, vlapic->base_address); + + return 1; +} + +int vlapic_init(struct vcpu *v) +{ + struct vlapic *vlapic = NULL; + + VMX_DBG_LOG(DBG_LEVEL_VLAPIC, "vlapic_init %d", v->vcpu_id); + + vlapic = xmalloc_bytes(sizeof(struct vlapic)); + + if (!vlapic) { + printk("malloc vlapic error for vcpu %x\n", v->vcpu_id); + return -ENOMEM; + } + + vlapic->vcpu = v; + + vlapic_reset(vlapic); + + return 0; +} + +#endif /* CONFIG_VMX */ diff -r dc36edf1102f -r bcccadcc56e5 xen/include/asm-x86/vmx_vlapic.h --- /dev/null Sun Oct 30 12:52:38 2005 +++ b/xen/include/asm-x86/vmx_vlapic.h Sun Oct 30 13:00:35 2005 @@ -0,0 +1,245 @@ +/* + * vmx_vlapic.h: virtualize LAPIC definitions. + * Copyright (c) 2004, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#ifndef VMX_VLAPIC_H +#define VMX_VLAPIC_H + +#include <asm/msr.h> +#include <public/io/ioreq.h> + +#if defined(__i386__) || defined(__x86_64__) +static inline int __fls(uint32_t word) +{ + int bit; + + __asm__("bsrl %1,%0" + :"=r" (bit) + :"rm" (word)); + return word ? bit : -1; +} +#else +#define __fls(x) generic_fls(x) +static __inline__ int generic_fls(uint32_t x) +{ + int r = 31; + + if (!x) + return -1; + if (!(x & 0xffff0000u)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xff000000u)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xf0000000u)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xc0000000u)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000u)) { + x <<= 1; + r -= 1; + } + return r; +} +#endif + +static __inline__ int find_highest_bit(uint32_t *data, int length) +{ + while(length && !data[--length]); + return __fls(data[length]) + 32 * length; +} + +#define VLAPIC(v) (v->arch.arch_vmx.vlapic) + +#define VAPIC_ID_MASK 0xff +#define VAPIC_LDR_MASK (VAPIC_ID_MASK << 24) +#define VLAPIC_VERSION 0x00050014 + +#define VLAPIC_BASE_MSR_MASK 0x00000000fffff900ULL +#define VLAPIC_BASE_MSR_INIT_BASE_ADDR 0xfee00000U +#define VLAPIC_BASE_MSR_BASE_ADDR_MASK 0xfffff000U +#define VLAPIC_BASE_MSR_INIT_VALUE (VLAPIC_BASE_MSR_INIT_BASE_ADDR | \ + MSR_IA32_APICBASE_ENABLE) +#define VLOCAL_APIC_MEM_LENGTH (1 << 12) + +#define VLAPIC_LVT_TIMER 0 +#define VLAPIC_LVT_THERMAL 1 +#define VLAPIC_LVT_PERFORM 2 +#define VLAPIC_LVT_LINT0 3 +#define VLAPIC_LVT_LINT1 4 +#define VLAPIC_LVT_ERROR 5 +#define VLAPIC_LVT_NUM 6 + +#define VLAPIC_LVT_BIT_MASK (1 << 16) +#define VLAPIC_LVT_BIT_VECTOR 0xff +#define VLAPIC_LVT_BIT_DELIMOD (0x7 << 8) +#define VLAPIC_LVT_BIT_DELISTATUS (1 << 12) +#define VLAPIC_LVT_BIT_POLARITY (1 << 13) +#define VLAPIC_LVT_BIT_IRR (1 << 14) +#define VLAPIC_LVT_BIT_TRIG (1 << 15) +#define VLAPIC_LVT_TIMERMODE (1 << 17) + +#define VLAPIC_DELIV_MODE_FIXED 0x0 +#define VLAPIC_DELIV_MODE_LPRI 0x1 +#define VLAPIC_DELIV_MODE_SMI 0x2 +#define VLAPIC_DELIV_MODE_RESERVED 0x3 +#define VLAPIC_DELIV_MODE_NMI 0x4 +#define VLAPIC_DELIV_MODE_INIT 0x5 +#define VLAPIC_DELIV_MODE_STARTUP 0x6 +#define VLAPIC_DELIV_MODE_EXT 0x7 + + + +#define VLAPIC_NO_SHORTHAND 0x0 +#define VLAPIC_SHORTHAND_SELF 0x1 +#define VLAPIC_SHORTHAND_INCLUDE_SELF 0x2 +#define VLAPIC_SHORTHAND_EXCLUDE_SELF 0x3 + +#define vlapic_lvt_timer_enabled(vlapic) \ + (!(vlapic->lvt[VLAPIC_LVT_TIMER] & VLAPIC_LVT_BIT_MASK)) + +#define vlapic_lvt_vector(vlapic, type) \ + (vlapic->lvt[type] & VLAPIC_LVT_BIT_VECTOR) + +#define vlapic_lvt_dm(value) ((value >> 8) && 7) +#define vlapic_lvt_timer_period(vlapic) \ + (vlapic->lvt[VLAPIC_LVT_TIMER] & VLAPIC_LVT_TIMERMODE) + +#define vlapic_isr_status(vlapic,vector) \ + test_bit(vector, &vlapic->isr[0]) + +#define vlapic_irr_status(vlapic,vector) \ + test_bit(vector, &vlapic->irr[0]) + +#define vlapic_set_isr(vlapic,vector) \ + test_and_set_bit(vector, &vlapic->isr[0]) + +#define vlapic_set_irr(vlapic,vector) \ + test_and_set_bit(vector, &vlapic->irr[0]) + +#define vlapic_clear_irr(vlapic,vector) \ + clear_bit(vector, &vlapic->irr[0]) +#define vlapic_clear_isr(vlapic,vector) \ + clear_bit(vector, &vlapic->isr[0]) + +#define vlapic_enabled(vlapic) \ + (!(vlapic->status & \ + (VLAPIC_GLOB_DISABLE_MASK | VLAPIC_SOFTWARE_DISABLE_MASK))) + +#define vlapic_global_enabled(vlapic) \ + !(test_bit(_VLAPIC_GLOB_DISABLE, &(vlapic)->status)) + +typedef struct direct_intr_info { + int deliver_mode; + int source[6]; +} direct_intr_info_t; + +#define VLAPIC_INIT_SIPI_SIPI_STATE_NORM 0 +#define VLAPIC_INIT_SIPI_SIPI_STATE_WAIT_SIPI 1 + +struct vlapic +{ + //FIXME check what would be 64 bit on EM64T + uint32_t version; +#define _VLAPIC_GLOB_DISABLE 0x0 +#define VLAPIC_GLOB_DISABLE_MASK 0x1 +#define VLAPIC_SOFTWARE_DISABLE_MASK 0x2 +#define _VLAPIC_BSP_ACCEPT_PIC 0x3 + uint32_t status; + uint32_t id; + uint32_t vcpu_id; + unsigned long base_address; + uint32_t isr[8]; + uint32_t irr[INTR_LEN_32]; + uint32_t tmr[INTR_LEN_32]; + uint32_t task_priority; + uint32_t processor_priority; + uint32_t logical_dest; + uint32_t dest_format; + uint32_t spurious_vec; + uint32_t lvt[6]; + uint32_t timer_initial; + uint32_t timer_current; + uint32_t timer_divconf; + uint32_t timer_divide_counter; + struct ac_timer vlapic_timer; + int intr_pending_count[MAX_VECTOR]; + s_time_t timer_current_update; + uint32_t icr_high; + uint32_t icr_low; + direct_intr_info_t direct_intr; + uint32_t err_status; + unsigned long init_ticks; + uint32_t err_write_count; + uint64_t apic_base_msr; + uint32_t init_sipi_sipi_state; + struct vcpu *vcpu; + struct domain *domain; +}; + +static inline int vlapic_timer_active(struct vlapic *vlapic) +{ + return active_ac_timer(&(vlapic->vlapic_timer)); +} + +int vlapic_find_highest_irr(struct vlapic *vlapic); + +int vlapic_find_highest_isr(struct vlapic *vlapic); + +static uint32_t inline vlapic_get_base_address(struct vlapic *vlapic) +{ + return (vlapic->apic_base_msr & VLAPIC_BASE_MSR_BASE_ADDR_MASK); +} + +void vlapic_post_injection(struct vcpu* v, int vector, int deliver_mode); + +int cpu_get_apic_interrupt(struct vcpu* v, int *mode); + +extern uint32_t vlapic_update_ppr(struct vlapic *vlapic); + +int vlapic_update(struct vcpu *v); + +extern int vlapic_init(struct vcpu *vc); + +extern void vlapic_msr_set(struct vlapic *vlapic, uint64_t value); + +int vlapic_range(struct vcpu *v, unsigned long addr); + +unsigned long vlapic_write(struct vcpu *v, unsigned long address, + unsigned long len, unsigned long val); + +unsigned long vlapic_read(struct vcpu *v, unsigned long address, + unsigned long len); + +int vlapic_accept_pic_intr(struct vcpu *v); + +struct vlapic* apic_round_robin(struct domain *d, + uint8_t dest_mode, + uint8_t vector, + uint32_t bitmap); + +int vmx_apic_support(struct domain *d); + +#endif /* VMX_VLAPIC_H */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |