[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [XenPPC] [xenppc-unstable] [ppc-merge] null merge
# HG changeset patch # User Jimi Xenidis <jimix@xxxxxxxxxxxxxx> # Node ID e6c7667c97eda1ba63ca749f102f87328e8375a1 # Parent 17f7a426b2cddbc9b347157a49edfa921cf58867 # Parent 06001ce66e1cdc12e1f4d2b2dbd5100b457ad0c1 [ppc-merge] null merge --- linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-post | 2 linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre | 12 Makefile | 1 buildconfigs/Rules.mk | 7 buildconfigs/linux-defconfig_xen0_ia64 | 1 buildconfigs/linux-defconfig_xenU_ia64 | 1 buildconfigs/linux-defconfig_xen_ia64 | 1 buildconfigs/mk.linux-2.6-xen | 8 config/ppc64.mk | 1 docs/src/user.tex | 502 +++++++++++++++- extras/mini-os/console/xencons_ring.c | 5 extras/mini-os/events.c | 84 +- extras/mini-os/gnttab.c | 158 +++++ extras/mini-os/include/events.h | 24 extras/mini-os/include/gnttab.h | 14 extras/mini-os/include/lib.h | 13 extras/mini-os/include/mm.h | 8 extras/mini-os/include/os.h | 57 + extras/mini-os/include/xenbus.h | 29 extras/mini-os/kernel.c | 9 extras/mini-os/lib/string.c | 10 extras/mini-os/mm.c | 125 ++++ extras/mini-os/time.c | 6 extras/mini-os/xenbus/xenbus.c | 106 ++- linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c | 289 +++++++++ linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c | 259 ++++---- linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S | 4 linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S | 20 linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S | 2 linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S | 21 linux-2.6-xen-sparse/drivers/xen/console/console.c | 35 - linux-2.6-xen-sparse/drivers/xen/netback/netback.c | 44 - linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c | 5 linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c | 11 linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c | 71 +- linux-2.6-xen-sparse/include/asm-ia64/page.h | 2 linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h | 37 + tools/examples/xmexample.vti | 25 tools/ioemu/hw/piix4acpi.c | 2 tools/libxc/Makefile | 4 tools/libxc/xc_hvm_build.c | 9 tools/libxc/xc_ia64_stubs.c | 2 tools/libxc/xc_linux_build.c | 25 tools/libxc/xc_load_elf.c | 15 tools/libxc/xc_ppc_linux_build.c | 10 tools/python/xen/xend/XendDomainInfo.py | 3 tools/xenmon/xenmon.py | 26 tools/xenstat/xentop/xentop.c | 10 tools/xentrace/Makefile | 4 tools/xentrace/xenctx.c | 62 ++ xen/arch/ia64/Makefile | 15 xen/arch/ia64/Rules.mk | 13 xen/arch/ia64/asm-xsi-offsets.c | 86 -- xen/arch/ia64/linux-xen/iosapic.c | 1 xen/arch/ia64/linux-xen/irq_ia64.c | 8 xen/arch/ia64/vmx/vmmu.c | 32 - xen/arch/ia64/vmx/vmx_entry.S | 34 - xen/arch/ia64/vmx/vmx_hypercall.c | 6 xen/arch/ia64/vmx/vmx_ivt.S | 6 xen/arch/ia64/vmx/vmx_phy_mode.c | 21 xen/arch/ia64/vmx/vmx_process.c | 3 xen/arch/ia64/vmx/vtlb.c | 28 xen/arch/ia64/xen/acpi.c | 6 xen/arch/ia64/xen/dom0_ops.c | 2 xen/arch/ia64/xen/dom_fw.c | 195 +++--- xen/arch/ia64/xen/domain.c | 51 + xen/arch/ia64/xen/faults.c | 33 - xen/arch/ia64/xen/fw_emul.c | 45 + xen/arch/ia64/xen/hypercall.c | 3 xen/arch/ia64/xen/hyperprivop.S | 118 ++- xen/arch/ia64/xen/irq.c | 163 ----- xen/arch/ia64/xen/ivt.S | 9 xen/arch/ia64/xen/mm.c | 513 +++++++++++++---- xen/arch/ia64/xen/pcdp.c | 142 ++++ xen/arch/ia64/xen/regionreg.c | 19 xen/arch/ia64/xen/vcpu.c | 124 ++-- xen/arch/ia64/xen/vhpt.c | 53 - xen/arch/ia64/xen/xenasm.S | 31 - xen/arch/ia64/xen/xenmem.c | 14 xen/arch/ia64/xen/xenmisc.c | 12 xen/arch/ia64/xen/xensetup.c | 77 +- xen/arch/ppc/setup.c | 16 xen/arch/x86/domain.c | 2 xen/arch/x86/hvm/vmx/vmcs.c | 216 +++---- xen/arch/x86/hvm/vmx/vmx.c | 174 +++-- xen/arch/x86/setup.c | 11 xen/arch/x86/shadow.c | 12 xen/arch/x86/traps.c | 7 xen/arch/x86/x86_emulate.c | 2 xen/common/dom0_ops.c | 100 +-- xen/common/domain.c | 98 +-- xen/common/elf.c | 21 xen/common/event_channel.c | 7 xen/common/keyhandler.c | 2 xen/common/memory.c | 2 xen/common/sched_credit.c | 7 xen/common/sched_sedf.c | 2 xen/drivers/char/console.c | 2 xen/include/asm-ia64/dom_fw.h | 7 xen/include/asm-ia64/domain.h | 32 - xen/include/asm-ia64/linux-xen/asm/iosapic.h | 1 xen/include/asm-ia64/linux-xen/asm/pgtable.h | 15 xen/include/asm-ia64/mm.h | 16 xen/include/asm-ia64/offsets.h | 3 xen/include/asm-ia64/regionreg.h | 1 xen/include/asm-ia64/vmx.h | 3 xen/include/asm-ia64/vmx_phy_mode.h | 1 xen/include/asm-ia64/xenkregs.h | 2 xen/include/asm-ia64/xensystem.h | 18 xen/include/asm-x86/hvm/vmx/vmcs.h | 15 xen/include/asm-x86/hvm/vmx/vmx.h | 5 xen/include/public/arch-ia64.h | 21 xen/include/public/io/netif.h | 13 xen/include/xen/sched.h | 25 114 files changed, 3508 insertions(+), 1395 deletions(-) diff -r 17f7a426b2cd -r e6c7667c97ed Makefile --- a/Makefile Thu Jul 06 06:44:19 2006 -0400 +++ b/Makefile Thu Jul 06 06:47:16 2006 -0400 @@ -130,6 +130,7 @@ distclean: rm -rf dist patches/tmp for i in $(ALLKERNELS) ; do $(MAKE) $$i-delete ; done for i in $(ALLSPARSETREES) ; do $(MAKE) $$i-mrproper ; done + rm -rf patches/*/.makedep # Linux name for GNU distclean .PHONY: mrproper diff -r 17f7a426b2cd -r e6c7667c97ed buildconfigs/Rules.mk --- a/buildconfigs/Rules.mk Thu Jul 06 06:44:19 2006 -0400 +++ b/buildconfigs/Rules.mk Thu Jul 06 06:47:16 2006 -0400 @@ -59,10 +59,6 @@ ifneq ($(PATCHDIRS),) $(patsubst patches/%,patches/%/.makedep,$(PATCHDIRS)): patches/%/.makedep: @echo 'ref-$*/.valid-ref: $$(wildcard patches/$*/*.patch)' >$@ -.PHONY: clean -clean:: - rm -f patches/*/.makedep - ref-%/.valid-ref: pristine-%/.valid-pristine set -e rm -rf $(@D) @@ -111,7 +107,8 @@ linux-2.6-xen.patch: ref-linux-$(LINUX_V rm -rf tmp-$@ %-mrproper: - rm -rf pristine-$(*)* ref-$(*)* $*.tar.bz2 + $(MAKE) -f buildconfigs/mk.$*-xen mrpropper + rm -rf pristine-$(*)* ref-$(*)* rm -rf $*-xen.patch .PHONY: config-update-pae diff -r 17f7a426b2cd -r e6c7667c97ed buildconfigs/linux-defconfig_xen0_ia64 --- a/buildconfigs/linux-defconfig_xen0_ia64 Thu Jul 06 06:44:19 2006 -0400 +++ b/buildconfigs/linux-defconfig_xen0_ia64 Thu Jul 06 06:47:16 2006 -0400 @@ -91,7 +91,6 @@ CONFIG_EFI=y CONFIG_EFI=y CONFIG_GENERIC_IOMAP=y CONFIG_XEN=y -CONFIG_ARCH_XEN=y CONFIG_XEN_IA64_DOM0_VP=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_DMA_IS_DMA32=y diff -r 17f7a426b2cd -r e6c7667c97ed buildconfigs/linux-defconfig_xenU_ia64 --- a/buildconfigs/linux-defconfig_xenU_ia64 Thu Jul 06 06:44:19 2006 -0400 +++ b/buildconfigs/linux-defconfig_xenU_ia64 Thu Jul 06 06:47:16 2006 -0400 @@ -88,7 +88,6 @@ CONFIG_EFI=y CONFIG_EFI=y CONFIG_GENERIC_IOMAP=y CONFIG_XEN=y -CONFIG_ARCH_XEN=y CONFIG_XEN_IA64_DOM0_VP=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_DMA_IS_DMA32=y diff -r 17f7a426b2cd -r e6c7667c97ed buildconfigs/linux-defconfig_xen_ia64 --- a/buildconfigs/linux-defconfig_xen_ia64 Thu Jul 06 06:44:19 2006 -0400 +++ b/buildconfigs/linux-defconfig_xen_ia64 Thu Jul 06 06:47:16 2006 -0400 @@ -91,7 +91,6 @@ CONFIG_EFI=y CONFIG_EFI=y CONFIG_GENERIC_IOMAP=y CONFIG_XEN=y -CONFIG_ARCH_XEN=y CONFIG_XEN_IA64_DOM0_VP=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_DMA_IS_DMA32=y diff -r 17f7a426b2cd -r e6c7667c97ed buildconfigs/mk.linux-2.6-xen --- a/buildconfigs/mk.linux-2.6-xen Thu Jul 06 06:44:19 2006 -0400 +++ b/buildconfigs/mk.linux-2.6-xen Thu Jul 06 06:47:16 2006 -0400 @@ -47,8 +47,14 @@ config: $(LINUX_DIR)/include/linux/autoc .PHONY: clean clean:: - $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) clean + [ ! -d $(LINUX_DIR) ] || \ + $(MAKE) -C $(LINUX_DIR) ARCH=$(LINUX_ARCH) clean + .PHONY: delete delete: rm -rf tmp-linux-$(LINUX_VER) $(LINUX_DIR) + +.PHONY: mrpropper +mrpropper: + rm -f linux-$(LINUX_VER).tar.bz2 diff -r 17f7a426b2cd -r e6c7667c97ed config/ppc64.mk --- a/config/ppc64.mk Thu Jul 06 06:44:19 2006 -0400 +++ b/config/ppc64.mk Thu Jul 06 06:47:16 2006 -0400 @@ -1,3 +1,4 @@ CONFIG_POWERPC := y CONFIG_POWERPC := y +CFLAGS += -DELFSIZE=64 LIBDIR := lib diff -r 17f7a426b2cd -r e6c7667c97ed docs/src/user.tex --- a/docs/src/user.tex Thu Jul 06 06:44:19 2006 -0400 +++ b/docs/src/user.tex Thu Jul 06 06:47:16 2006 -0400 @@ -2203,6 +2203,33 @@ ne2000 & Enable ne2000, default = serial & Enable redirection of VMX serial output to pty device\\ +\end{tabular} + +\begin{tabular}{lp{10cm}} + +usb & Enable USB support without defining a specific USB device. +This option defaults to 0 (disabled) unless the option usbdevice is +specified in which case this option then defaults to 1 (enabled).\\ + +usbdevice & Enable USB support and also enable support for the given +device. Devices that can be specified are {\small {\tt mouse}} (a PS/2 style +mouse), {\small {\tt tablet}} (an absolute pointing device) and +{\small {\tt host:id1:id2}} (a physical USB device on the host machine whose +ids are {\small {\tt id1}} and {\small {\tt id2}}). The advantage +of {\small {\tt tablet}} is that Windows guests will automatically recognize +and support this device so specifying the config line + +{\small +\begin{verbatim} + usbdevice='tablet' +\end{verbatim} +} + +will create a mouse that works transparently with Windows guests under VNC. +Linux doesn't recognize the USB tablet yet so Linux guests under VNC will +still need the Summagraphics emulation. +Details about mouse emulation are provided in section \textbf{A.4.3}.\\ + localtime & Set the real time clock to local time [default=0, that is, set to UTC].\\ enable-audio & Enable audio support. This is under development.\\ @@ -2317,29 +2344,458 @@ Simply follow the usual method of creati In the default configuration, VNC is on and SDL is off. Therefore VNC windows will open when VMX guests are created. If you want to use SDL to create VMX guests, set {\small {\tt sdl=1}} in your VMX configuration file. You can also turn off VNC by setting {\small {\tt vnc=0}}. -\subsection{Use mouse in VNC window} -The default PS/2 mouse will not work properly in VMX by a VNC window. Summagraphics mouse emulation does work in this environment. A Summagraphics mouse can be enabled by reconfiguring 2 services: - -{\small {\tt 1. General Purpose Mouse (GPM). The GPM daemon is configured in different ways in different Linux distributions. On a Redhat distribution, this is accomplished by changing the file `/etc/sysconfig/mouse' to have the following:\\ -MOUSETYPE="summa"\\ -XMOUSETYPE="SUMMA"\\ -DEVICE=/dev/ttyS0\\ -\\ -2. X11. For all Linux distributions, change the Mouse0 stanza in `/etc/X11/xorg.conf' to:\\ -Section "InputDevice"\\ -Identifier "Mouse0"\\ -Driver "summa"\\ -Option "Device" "/dev/ttyS0"\\ -Option "InputFashion" "Tablet"\\ -Option "Mode" "Absolute"\\ -Option "Name" "EasyPen"\\ -Option "Compatible" "True"\\ -Option "Protocol" "Auto"\\ -Option "SendCoreEvents" "on"\\ -Option "Vendor" "GENIUS"\\ -EndSection}} - -If the Summagraphics mouse isn't the default mouse, you can manually kill 'gpm' and restart it with the command "gpm -m /dev/ttyS0 -t summa". Note that Summagraphics mouse makes no sense in an SDL window and is therefore not available in this environment. +\subsection{Mouse issues, especially under VNC} +Mouse handling when using VNC is a little problematic. +The problem is that the VNC viewer provides a virtual pointer which is +located at an absolute location in the VNC window and only absolute +coordinates are provided. +The VMX device model converts these absolute mouse coordinates +into the relative motion deltas that are expected by the PS/2 +mouse driver running in the guest. +Unfortunately, +it is impossible to keep these generated mouse deltas +accurate enough for the guest cursor to exactly match +the VNC pointer. +This can lead to situations where the guest's cursor +is in the center of the screen and there's no way to +move that cursor to the left +(it can happen that the VNC pointer is at the left +edge of the screen and, +therefore, +there are no longer any left mouse deltas that +can be provided by the device model emulation code.) + +To deal with these mouse issues there are 4 different +mouse emulations available from the VMX device model: + +\begin{description} +\item[PS/2 mouse over the PS/2 port.] +This is the default mouse +that works perfectly well under SDL. +Under VNC the guest cursor will get +out of sync with the VNC pointer. +When this happens you can re-synchronize +the guest cursor to the VNC pointer by +holding down the +\textbf{left-ctl} +and +\textbf{left-alt} +keys together. +While these keys are down VNC pointer motions +will not be reported to the guest so +that the VNC pointer can be moved +to a place where it is possible +to move the guest cursor again. + +\item[Summagraphics mouse over the serial port.] +The device model also provides emulation +for a Summagraphics tablet, +an absolute pointer device. +This emulation is provided over the second +serial port, +\textbf{/dev/ttyS1} +for Linux guests and +\textbf{COM2} +for Windows guests. +Unfortunately, +neither Linux nor Windows provides +default support for the Summagraphics +tablet so the guest will have to be +manually configured for this mouse. + +\textbf{Linux configuration.} + +First, +configure the GPM service to use the Summagraphics tablet. +This can vary between distributions but, +typically, +all that needs to be done is modify the file +\path{/etc/sysconfig/mouse} to contain the lines: + +{\small +\begin{verbatim} + MOUSETYPE="summa" + XMOUSETYPE="SUMMA" + DEVICE=/dev/ttyS1 +\end{verbatim} +} + +and then restart the GPM daemon. + +Next, +modify the X11 config +\path{/etc/X11/xorg.conf} +to support the Summgraphics tablet by replacing +the input device stanza with the following: + +{\small +\begin{verbatim} + Section "InputDevice" + Identifier "Mouse0" + Driver "summa" + Option "Device" "/dev/ttyS1" + Option "InputFashion" "Tablet" + Option "Mode" "Absolute" + Option "Name" "EasyPen" + Option "Compatible" "True" + Option "Protocol" "Auto" + Option "SendCoreEvents" "on" + Option "Vendor" "GENIUS" + EndSection +\end{verbatim} +} + +Restart X and the X cursor should now properly +track the VNC pointer. + + +\textbf{Windows configuration.} + +Get the file +\path{http://www.cad-plan.de/files/download/tw2k.exe} +and execute that file on the guest, +answering the questions as follows: + +\begin{enumerate} +\item When the program asks for \textbf{model}, +scroll down and selese \textbf{SummaSketch (MM Compatible)}. + +\item When the program asks for \textbf{COM Port} specify \textbf{com2}. + +\item When the programs asks for a \textbf{Cursor Type} specify +\textbf{4 button cursor/puck}. + +\item The guest system will then reboot and, +when it comes back up, +the guest cursor will now properly track +the VNC pointer. +\end{enumerate} + +\item[PS/2 mouse over USB port.] +This is just the same PS/2 emulation except it is +provided over a USB port. +This emulation is enabled by the configuration flag: +{\small +\begin{verbatim} + usbdevice='mouse' +\end{verbatim} +} + +\item[USB tablet over USB port.] +The USB tablet is an absolute pointing device +that has the advantage that it is automatically +supported under Windows guests, +although Linux guests still require some +manual configuration. +This mouse emulation is enabled by the +configuration flag: +{\small +\begin{verbatim} + usbdevice='tablet' +\end{verbatim} +} + +\textbf{Linux configuration.} + +Unfortunately, +there is no GPM support for the +USB tablet at this point in time. +If you intend to use a GPM pointing +device under VNC you should +configure the guest for Summagraphics +emulation. + +Support for X11 is available by following +the instructions at\\ +\verb+http://stz-softwaretechnik.com/~ke/touchscreen/evtouch.html+\\ +with one minor change. +The +\path{xorg.conf} +given in those instructions +uses the wrong values for the X \& Y minimums and maximums, +use the following config stanza instead: + +{\small +\begin{verbatim} + Section "InputDevice" + Identifier "Tablet" + Driver "evtouch" + Option "Device" "/dev/input/event2" + Option "DeviceName" "touchscreen" + Option "MinX" "0" + Option "MinY" "0" + Option "MaxX" "32256" + Option "MaxY" "32256" + Option "ReportingMode" "Raw" + Option "Emulate3Buttons" + Option "Emulate3Timeout" "50" + Option "SendCoreEvents" "On" + EndSection +\end{verbatim} +} + +\textbf{Windows configuration.} + +Just enabling the USB tablet in the +guest's configuration file is sufficient, +Windows will automatically recognize and +configure device drivers for this +pointing device. + +\end{description} + +\subsection{USB Support} +There is support for an emulated USB mouse, +an emulated USB tablet +and physical low speed USB devices +(support for high speed USB 2.0 devices is +still under development). + +\begin{description} +\item[USB PS/2 style mouse.] +Details on the USB mouse emulation are +given in sections +\textbf{A.2} +and +\textbf{A.4.3}. +Enabling USB PS/2 style mouse emulation +is just a matter of adding the line + +{\small +\begin{verbatim} + usbdevice='mouse' +\end{verbatim} +} + +to the configuration file. +\item[USB tablet.] +Details on the USB tablet emulation are +given in sections +\textbf{A.2} +and +\textbf{A.4.3}. +Enabling USB tablet emulation +is just a matter of adding the line + +{\small +\begin{verbatim} + usbdevice='tablet' +\end{verbatim} +} + +to the configuration file. +\item[USB physical devices.] +Access to a physical (low speed) USB device +is enabled by adding a line of the form + +{\small +\begin{verbatim} + usbdevice='host:vid:pid' +\end{verbatim} +} + +into the the configuration file.\footnote{ +There is an alternate +way of specifying a USB device that +uses the syntax +\textbf{host:bus.addr} +but this syntax suffers from +a major problem that makes +it effectively useless. +The problem is that the +\textbf{addr} +portion of this address +changes every time the USB device +is plugged into the system. +For this reason this addressing +scheme is not recommended and +will not be documented further. +} +\textbf{vid} +and +\textbf{pid} +are a +product id and +vendor id +that uniquely identify +the USB device. +These ids can be identified +in two ways: + +\begin{enumerate} +\item Through the control window. +As described in section +\textbf{A.4.6} +the control window +is activated by pressing +\textbf{ctl-alt-2} +in the guest VGA window. +As long as USB support is +enabled in the guest by including +the config file line +{\small +\begin{verbatim} + usb=1 +\end{verbatim} +} +then executing the command +{\small +\begin{verbatim} + info usbhost +\end{verbatim} +} +in the control window +will display a list of all +usb devices and their ids. +For example, +this output: +{\small +\begin{verbatim} + Device 1.3, speed 1.5 Mb/s + Class 00: USB device 04b3:310b +\end{verbatim} +} +was created from a USB mouse with +vendor id +\textbf{04b3} +and product id +\textbf{310b}. +This device could be made available +to the VMX guest by including the +config file entry +{\small +\begin{verbatim} + usbdevice='host:04be:310b' +\end{verbatim} +} + +It is also possible to +enable access to a USB +device dynamically through +the control window. +The control window command +{\small +\begin{verbatim} + usb_add host:vid:pid +\end{verbatim} +} +will also allow access to a +USB device with vendor id +\textbf{vid} +and product id +\textbf{pid}. +\item Through the +\path{/proc} file system. +The contents of the pseudo file +\path{/proc/bus/usb/devices} +can also be used to identify +vendor and product ids. +Looking at this file, +the line starting with +\textbf{P:} +has a field +\textbf{Vendor} +giving the vendor id and +another field +\textbf{ProdID} +giving the product id. +The contents of +\path{/proc/bus/usb/devices} +for the example mouse is as +follows: +{\small +\begin{verbatim} +T: Bus=01 Lev=01 Prnt=01 Port=01 Cnt=02 Dev#= 3 Spd=1.5 MxCh= 0 +D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1 +P: Vendor=04b3 ProdID=310b Rev= 1.60 +C:* #Ifs= 1 Cfg#= 1 Atr=a0 MxPwr=100mA +I: If#= 0 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=01 Prot=02 Driver=(none) +E: Ad=81(I) Atr=03(Int.) MxPS= 4 Ivl=10ms +\end{verbatim} +} +Note that the +\textbf{P:} +line correctly identifies the +vendor id and product id +for this mouse as +\textbf{04b3:310b}. +\end{enumerate} +There is one other issue to +be aware of when accessing a +physical USB device from the guest. +The Dom0 kernel must not have +a device driver loaded for +the device that the guest wishes +to access. +This means that the Dom0 +kernel must not have that +device driver compiled into +the kernel or, +if using modules, +that driver module must +not be loaded. +Note that this is the device +specific USB driver that must +not be loaded, +either the +\textbf{UHCI} +or +\textbf{OHCI} +USB controller driver must +still be loaded. + +Going back to the USB mouse +as an example, +if \textbf{lsmod} +gives the output: + +{\small +\begin{verbatim} +Module Size Used by +usbmouse 4128 0 +usbhid 28996 0 +uhci_hcd 35409 0 +\end{verbatim} +} + +then the USB mouse is being +used by the Dom0 kernel and is +not available to the guest. +Executing the command +\textbf{rmmod usbhid}\footnote{ +Turns out the +\textbf{usbhid} +driver is the significant +one for the USB mouse, +the presence or absence of +the module +\textbf{usbmouse} +has no effect on whether or +not the guest can see a USB mouse.} +will remove the USB mouse +driver from the Dom0 kernel +and the mouse will now be +accessible by the VMX guest. + +Be aware the the Linux USB +hotplug system will reload +the drivers if a USB device +is removed and plugged back +in. +This means that just unloading +the driver module might not +be sufficient if the USB device +is removed and added back. +A more reliable technique is +to first +\textbf{rmmod} +the driver and then rename the +driver file in the +\path{/lib/modules} +directory, +just to make sure it doesn't get +reloaded. +\end{description} \subsection{Destroy VMX guests} VMX guests can be destroyed in the same way as can paravirtualized guests. We recommend that you type the command diff -r 17f7a426b2cd -r e6c7667c97ed extras/mini-os/console/xencons_ring.c --- a/extras/mini-os/console/xencons_ring.c Thu Jul 06 06:44:19 2006 -0400 +++ b/extras/mini-os/console/xencons_ring.c Thu Jul 06 06:47:16 2006 -0400 @@ -53,7 +53,7 @@ int xencons_ring_send(const char *data, -static void handle_input(int port, struct pt_regs *regs) +static void handle_input(int port, struct pt_regs *regs, void *ign) { struct xencons_interface *intf = xencons_interface(); XENCONS_RING_IDX cons, prod; @@ -83,7 +83,8 @@ int xencons_ring_init(void) if (!start_info.console_evtchn) return 0; - err = bind_evtchn(start_info.console_evtchn, handle_input); + err = bind_evtchn(start_info.console_evtchn, handle_input, + NULL); if (err <= 0) { printk("XEN console request chn bind failed %i\n", err); return err; diff -r 17f7a426b2cd -r e6c7667c97ed extras/mini-os/events.c --- a/extras/mini-os/events.c Thu Jul 06 06:44:19 2006 -0400 +++ b/extras/mini-os/events.c Thu Jul 06 06:47:16 2006 -0400 @@ -22,9 +22,18 @@ #include <events.h> #include <lib.h> +#define NR_EVS 1024 + +/* this represents a event handler. Chaining or sharing is not allowed */ +typedef struct _ev_action_t { + void (*handler)(int, struct pt_regs *, void *); + void *data; + u32 count; +} ev_action_t; + static ev_action_t ev_actions[NR_EVS]; -void default_handler(int port, struct pt_regs *regs); +void default_handler(int port, struct pt_regs *regs, void *data); /* @@ -35,42 +44,33 @@ int do_event(u32 port, struct pt_regs *r ev_action_t *action; if (port >= NR_EVS) { printk("Port number too large: %d\n", port); - goto out; + goto out; } action = &ev_actions[port]; action->count++; - if (!action->handler) - { - printk("Spurious event on port %d\n", port); - goto out; - } - - if (action->status & EVS_DISABLED) - { - printk("Event on port %d disabled\n", port); - goto out; - } - /* call the handler */ - action->handler(port, regs); - + action->handler(port, regs, action->data); + out: clear_evtchn(port); + return 1; } -int bind_evtchn( u32 port, void (*handler)(int, struct pt_regs *) ) +int bind_evtchn( u32 port, void (*handler)(int, struct pt_regs *, void *), + void *data ) { if(ev_actions[port].handler != default_handler) printk("WARN: Handler for port %d already registered, replacing\n", port); + ev_actions[port].data = data; + wmb(); ev_actions[port].handler = handler; - ev_actions[port].status &= ~EVS_DISABLED; - + /* Finally unmask the port */ unmask_evtchn(port); @@ -82,13 +82,14 @@ void unbind_evtchn( u32 port ) if (ev_actions[port].handler == default_handler) printk("WARN: No handler for port %d when unbinding\n", port); ev_actions[port].handler = default_handler; - ev_actions[port].status |= EVS_DISABLED; + wmb(); + ev_actions[port].data = NULL; } -int bind_virq( u32 virq, void (*handler)(int, struct pt_regs *) ) +int bind_virq( u32 virq, void (*handler)(int, struct pt_regs *, void *data), + void *data) { evtchn_op_t op; - int ret = 0; /* Try to bind the virq to a port */ op.cmd = EVTCHNOP_bind_virq; @@ -97,13 +98,11 @@ int bind_virq( u32 virq, void (*handler) if ( HYPERVISOR_event_channel_op(&op) != 0 ) { - ret = 1; printk("Failed to bind virtual IRQ %d\n", virq); - goto out; + return 1; } - bind_evtchn(op.u.bind_virq.port, handler); -out: - return ret; + bind_evtchn(op.u.bind_virq.port, handler, data); + return 0; } void unbind_virq( u32 port ) @@ -137,13 +136,38 @@ void init_events(void) #endif /* inintialise event handler */ for ( i = 0; i < NR_EVS; i++ ) - { - ev_actions[i].status = EVS_DISABLED; + { ev_actions[i].handler = default_handler; mask_evtchn(i); } } -void default_handler(int port, struct pt_regs *regs) { +void default_handler(int port, struct pt_regs *regs, void *ignore) +{ printk("[Port %d] - event received\n", port); } + +/* Unfortunate confusion of terminology: the port is unbound as far + as Xen is concerned, but we automatically bind a handler to it + from inside mini-os. */ +int evtchn_alloc_unbound(void (*handler)(int, struct pt_regs *regs, + void *data), + void *data) +{ + u32 port; + evtchn_op_t op; + int err; + + op.cmd = EVTCHNOP_alloc_unbound; + op.u.alloc_unbound.dom = DOMID_SELF; + op.u.alloc_unbound.remote_dom = 0; + + err = HYPERVISOR_event_channel_op(&op); + if (err) { + printk("Failed to alloc unbound evtchn: %d.\n", err); + return -1; + } + port = op.u.alloc_unbound.port; + bind_evtchn(port, handler, data); + return port; +} diff -r 17f7a426b2cd -r e6c7667c97ed extras/mini-os/include/events.h --- a/extras/mini-os/include/events.h Thu Jul 06 06:44:19 2006 -0400 +++ b/extras/mini-os/include/events.h Thu Jul 06 06:47:16 2006 -0400 @@ -22,28 +22,18 @@ #include<traps.h> #include <xen/event_channel.h> -#define NR_EVS 1024 - -/* ev handler status */ -#define EVS_INPROGRESS 1 /* Event handler active - do not enter! */ -#define EVS_DISABLED 2 /* Event disabled - do not enter! */ -#define EVS_PENDING 4 /* Event pending - replay on enable */ -#define EVS_REPLAY 8 /* Event has been replayed but not acked yet */ - -/* this represents a event handler. Chaining or sharing is not allowed */ -typedef struct _ev_action_t { - void (*handler)(int, struct pt_regs *); - unsigned int status; /* IRQ status */ - u32 count; -} ev_action_t; - /* prototypes */ int do_event(u32 port, struct pt_regs *regs); -int bind_virq( u32 virq, void (*handler)(int, struct pt_regs *) ); -int bind_evtchn( u32 virq, void (*handler)(int, struct pt_regs *) ); +int bind_virq( u32 virq, void (*handler)(int, struct pt_regs *, void *data), + void *data); +int bind_evtchn( u32 virq, void (*handler)(int, struct pt_regs *, void *data), + void *data ); void unbind_evtchn( u32 port ); void init_events(void); void unbind_virq( u32 port ); +int evtchn_alloc_unbound(void (*handler)(int, struct pt_regs *regs, + void *data), + void *data); static inline int notify_remote_via_evtchn(int port) { diff -r 17f7a426b2cd -r e6c7667c97ed extras/mini-os/include/lib.h --- a/extras/mini-os/include/lib.h Thu Jul 06 06:44:19 2006 -0400 +++ b/extras/mini-os/include/lib.h Thu Jul 06 06:47:16 2006 -0400 @@ -89,6 +89,7 @@ char *strchr(const char *s, int c); char *strchr(const char *s, int c); char *strstr(const char *s1, const char *s2); char * strcat(char * dest, const char * src); +char *strdup(const char *s); #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) @@ -98,6 +99,18 @@ struct kvec { size_t iov_len; }; +#define ASSERT(x) \ +do { \ + if (!(x)) { \ + printk("ASSERTION FAILED: %s at %s:%d.\n", \ + # x , \ + __FILE__, \ + __LINE__); \ + BUG(); \ + } \ +} while(0) +/* Consistency check as much as possible. */ +void sanity_check(void); #endif /* _LIB_H_ */ diff -r 17f7a426b2cd -r e6c7667c97ed extras/mini-os/include/mm.h --- a/extras/mini-os/include/mm.h Thu Jul 06 06:44:19 2006 -0400 +++ b/extras/mini-os/include/mm.h Thu Jul 06 06:47:16 2006 -0400 @@ -196,9 +196,11 @@ static __inline__ paddr_t machine_to_phy #define to_virt(x) ((void *)((unsigned long)(x)+VIRT_START)) #define virt_to_pfn(_virt) (PFN_DOWN(to_phys(_virt))) +#define virt_to_mfn(_virt) (pfn_to_mfn(virt_to_pfn(_virt))) #define mach_to_virt(_mach) (to_virt(machine_to_phys(_mach))) +#define virt_to_mach(_virt) (phys_to_machine(to_phys(_virt))) #define mfn_to_virt(_mfn) (to_virt(mfn_to_pfn(_mfn) << PAGE_SHIFT)) -#define pfn_to_virt(_pfn) (to_virt(_pfn << PAGE_SHIFT)) +#define pfn_to_virt(_pfn) (to_virt((_pfn) << PAGE_SHIFT)) /* Pagetable walking. */ #define pte_to_mfn(_pte) (((_pte) & (PADDR_MASK&PAGE_MASK)) >> L1_PAGETABLE_SHIFT) @@ -206,7 +208,7 @@ static __inline__ paddr_t machine_to_phy void init_mm(void); unsigned long alloc_pages(int order); -#define alloc_page() alloc_pages(0); +#define alloc_page() alloc_pages(0) void free_pages(void *pointer, int order); static __inline__ int get_order(unsigned long size) @@ -219,4 +221,6 @@ static __inline__ int get_order(unsigned } +void *map_frames(unsigned long *f, unsigned long n); + #endif /* _MM_H_ */ diff -r 17f7a426b2cd -r e6c7667c97ed extras/mini-os/include/os.h --- a/extras/mini-os/include/os.h Thu Jul 06 06:44:19 2006 -0400 +++ b/extras/mini-os/include/os.h Thu Jul 06 06:47:16 2006 -0400 @@ -445,7 +445,62 @@ static __inline__ unsigned long __ffs(un /********************* common i386 and x86_64 ****************************/ - +struct __synch_xchg_dummy { unsigned long a[100]; }; +#define __synch_xg(x) ((struct __synch_xchg_dummy *)(x)) + +#define synch_cmpxchg(ptr, old, new) \ +((__typeof__(*(ptr)))__synch_cmpxchg((ptr),\ + (unsigned long)(old), \ + (unsigned long)(new), \ + sizeof(*(ptr)))) + +static inline unsigned long __synch_cmpxchg(volatile void *ptr, + unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__("lock; cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__("lock; cmpxchgw %w1,%2" + : "=a"(prev) + : "r"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; +#ifdef __x86_64__ + case 4: + __asm__ __volatile__("lock; cmpxchgl %k1,%2" + : "=a"(prev) + : "r"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; + case 8: + __asm__ __volatile__("lock; cmpxchgq %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; +#else + case 4: + __asm__ __volatile__("lock; cmpxchgl %1,%2" + : "=a"(prev) + : "r"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; +#endif + } + return old; +} static __inline__ void synch_set_bit(int nr, volatile void * addr) diff -r 17f7a426b2cd -r e6c7667c97ed extras/mini-os/include/xenbus.h --- a/extras/mini-os/include/xenbus.h Thu Jul 06 06:44:19 2006 -0400 +++ b/extras/mini-os/include/xenbus.h Thu Jul 06 06:47:16 2006 -0400 @@ -1,5 +1,8 @@ #ifndef XENBUS_H__ #define XENBUS_H__ + +typedef unsigned long xenbus_transaction_t; +#define XBT_NIL ((xenbus_transaction_t)0) /* Initialize the XenBus system. */ void init_xenbus(void); @@ -7,28 +10,42 @@ void init_xenbus(void); /* Read the value associated with a path. Returns a malloc'd error string on failure and sets *value to NULL. On success, *value is set to a malloc'd copy of the value. */ -char *xenbus_read(const char *path, char **value); +char *xenbus_read(xenbus_transaction_t xbt, const char *path, char **value); /* Associates a value with a path. Returns a malloc'd error string on failure. */ -char *xenbus_write(const char *path, const char *value); +char *xenbus_write(xenbus_transaction_t xbt, const char *path, const char *value); /* Removes the value associated with a path. Returns a malloc'd error string on failure. */ -char *xenbus_rm(const char *path); +char *xenbus_rm(xenbus_transaction_t xbt, const char *path); /* List the contents of a directory. Returns a malloc'd error string on failure and sets *contents to NULL. On success, *contents is set to a malloc'd array of pointers to malloc'd strings. The array is NULL terminated. May block. */ -char *xenbus_ls(const char *prefix, char ***contents); +char *xenbus_ls(xenbus_transaction_t xbt, const char *prefix, char ***contents); /* Reads permissions associated with a path. Returns a malloc'd error string on failure and sets *value to NULL. On success, *value is set to a malloc'd copy of the value. */ -char *xenbus_get_perms(const char *path, char **value); +char *xenbus_get_perms(xenbus_transaction_t xbt, const char *path, char **value); /* Sets the permissions associated with a path. Returns a malloc'd error string on failure. */ -char *xenbus_set_perms(const char *path, domid_t dom, char perm); +char *xenbus_set_perms(xenbus_transaction_t xbt, const char *path, domid_t dom, char perm); + +/* Start a xenbus transaction. Returns the transaction in xbt on + success or a malloc'd error string otherwise. */ +char *xenbus_transaction_start(xenbus_transaction_t *xbt); + +/* End a xenbus transaction. Returns a malloc'd error string if it + fails. abort says whether the transaction should be aborted. + Returns 1 in *retry iff the transaction should be retried. */ +char *xenbus_transaction_end(xenbus_transaction_t, int abort, + int *retry); + +/* Read path and parse it as an integer. Returns -1 on error. */ +int xenbus_read_integer(char *path); + #endif /* XENBUS_H__ */ diff -r 17f7a426b2cd -r e6c7667c97ed extras/mini-os/kernel.c --- a/extras/mini-os/kernel.c Thu Jul 06 06:44:19 2006 -0400 +++ b/extras/mini-os/kernel.c Thu Jul 06 06:47:16 2006 -0400 @@ -35,6 +35,7 @@ #include <lib.h> #include <sched.h> #include <xenbus.h> +#include <gnttab.h> #include <xen/features.h> #include <xen/version.h> @@ -104,7 +105,8 @@ void test_xenbus(void); void xenbus_tester(void *p) { - test_xenbus(); + printk("Xenbus tests disabled, because of a Xend bug.\n"); + /* test_xenbus(); */ } /* This should be overridden by the application we are linked against. */ @@ -176,7 +178,10 @@ void start_kernel(start_info_t *si) /* Init the console driver. */ init_console(); - + + /* Init grant tables */ + init_gnttab(); + /* Init scheduler. */ init_sched(); diff -r 17f7a426b2cd -r e6c7667c97ed extras/mini-os/lib/string.c --- a/extras/mini-os/lib/string.c Thu Jul 06 06:44:19 2006 -0400 +++ b/extras/mini-os/lib/string.c Thu Jul 06 06:47:16 2006 -0400 @@ -23,6 +23,7 @@ #include <os.h> #include <types.h> #include <lib.h> +#include <xmalloc.h> int memcmp(const void * cs,const void * ct,size_t count) { @@ -156,4 +157,13 @@ char * strstr(const char * s1,const char return NULL; } +char *strdup(const char *x) +{ + int l = strlen(x); + char *res = malloc(l + 1); + if (!res) return NULL; + memcpy(res, x, l + 1); + return res; +} + #endif diff -r 17f7a426b2cd -r e6c7667c97ed extras/mini-os/mm.c --- a/extras/mini-os/mm.c Thu Jul 06 06:44:19 2006 -0400 +++ b/extras/mini-os/mm.c Thu Jul 06 06:47:16 2006 -0400 @@ -343,7 +343,7 @@ void free_pages(void *pointer, int order break; /* Merge with successor */ - freed_ct = (chunk_tail_t *)((char *)to_merge_ch + mask); + freed_ct = (chunk_tail_t *)((char *)to_merge_ch + mask) - 1; } /* We are commited to merging, unlink the chunk */ @@ -612,6 +612,107 @@ void mem_test(unsigned long *start_add, } +static pgentry_t *demand_map_pgt; +static void *demand_map_area_start; + +static void init_demand_mapping_area(unsigned long max_pfn) +{ + unsigned long mfn; + pgentry_t *tab; + unsigned long start_addr; + unsigned long pt_pfn; + unsigned offset; + + /* Round up to four megs. + 1024 rather than + 1023 since we want + to be sure we don't end up in the same place we started. */ + max_pfn = (max_pfn + L1_PAGETABLE_ENTRIES) & ~(L1_PAGETABLE_ENTRIES - 1); + if (max_pfn == 0 || + (unsigned long)pfn_to_virt(max_pfn + L1_PAGETABLE_ENTRIES) >= + HYPERVISOR_VIRT_START) { + printk("Too much memory; no room for demand map hole.\n"); + do_exit(); + } + + demand_map_area_start = pfn_to_virt(max_pfn); + printk("Demand map pfns start at %lx (%p).\n", max_pfn, + demand_map_area_start); + start_addr = (unsigned long)demand_map_area_start; + + tab = (pgentry_t *)start_info.pt_base; + mfn = virt_to_mfn(start_info.pt_base); + pt_pfn = virt_to_pfn(alloc_page()); + +#if defined(__x86_64__) + offset = l4_table_offset(start_addr); + if (!(tab[offset] & _PAGE_PRESENT)) { + new_pt_frame(&pt_pfn, mfn, offset, L3_FRAME); + pt_pfn = virt_to_pfn(alloc_page()); + } + ASSERT(tab[offset] & _PAGE_PRESENT); + mfn = pte_to_mfn(tab[offset]); + tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT); +#endif +#if defined(__x86_64__) || defined(CONFIG_X86_PAE) + offset = l3_table_offset(start_addr); + if (!(tab[offset] & _PAGE_PRESENT)) { + new_pt_frame(&pt_pfn, mfn, offset, L2_FRAME); + pt_pfn = virt_to_pfn(alloc_page()); + } + ASSERT(tab[offset] & _PAGE_PRESENT); + mfn = pte_to_mfn(tab[offset]); + tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT); +#endif + offset = l2_table_offset(start_addr); + if (tab[offset] & _PAGE_PRESENT) { + printk("Demand map area already has a page table covering it?\n"); + BUG(); + } + demand_map_pgt = pfn_to_virt(pt_pfn); + new_pt_frame(&pt_pfn, mfn, offset, L1_FRAME); + ASSERT(tab[offset] & _PAGE_PRESENT); +} + +void *map_frames(unsigned long *f, unsigned long n) +{ + unsigned long x; + unsigned long y = 0; + mmu_update_t mmu_updates[16]; + int rc; + + if (n > 16) { + printk("Tried to map too many (%ld) frames at once.\n", n); + return NULL; + } + + /* Find a run of n contiguous frames */ + for (x = 0; x <= 1024 - n; x += y + 1) { + for (y = 0; y < n; y++) + if (demand_map_pgt[y] & _PAGE_PRESENT) + break; + if (y == n) + break; + } + if (y != n) { + printk("Failed to map %ld frames!\n", n); + return NULL; + } + + /* Found it at x. Map it in. */ + for (y = 0; y < n; y++) { + mmu_updates[y].ptr = virt_to_mach(&demand_map_pgt[x + y]); + mmu_updates[y].val = (f[y] << PAGE_SHIFT) | L1_PROT; + } + + rc = HYPERVISOR_mmu_update(mmu_updates, n, NULL, DOMID_SELF); + if (rc < 0) { + printk("Map %ld failed: %d.\n", n, rc); + return NULL; + } else { + return (void *)(unsigned long)((unsigned long)demand_map_area_start + + x * PAGE_SIZE); + } +} + void init_mm(void) { @@ -643,4 +744,24 @@ void init_mm(void) (u_long)to_virt(PFN_PHYS(max_pfn)), PFN_PHYS(max_pfn)); init_page_allocator(PFN_PHYS(start_pfn), PFN_PHYS(max_pfn)); printk("MM: done\n"); -} + + init_demand_mapping_area(max_pfn); + printk("Initialised demand area.\n"); +} + +void sanity_check(void) +{ + int x; + chunk_head_t *head; + + for (x = 0; x < FREELIST_SIZE; x++) { + for (head = free_head[x]; !FREELIST_EMPTY(head); head = head->next) { + ASSERT(!allocated_in_map(virt_to_pfn(head))); + if (head->next) + ASSERT(head->next->pprev == &head->next); + } + if (free_head[x]) { + ASSERT(free_head[x]->pprev == &free_head[x]); + } + } +} diff -r 17f7a426b2cd -r e6c7667c97ed extras/mini-os/time.c --- a/extras/mini-os/time.c Thu Jul 06 06:44:19 2006 -0400 +++ b/extras/mini-os/time.c Thu Jul 06 06:47:16 2006 -0400 @@ -215,7 +215,7 @@ void block_domain(u32 millisecs) /* * Just a dummy */ -static void timer_handler(int ev, struct pt_regs *regs) +static void timer_handler(int ev, struct pt_regs *regs, void *ign) { static int i; @@ -233,5 +233,5 @@ void init_time(void) void init_time(void) { printk("Initialising timer interface\n"); - bind_virq(VIRQ_TIMER, &timer_handler); -} + bind_virq(VIRQ_TIMER, &timer_handler, NULL); +} diff -r 17f7a426b2cd -r e6c7667c97ed extras/mini-os/xenbus/xenbus.c --- a/extras/mini-os/xenbus/xenbus.c Thu Jul 06 06:44:19 2006 -0400 +++ b/extras/mini-os/xenbus/xenbus.c Thu Jul 06 06:47:16 2006 -0400 @@ -112,7 +112,7 @@ static void xenbus_thread_func(void *ign } } -static void xenbus_evtchn_handler(int port, struct pt_regs *regs) +static void xenbus_evtchn_handler(int port, struct pt_regs *regs, void *ign) { wake_up(&xb_waitq); } @@ -174,7 +174,8 @@ void init_xenbus(void) create_thread("xenstore", xenbus_thread_func, NULL); DEBUG("buf at %p.\n", xenstore_buf); err = bind_evtchn(start_info.store_evtchn, - xenbus_evtchn_handler); + xenbus_evtchn_handler, + NULL); DEBUG("xenbus on irq %d\n", err); } @@ -187,8 +188,8 @@ struct write_req { by xenbus as if sent atomically. The header is added automatically, using type %type, req_id %req_id, and trans_id %trans_id. */ -static void xb_write(int type, int req_id, int trans_id, - const struct write_req *req, int nr_reqs) +static void xb_write(int type, int req_id, xenbus_transaction_t trans_id, + const struct write_req *req, int nr_reqs) { XENSTORE_RING_IDX prod; int r; @@ -266,9 +267,9 @@ static void xb_write(int type, int req_i freed by the caller. */ static struct xsd_sockmsg * xenbus_msg_reply(int type, - int trans, - struct write_req *io, - int nr_reqs) + xenbus_transaction_t trans, + struct write_req *io, + int nr_reqs) { int id; DEFINE_WAIT(w); @@ -322,14 +323,14 @@ static void xenbus_debug_msg(const char /* List the contents of a directory. Returns a malloc()ed array of pointers to malloc()ed strings. The array is NULL terminated. May block. */ -char *xenbus_ls(const char *pre, char ***contents) +char *xenbus_ls(xenbus_transaction_t xbt, const char *pre, char ***contents) { struct xsd_sockmsg *reply, *repmsg; struct write_req req[] = { { pre, strlen(pre)+1 } }; int nr_elems, x, i; char **res; - repmsg = xenbus_msg_reply(XS_DIRECTORY, 0, req, ARRAY_SIZE(req)); + repmsg = xenbus_msg_reply(XS_DIRECTORY, xbt, req, ARRAY_SIZE(req)); char *msg = errmsg(repmsg); if (msg) { *contents = NULL; @@ -351,12 +352,12 @@ char *xenbus_ls(const char *pre, char ** return NULL; } -char *xenbus_read(const char *path, char **value) +char *xenbus_read(xenbus_transaction_t xbt, const char *path, char **value) { struct write_req req[] = { {path, strlen(path) + 1} }; struct xsd_sockmsg *rep; char *res; - rep = xenbus_msg_reply(XS_READ, 0, req, ARRAY_SIZE(req)); + rep = xenbus_msg_reply(XS_READ, xbt, req, ARRAY_SIZE(req)); char *msg = errmsg(rep); if (msg) { *value = NULL; @@ -370,14 +371,14 @@ char *xenbus_read(const char *path, char return NULL; } -char *xenbus_write(const char *path, const char *value) +char *xenbus_write(xenbus_transaction_t xbt, const char *path, const char *value) { struct write_req req[] = { {path, strlen(path) + 1}, {value, strlen(value) + 1}, }; struct xsd_sockmsg *rep; - rep = xenbus_msg_reply(XS_WRITE, 0, req, ARRAY_SIZE(req)); + rep = xenbus_msg_reply(XS_WRITE, xbt, req, ARRAY_SIZE(req)); char *msg = errmsg(rep); if (msg) return msg; @@ -385,11 +386,11 @@ char *xenbus_write(const char *path, con return NULL; } -char *xenbus_rm(const char *path) +char *xenbus_rm(xenbus_transaction_t xbt, const char *path) { struct write_req req[] = { {path, strlen(path) + 1} }; struct xsd_sockmsg *rep; - rep = xenbus_msg_reply(XS_RM, 0, req, ARRAY_SIZE(req)); + rep = xenbus_msg_reply(XS_RM, xbt, req, ARRAY_SIZE(req)); char *msg = errmsg(rep); if (msg) return msg; @@ -397,12 +398,12 @@ char *xenbus_rm(const char *path) return NULL; } -char *xenbus_get_perms(const char *path, char **value) +char *xenbus_get_perms(xenbus_transaction_t xbt, const char *path, char **value) { struct write_req req[] = { {path, strlen(path) + 1} }; struct xsd_sockmsg *rep; char *res; - rep = xenbus_msg_reply(XS_GET_PERMS, 0, req, ARRAY_SIZE(req)); + rep = xenbus_msg_reply(XS_GET_PERMS, xbt, req, ARRAY_SIZE(req)); char *msg = errmsg(rep); if (msg) { *value = NULL; @@ -417,7 +418,7 @@ char *xenbus_get_perms(const char *path, } #define PERM_MAX_SIZE 32 -char *xenbus_set_perms(const char *path, domid_t dom, char perm) +char *xenbus_set_perms(xenbus_transaction_t xbt, const char *path, domid_t dom, char perm) { char value[PERM_MAX_SIZE]; snprintf(value, PERM_MAX_SIZE, "%c%hu", perm, dom); @@ -426,7 +427,7 @@ char *xenbus_set_perms(const char *path, {value, strlen(value) + 1}, }; struct xsd_sockmsg *rep; - rep = xenbus_msg_reply(XS_SET_PERMS, 0, req, ARRAY_SIZE(req)); + rep = xenbus_msg_reply(XS_SET_PERMS, xbt, req, ARRAY_SIZE(req)); char *msg = errmsg(rep); if (msg) return msg; @@ -434,13 +435,72 @@ char *xenbus_set_perms(const char *path, return NULL; } +char *xenbus_transaction_start(xenbus_transaction_t *xbt) +{ + /* xenstored becomes angry if you send a length 0 message, so just + shove a nul terminator on the end */ + struct write_req req = { "", 1}; + struct xsd_sockmsg *rep; + char *err; + + rep = xenbus_msg_reply(XS_TRANSACTION_START, 0, &req, 1); + err = errmsg(rep); + if (err) + return err; + sscanf((char *)(rep + 1), "%u", xbt); + free(rep); + return NULL; +} + +char * +xenbus_transaction_end(xenbus_transaction_t t, int abort, int *retry) +{ + struct xsd_sockmsg *rep; + struct write_req req; + char *err; + + *retry = 0; + + req.data = abort ? "F" : "T"; + req.len = 2; + rep = xenbus_msg_reply(XS_TRANSACTION_END, t, &req, 1); + err = errmsg(rep); + if (err) { + if (!strcmp(err, "EAGAIN")) { + *retry = 1; + free(err); + return NULL; + } else { + return err; + } + } + free(rep); + return NULL; +} + +int xenbus_read_integer(char *path) +{ + char *res, *buf; + int t; + + res = xenbus_read(XBT_NIL, path, &buf); + if (res) { + printk("Failed to read %s.\n", path); + free(res); + return -1; + } + sscanf(buf, "%d", &t); + free(buf); + return t; +} + static void do_ls_test(const char *pre) { char **dirs; int x; DEBUG("ls %s...\n", pre); - char *msg = xenbus_ls(pre, &dirs); + char *msg = xenbus_ls(XBT_NIL, pre, &dirs); if (msg) { DEBUG("Error in xenbus ls: %s\n", msg); free(msg); @@ -458,7 +518,7 @@ static void do_read_test(const char *pat { char *res; DEBUG("Read %s...\n", path); - char *msg = xenbus_read(path, &res); + char *msg = xenbus_read(XBT_NIL, path, &res); if (msg) { DEBUG("Error in xenbus read: %s\n", msg); free(msg); @@ -471,7 +531,7 @@ static void do_write_test(const char *pa static void do_write_test(const char *path, const char *val) { DEBUG("Write %s to %s...\n", val, path); - char *msg = xenbus_write(path, val); + char *msg = xenbus_write(XBT_NIL, path, val); if (msg) { DEBUG("Result %s\n", msg); free(msg); @@ -483,7 +543,7 @@ static void do_rm_test(const char *path) static void do_rm_test(const char *path) { DEBUG("rm %s...\n", path); - char *msg = xenbus_rm(path); + char *msg = xenbus_rm(XBT_NIL, path); if (msg) { DEBUG("Result %s\n", msg); free(msg); diff -r 17f7a426b2cd -r e6c7667c97ed linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c --- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c Thu Jul 06 06:44:19 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c Thu Jul 06 06:47:16 2006 -0400 @@ -161,43 +161,13 @@ static void contiguous_bitmap_clear( } } -static unsigned long -HYPERVISOR_populate_physmap(unsigned long gpfn, unsigned int extent_order, - unsigned int address_bits) -{ - unsigned long ret; - struct xen_memory_reservation reservation = { - .nr_extents = 1, - .address_bits = address_bits, - .extent_order = extent_order, - .domid = DOMID_SELF - }; - set_xen_guest_handle(reservation.extent_start, &gpfn); - ret = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); - // it may fail on non-privileged domain with extent_order > 0. - BUG_ON(ret != 1 && - !(ret == 0 && !(xen_start_info->flags & SIF_PRIVILEGED) && - extent_order > 0)); - if (ret != 1) - return -EINVAL;//XXX - return 0; -} - -static unsigned long -HYPERVISOR_remove_physmap(unsigned long gpfn, unsigned int extent_order) -{ - unsigned long ret; - struct xen_memory_reservation reservation = { - .nr_extents = 1, - .address_bits = 0, - .extent_order = extent_order, - .domid = DOMID_SELF - }; - set_xen_guest_handle(reservation.extent_start, &gpfn); - ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); - BUG_ON(ret != 1); - return 0; -} +// __xen_create_contiguous_region(), __xen_destroy_contiguous_region() +// are based on i386 xen_create_contiguous_region(), +// xen_destroy_contiguous_region() + +/* Protected by balloon_lock. */ +#define MAX_CONTIG_ORDER 7 +static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER]; /* Ensure multi-page extents are contiguous in machine memory. */ int @@ -211,57 +181,92 @@ __xen_create_contiguous_region(unsigned unsigned long i; unsigned long flags; + unsigned long *in_frames = discontig_frames, out_frame; + int success; + struct xen_memory_exchange exchange = { + .in = { + .nr_extents = num_gpfn, + .extent_order = 0, + .domid = DOMID_SELF + }, + .out = { + .nr_extents = 1, + .extent_order = order, + .address_bits = address_bits, + .domid = DOMID_SELF + }, + .nr_exchanged = 0 + }; + + if (order > MAX_CONTIG_ORDER) + return -ENOMEM; + + set_xen_guest_handle(exchange.in.extent_start, in_frames); + set_xen_guest_handle(exchange.out.extent_start, &out_frame); + scrub_pages(vstart, num_gpfn); balloon_lock(flags); - error = HYPERVISOR_remove_physmap(start_gpfn, order); - if (error) { - goto fail; - } - - error = HYPERVISOR_populate_physmap(start_gpfn, order, address_bits); - if (error) { - goto fail; - } - contiguous_bitmap_set(start_gpfn, num_gpfn); + /* Get a new contiguous memory extent. */ + for (i = 0; i < num_gpfn; i++) { + in_frames[i] = start_gpfn + i; + } + out_frame = start_gpfn; + error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange); + success = (exchange.nr_exchanged == num_gpfn); + BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0))); + BUG_ON(success && (error != 0)); + if (unlikely(error == -ENOSYS)) { + /* Compatibility when XENMEM_exchange is unsupported. */ + error = HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &exchange.in); + BUG_ON(error != num_gpfn); + error = HYPERVISOR_memory_op(XENMEM_populate_physmap, + &exchange.out); + if (error != 1) { + /* Couldn't get special memory: fall back to normal. */ + for (i = 0; i < num_gpfn; i++) { + in_frames[i] = start_gpfn + i; + } + error = HYPERVISOR_memory_op(XENMEM_populate_physmap, + &exchange.in); + BUG_ON(error != num_gpfn); + success = 0; + } else + success = 1; + } + if (success) + contiguous_bitmap_set(start_gpfn, num_gpfn); #if 0 - { - unsigned long mfn; - unsigned long mfn_prev = ~0UL; - for (i = 0; i < num_gpfn; i++) { - mfn = pfn_to_mfn_for_dma(start_gpfn + i); - if (mfn_prev != ~0UL && mfn != mfn_prev + 1) { - xprintk("\n"); - xprintk("%s:%d order %d " - "start 0x%lx bus 0x%lx machine 0x%lx\n", - __func__, __LINE__, order, - vstart, virt_to_bus((void*)vstart), - phys_to_machine_for_dma(gphys)); - xprintk("mfn: "); - for (i = 0; i < num_gpfn; i++) { - mfn = pfn_to_mfn_for_dma(start_gpfn + i); - xprintk("0x%lx ", mfn); + if (success) { + unsigned long mfn; + unsigned long mfn_prev = ~0UL; + for (i = 0; i < num_gpfn; i++) { + mfn = pfn_to_mfn_for_dma(start_gpfn + i); + if (mfn_prev != ~0UL && mfn != mfn_prev + 1) { + xprintk("\n"); + xprintk("%s:%d order %d " + "start 0x%lx bus 0x%lx " + "machine 0x%lx\n", + __func__, __LINE__, order, + vstart, virt_to_bus((void*)vstart), + phys_to_machine_for_dma(gphys)); + xprintk("mfn: "); + for (i = 0; i < num_gpfn; i++) { + mfn = pfn_to_mfn_for_dma( + start_gpfn + i); + xprintk("0x%lx ", mfn); + } + xprintk("\n"); + break; } - xprintk("\n"); - goto out; - } - mfn_prev = mfn; - } + mfn_prev = mfn; + } } #endif -out: balloon_unlock(flags); - return error; - -fail: - for (i = 0; i < num_gpfn; i++) { - error = HYPERVISOR_populate_physmap(start_gpfn + i, 0, 0); - if (error) { - BUG();//XXX - } - } - goto out; + return success? 0: -ENOMEM; } void @@ -271,55 +276,61 @@ __xen_destroy_contiguous_region(unsigned unsigned long error = 0; unsigned long start_gpfn = __pa(vstart) >> PAGE_SHIFT; unsigned long num_gpfn = 1UL << order; - unsigned long* gpfns; - struct xen_memory_reservation reservation; unsigned long i; - gpfns = kmalloc(sizeof(gpfns[0]) * num_gpfn, - GFP_KERNEL | __GFP_NOFAIL); + unsigned long *out_frames = discontig_frames, in_frame; + int success; + struct xen_memory_exchange exchange = { + .in = { + .nr_extents = 1, + .extent_order = order, + .domid = DOMID_SELF + }, + .out = { + .nr_extents = num_gpfn, + .extent_order = 0, + .address_bits = 0, + .domid = DOMID_SELF + }, + .nr_exchanged = 0 + }; + + + if (!test_bit(start_gpfn, contiguous_bitmap)) + return; + + if (order > MAX_CONTIG_ORDER) + return; + + set_xen_guest_handle(exchange.in.extent_start, &in_frame); + set_xen_guest_handle(exchange.out.extent_start, out_frames); + + scrub_pages(vstart, num_gpfn); + + balloon_lock(flags); + + contiguous_bitmap_clear(start_gpfn, num_gpfn); + + /* Do the exchange for non-contiguous MFNs. */ + in_frame = start_gpfn; for (i = 0; i < num_gpfn; i++) { - gpfns[i] = start_gpfn + i; - } - - scrub_pages(vstart, num_gpfn); - - balloon_lock(flags); - - contiguous_bitmap_clear(start_gpfn, num_gpfn); - error = HYPERVISOR_remove_physmap(start_gpfn, order); - if (error) { - goto fail; - } - - set_xen_guest_handle(reservation.extent_start, gpfns); - reservation.nr_extents = num_gpfn; - reservation.address_bits = 0; - reservation.extent_order = 0; - reservation.domid = DOMID_SELF; - error = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); - if (error != num_gpfn) { - error = -EFAULT;//XXX - goto fail; - } - error = 0; -out: + out_frames[i] = start_gpfn + i; + } + error = HYPERVISOR_memory_op(XENMEM_exchange, &exchange); + success = (exchange.nr_exchanged == 1); + BUG_ON(!success && ((exchange.nr_exchanged != 0) || (error == 0))); + BUG_ON(success && (error != 0)); + if (unlikely(error == -ENOSYS)) { + /* Compatibility when XENMEM_exchange is unsupported. */ + error = HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &exchange.in); + BUG_ON(error != 1); + + error = HYPERVISOR_memory_op(XENMEM_populate_physmap, + &exchange.out); + BUG_ON(error != num_gpfn); + } balloon_unlock(flags); - kfree(gpfns); - if (error) { - // error can't be returned. - BUG();//XXX - } - return; - -fail: - for (i = 0; i < num_gpfn; i++) { - int tmp_error;// don't overwrite error. - tmp_error = HYPERVISOR_populate_physmap(start_gpfn + i, 0, 0); - if (tmp_error) { - BUG();//XXX - } - } - goto out; } diff -r 17f7a426b2cd -r e6c7667c97ed linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S Thu Jul 06 06:44:19 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenentry.S Thu Jul 06 06:47:16 2006 -0400 @@ -765,7 +765,7 @@ skip_rbs_switch: #ifdef CONFIG_XEN movl r25=XSI_IPSR ;; - st8[r25]=r29,XSI_IFS-XSI_IPSR + st8[r25]=r29,XSI_IFS_OFS-XSI_IPSR_OFS ;; #else mov cr.ipsr=r29 // M2 @@ -776,7 +776,7 @@ skip_rbs_switch: #ifdef CONFIG_XEN (p9) st8 [r25]=r30 ;; - adds r25=XSI_IIP-XSI_IFS,r25 + adds r25=XSI_IIP_OFS-XSI_IFS_OFS,r25 ;; #else (p9) mov cr.ifs=r30 // M2 diff -r 17f7a426b2cd -r e6c7667c97ed linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S Thu Jul 06 06:44:19 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenivt.S Thu Jul 06 06:47:16 2006 -0400 @@ -446,7 +446,7 @@ ENTRY(alt_itlb_miss) #ifdef CONFIG_XEN movl r31=XSI_IPSR ;; - ld8 r21=[r31],XSI_IFA-XSI_IPSR // get ipsr, point to ifa + ld8 r21=[r31],XSI_IFA_OFS-XSI_IPSR_OFS // get ipsr, point to ifa movl r17=PAGE_KERNEL ;; ld8 r16=[r31] // get ifa @@ -508,10 +508,10 @@ ENTRY(alt_dtlb_miss) #ifdef CONFIG_XEN movl r31=XSI_IPSR ;; - ld8 r21=[r31],XSI_ISR-XSI_IPSR // get ipsr, point to isr + ld8 r21=[r31],XSI_ISR_OFS-XSI_IPSR_OFS // get ipsr, point to isr movl r17=PAGE_KERNEL ;; - ld8 r20=[r31],XSI_IFA-XSI_ISR // get isr, point to ifa + ld8 r20=[r31],XSI_IFA_OFS-XSI_ISR_OFS // get isr, point to ifa movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) ;; ld8 r16=[r31] // get ifa @@ -688,12 +688,12 @@ ENTRY(page_fault) #ifdef CONFIG_XEN movl r3=XSI_ISR ;; - ld8 out1=[r3],XSI_IFA-XSI_ISR // get vcr.isr, point to ifa + ld8 out1=[r3],XSI_IFA_OFS-XSI_ISR_OFS // get vcr.isr, point to ifa ;; ld8 out0=[r3] // get vcr.ifa mov r14=1 ;; - add r3=XSI_PSR_IC-XSI_IFA, r3 // point to vpsr.ic + add r3=XSI_PSR_IC_OFS-XSI_IFA_OFS, r3 // point to vpsr.ic ;; st4 [r3]=r14 // vpsr.ic = 1 adds r3=8,r2 // set up second base pointer @@ -737,7 +737,7 @@ xen_page_fault: ;; (p15) ld8 r3=[r3] ;; -(p15) st1 [r3]=r0,XSI_PEND-XSI_PSR_I_ADDR // if (p15) vpsr.i = 1 +(p15) st1 [r3]=r0,XSI_PEND_OFS-XSI_PSR_I_ADDR_OFS // if (p15) vpsr.i = 1 mov r14=r0 ;; (p15) ld4 r14=[r3] // if (pending_interrupts) @@ -1047,7 +1047,7 @@ ENTRY(break_fault) #ifdef CONFIG_XEN movl r22=XSI_IPSR ;; - ld8 r29=[r22],XSI_IIM-XSI_IPSR // get ipsr, point to iip + ld8 r29=[r22],XSI_IIM_OFS-XSI_IPSR_OFS // get ipsr, point to iip #else mov r29=cr.ipsr // M2 (12 cyc) #endif @@ -1055,7 +1055,7 @@ ENTRY(break_fault) #ifdef CONFIG_XEN ;; - ld8 r17=[r22],XSI_IIP-XSI_IIM + ld8 r17=[r22],XSI_IIP_OFS-XSI_IIM_OFS #else mov r17=cr.iim // M2 (2 cyc) #endif @@ -1156,7 +1156,7 @@ 1: movl r16=XSI_PSR_IC mov r3=1 ;; - st4 [r16]=r3,XSI_PSR_I_ADDR-XSI_PSR_IC // vpsr.ic = 1 + st4 [r16]=r3,XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS // vpsr.ic = 1 #else ssm psr.ic | PSR_DEFAULT_BITS // M2 now it's safe to re-enable intr.-collection #endif @@ -1170,7 +1170,7 @@ 1: #ifdef CONFIG_XEN (p15) ld8 r16=[r16] // vpsr.i ;; -(p15) st1 [r16]=r0,XSI_PEND-XSI_PSR_I_ADDR // if (p15) vpsr.i = 1 +(p15) st1 [r16]=r0,XSI_PEND_OFS-XSI_PSR_I_ADDR_OFS // if (p15) vpsr.i = 1 mov r2=r0 ;; (p15) ld4 r2=[r16] // if (pending_interrupts) diff -r 17f7a426b2cd -r e6c7667c97ed linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S Thu Jul 06 06:44:19 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xenpal.S Thu Jul 06 06:47:16 2006 -0400 @@ -45,7 +45,7 @@ 1: { ;; (p6) st4 [r31]=r0 ;; -(p7) adds r31=XSI_PSR_I_ADDR-XSI_PSR_IC,r31 +(p7) adds r31=XSI_PSR_I_ADDR_OFS-XSI_PSR_IC_OFS,r31 (p7) mov r22=1 ;; (p7) ld8 r31=[r31] diff -r 17f7a426b2cd -r e6c7667c97ed linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S Thu Jul 06 06:44:19 2006 -0400 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S Thu Jul 06 06:47:16 2006 -0400 @@ -15,10 +15,21 @@ GLOBAL_ENTRY(early_xen_setup) mov r8=ar.rsc // Initialized in head.S (isBP) movl r9=running_on_xen;; extr.u r8=r8,2,2;; // Extract pl fields - cmp.ne p7,p0=r8,r0;; // p7: running on xen -(p7) mov r8=1 // booleanize. -(p7) movl r10=xen_ivt;; + cmp.eq p7,p0=r8,r0 // p7: !running on xen + mov r8=1 // booleanize. +(p7) br.ret.sptk.many rp;; (isBP) st4 [r9]=r8 -(p7) mov cr.iva=r10 - br.ret.sptk.many rp;; + movl r10=xen_ivt;; + + mov cr.iva=r10 + +#if XSI_BASE != 0xf100000000000000UL + /* Backward compatibility. */ +(isBP) mov r2=0x600 +(isBP) movl r28=XSI_BASE;; +(isBP) break 0x1000;; +#endif + + br.ret.sptk.many rp + ;; END(early_xen_setup) diff -r 17f7a426b2cd -r e6c7667c97ed linux-2.6-xen-sparse/drivers/xen/console/console.c --- a/linux-2.6-xen-sparse/drivers/xen/console/console.c Thu Jul 06 06:44:19 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/console/console.c Thu Jul 06 06:47:16 2006 -0400 @@ -267,7 +267,8 @@ void xencons_force_flush(void) /******************** User-space console driver (/dev/console) ************/ #define DRV(_d) (_d) -#define TTY_INDEX(_tty) ((_tty)->index) +#define DUMMY_TTY(_tty) ((xc_mode != XC_SERIAL) && \ + ((_tty)->index != (xc_num - 1))) static struct termios *xencons_termios[MAX_NR_CONSOLES]; static struct termios *xencons_termios_locked[MAX_NR_CONSOLES]; @@ -391,7 +392,7 @@ static void xencons_send_xchar(struct tt { unsigned long flags; - if (TTY_INDEX(tty) != 0) + if (DUMMY_TTY(tty)) return; spin_lock_irqsave(&xencons_lock, flags); @@ -402,7 +403,7 @@ static void xencons_send_xchar(struct tt static void xencons_throttle(struct tty_struct *tty) { - if (TTY_INDEX(tty) != 0) + if (DUMMY_TTY(tty)) return; if (I_IXOFF(tty)) @@ -411,7 +412,7 @@ static void xencons_throttle(struct tty_ static void xencons_unthrottle(struct tty_struct *tty) { - if (TTY_INDEX(tty) != 0) + if (DUMMY_TTY(tty)) return; if (I_IXOFF(tty)) { @@ -426,7 +427,7 @@ static void xencons_flush_buffer(struct { unsigned long flags; - if (TTY_INDEX(tty) != 0) + if (DUMMY_TTY(tty)) return; spin_lock_irqsave(&xencons_lock, flags); @@ -451,7 +452,7 @@ static int xencons_write( int i; unsigned long flags; - if (TTY_INDEX(tty) != 0) + if (DUMMY_TTY(tty)) return count; spin_lock_irqsave(&xencons_lock, flags); @@ -472,7 +473,7 @@ static void xencons_put_char(struct tty_ { unsigned long flags; - if (TTY_INDEX(tty) != 0) + if (DUMMY_TTY(tty)) return; spin_lock_irqsave(&xencons_lock, flags); @@ -484,7 +485,7 @@ static void xencons_flush_chars(struct t { unsigned long flags; - if (TTY_INDEX(tty) != 0) + if (DUMMY_TTY(tty)) return; spin_lock_irqsave(&xencons_lock, flags); @@ -496,7 +497,7 @@ static void xencons_wait_until_sent(stru { unsigned long orig_jiffies = jiffies; - if (TTY_INDEX(tty) != 0) + if (DUMMY_TTY(tty)) return; while (DRV(tty->driver)->chars_in_buffer(tty)) { @@ -515,7 +516,7 @@ static int xencons_open(struct tty_struc { unsigned long flags; - if (TTY_INDEX(tty) != 0) + if (DUMMY_TTY(tty)) return 0; spin_lock_irqsave(&xencons_lock, flags); @@ -532,7 +533,7 @@ static void xencons_close(struct tty_str { unsigned long flags; - if (TTY_INDEX(tty) != 0) + if (DUMMY_TTY(tty)) return; if (tty->count == 1) { @@ -588,8 +589,7 @@ static int __init xencons_init(void) DRV(xencons_driver)->init_termios = tty_std_termios; DRV(xencons_driver)->flags = TTY_DRIVER_REAL_RAW | - TTY_DRIVER_RESET_TERMIOS | - TTY_DRIVER_NO_DEVFS; + TTY_DRIVER_RESET_TERMIOS; DRV(xencons_driver)->termios = xencons_termios; DRV(xencons_driver)->termios_locked = xencons_termios_locked; @@ -599,8 +599,8 @@ static int __init xencons_init(void) DRV(xencons_driver)->name_base = 0 + xc_num; } else { DRV(xencons_driver)->name = "tty"; - DRV(xencons_driver)->minor_start = xc_num; - DRV(xencons_driver)->name_base = xc_num; + DRV(xencons_driver)->minor_start = 1; + DRV(xencons_driver)->name_base = 1; } tty_set_operations(xencons_driver, &xencons_ops); @@ -614,8 +614,6 @@ static int __init xencons_init(void) xencons_driver = NULL; return rc; } - - tty_register_device(xencons_driver, 0, NULL); if (xen_start_info->flags & SIF_INITDOMAIN) { xencons_priv_irq = bind_virq_to_irqhandler( @@ -629,8 +627,7 @@ static int __init xencons_init(void) } printk("Xen virtual console successfully installed as %s%d\n", - DRV(xencons_driver)->name, - DRV(xencons_driver)->name_base ); + DRV(xencons_driver)->name, xc_num); return 0; } diff -r 17f7a426b2cd -r e6c7667c97ed linux-2.6-xen-sparse/drivers/xen/netback/netback.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Thu Jul 06 06:44:19 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Thu Jul 06 06:47:16 2006 -0400 @@ -496,9 +496,9 @@ static void netbk_tx_err(netif_t *netif, do { make_tx_response(netif, txp, NETIF_RSP_ERROR); - if (++cons >= end) + if (cons >= end) break; - txp = RING_GET_REQUEST(&netif->tx, cons); + txp = RING_GET_REQUEST(&netif->tx, cons++); } while (1); netif->tx.req_cons = cons; netif_schedule_work(netif); @@ -691,6 +691,29 @@ int netbk_get_extras(netif_t *netif, str return work_to_do; } +static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso) +{ + if (!gso->u.gso.size) { + DPRINTK("GSO size must not be zero.\n"); + return -EINVAL; + } + + /* Currently only TCPv4 S.O. is supported. */ + if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { + DPRINTK("Bad GSO type %d.\n", gso->u.gso.type); + return -EINVAL; + } + + skb_shinfo(skb)->gso_size = gso->u.gso.size; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + + /* Header must be checked, and gso_segs computed. */ + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb)->gso_segs = 0; + + return 0; +} + /* Called after netfront has transmitted */ static void net_tx_action(unsigned long unused) { @@ -764,11 +787,11 @@ static void net_tx_action(unsigned long if (txreq.flags & NETTXF_extra_info) { work_to_do = netbk_get_extras(netif, extras, work_to_do); + i = netif->tx.req_cons; if (unlikely(work_to_do < 0)) { - netbk_tx_err(netif, &txreq, 0); + netbk_tx_err(netif, &txreq, i); continue; } - i = netif->tx.req_cons; } ret = netbk_count_requests(netif, &txreq, work_to_do); @@ -819,20 +842,11 @@ static void net_tx_action(unsigned long struct netif_extra_info *gso; gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; - /* Currently on TCPv4 S.O. is supported. */ - if (gso->u.gso.type != XEN_NETIF_GSO_TCPV4) { - DPRINTK("Bad GSO type %d.\n", gso->u.gso.type); + if (netbk_set_skb_gso(skb, gso)) { kfree_skb(skb); netbk_tx_err(netif, &txreq, i); - break; + continue; } - - skb_shinfo(skb)->gso_size = gso->u.gso.size; - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; - - /* Header must be checked, and gso_segs computed. */ - skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; - skb_shinfo(skb)->gso_segs = 0; } gnttab_set_map_op(mop, MMAP_VADDR(pending_idx), diff -r 17f7a426b2cd -r e6c7667c97ed linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu Jul 06 06:44:19 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu Jul 06 06:47:16 2006 -0400 @@ -102,9 +102,10 @@ static int netback_probe(struct xenbus_d } #if 0 /* KAF: After the protocol is finalised. */ - err = xenbus_printf(xbt, dev->nodename, "feature-tso", "%d", 1); + err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", + "%d", 1); if (err) { - message = "writing feature-tso"; + message = "writing feature-gso-tcpv4"; goto abort_transaction; } #endif diff -r 17f7a426b2cd -r e6c7667c97ed linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Thu Jul 06 06:44:19 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Thu Jul 06 06:47:16 2006 -0400 @@ -787,7 +787,7 @@ static int network_start_xmit(struct sk_ tx->flags |= NETTXF_extra_info; gso->u.gso.size = skb_shinfo(skb)->gso_size; - gso->u.gso.type = XEN_NETIF_GSO_TCPV4; + gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; gso->type = XEN_NETIF_EXTRA_TYPE_GSO; gso->flags = 0; @@ -1098,8 +1098,8 @@ static int xennet_set_tso(struct net_dev struct netfront_info *np = netdev_priv(dev); int val; - if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-tso", - "%d", &val) < 0) + if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, + "feature-gso-tcpv4", "%d", &val) < 0) val = 0; #if 0 /* KAF: After the protocol is finalised. */ if (!val) @@ -1112,6 +1112,11 @@ static int xennet_set_tso(struct net_dev static void xennet_set_features(struct net_device *dev) { + /* Turn off all GSO bits except ROBUST. */ + dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1; + dev->features |= NETIF_F_GSO_ROBUST; + xennet_set_sg(dev, 0); + if (!xennet_set_sg(dev, 1)) xennet_set_tso(dev, 1); } diff -r 17f7a426b2cd -r e6c7667c97ed linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Thu Jul 06 06:44:19 2006 -0400 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Thu Jul 06 06:47:16 2006 -0400 @@ -886,29 +886,6 @@ EXPORT_SYMBOL_GPL(unregister_xenstore_no EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); -static int all_devices_ready_(struct device *dev, void *data) -{ - struct xenbus_device *xendev = to_xenbus_device(dev); - int *result = data; - - if (xendev->state != XenbusStateConnected) { - *result = 0; - return 1; - } - - return 0; -} - - -static int all_devices_ready(void) -{ - int ready = 1; - bus_for_each_dev(&xenbus_frontend.bus, NULL, &ready, - all_devices_ready_); - return ready; -} - - void xenbus_probe(void *unused) { BUG_ON((xenstored_ready <= 0)); @@ -1060,6 +1037,43 @@ postcore_initcall(xenbus_probe_init); postcore_initcall(xenbus_probe_init); +static int is_disconnected_device(struct device *dev, void *data) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + + /* + * A device with no driver will never connect. We care only about + * devices which should currently be in the process of connecting. + */ + if (!dev->driver) + return 0; + + return (xendev->state != XenbusStateConnected); +} + +static int exists_disconnected_device(void) +{ + return bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, + is_disconnected_device); +} + +static int print_device_status(struct device *dev, void *data) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + + if (!dev->driver) { + /* Information only: is this too noisy? */ + printk(KERN_INFO "XENBUS: Device with no driver: %s\n", + xendev->nodename); + } else if (xendev->state != XenbusStateConnected) { + printk(KERN_WARNING "XENBUS: Timeout connecting " + "to device: %s (state %d)\n", + xendev->nodename, xendev->state); + } + + return 0; +} + /* * On a 10 second timeout, wait for all devices currently configured. We need * to do this to guarantee that the filesystems and / or network devices @@ -1081,13 +1095,12 @@ static int __init wait_for_devices(void) if (!is_running_on_xen()) return -ENODEV; - while (time_before(jiffies, timeout)) { - if (all_devices_ready()) - return 0; + while (time_before(jiffies, timeout) && exists_disconnected_device()) schedule_timeout_interruptible(HZ/10); - } - - printk(KERN_WARNING "XENBUS: Timeout connecting to devices!\n"); + + bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, + print_device_status); + return 0; } diff -r 17f7a426b2cd -r e6c7667c97ed linux-2.6-xen-sparse/include/asm-ia64/page.h --- a/linux-2.6-xen-sparse/include/asm-ia64/page.h Thu Jul 06 06:44:19 2006 -0400 +++ b/linux-2.6-xen-sparse/include/asm-ia64/page.h Thu Jul 06 06:47:16 2006 -0400 @@ -331,7 +331,7 @@ mfn_to_local_pfn(unsigned long mfn) mfn_to_local_pfn(unsigned long mfn) { extern unsigned long max_mapnr; - unsigned long pfn = mfn_to_pfn(mfn); + unsigned long pfn = mfn_to_pfn_for_dma(mfn); if (!pfn_valid(pfn)) return INVALID_P2M_ENTRY; return pfn; diff -r 17f7a426b2cd -r e6c7667c97ed linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h --- a/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h Thu Jul 06 06:44:19 2006 -0400 +++ b/linux-2.6-xen-sparse/include/asm-ia64/xen/privop.h Thu Jul 06 06:47:16 2006 -0400 @@ -10,10 +10,19 @@ */ -#include <asm/xen/asm-xsi-offsets.h> #include <xen/interface/arch-ia64.h> #define IA64_PARAVIRTUALIZED + +#if 0 +#undef XSI_BASE +/* At 1 MB, before per-cpu space but still addressable using addl instead + of movl. */ +#define XSI_BASE 0xfffffffffff00000 +#endif + +/* Address of mapped regs. */ +#define XMAPPEDREGS_BASE (XSI_BASE + XSI_SIZE) #ifdef __ASSEMBLY__ #define XEN_HYPER_RFI break HYPERPRIVOP_RFI @@ -39,6 +48,21 @@ #define XEN_HYPER_GET_PMD break HYPERPRIVOP_GET_PMD #define XEN_HYPER_GET_EFLAG break HYPERPRIVOP_GET_EFLAG #define XEN_HYPER_SET_EFLAG break HYPERPRIVOP_SET_EFLAG + +#define XSI_IFS (XSI_BASE + XSI_IFS_OFS) +#define XSI_PRECOVER_IFS (XSI_BASE + XSI_PRECOVER_IFS_OFS) +#define XSI_INCOMPL_REGFR (XSI_BASE + XSI_INCOMPL_REGFR_OFS) +#define XSI_IFA (XSI_BASE + XSI_IFA_OFS) +#define XSI_ISR (XSI_BASE + XSI_ISR_OFS) +#define XSI_IIM (XSI_BASE + XSI_IIM_OFS) +#define XSI_ITIR (XSI_BASE + XSI_ITIR_OFS) +#define XSI_PSR_I_ADDR (XSI_BASE + XSI_PSR_I_ADDR_OFS) +#define XSI_PSR_IC (XSI_BASE + XSI_PSR_IC_OFS) +#define XSI_IPSR (XSI_BASE + XSI_IPSR_OFS) +#define XSI_IIP (XSI_BASE + XSI_IIP_OFS) +#define XSI_BANK1_R16 (XSI_BASE + XSI_BANK1_R16_OFS) +#define XSI_BANKNUM (XSI_BASE + XSI_BANKNUM_OFS) +#define XSI_IHA (XSI_BASE + XSI_IHA_OFS) #endif #ifndef __ASSEMBLY__ @@ -81,15 +105,16 @@ extern void xen_set_eflag(unsigned long) * Others, like "pend", are abstractions based on privileged registers. * "Pend" is guaranteed to be set if reading cr.ivr would return a * (non-spurious) interrupt. */ +#define XEN_MAPPEDREGS ((struct mapped_regs *)XMAPPEDREGS_BASE) #define XSI_PSR_I \ - (*(uint64_t *)(XSI_PSR_I_ADDR)) + (*XEN_MAPPEDREGS->interrupt_mask_addr) #define xen_get_virtual_psr_i() \ - (!(*(uint8_t *)(XSI_PSR_I))) + (!XSI_PSR_I) #define xen_set_virtual_psr_i(_val) \ - ({ *(uint8_t *)(XSI_PSR_I) = (uint8_t)(_val) ? 0:1; }) + ({ XSI_PSR_I = (uint8_t)(_val) ? 0 : 1; }) #define xen_set_virtual_psr_ic(_val) \ - ({ *(int *)(XSI_PSR_IC) = _val ? 1:0; }) -#define xen_get_virtual_pend() (*(int *)(XSI_PEND)) + ({ XEN_MAPPEDREGS->interrupt_collection_enabled = _val ? 1 : 0; }) +#define xen_get_virtual_pend() (XEN_MAPPEDREGS->pending_interruption) /* Hyperprivops are "break" instructions with a well-defined API. * In particular, the virtual psr.ic bit must be off; in this way diff -r 17f7a426b2cd -r e6c7667c97ed tools/examples/xmexample.vti --- a/tools/examples/xmexample.vti Thu Jul 06 06:44:19 2006 -0400 +++ b/tools/examples/xmexample.vti Thu Jul 06 06:47:16 2006 -0400 @@ -12,7 +12,7 @@ arch_libdir = 'lib' #---------------------------------------------------------------------------- # Kernel image file. -kernel = "/boot/Flash.fd" +kernel = "/usr/lib/xen/boot/guest_firmware.bin" # The domain build function. VTI domain uses 'hvm'. builder='hvm' @@ -26,6 +26,9 @@ memory = 256 # A name for your domain. All domains must have different names. name = "ExampleVTIDomain" + +# the number of cpus guest platform has, default=1 +#vcpus=1 # List of which CPUS this domain is allowed to use, default Xen picks #cpus = "" # leave to Xen to pick @@ -81,7 +84,6 @@ memmap = '/usr/lib/xen/boot/mem-map.sxp' # enable SDL library for graphics, default = 0 sdl=1 -stdvga=1 #---------------------------------------------------------------------------- # enable VNC library for graphics, default = 1 vnc=0 @@ -94,6 +96,14 @@ vncviewer=0 # no graphics, use serial port #nographic=0 +#---------------------------------------------------------------------------- +# enable stdvga, default = 0 (use cirrus logic device model) +stdvga=0 + +#----------------------------------------------------------------------------- +# serial port re-direct to pty deivce, /dev/pts/n +# then xm console or minicom can connect +serial='pty' #----------------------------------------------------------------------------- # enable audio support @@ -108,3 +118,14 @@ vncviewer=0 #----------------------------------------------------------------------------- # start in full screen #full-screen=1 diff -r 42cab8724273 tools/libxc/xc_ia64_stubs.c + +#----------------------------------------------------------------------------- +# Enable USB support (specific devices specified at runtime through the +# monitor window) +#usb=1 + +# Enable USB mouse support (only enable one of the following, `mouse' for +# PS/2 protocol relative mouse, `tablet' for +# absolute mouse) +#usbdevice='mouse' +#usbdevice='tablet' diff -r 17f7a426b2cd -r e6c7667c97ed tools/ioemu/hw/piix4acpi.c --- a/tools/ioemu/hw/piix4acpi.c Thu Jul 06 06:44:19 2006 -0400 +++ b/tools/ioemu/hw/piix4acpi.c Thu Jul 06 06:47:16 2006 -0400 @@ -476,6 +476,6 @@ void pci_piix4_acpi_init(PCIBus *bus) pci_register_io_region((PCIDevice *)d, 4, 0x10, PCI_ADDRESS_SPACE_IO, acpi_map); - pmtimer_state = pmtimer_init(); + /*pmtimer_state = pmtimer_init();*/ acpi_reset (d); } diff -r 17f7a426b2cd -r e6c7667c97ed tools/libxc/Makefile --- a/tools/libxc/Makefile Thu Jul 06 06:44:19 2006 -0400 +++ b/tools/libxc/Makefile Thu Jul 06 06:47:16 2006 -0400 @@ -39,10 +39,6 @@ CFLAGS += -Werror CFLAGS += -Werror CFLAGS += -fno-strict-aliasing CFLAGS += $(INCLUDES) -I. - -ELFSIZE64-y = -DELFSIZE=64 -CFLAGS += $(ELFSIZE64-$(CONFIG_POWERPC)) - # Define this to make it possible to run valgrind on code linked with these # libraries. diff -r 17f7a426b2cd -r e6c7667c97ed tools/libxc/xc_hvm_build.c --- a/tools/libxc/xc_hvm_build.c Thu Jul 06 06:44:19 2006 -0400 +++ b/tools/libxc/xc_hvm_build.c Thu Jul 06 06:47:16 2006 -0400 @@ -4,6 +4,7 @@ #define ELFSIZE 32 #include <stddef.h> +#include <inttypes.h> #include "xg_private.h" #include "xc_elf.h" #include <stdlib.h> @@ -188,7 +189,7 @@ static int setup_guest(int xc_handle, unsigned char e820_map_nr; struct domain_setup_info dsi; - unsigned long long v_end; + uint64_t v_end; unsigned long shared_page_frame = 0; shared_iopage_t *sp; @@ -208,11 +209,11 @@ static int setup_guest(int xc_handle, v_end = (unsigned long long)memsize << 20; IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n" - " Loaded HVM loader: %08lx->%08lx\n" - " TOTAL: %08lx->%016llx\n", + " Loaded HVM loader: %016"PRIx64"->%016"PRIx64"\n" + " TOTAL: %016"PRIx64"->%016"PRIx64"\n", dsi.v_kernstart, dsi.v_kernend, dsi.v_start, v_end); - IPRINTF(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry); + IPRINTF(" ENTRY ADDRESS: %016"PRIx64"\n", dsi.v_kernentry); if ( (v_end - dsi.v_start) > ((unsigned long long)nr_pages << PAGE_SHIFT) ) { diff -r 17f7a426b2cd -r e6c7667c97ed tools/libxc/xc_ia64_stubs.c --- a/tools/libxc/xc_ia64_stubs.c Thu Jul 06 06:44:19 2006 -0400 +++ b/tools/libxc/xc_ia64_stubs.c Thu Jul 06 06:47:16 2006 -0400 @@ -727,7 +727,7 @@ int xc_hvm_build(int xc_handle, free(image); ctxt->flags = VGCF_VMX_GUEST; - ctxt->regs.cr_iip = 0x80000000ffffffb0UL; + ctxt->user_regs.cr_iip = 0x80000000ffffffb0UL; ctxt->privregs = 0; memset( &launch_op, 0, sizeof(launch_op) ); diff -r 17f7a426b2cd -r e6c7667c97ed tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Thu Jul 06 06:44:19 2006 -0400 +++ b/tools/libxc/xc_linux_build.c Thu Jul 06 06:47:16 2006 -0400 @@ -11,6 +11,9 @@ #include <unistd.h> #include <inttypes.h> #include <zlib.h> + +/* Handy for printing out '0' prepended values at native pointer size */ +#define _p(a) ((void *) ((ulong)a)) #if defined(__i386__) #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) @@ -502,8 +505,6 @@ static int setup_guest(int xc_handle, goto error_out; } -#define _p(a) ((void *) (a)) - IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n" " Loaded kernel: %p->%p\n" " Init. ramdisk: %p->%p\n" @@ -766,9 +767,9 @@ static int setup_guest(int xc_handle, goto error_out; } -#define NR(_l,_h,_s) \ - (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \ - ((_l) & ~((1UL<<(_s))-1))) >> (_s)) +#define NR(_l,_h,_s) \ + (((((unsigned long)(_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \ + ((unsigned long)(_l) & ~((1UL<<(_s))-1))) >> (_s)) #if defined(__i386__) if ( dsi.pae_kernel != PAEKERN_no ) { @@ -796,8 +797,6 @@ static int setup_guest(int xc_handle, break; #endif } - -#define _p(a) ((void *) (a)) IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n"); IPRINTF(" Loaded kernel: %p->%p\n", _p(dsi.v_kernstart), @@ -819,8 +818,8 @@ static int setup_guest(int xc_handle, if ( ((v_end - dsi.v_start)>>PAGE_SHIFT) > nr_pages ) { PERROR("Initial guest OS requires too much space\n" - "(%luMB is greater than %luMB limit)\n", - (v_end-dsi.v_start)>>20, nr_pages>>(20-PAGE_SHIFT)); + "(%pMB is greater than %luMB limit)\n", + _p((v_end-dsi.v_start)>>20), nr_pages>>(20-PAGE_SHIFT)); goto error_out; } @@ -1152,10 +1151,10 @@ static int xc_linux_build_internal(int x ctxt->flags = 0; ctxt->shared.flags = flags; ctxt->shared.start_info_pfn = nr_pages - 3; /* metaphysical */ - ctxt->regs.cr_ipsr = 0; /* all necessary bits filled by hypervisor */ - ctxt->regs.cr_iip = vkern_entry; - ctxt->regs.cr_ifs = 1UL << 63; - ctxt->regs.ar_fpsr = xc_ia64_fpsr_default(); + ctxt->user_regs.cr_ipsr = 0; /* all necessary bits filled by hypervisor */ + ctxt->user_regs.cr_iip = vkern_entry; + ctxt->user_regs.cr_ifs = 1UL << 63; + ctxt->user_regs.ar_fpsr = xc_ia64_fpsr_default(); /* currently done by hypervisor, should move here */ /* ctxt->regs.r28 = dom_fw_setup(); */ ctxt->privregs = 0; diff -r 17f7a426b2cd -r e6c7667c97ed tools/libxc/xc_load_elf.c --- a/tools/libxc/xc_load_elf.c Thu Jul 06 06:44:19 2006 -0400 +++ b/tools/libxc/xc_load_elf.c Thu Jul 06 06:47:16 2006 -0400 @@ -173,8 +173,10 @@ static int parseelfimage(const char *ima elf_pa_off_defined = (p != NULL); elf_pa_off = elf_pa_off_defined ? strtoull(p+17, &p, 0) : virt_base; - if ( elf_pa_off_defined && !virt_base_defined ) { - ERROR("no PA or VA base defined in ELF image"); + if ( elf_pa_off_defined && !virt_base_defined ) + { + ERROR("Neither ELF_PADDR_OFFSET nor VIRT_BASE found in __xen_guest" + " section."); return -EINVAL; } @@ -184,9 +186,9 @@ static int parseelfimage(const char *ima if ( !is_loadable_phdr(phdr) ) continue; vaddr = phdr->p_paddr - elf_pa_off + virt_base; - if ( (vaddr + phdr->p_memsz) < vaddr ) { - /* XXX Are we checking for vaddr to wrap here? */ - ERROR("ELF program header too large."); + if ( (vaddr + phdr->p_memsz) < vaddr ) + { + ERROR("ELF program header %d is too large.", h); return -EINVAL; } @@ -211,7 +213,8 @@ static int parseelfimage(const char *ima if ( (kernstart > kernend) || (dsi->v_kernentry < kernstart) || (dsi->v_kernentry > kernend) || - (dsi->v_start > kernstart) ) { + (dsi->v_start > kernstart) ) + { ERROR("ELF start or entries are out of bounds."); return -EINVAL; } diff -r 17f7a426b2cd -r e6c7667c97ed tools/libxc/xc_ppc_linux_build.c --- a/tools/libxc/xc_ppc_linux_build.c Thu Jul 06 06:44:19 2006 -0400 +++ b/tools/libxc/xc_ppc_linux_build.c Thu Jul 06 06:47:16 2006 -0400 @@ -257,11 +257,11 @@ static int load_kernel( (load_funcs.loadimage)(kernel_img, kernel_size, xc_handle, domid, page_array, dsi); - VERBOSE(printf(" v_start %llx\n", dsi->v_start)); - VERBOSE(printf(" v_end %llx\n", dsi->v_end)); - VERBOSE(printf(" v_kernstart %llx\n", dsi->v_kernstart)); - VERBOSE(printf(" v_kernend %llx\n", dsi->v_kernend)); - VERBOSE(printf(" v_kernentry %llx\n", dsi->v_kernentry)); + VERBOSE(printf(" v_start %016"PRIx64"\n", dsi->v_start)); + VERBOSE(printf(" v_end %016"PRIx64"\n", dsi->v_end)); + VERBOSE(printf(" v_kernstart %016"PRIx64"\n", dsi->v_kernstart)); + VERBOSE(printf(" v_kernend %016"PRIx64"\n", dsi->v_kernend)); + VERBOSE(printf(" v_kernentry %016"PRIx64"\n", dsi->v_kernentry)); out: free(kernel_img); diff -r 17f7a426b2cd -r e6c7667c97ed tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Thu Jul 06 06:44:19 2006 -0400 +++ b/tools/python/xen/xend/XendDomainInfo.py Thu Jul 06 06:47:16 2006 -0400 @@ -1283,7 +1283,8 @@ class XendDomainInfo: init_reservation = self.info['memory'] * 1024 if os.uname()[4] in ('ia64', 'ppc64'): - # Workaround until ia64 properly supports ballooning. + # Workaround for architectures that don't yet support + # ballooning. init_reservation = m xc.domain_memory_increase_reservation(self.domid, init_reservation, diff -r 17f7a426b2cd -r e6c7667c97ed tools/xenmon/xenmon.py --- a/tools/xenmon/xenmon.py Thu Jul 06 06:44:19 2006 -0400 +++ b/tools/xenmon/xenmon.py Thu Jul 06 06:47:16 2006 -0400 @@ -36,6 +36,7 @@ import sys # constants NSAMPLES = 100 NDOMAINS = 32 +IDLE_DOMAIN = 31 # idle domain's ID # the struct strings for qos_info ST_DOM_INFO = "6Q4i32s" @@ -253,6 +254,14 @@ def display(scr, row, col, str, attr=0): sys.exit(1) +# diplay domain id +def display_domain_id(scr, row, col, dom): + if dom == IDLE_DOMAIN: + display(scr, row, col-1, "Idle") + else: + display(scr, row, col, "%d" % dom) + + # the live monitoring code def show_livestats(cpu): ncpu = 1 # number of cpu's on this platform @@ -361,7 +370,7 @@ def show_livestats(cpu): # display gotten row += 1 col = 2 - display(stdscr, row, col, "%d" % dom) + display_domain_id(stdscr, row, col, dom) col += 4 display(stdscr, row, col, "%s" % time_scale(h2[dom][0][0])) col += 12 @@ -386,7 +395,7 @@ def show_livestats(cpu): if options.allocated: row += 1 col = 2 - display(stdscr, row, col, "%d" % dom) + display_domain_id(stdscr, row, col, dom) col += 28 display(stdscr, row, col, "%s/ex" % time_scale(h2[dom][1])) col += 42 @@ -398,7 +407,7 @@ def show_livestats(cpu): if options.blocked: row += 1 col = 2 - display(stdscr, row, col, "%d" % dom) + display_domain_id(stdscr, row, col, dom) col += 4 display(stdscr, row, col, "%s" % time_scale(h2[dom][2][0])) col += 12 @@ -418,7 +427,7 @@ def show_livestats(cpu): if options.waited: row += 1 col = 2 - display(stdscr, row, col, "%d" % dom) + display_domain_id(stdscr, row, col, dom) col += 4 display(stdscr, row, col, "%s" % time_scale(h2[dom][3][0])) col += 12 @@ -438,7 +447,7 @@ def show_livestats(cpu): if options.excount: row += 1 col = 2 - display(stdscr, row, col, "%d" % dom) + display_domain_id(stdscr, row, col, dom) col += 28 display(stdscr, row, col, "%d/s" % h2[dom][4]) @@ -451,7 +460,7 @@ def show_livestats(cpu): if options.iocount: row += 1 col = 2 - display(stdscr, row, col, "%d" % dom) + display_domain_id(stdscr, row, col, dom) col += 4 display(stdscr, row, col, "%d/s" % h2[dom][5][0]) col += 24 @@ -558,7 +567,10 @@ def writelog(): curr = last = time.time() outfiles = {} for dom in range(0, NDOMAINS): - outfiles[dom] = Delayed("%s-dom%d.log" % (options.prefix, dom), 'w') + if dom == IDLE_DOMAIN: + outfiles[dom] = Delayed("%s-idle.log" % options.prefix, 'w') + else: + outfiles[dom] = Delayed("%s-dom%d.log" % (options.prefix, dom), 'w') outfiles[dom].delayed_write("# passed cpu dom cpu(tot) cpu(%) cpu/ex allocated/ex blocked(tot) blocked(%) blocked/io waited(tot) waited(%) waited/ex ex/s io(tot) io/ex\n") while options.duration == 0 or interval < (options.duration * 1000): diff -r 17f7a426b2cd -r e6c7667c97ed tools/xenstat/xentop/xentop.c --- a/tools/xenstat/xentop/xentop.c Thu Jul 06 06:44:19 2006 -0400 +++ b/tools/xenstat/xentop/xentop.c Thu Jul 06 06:47:16 2006 -0400 @@ -156,11 +156,11 @@ field fields[] = { { FIELD_NETS, "NETS", 4, compare_nets, print_nets }, { FIELD_NET_TX, "NETTX(k)", 8, compare_net_tx, print_net_tx }, { FIELD_NET_RX, "NETRX(k)", 8, compare_net_rx, print_net_rx }, - { FIELD_NET_RX, "VBDS", 8, compare_vbds, print_vbds }, - { FIELD_NET_RX, "VBD_OO", 8, compare_vbd_oo, print_vbd_oo }, - { FIELD_NET_RX, "VBD_RD", 8, compare_vbd_rd, print_vbd_rd }, - { FIELD_NET_RX, "VBD_WR", 8, compare_vbd_wr, print_vbd_wr }, - { FIELD_SSID, "SSID", 4, compare_ssid, print_ssid } + { FIELD_VBDS, "VBDS", 4, compare_vbds, print_vbds }, + { FIELD_VBD_OO, "VBD_OO", 8, compare_vbd_oo, print_vbd_oo }, + { FIELD_VBD_RD, "VBD_RD", 8, compare_vbd_rd, print_vbd_rd }, + { FIELD_VBD_WR, "VBD_WR", 8, compare_vbd_wr, print_vbd_wr }, + { FIELD_SSID, "SSID", 4, compare_ssid, print_ssid } }; const unsigned int NUM_FIELDS = sizeof(fields)/sizeof(field); diff -r 17f7a426b2cd -r e6c7667c97ed tools/xentrace/Makefile --- a/tools/xentrace/Makefile Thu Jul 06 06:44:19 2006 -0400 +++ b/tools/xentrace/Makefile Thu Jul 06 06:47:16 2006 -0400 @@ -28,6 +28,10 @@ LIBBIN += xenctx LIBBIN += xenctx endif +ifeq ($(XEN_TARGET_ARCH),ia64) +LIBBIN += xenctx +endif + .PHONY: all all: build diff -r 17f7a426b2cd -r e6c7667c97ed tools/xentrace/xenctx.c --- a/tools/xentrace/xenctx.c Thu Jul 06 06:44:19 2006 -0400 +++ b/tools/xentrace/xenctx.c Thu Jul 06 06:47:16 2006 -0400 @@ -44,6 +44,15 @@ int stack_trace = 0; #define INSTR_POINTER(regs) (regs->rip) #define STACK_ROWS 4 #define STACK_COLS 4 +#elif defined (__ia64__) +#define FMT_SIZE_T "%016lx" +#define STACK_POINTER(regs) (regs->r12) +#define FRAME_POINTER(regs) 0 +#define INSTR_POINTER(regs) (regs->cr_iip) +#define STACK_ROWS 4 +#define STACK_COLS 4 +/* On ia64, we can't translate virtual address to physical address. */ +#define NO_TRANSLATION #endif struct symbol { @@ -63,6 +72,9 @@ int is_kernel_text(size_t addr) #elif defined (__x86_64__) if (symbol_table == NULL) return (addr > 0xffffffff80000000UL); +#elif defined (__ia64__) + if (symbol_table == NULL) + return (addr > 0xa000000000000000UL); #endif if (addr >= kernel_stext && @@ -255,8 +267,53 @@ void print_ctx(vcpu_guest_context_t *ctx printf(" gs: %08x\n", regs->gs); } +#elif defined(__ia64__) +void print_ctx(vcpu_guest_context_t *ctx1) +{ + struct cpu_user_regs *regs = &ctx1->user_regs; + + printf("iip: %016lx ", regs->cr_iip); + print_symbol(regs->cr_iip); + printf("\n"); + printf("psr: %016lu ", regs->cr_ipsr); + printf(" b0: %016lx\n", regs->b0); + + printf(" r1: %016lx\n", regs->r1); + printf(" r2: %016lx ", regs->r2); + printf(" r3: %016lx\n", regs->r3); + printf(" r4: %016lx ", regs->r4); + printf(" r5: %016lx\n", regs->r5); + printf(" r6: %016lx ", regs->r6); + printf(" r7: %016lx\n", regs->r7); + printf(" r8: %016lx ", regs->r8); + printf(" r9: %016lx\n", regs->r9); + printf(" r10: %016lx ", regs->r10); + printf(" r11: %016lx\n", regs->r11); + printf(" sp: %016lx ", regs->r12); + printf(" tp: %016lx\n", regs->r13); + printf(" r14: %016lx ", regs->r14); + printf(" r15: %016lx\n", regs->r15); + printf(" r16: %016lx ", regs->r16); + printf(" r17: %016lx\n", regs->r17); + printf(" r18: %016lx ", regs->r18); + printf(" r19: %016lx\n", regs->r19); + printf(" r20: %016lx ", regs->r20); + printf(" r21: %016lx\n", regs->r21); + printf(" r22: %016lx ", regs->r22); + printf(" r23: %016lx\n", regs->r23); + printf(" r24: %016lx ", regs->r24); + printf(" r25: %016lx\n", regs->r25); + printf(" r26: %016lx ", regs->r26); + printf(" r27: %016lx\n", regs->r27); + printf(" r28: %016lx ", regs->r28); + printf(" r29: %016lx\n", regs->r29); + printf(" r30: %016lx ", regs->r30); + printf(" r31: %016lx\n", regs->r31); + +} #endif +#ifndef NO_TRANSLATION void *map_page(vcpu_guest_context_t *ctx, int vcpu, size_t virt) { static unsigned long previous_mfn = 0; @@ -371,6 +428,9 @@ void print_stack(vcpu_guest_context_t *c } } } +#else +#define print_stack(ctx, vcpu) +#endif void dump_ctx(int vcpu) { @@ -393,7 +453,7 @@ void dump_ctx(int vcpu) } print_ctx(&ctx); - if (is_kernel_text(ctx.user_regs.eip)) + if (is_kernel_text(INSTR_POINTER((&ctx.user_regs)))) print_stack(&ctx, vcpu); ret = xc_domain_unpause(xc_handle, domid); diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/Makefile --- a/xen/arch/ia64/Makefile Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/Makefile Thu Jul 06 06:47:16 2006 -0400 @@ -21,9 +21,12 @@ subdir-y += linux-xen $(NM) -n $(TARGET)-syms | grep -v '\( [aUw] \)\|\(__crc_\)\|\( \$[adt]\)'\ > $(BASEDIR)/System.map +# Headers do not depend on auto-generated header, but object files do. HDRS := $(subst $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h,,$(HDRS)) -asm-offsets.s: asm-offsets.c $(BASEDIR)/include/asm-ia64/.offsets.h.stamp $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h - $(CC) $(CFLAGS) -S -o $@ $< +$(ALL_OBJS): $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h + +asm-offsets.s: asm-offsets.c $(BASEDIR)/include/asm-ia64/.offsets.h.stamp + $(CC) $(CFLAGS) -DGENERATE_ASM_OFFSETS -DIA64_TASK_SIZE=0 -S -o $@ $< asm-xsi-offsets.s: asm-xsi-offsets.c $(HDRS) $(CC) $(CFLAGS) -S -o $@ $< @@ -63,14 +66,6 @@ asm-xsi-offsets.s: asm-xsi-offsets.c $(H || ln -s ../../../include/asm-x86/hvm/vioapic.h $(BASEDIR)/include/asm-ia64/hvm/vioapic.h [ -e $(BASEDIR)/arch/ia64/vmx/hvm_vioapic.c ] \ || ln -s ../../../arch/x86/hvm/vioapic.c $(BASEDIR)/arch/ia64/vmx/hvm_vioapic.c -# Solve circular reference on asm-offsets.h - [ -f $(BASEDIR)/include/asm-ia64/asm-offsets.h ] \ - || echo "#define IA64_TASK_SIZE 0" > $(BASEDIR)/include/asm-ia64/asm-offsets.h - [ -f $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h ] \ - || touch $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h -#Bad hack. Force asm-offsets.h out-of-date - sleep 1 - touch $@ # I'm sure a Makefile wizard would know a better way to do this xen.lds.s: xen/xen.lds.S diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/Rules.mk --- a/xen/arch/ia64/Rules.mk Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/Rules.mk Thu Jul 06 06:47:16 2006 -0400 @@ -44,3 +44,16 @@ endif endif LDFLAGS := -g + +# Additionnal IA64 include dirs. +HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-null/asm/*.h) +HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-null/asm/sn/*.h) +HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-null/linux/*.h) +HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-xen/asm/*.h) +HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-xen/asm/sn/*.h) +HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-xen/linux/*.h) +HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/*.h) +HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/asm-generic/*.h) +HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/asm/*.h) +HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/byteorder/*.h) +HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/hvm/*.h) diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/asm-xsi-offsets.c --- a/xen/arch/ia64/asm-xsi-offsets.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/asm-xsi-offsets.c Thu Jul 06 06:47:16 2006 -0400 @@ -42,66 +42,34 @@ #define BLANK() asm volatile("\n->" : : ) -#define OFFSET(_sym, _str, _mem) \ - DEFINE(_sym, offsetof(_str, _mem)); +#define DEFINE_MAPPED_REG_OFS(sym, field) \ + DEFINE(sym, (XMAPPEDREGS_OFS + offsetof(mapped_regs_t, field))) void foo(void) { - /* First is shared info page, and then arch specific vcpu context */ - DEFINE(XSI_BASE, SHAREDINFO_ADDR); - - DEFINE(XSI_PSR_I_ADDR_OFS, (XSI_OFS + offsetof(mapped_regs_t, interrupt_mask_addr))); - DEFINE(XSI_PSR_I_ADDR, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, interrupt_mask_addr))); - DEFINE(XSI_IPSR, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, ipsr))); - DEFINE(XSI_IPSR_OFS, (XSI_OFS + offsetof(mapped_regs_t, ipsr))); - DEFINE(XSI_IIP_OFS, (XSI_OFS + offsetof(mapped_regs_t, iip))); - DEFINE(XSI_IIP, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, iip))); - DEFINE(XSI_IFS_OFS, (XSI_OFS + offsetof(mapped_regs_t, ifs))); - DEFINE(XSI_IFS, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, ifs))); - DEFINE(XSI_PRECOVER_IFS_OFS, (XSI_OFS + offsetof(mapped_regs_t, precover_ifs))); - DEFINE(XSI_PRECOVER_IFS, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, precover_ifs))); - DEFINE(XSI_ISR_OFS, (XSI_OFS + offsetof(mapped_regs_t, isr))); - DEFINE(XSI_ISR, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, isr))); - DEFINE(XSI_IFA_OFS, (XSI_OFS + offsetof(mapped_regs_t, ifa))); - DEFINE(XSI_IFA, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, ifa))); - DEFINE(XSI_IIPA_OFS, (XSI_OFS + offsetof(mapped_regs_t, iipa))); - DEFINE(XSI_IIPA, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, iipa))); - DEFINE(XSI_IIM_OFS, (XSI_OFS + offsetof(mapped_regs_t, iim))); - DEFINE(XSI_IIM, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, iim))); - DEFINE(XSI_TPR_OFS, (XSI_OFS + offsetof(mapped_regs_t, tpr))); - DEFINE(XSI_TPR, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, tpr))); - DEFINE(XSI_IHA_OFS, (XSI_OFS + offsetof(mapped_regs_t, iha))); - DEFINE(XSI_IHA, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, iha))); - DEFINE(XSI_ITIR_OFS, (XSI_OFS + offsetof(mapped_regs_t, itir))); - DEFINE(XSI_ITIR, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, itir))); - DEFINE(XSI_ITV_OFS, (XSI_OFS + offsetof(mapped_regs_t, itv))); - DEFINE(XSI_ITV, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, itv))); - DEFINE(XSI_PTA_OFS, (XSI_OFS + offsetof(mapped_regs_t, pta))); - DEFINE(XSI_PTA, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, pta))); - DEFINE(XSI_PSR_IC_OFS, (XSI_OFS + offsetof(mapped_regs_t, interrupt_collection_enabled))); - DEFINE(XSI_PSR_IC, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, interrupt_collection_enabled))); - DEFINE(XSI_PEND_OFS, (XSI_OFS + offsetof(mapped_regs_t, pending_interruption))); - DEFINE(XSI_PEND, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, pending_interruption))); - DEFINE(XSI_INCOMPL_REGFR_OFS, (XSI_OFS + offsetof(mapped_regs_t, incomplete_regframe))); - DEFINE(XSI_INCOMPL_REGFR, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, incomplete_regframe))); - DEFINE(XSI_METAPHYS_OFS, (XSI_OFS + offsetof(mapped_regs_t, metaphysical_mode))); - DEFINE(XSI_METAPHYS, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, metaphysical_mode))); - - DEFINE(XSI_BANKNUM_OFS, (XSI_OFS + offsetof(mapped_regs_t, banknum))); - DEFINE(XSI_BANKNUM, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, banknum))); - - DEFINE(XSI_BANK0_R16_OFS, (XSI_OFS + offsetof(mapped_regs_t, bank0_regs[0]))); - DEFINE(XSI_BANK0_R16, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, bank0_regs[0]))); - DEFINE(XSI_BANK1_R16_OFS, (XSI_OFS + offsetof(mapped_regs_t, bank1_regs[0]))); - DEFINE(XSI_BANK1_R16, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, bank1_regs[0]))); - DEFINE(XSI_B0NATS_OFS, (XSI_OFS + offsetof(mapped_regs_t, vbnat))); - DEFINE(XSI_B1NATS_OFS, (XSI_OFS + offsetof(mapped_regs_t, vnat))); - DEFINE(XSI_RR0_OFS, (XSI_OFS + offsetof(mapped_regs_t, rrs[0]))); - DEFINE(XSI_RR0, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, rrs[0]))); - DEFINE(XSI_KR0_OFS, (XSI_OFS + offsetof(mapped_regs_t, krs[0]))); - DEFINE(XSI_KR0, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, krs[0]))); - DEFINE(XSI_PKR0_OFS, (XSI_OFS + offsetof(mapped_regs_t, pkrs[0]))); - DEFINE(XSI_PKR0, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, pkrs[0]))); - DEFINE(XSI_TMP0_OFS, (XSI_OFS + offsetof(mapped_regs_t, tmp[0]))); - DEFINE(XSI_TMP0, (SHARED_ARCHINFO_ADDR+offsetof(mapped_regs_t, tmp[0]))); + DEFINE_MAPPED_REG_OFS(XSI_PSR_I_ADDR_OFS, interrupt_mask_addr); + DEFINE_MAPPED_REG_OFS(XSI_IPSR_OFS, ipsr); + DEFINE_MAPPED_REG_OFS(XSI_IIP_OFS, iip); + DEFINE_MAPPED_REG_OFS(XSI_IFS_OFS, ifs); + DEFINE_MAPPED_REG_OFS(XSI_PRECOVER_IFS_OFS, precover_ifs); + DEFINE_MAPPED_REG_OFS(XSI_ISR_OFS, isr); + DEFINE_MAPPED_REG_OFS(XSI_IFA_OFS, ifa); + DEFINE_MAPPED_REG_OFS(XSI_IIPA_OFS, iipa); + DEFINE_MAPPED_REG_OFS(XSI_IIM_OFS, iim); + DEFINE_MAPPED_REG_OFS(XSI_TPR_OFS, tpr); + DEFINE_MAPPED_REG_OFS(XSI_IHA_OFS, iha); + DEFINE_MAPPED_REG_OFS(XSI_ITIR_OFS, itir); + DEFINE_MAPPED_REG_OFS(XSI_ITV_OFS, itv); + DEFINE_MAPPED_REG_OFS(XSI_PTA_OFS, pta); + DEFINE_MAPPED_REG_OFS(XSI_PSR_IC_OFS, interrupt_collection_enabled); + DEFINE_MAPPED_REG_OFS(XSI_PEND_OFS, pending_interruption); + DEFINE_MAPPED_REG_OFS(XSI_INCOMPL_REGFR_OFS, incomplete_regframe); + DEFINE_MAPPED_REG_OFS(XSI_METAPHYS_OFS, metaphysical_mode); + DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum); + DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]); + DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]); + DEFINE_MAPPED_REG_OFS(XSI_B0NATS_OFS, vbnat); + DEFINE_MAPPED_REG_OFS(XSI_B1NATS_OFS, vnat); + DEFINE_MAPPED_REG_OFS(XSI_RR0_OFS, rrs[0]); + DEFINE_MAPPED_REG_OFS(XSI_KR0_OFS, krs[0]); } diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/linux-xen/iosapic.c --- a/xen/arch/ia64/linux-xen/iosapic.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/linux-xen/iosapic.c Thu Jul 06 06:47:16 2006 -0400 @@ -1185,6 +1185,7 @@ int iosapic_guest_write(unsigned long ph /* Sanity check. Vector should be allocated before this update */ if ((rte_index > ios->num_rte) || + test_bit(vec, ia64_xen_vector) || ((vec > IA64_FIRST_DEVICE_VECTOR) && (vec < IA64_LAST_DEVICE_VECTOR) && (!test_bit(vec - IA64_FIRST_DEVICE_VECTOR, ia64_vector_mask)))) diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/linux-xen/irq_ia64.c --- a/xen/arch/ia64/linux-xen/irq_ia64.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/linux-xen/irq_ia64.c Thu Jul 06 06:47:16 2006 -0400 @@ -233,6 +233,10 @@ static struct irqaction ipi_irqaction = }; #endif +#ifdef XEN +extern void setup_vector (unsigned int vec, struct irqaction *action); +#endif + void register_percpu_irq (ia64_vector vec, struct irqaction *action) { @@ -245,7 +249,11 @@ register_percpu_irq (ia64_vector vec, st desc->status |= IRQ_PER_CPU; desc->handler = &irq_type_ia64_lsapic; if (action) +#ifdef XEN + setup_vector(irq, action); +#else setup_irq(irq, action); +#endif } } diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/vmx/vmmu.c --- a/xen/arch/ia64/vmx/vmmu.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/vmx/vmmu.c Thu Jul 06 06:47:16 2006 -0400 @@ -313,7 +313,9 @@ fetch_code(VCPU *vcpu, u64 gip, u64 *cod u64 *vpa; thash_data_t *tlb; u64 mfn; - + struct page_info* page; + + again: if ( !(VCPU(vcpu, vpsr) & IA64_PSR_IT) ) { // I-side physical mode gpip = gip; } @@ -327,15 +329,27 @@ fetch_code(VCPU *vcpu, u64 gip, u64 *cod if( gpip){ mfn = gmfn_to_mfn(vcpu->domain, gpip >>PAGE_SHIFT); if( mfn == INVALID_MFN ) panic_domain(vcpu_regs(vcpu),"fetch_code: invalid memory\n"); - vpa =(u64 *)__va( (gip & (PAGE_SIZE-1)) | (mfn<<PAGE_SHIFT)); }else{ tlb = vhpt_lookup(gip); if( tlb == NULL) panic_domain(vcpu_regs(vcpu),"No entry found in ITLB and DTLB\n"); - vpa =(u64 *)__va((tlb->ppn>>(PAGE_SHIFT-ARCH_PAGE_SHIFT)<<PAGE_SHIFT)|(gip&(PAGE_SIZE-1))); - } + mfn = tlb->ppn >> (PAGE_SHIFT - ARCH_PAGE_SHIFT); + } + + page = mfn_to_page(mfn); + if (get_page(page, vcpu->domain) == 0) { + if (page_get_owner(page) != vcpu->domain) { + // This page might be a page granted by another domain. + panic_domain(NULL, "domain tries to execute foreign domain " + "page which might be mapped by grant table.\n"); + } + goto again; + } + vpa = (u64 *)__va((mfn << PAGE_SHIFT) | (gip & (PAGE_SIZE - 1))); + *code1 = *vpa++; *code2 = *vpa; + put_page(page); return 1; } @@ -503,7 +517,7 @@ struct ptc_ga_args { static void ptc_ga_remote_func (void *varg) { - u64 oldrid, moldrid; + u64 oldrid, moldrid, mpta; struct ptc_ga_args *args = (struct ptc_ga_args *)varg; VCPU *v = args->vcpu; @@ -511,10 +525,13 @@ static void ptc_ga_remote_func (void *va VMX(v, vrr[0]) = args->rid; moldrid = ia64_get_rr(0x0); ia64_set_rr(0x0,vrrtomrr(v,args->rid)); + mpta = ia64_get_pta(); + ia64_set_pta(v->arch.arch_vmx.mpta&(~1)); ia64_srlz_d(); vmx_vcpu_ptc_l(v, args->vadr, args->ps); VMX(v, vrr[0]) = oldrid; ia64_set_rr(0x0,moldrid); + ia64_set_pta(mpta); ia64_dv_serialize_data(); } @@ -530,6 +547,9 @@ IA64FAULT vmx_vcpu_ptc_ga(VCPU *vcpu,UIN vcpu_get_rr(vcpu, va, &args.rid); args.ps = ps; for_each_vcpu (d, v) { + if (!test_bit(_VCPUF_initialised, &v->vcpu_flags)) + continue; + args.vcpu = v; if (v->processor != vcpu->processor) { int proc; @@ -541,6 +561,8 @@ IA64FAULT vmx_vcpu_ptc_ga(VCPU *vcpu,UIN /* Try again if VCPU has migrated. */ } while (proc != v->processor); } + else if(v == vcpu) + vmx_vcpu_ptc_l(v, va, ps); else ptc_ga_remote_func(&args); } diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/vmx/vmx_entry.S --- a/xen/arch/ia64/vmx/vmx_entry.S Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/vmx/vmx_entry.S Thu Jul 06 06:47:16 2006 -0400 @@ -407,6 +407,7 @@ GLOBAL_ENTRY(ia64_vmm_entry) ;; (p1) add r29=PAL_VPS_RESUME_NORMAL,r20 (p2) add r29=PAL_VPS_RESUME_HANDLER,r20 + (p2) ld8 r26=[r25] ;; ia64_vmm_entry_out: mov pr=r23,-2 @@ -675,39 +676,6 @@ 1: itr.d dtr[r24]=loc2 // wire in new mapping... ;; - -#if 0 - // re-pin mappings for shared_info - - mov r24=IA64_TR_SHARED_INFO - movl r25=__pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RW) - ;; - or loc3 = r25,loc3 // construct PA | page properties - mov r23 = PAGE_SHIFT<<2 - ;; - ptr.d in1,r23 - ;; - mov cr.itir=r23 - mov cr.ifa=in1 - ;; - itr.d dtr[r24]=loc3 // wire in new mapping... - ;; - // re-pin mappings for shared_arch_info - - mov r24=IA64_TR_ARCH_INFO - or loc4 = r25,loc4 // construct PA | page properties - mov r23 = PAGE_SHIFT<<2 - ;; - ptr.d in2,r23 - ;; - mov cr.itir=r23 - mov cr.ifa=in2 - ;; - itr.d dtr[r24]=loc4 // wire in new mapping... - ;; -#endif - - // re-pin mappings for guest_vhpt mov r24=IA64_TR_PERVP_VHPT diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/vmx/vmx_hypercall.c --- a/xen/arch/ia64/vmx/vmx_hypercall.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/vmx/vmx_hypercall.c Thu Jul 06 06:47:16 2006 -0400 @@ -160,10 +160,15 @@ static int do_set_shared_page(VCPU *vcpu u64 o_info; struct domain *d = vcpu->domain; struct vcpu *v; + struct page_info *page; if(vcpu->domain!=dom0) return -EPERM; o_info = (u64)vcpu->domain->shared_info; + again: d->shared_info= (shared_info_t *)domain_mpa_to_imva(vcpu->domain, gpa); + page = virt_to_page(d->shared_info); + if (get_page(page, d) == 0) + goto again; /* Copy existing shared info into new page */ if (o_info) { @@ -178,6 +183,7 @@ static int do_set_shared_page(VCPU *vcpu free_xenheap_page((void *)o_info); } else memset(d->shared_info, 0, PAGE_SIZE); + put_page(page); return 0; } diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/vmx/vmx_ivt.S --- a/xen/arch/ia64/vmx/vmx_ivt.S Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/vmx/vmx_ivt.S Thu Jul 06 06:47:16 2006 -0400 @@ -88,11 +88,13 @@ #include "vmx_minstate.h" +#define MINSTATE_VIRT /* needed by minstate.h */ +#include "minstate.h" #define VMX_FAULT(n) \ vmx_fault_##n:; \ - br.sptk vmx_fault_##n; \ + br.sptk.many dispatch_to_fault_handler; \ ;; \ @@ -508,7 +510,7 @@ ENTRY(vmx_interrupt) ;; .mem.offset 0,0; st8.spill [r16]=r13,16 .mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */ - mov r13=r21 /* establish `current' */ + MINSTATE_GET_CURRENT(r13) ;; .mem.offset 0,0; st8.spill [r16]=r15,16 .mem.offset 8,0; st8.spill [r17]=r14,16 diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/vmx/vmx_phy_mode.c --- a/xen/arch/ia64/vmx/vmx_phy_mode.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/vmx/vmx_phy_mode.c Thu Jul 06 06:47:16 2006 -0400 @@ -137,13 +137,14 @@ vmx_init_all_rr(VCPU *vcpu) #endif } +extern void * pal_vaddr; + void vmx_load_all_rr(VCPU *vcpu) { unsigned long psr; ia64_rr phy_rr; - extern void * pal_vaddr; local_irq_save(psr); @@ -204,6 +205,24 @@ vmx_load_all_rr(VCPU *vcpu) } void +vmx_load_rr7_and_pta(VCPU *vcpu) +{ + unsigned long psr; + + local_irq_save(psr); + + vmx_switch_rr7(vrrtomrr(vcpu,VMX(vcpu, vrr[VRN7])), + (void *)vcpu->domain->shared_info, + (void *)vcpu->arch.privregs, + (void *)vcpu->arch.vhpt.hash, pal_vaddr ); + ia64_set_pta(vcpu->arch.arch_vmx.mpta); + + ia64_srlz_d(); + local_irq_restore(psr); + ia64_srlz_i(); +} + +void switch_to_physical_rid(VCPU *vcpu) { UINT64 psr; diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/vmx/vmx_process.c --- a/xen/arch/ia64/vmx/vmx_process.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/vmx/vmx_process.c Thu Jul 06 06:47:16 2006 -0400 @@ -58,7 +58,6 @@ extern void die_if_kernel(char *str, struct pt_regs *regs, long err); extern void rnat_consumption (VCPU *vcpu); -extern unsigned long translate_domain_mpaddr(unsigned long mpaddr); extern void alt_itlb (VCPU *vcpu, u64 vadr); extern void itlb_fault (VCPU *vcpu, u64 vadr); extern void ivhpt_fault (VCPU *vcpu, u64 vadr); @@ -126,7 +125,7 @@ vmx_ia64_handle_break (unsigned long ifa #endif { if (iim == 0) - die_if_kernel("bug check", regs, iim); + vmx_die_if_kernel("Break 0 in Hypervisor.", regs, iim); if (!user_mode(regs)) { /* Allow hypercalls only when cpl = 0. */ diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/vmx/vtlb.c --- a/xen/arch/ia64/vmx/vtlb.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/vmx/vtlb.c Thu Jul 06 06:47:16 2006 -0400 @@ -28,8 +28,10 @@ #include <asm/gcc_intrin.h> #include <linux/interrupt.h> #include <asm/vmx_vcpu.h> +#include <asm/vmx_phy_mode.h> #include <asm/vmmu.h> #include <asm/tlbflush.h> +#include <asm/regionreg.h> #define MAX_CCH_LENGTH 40 thash_data_t *__alloc_chain(thash_cb_t *); @@ -229,12 +231,13 @@ u64 guest_vhpt_lookup(u64 iha, u64 *pte) * purge software guest tlb */ -static void vtlb_purge(thash_cb_t *hcb, u64 va, u64 ps) -{ +static void vtlb_purge(VCPU *v, u64 va, u64 ps) +{ + thash_cb_t *hcb = &v->arch.vtlb; thash_data_t *hash_table, *prev, *next; u64 start, end, size, tag, rid, def_size; ia64_rr vrr; - vcpu_get_rr(current, va, &vrr.rrval); + vcpu_get_rr(v, va, &vrr.rrval); rid = vrr.rid; size = PSIZE(ps); start = va & (-size); @@ -263,11 +266,14 @@ static void vtlb_purge(thash_cb_t *hcb, } // machine_tlb_purge(va, ps); } + + /* * purge VHPT and machine TLB */ -static void vhpt_purge(thash_cb_t *hcb, u64 va, u64 ps) -{ +static void vhpt_purge(VCPU *v, u64 va, u64 ps) +{ + //thash_cb_t *hcb = &v->arch.vhpt; thash_data_t *hash_table, *prev, *next; u64 start, end, size, tag; size = PSIZE(ps); @@ -413,8 +419,8 @@ void thash_purge_entries(VCPU *v, u64 va void thash_purge_entries(VCPU *v, u64 va, u64 ps) { if(vcpu_quick_region_check(v->arch.tc_regions,va)) - vtlb_purge(&v->arch.vtlb, va, ps); - vhpt_purge(&v->arch.vhpt, va, ps); + vtlb_purge(v, va, ps); + vhpt_purge(v, va, ps); } u64 translate_phy_pte(VCPU *v, u64 *pte, u64 itir, u64 va) @@ -425,7 +431,7 @@ u64 translate_phy_pte(VCPU *v, u64 *pte, phy_pte.val = *pte; addr = *pte; addr = ((addr & _PAGE_PPN_MASK)>>ps<<ps)|(va&((1UL<<ps)-1)); - addr = lookup_domain_mpa(v->domain, addr); + addr = lookup_domain_mpa(v->domain, addr, NULL); if(addr & GPFN_IO_MASK){ *pte |= VTLB_PTE_IO; return -1; @@ -456,17 +462,17 @@ void thash_purge_and_insert(VCPU *v, u64 phy_pte = translate_phy_pte(v, &pte, itir, ifa); if(ps==PAGE_SHIFT){ if(!(pte&VTLB_PTE_IO)){ - vhpt_purge(&v->arch.vhpt, ifa, ps); + vhpt_purge(v, ifa, ps); vmx_vhpt_insert(&v->arch.vhpt, phy_pte, itir, ifa); } else{ - vhpt_purge(&v->arch.vhpt, ifa, ps); + vhpt_purge(v, ifa, ps); vtlb_insert(&v->arch.vtlb, pte, itir, ifa); vcpu_quick_region_set(PSCBX(v,tc_regions),ifa); } } else{ - vhpt_purge(&v->arch.vhpt, ifa, ps); + vhpt_purge(v, ifa, ps); vtlb_insert(&v->arch.vtlb, pte, itir, ifa); vcpu_quick_region_set(PSCBX(v,tc_regions),ifa); if(!(pte&VTLB_PTE_IO)){ diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/xen/acpi.c --- a/xen/arch/ia64/xen/acpi.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/xen/acpi.c Thu Jul 06 06:47:16 2006 -0400 @@ -50,7 +50,7 @@ #include <asm/system.h> #include <asm/numa.h> #include <asm/sal.h> -//#include <asm/cyclone.h> +#include <asm/hw_irq.h> #define BAD_MADT_ENTRY(entry, end) ( \ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ @@ -650,7 +650,7 @@ acpi_boot_init (void) printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, total_cpus); return 0; } -#if 0 + int acpi_gsi_to_irq (u32 gsi, unsigned int *irq) { @@ -667,7 +667,7 @@ acpi_gsi_to_irq (u32 gsi, unsigned int * } return 0; } - +#if 0 int acpi_register_irq (u32 gsi, u32 polarity, u32 trigger) { diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/xen/dom0_ops.c --- a/xen/arch/ia64/xen/dom0_ops.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/xen/dom0_ops.c Thu Jul 06 06:47:16 2006 -0400 @@ -256,7 +256,7 @@ do_dom0vp_op(unsigned long cmd, } break; case IA64_DOM0VP_machtophys: - if (max_page <= arg0) { + if (!mfn_valid(arg0)) { ret = INVALID_M2P_ENTRY; break; } diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/xen/dom_fw.c --- a/xen/arch/ia64/xen/dom_fw.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/xen/dom_fw.c Thu Jul 06 06:47:16 2006 -0400 @@ -30,9 +30,30 @@ extern unsigned long dom0_start; extern unsigned long running_on_sim; - unsigned long dom_fw_base_mpa = -1; unsigned long imva_fw_base = -1; + +#define FW_VENDOR "X\0e\0n\0/\0i\0a\0\066\0\064\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + +#define MAKE_MD(typ, attr, start, end, abs) \ + do { \ + md = efi_memmap + i++; \ + md->type = typ; \ + md->pad = 0; \ + md->phys_addr = abs ? start : start_mpaddr + start; \ + md->virt_addr = 0; \ + md->num_pages = (end - start) >> EFI_PAGE_SHIFT; \ + md->attribute = attr; \ + } while (0) + +#define EFI_HYPERCALL_PATCH(tgt, call) \ + do { \ + dom_efi_hypercall_patch(d, FW_HYPERCALL_##call##_PADDR, \ + FW_HYPERCALL_##call); \ + tgt = dom_pa((unsigned long) pfn); \ + *pfn++ = FW_HYPERCALL_##call##_PADDR + start_mpaddr; \ + *pfn++ = 0; \ + } while (0) // return domain (meta)physical address for a given imva // this function is a call-back from dom_fw_init @@ -139,14 +160,13 @@ unsigned long dom_fw_setup(struct domain #define NFUNCPTRS 20 -static void print_md(efi_memory_desc_t *md) -{ -#if 1 +static inline void +print_md(efi_memory_desc_t *md) +{ printk("domain mem: type=%2u, attr=0x%016lx, range=[0x%016lx-0x%016lx) (%luMB)\n", md->type, md->attribute, md->phys_addr, md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), md->num_pages >> (20 - EFI_PAGE_SHIFT)); -#endif } static u32 lsapic_nbr; @@ -424,7 +444,6 @@ dom_fw_dom0_passthrough(efi_memory_desc_ arg->md->virt_addr = 0; arg->md->num_pages = md->num_pages; arg->md->attribute = md->attribute; - print_md(arg->md); (*arg->i)++; arg->md++; @@ -440,15 +459,23 @@ dom_fw_dom0_lowmem(efi_memory_desc_t *md dom_fw_dom0_lowmem(efi_memory_desc_t *md, void *arg__) { struct dom0_passthrough_arg* arg = (struct dom0_passthrough_arg*)arg__; - u64 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); + u64 end = min(HYPERCALL_START, + md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)); BUG_ON(md->type != EFI_CONVENTIONAL_MEMORY); - if (md->phys_addr >= 1*MB) + /* avoid hypercall area */ + if (md->phys_addr >= HYPERCALL_START) return 0; - if (end > 1*MB) - end = 1*MB; + /* avoid firmware base area */ + if (md->phys_addr < dom_pa(imva_fw_base)) + end = min(end, dom_pa(imva_fw_base)); + else if (md->phys_addr < dom_pa(imva_fw_base + PAGE_SIZE)) { + if (end < dom_pa(imva_fw_base + PAGE_SIZE)) + return 0; + md->phys_addr = dom_pa(imva_fw_base + PAGE_SIZE); + } arg->md->type = md->type; arg->md->pad = 0; @@ -456,10 +483,26 @@ dom_fw_dom0_lowmem(efi_memory_desc_t *md arg->md->virt_addr = 0; arg->md->num_pages = (end - md->phys_addr) >> EFI_PAGE_SHIFT; arg->md->attribute = md->attribute; - print_md(arg->md); (*arg->i)++; arg->md++; + + /* if firmware area spliced the md, add the upper part here */ + if (end == dom_pa(imva_fw_base)) { + end = min(HYPERCALL_START, + md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)); + if (end > dom_pa(imva_fw_base + PAGE_SIZE)) { + arg->md->type = md->type; + arg->md->pad = 0; + arg->md->phys_addr = dom_pa(imva_fw_base + PAGE_SIZE); + arg->md->virt_addr = 0; + arg->md->num_pages = (end - arg->md->phys_addr) >> EFI_PAGE_SHIFT; + arg->md->attribute = md->attribute; + + (*arg->i)++; + arg->md++; + } + } return 0; } @@ -497,25 +540,13 @@ dom_fw_init (struct domain *d, const cha unsigned long *pfn; unsigned char checksum = 0; char *cp, *cmd_line, *fw_vendor; - int i = 0; + int num_mds, j, i = 0; unsigned long maxmem = (d->max_pages - d->arch.sys_pgnr) * PAGE_SIZE; #ifdef CONFIG_XEN_IA64_DOM0_VP const unsigned long start_mpaddr = 0; #else const unsigned long start_mpaddr = ((d==dom0)?dom0_start:0); #endif - -# define MAKE_MD(typ, attr, start, end, abs) \ - do { \ - md = efi_memmap + i++; \ - md->type = typ; \ - md->pad = 0; \ - md->phys_addr = abs ? start : start_mpaddr + start; \ - md->virt_addr = 0; \ - md->num_pages = (end - start) >> 12; \ - md->attribute = attr; \ - print_md(md); \ - } while (0) /* FIXME: should check size but for now we have a whole MB to play with. And if stealing code from fw-emu.c, watch out for new fw_vendor on the end! @@ -557,7 +588,6 @@ dom_fw_init (struct domain *d, const cha efi_systab->hdr.revision = EFI_SYSTEM_TABLE_REVISION; efi_systab->hdr.headersize = sizeof(efi_systab->hdr); cp = fw_vendor = &cmd_line[arglen] + (2-(arglen&1)); // round to 16-bit boundary -#define FW_VENDOR "X\0e\0n\0/\0i\0a\0\066\0\064\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" cp += sizeof(FW_VENDOR) + (8-((unsigned long)cp & 7)); // round to 64-bit boundary memcpy(fw_vendor,FW_VENDOR,sizeof(FW_VENDOR)); @@ -571,12 +601,6 @@ dom_fw_init (struct domain *d, const cha efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE; efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION; efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr); -#define EFI_HYPERCALL_PATCH(tgt,call) do { \ - dom_efi_hypercall_patch(d,FW_HYPERCALL_##call##_PADDR,FW_HYPERCALL_##call); \ - tgt = dom_pa((unsigned long) pfn); \ - *pfn++ = FW_HYPERCALL_##call##_PADDR + start_mpaddr; \ - *pfn++ = 0; \ - } while (0) EFI_HYPERCALL_PATCH(efi_runtime->get_time,EFI_GET_TIME); EFI_HYPERCALL_PATCH(efi_runtime->set_time,EFI_SET_TIME); @@ -690,7 +714,13 @@ dom_fw_init (struct domain *d, const cha dom_fpswa_hypercall_patch(d); fpswa_inf->fpswa = (void *) FW_HYPERCALL_FPSWA_ENTRY_PADDR + start_mpaddr; - i = 0; + i = 0; /* Used by MAKE_MD */ + + /* Create dom0/domu md entry for fw_mem area */ + MAKE_MD(EFI_ACPI_RECLAIM_MEMORY, EFI_MEMORY_WB | EFI_MEMORY_RUNTIME, + dom_pa((unsigned long)fw_mem), + dom_pa((unsigned long)fw_mem + fw_mem_size), 1); + if (d == dom0) { #ifndef CONFIG_XEN_IA64_DOM0_VP /* @@ -708,9 +738,6 @@ dom_fw_init (struct domain *d, const cha /* simulate 1MB free memory at physical address zero */ MAKE_MD(EFI_LOADER_DATA,EFI_MEMORY_WB,0*MB,1*MB, 0);//XXX -#else - int num_mds; - int j; #endif /* hypercall patches live here, masquerade as reserved PAL memory */ MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB|EFI_MEMORY_RUNTIME,HYPERCALL_START,HYPERCALL_END, 0); @@ -755,59 +782,13 @@ dom_fw_init (struct domain *d, const cha dom_fw_dom0_lowmem, &arg); } else MAKE_MD(EFI_RESERVED_TYPE,0,0,0,0); - -#ifdef CONFIG_XEN_IA64_DOM0_VP - // simple - // MAKE_MD(EFI_CONVENTIONAL_MEMORY, EFI_MEMORY_WB, - // HYPERCALL_END, maxmem, 0); - // is not good. Check overlap. - sort(efi_memmap, i, sizeof(efi_memory_desc_t), - efi_mdt_cmp, NULL); - - // find gap and fill it with conventional memory - num_mds = i; - for (j = 0; j < num_mds; j++) { - unsigned long end; - unsigned long next_start; - - md = &efi_memmap[j]; - end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); - - next_start = maxmem; - if (j + 1 < num_mds) { - efi_memory_desc_t* next_md = &efi_memmap[j + 1]; - next_start = next_md->phys_addr; - BUG_ON(end > next_start); - if (end == next_md->phys_addr) - continue; - } - - // clip the range and align to PAGE_SIZE - // Avoid "legacy" low memory addresses and the - // HYPERCALL patch area. - if (end < HYPERCALL_END) - end = HYPERCALL_END; - if (next_start > maxmem) - next_start = maxmem; - end = PAGE_ALIGN(end); - next_start = next_start & PAGE_MASK; - if (end >= next_start) - continue; - - MAKE_MD(EFI_CONVENTIONAL_MEMORY, EFI_MEMORY_WB, - end, next_start, 0); - if (next_start >= maxmem) - break; - } -#endif - } - else { + } else { #ifndef CONFIG_XEN_IA64_DOM0_VP MAKE_MD(EFI_LOADER_DATA,EFI_MEMORY_WB,0*MB,1*MB, 1); + MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem, 1); #endif /* hypercall patches live here, masquerade as reserved PAL memory */ MAKE_MD(EFI_PAL_CODE,EFI_MEMORY_WB|EFI_MEMORY_RUNTIME,HYPERCALL_START,HYPERCALL_END, 1); - MAKE_MD(EFI_CONVENTIONAL_MEMORY,EFI_MEMORY_WB,HYPERCALL_END,maxmem, 1); /* Create a dummy entry for IO ports, so that IO accesses are trapped by Xen. */ MAKE_MD(EFI_MEMORY_MAPPED_IO_PORT_SPACE,EFI_MEMORY_UC, @@ -815,6 +796,50 @@ dom_fw_init (struct domain *d, const cha MAKE_MD(EFI_RESERVED_TYPE,0,0,0,0); } +#ifdef CONFIG_XEN_IA64_DOM0_VP + // simple + // MAKE_MD(EFI_CONVENTIONAL_MEMORY, EFI_MEMORY_WB, + // HYPERCALL_END, maxmem, 0); + // is not good. Check overlap. + sort(efi_memmap, i, sizeof(efi_memory_desc_t), + efi_mdt_cmp, NULL); + + // find gap and fill it with conventional memory + num_mds = i; + for (j = 0; j < num_mds; j++) { + unsigned long end; + unsigned long next_start; + + md = &efi_memmap[j]; + end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); + + next_start = maxmem; + if (j + 1 < num_mds) { + efi_memory_desc_t* next_md = &efi_memmap[j + 1]; + next_start = next_md->phys_addr; + BUG_ON(end > next_start); + if (end == next_md->phys_addr) + continue; + } + + // clip the range and align to PAGE_SIZE + // Avoid "legacy" low memory addresses and the + // HYPERCALL patch area. + if (end < HYPERCALL_END) + end = HYPERCALL_END; + if (next_start > maxmem) + next_start = maxmem; + end = PAGE_ALIGN(end); + next_start = next_start & PAGE_MASK; + if (end >= next_start) + continue; + + MAKE_MD(EFI_CONVENTIONAL_MEMORY, EFI_MEMORY_WB, + end, next_start, 0); + if (next_start >= maxmem) + break; + } +#endif sort(efi_memmap, i, sizeof(efi_memory_desc_t), efi_mdt_cmp, NULL); bp->efi_systab = dom_pa((unsigned long) fw_mem); @@ -880,6 +905,10 @@ dom_fw_init (struct domain *d, const cha bp->initrd_start = d->arch.initrd_start; bp->initrd_size = d->arch.initrd_len; } + for (i = 0 ; i < bp->efi_memmap_size/sizeof(efi_memory_desc_t) ; i++) { + md = efi_memmap + i; + print_md(md); + } printf(" initrd start 0x%lx", bp->initrd_start); printf(" initrd size 0x%lx\n", bp->initrd_size); return bp; diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/xen/domain.c Thu Jul 06 06:47:16 2006 -0400 @@ -24,6 +24,7 @@ #include <xen/softirq.h> #include <xen/mm.h> #include <xen/iocap.h> +#include <asm/asm-xsi-offsets.h> #include <asm/ptrace.h> #include <asm/system.h> #include <asm/io.h> @@ -88,6 +89,7 @@ extern struct vcpu *ia64_switch_to (stru /* Address of vpsr.i (in fact evtchn_upcall_mask) of current vcpu. This is a Xen virtual address. */ DEFINE_PER_CPU(uint8_t *, current_psr_i_addr); +DEFINE_PER_CPU(int *, current_psr_ic_addr); #include <xen/sched-if.h> @@ -106,6 +108,8 @@ void schedule_tail(struct vcpu *prev) vcpu_load_kernel_regs(current); __ia64_per_cpu_var(current_psr_i_addr) = ¤t->domain-> shared_info->vcpu_info[current->vcpu_id].evtchn_upcall_mask; + __ia64_per_cpu_var(current_psr_ic_addr) = (int *) + (current->domain->arch.shared_info_va + XSI_PSR_IC_OFS); } } @@ -159,6 +163,8 @@ if (!i--) { i = 1000000; printk("+"); } vcpu_pend_timer(current); __ia64_per_cpu_var(current_psr_i_addr) = &nd->shared_info-> vcpu_info[current->vcpu_id].evtchn_upcall_mask; + __ia64_per_cpu_var(current_psr_ic_addr) = + (int *)(nd->arch.shared_info_va + XSI_PSR_IC_OFS); } else { /* When switching to idle domain, only need to disable vhpt * walker. Then all accesses happen within idle context will @@ -167,6 +173,7 @@ if (!i--) { i = 1000000; printk("+"); } pta = ia64_get_pta(); ia64_set_pta(pta & ~VHPT_ENABLED); __ia64_per_cpu_var(current_psr_i_addr) = NULL; + __ia64_per_cpu_var(current_psr_ic_addr) = NULL; } } local_irq_restore(spsr); @@ -304,9 +311,8 @@ int arch_domain_create(struct domain *d) int arch_domain_create(struct domain *d) { // the following will eventually need to be negotiated dynamically - d->arch.shared_info_va = SHAREDINFO_ADDR; + d->arch.shared_info_va = DEFAULT_SHAREDINFO_ADDR; d->arch.breakimm = 0x1000; - seqlock_init(&d->arch.vtlb_lock); if (is_idle_domain(d)) return 0; @@ -354,7 +360,7 @@ void arch_domain_destroy(struct domain * void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c) { - c->regs = *vcpu_regs (v); + c->user_regs = *vcpu_regs (v); c->shared = v->domain->shared_info->arch; } @@ -379,7 +385,7 @@ int arch_set_info_guest(struct vcpu *v, } else if (!d->arch.physmap_built) build_physmap_table(d); - *regs = c->regs; + *regs = c->user_regs; cmdline_addr = 0; if (v == d->vcpu[0]) { /* Only for first vcpu. */ @@ -514,6 +520,41 @@ void build_physmap_table(struct domain * } d->arch.physmap_built = 1; } + +unsigned long +domain_set_shared_info_va (unsigned long va) +{ + struct vcpu *v = current; + struct domain *d = v->domain; + struct vcpu *v1; + + /* Check virtual address: + must belong to region 7, + must be 64Kb aligned, + must not be within Xen virtual space. */ + if ((va >> 61) != 7 + || (va & 0xffffUL) != 0 + || (va >= HYPERVISOR_VIRT_START && va < HYPERVISOR_VIRT_END)) + panic_domain (NULL, "%s: bad va (0x%016lx)\n", __func__, va); + + /* Note: this doesn't work well if other cpus are already running. + However this is part of the spec :-) */ + printf ("Domain set shared_info_va to 0x%016lx\n", va); + d->arch.shared_info_va = va; + + for_each_vcpu (d, v1) { + VCPU(v1, interrupt_mask_addr) = + (unsigned char *)va + INT_ENABLE_OFFSET(v1); + } + + __ia64_per_cpu_var(current_psr_ic_addr) = (int *)(va + XSI_PSR_IC_OFS); + + /* Remap the shared pages. */ + set_one_rr (7UL << 61, PSCB(v,rrs[7])); + + return 0; +} + // remove following line if not privifying in memory //#define HAVE_PRIVIFY_MEMORY @@ -889,6 +930,7 @@ int construct_dom0(struct domain *d, void machine_restart(char * __unused) { + console_start_sync(); if (running_on_sim) printf ("machine_restart called. spinning...\n"); else @@ -898,6 +940,7 @@ void machine_restart(char * __unused) void machine_halt(void) { + console_start_sync(); if (running_on_sim) printf ("machine_halt called. spinning...\n"); else diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/xen/faults.c --- a/xen/arch/ia64/xen/faults.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/xen/faults.c Thu Jul 06 06:47:16 2006 -0400 @@ -26,6 +26,7 @@ #include <asm/vhpt.h> #include <asm/debugger.h> #include <asm/fpswa.h> +#include <asm/asm-xsi-offsets.h> extern void die_if_kernel(char *str, struct pt_regs *regs, long err); /* FIXME: where these declarations shold be there ? */ @@ -118,7 +119,7 @@ void reflect_interruption(unsigned long regs->cr_iip = ((unsigned long) PSCBX(v,iva) + vector) & ~0xffUL; regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET; - regs->r31 = XSI_IPSR; + regs->r31 = current->domain->arch.shared_info_va + XSI_IPSR_OFS; v->vcpu_info->evtchn_upcall_mask = 1; PSCB(v,interrupt_collection_enabled) = 0; @@ -172,7 +173,7 @@ void reflect_event(struct pt_regs *regs) regs->cr_iip = v->arch.event_callback_ip; regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET; - regs->r31 = XSI_IPSR; + regs->r31 = current->domain->arch.shared_info_va + XSI_IPSR_OFS; v->vcpu_info->evtchn_upcall_mask = 1; PSCB(v,interrupt_collection_enabled) = 0; @@ -214,11 +215,10 @@ void ia64_do_page_fault (unsigned long a // FIXME should validate address here unsigned long pteval; unsigned long is_data = !((isr >> IA64_ISR_X_BIT) & 1UL); - seqlock_t* vtlb_lock = ¤t->domain->arch.vtlb_lock; - unsigned long seq; IA64FAULT fault; - - if ((isr & IA64_ISR_IR) && handle_lazy_cover(current, regs)) return; + int is_ptc_l_needed = 0; + u64 logps; + if ((isr & IA64_ISR_SP) || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) { @@ -232,20 +232,29 @@ void ia64_do_page_fault (unsigned long a } again: - seq = read_seqbegin(vtlb_lock); fault = vcpu_translate(current,address,is_data,&pteval,&itir,&iha); if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB) { - u64 logps; - pteval = translate_domain_pte(pteval, address, itir, &logps); + struct p2m_entry entry; + pteval = translate_domain_pte(pteval, address, itir, &logps, &entry); vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,logps); - if (read_seqretry(vtlb_lock, seq)) { + if ((fault == IA64_USE_TLB && !current->arch.dtlb.pte.p) || + p2m_entry_retry(&entry)) { + /* dtlb has been purged in-between. This dtlb was + matching. Undo the work. */ vcpu_flush_tlb_vhpt_range(address & ((1 << logps) - 1), logps); + + // the stale entry which we inserted above + // may remains in tlb cache. + // we don't purge it now hoping next itc purges it. + is_ptc_l_needed = 1; goto again; } return; } + if (is_ptc_l_needed) + vcpu_ptc_l(current, address, logps); if (!user_mode (regs)) { /* The fault occurs inside Xen. */ if (!ia64_done_with_exception(regs)) { @@ -260,6 +269,10 @@ void ia64_do_page_fault (unsigned long a } return; } + + if ((isr & IA64_ISR_IR) && handle_lazy_cover(current, regs)) + return; + if (!PSCB(current,interrupt_collection_enabled)) { check_bad_nested_interruption(isr,regs,fault); //printf("Delivering NESTED DATA TLB fault\n"); diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/xen/fw_emul.c --- a/xen/arch/ia64/xen/fw_emul.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/xen/fw_emul.c Thu Jul 06 06:47:16 2006 -0400 @@ -359,18 +359,32 @@ xen_pal_emulator(unsigned long index, u6 // given a current domain (virtual or metaphysical) address, return the virtual address static unsigned long -efi_translate_domain_addr(unsigned long domain_addr, IA64FAULT *fault) +efi_translate_domain_addr(unsigned long domain_addr, IA64FAULT *fault, + struct page_info** page) { struct vcpu *v = current; unsigned long mpaddr = domain_addr; + unsigned long virt; *fault = IA64_NO_FAULT; +again: if (v->domain->arch.efi_virt_mode) { *fault = vcpu_tpa(v, domain_addr, &mpaddr); if (*fault != IA64_NO_FAULT) return 0; } - return ((unsigned long) __va(translate_domain_mpaddr(mpaddr))); + virt = (unsigned long)domain_mpa_to_imva(v->domain, mpaddr); + *page = virt_to_page(virt); + if (get_page(*page, current->domain) == 0) { + if (page_get_owner(*page) != current->domain) { + // which code is appropriate? + *fault = IA64_FAULT; + return 0; + } + goto again; + } + + return virt; } static efi_status_t @@ -379,18 +393,27 @@ efi_emulate_get_time( IA64FAULT *fault) { unsigned long tv = 0, tc = 0; + struct page_info *tv_page = NULL; + struct page_info *tc_page = NULL; efi_status_t status; //printf("efi_get_time(%016lx,%016lx) called\n", tv_addr, tc_addr); - tv = efi_translate_domain_addr(tv_addr, fault); - if (*fault != IA64_NO_FAULT) return 0; + tv = efi_translate_domain_addr(tv_addr, fault, &tv_page); + if (*fault != IA64_NO_FAULT) + return 0; if (tc_addr) { - tc = efi_translate_domain_addr(tc_addr, fault); - if (*fault != IA64_NO_FAULT) return 0; + tc = efi_translate_domain_addr(tc_addr, fault, &tc_page); + if (*fault != IA64_NO_FAULT) { + put_page(tv_page); + return 0; + } } //printf("efi_get_time(%016lx,%016lx) translated to xen virtual address\n", tv, tc); status = (*efi.get_time)((efi_time_t *) tv, (efi_time_cap_t *) tc); //printf("efi_get_time returns %lx\n", status); + if (tc_page != NULL) + put_page(tc_page); + put_page(tv_page); return status; } @@ -549,7 +572,7 @@ do_ssc(unsigned long ssc, struct pt_regs case SSC_WAIT_COMPLETION: if (arg0) { // metaphysical address - arg0 = translate_domain_mpaddr(arg0); + arg0 = translate_domain_mpaddr(arg0, NULL); /**/ stat = (struct ssc_disk_stat *)__va(arg0); ///**/ if (stat->fd == last_fd) stat->count = last_count; /**/ stat->count = last_count; @@ -564,7 +587,7 @@ do_ssc(unsigned long ssc, struct pt_regs arg1 = vcpu_get_gr(current,33); // access rights if (!running_on_sim) { printf("SSC_OPEN, not implemented on hardware. (ignoring...)\n"); arg0 = 0; } if (arg0) { // metaphysical address - arg0 = translate_domain_mpaddr(arg0); + arg0 = translate_domain_mpaddr(arg0, NULL); retval = ia64_ssc(arg0,arg1,0,0,ssc); } else retval = -1L; @@ -581,7 +604,7 @@ if (!running_on_sim) { printf("SSC_OPEN, unsigned long mpaddr; long len; - arg2 = translate_domain_mpaddr(arg2); + arg2 = translate_domain_mpaddr(arg2, NULL); req = (struct ssc_disk_req *) __va(arg2); req->len &= 0xffffffffL; // avoid strange bug len = req->len; @@ -592,7 +615,7 @@ if (!running_on_sim) { printf("SSC_OPEN, retval = 0; if ((mpaddr & PAGE_MASK) != ((mpaddr+len-1) & PAGE_MASK)) { // do partial page first - req->addr = translate_domain_mpaddr(mpaddr); + req->addr = translate_domain_mpaddr(mpaddr, NULL); req->len = PAGE_SIZE - (req->addr & ~PAGE_MASK); len -= req->len; mpaddr += req->len; retval = ia64_ssc(arg0,arg1,arg2,arg3,ssc); @@ -602,7 +625,7 @@ if (!running_on_sim) { printf("SSC_OPEN, //if (last_count >= PAGE_SIZE) printf("ssc(%p,%lx)[part]=%x ",req->addr,req->len,retval); } if (retval >= 0) while (len > 0) { - req->addr = translate_domain_mpaddr(mpaddr); + req->addr = translate_domain_mpaddr(mpaddr, NULL); req->len = (len > PAGE_SIZE) ? PAGE_SIZE : len; len -= PAGE_SIZE; mpaddr += PAGE_SIZE; retval = ia64_ssc(arg0,arg1,arg2,arg3,ssc); diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/xen/hypercall.c --- a/xen/arch/ia64/xen/hypercall.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/xen/hypercall.c Thu Jul 06 06:47:16 2006 -0400 @@ -267,6 +267,9 @@ fw_hypercall (struct pt_regs *regs) case FW_HYPERCALL_IPI: fw_hypercall_ipi (regs); break; + case FW_HYPERCALL_SET_SHARED_INFO_VA: + regs->r8 = domain_set_shared_info_va (regs->r28); + break; case FW_HYPERCALL_FPSWA: fpswa_ret = fw_hypercall_fpswa (v); regs->r8 = fpswa_ret.status; diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/xen/hyperprivop.S --- a/xen/arch/ia64/xen/hyperprivop.S Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/xen/hyperprivop.S Thu Jul 06 06:47:16 2006 -0400 @@ -13,6 +13,7 @@ #include <asm/processor.h> #include <asm/system.h> #include <asm/debugger.h> +#include <asm/asm-xsi-offsets.h> #include <public/arch-ia64.h> @@ -304,9 +305,13 @@ ENTRY(hyper_ssm_i) add r24=r24,r23;; mov cr.iip=r24;; // OK, now all set to go except for switch to virtual bank0 - mov r30=r2; mov r29=r3;; + mov r30=r2 + mov r29=r3 + mov r28=r4 + ;; adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18; adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18;; + adds r4=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 bsw.1;; // FIXME?: ar.unat is not really handled correctly, // but may not matter if the OS is NaT-clean @@ -326,9 +331,11 @@ ENTRY(hyper_ssm_i) .mem.offset 8,0; st8.spill [r3]=r29,16 ;; .mem.offset 0,0; st8.spill [r2]=r30,16; .mem.offset 8,0; st8.spill [r3]=r31,16 ;; - movl r31=XSI_IPSR;; + mov r31=r4 bsw.0 ;; - mov r2=r30; mov r3=r29;; + mov r2=r30 + mov r3=r29 + mov r4=r28 adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;; st4 [r20]=r0 ;; mov pr=r31,-1 ;; @@ -372,7 +379,10 @@ GLOBAL_ENTRY(fast_tick_reflect) st8 [r20]=r21;; #endif // vcpu_pend_timer(current) - movl r18=XSI_PSR_IC;; + movl r18=THIS_CPU(current_psr_ic_addr) + ;; + ld8 r18=[r18] + ;; adds r20=XSI_ITV_OFS-XSI_PSR_IC_OFS,r18 ;; ld8 r20=[r20];; cmp.eq p6,p0=r20,r0 // if cr.itv==0 done @@ -481,12 +491,17 @@ GLOBAL_ENTRY(fast_tick_reflect) add r24=r24,r23;; mov cr.iip=r24;; // OK, now all set to go except for switch to virtual bank0 - mov r30=r2; mov r29=r3;; + mov r30=r2 + mov r29=r3 + mov r27=r4 #ifdef HANDLE_AR_UNAT mov r28=ar.unat; #endif - adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18; - adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18;; + ;; + adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18 + adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18 + adds r4=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 + ;; bsw.1;; .mem.offset 0,0; st8.spill [r2]=r16,16; .mem.offset 8,0; st8.spill [r3]=r17,16 ;; @@ -506,28 +521,32 @@ GLOBAL_ENTRY(fast_tick_reflect) .mem.offset 8,0; st8.spill [r3]=r31,16 ;; #ifdef HANDLE_AR_UNAT // r16~r23 are preserved regsin bank0 regs, we need to restore them, - // r24~r31 are scratch regs, we don't need to handle NaT bit, - // because OS handler must assign it before access it - ld8 r16=[r2],16; - ld8 r17=[r3],16;; - ld8 r18=[r2],16; - ld8 r19=[r3],16;; - ld8 r20=[r2],16; - ld8 r21=[r3],16;; - ld8 r22=[r2],16; - ld8 r23=[r3],16;; -#endif - movl r31=XSI_IPSR;; - bsw.0 ;; - mov r24=ar.unat; - mov r2=r30; mov r3=r29;; + // r24~r31 are scratch regs, we don't need to handle NaT bit, + // because OS handler must assign it before access it + ld8 r16=[r2],16; + ld8 r17=[r3],16;; + ld8 r18=[r2],16; + ld8 r19=[r3],16;; + ld8 r20=[r2],16; + ld8 r21=[r3],16;; + ld8 r22=[r2],16; + ld8 r23=[r3],16;; +#endif + mov r31=r4 + ;; + bsw.0 ;; + mov r24=ar.unat; + mov r2=r30 + mov r3=r29 + mov r4=r27 #ifdef HANDLE_AR_UNAT - mov ar.unat=r28; -#endif - adds r25=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18 ; - adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r25]=r24; - st4 [r20]=r0 ;; + mov ar.unat=r28; +#endif + ;; + adds r25=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18 ; + adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;; + st8 [r25]=r24; + st4 [r20]=r0 ;; fast_tick_reflect_done: mov pr=r31,-1 ;; rfi @@ -659,12 +678,16 @@ ENTRY(fast_reflect) add r20=r20,r23;; mov cr.iip=r20;; // OK, now all set to go except for switch to virtual bank0 - mov r30=r2; mov r29=r3;; + mov r30=r2 + mov r29=r3 #ifdef HANDLE_AR_UNAT mov r28=ar.unat; #endif + mov r27=r4 adds r2=XSI_BANK1_R16_OFS-XSI_PSR_IC_OFS,r18; - adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18;; + adds r3=(XSI_BANK1_R16_OFS+8)-XSI_PSR_IC_OFS,r18 + adds r4=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 + ;; bsw.1;; .mem.offset 0,0; st8.spill [r2]=r16,16; .mem.offset 8,0; st8.spill [r3]=r17,16 ;; @@ -687,24 +710,28 @@ ENTRY(fast_reflect) // r24~r31 are scratch regs, we don't need to handle NaT bit, // because OS handler must assign it before access it ld8 r16=[r2],16; - ld8 r17=[r3],16;; - ld8 r18=[r2],16; - ld8 r19=[r3],16;; + ld8 r17=[r3],16;; + ld8 r18=[r2],16; + ld8 r19=[r3],16;; ld8 r20=[r2],16; - ld8 r21=[r3],16;; - ld8 r22=[r2],16; - ld8 r23=[r3],16;; -#endif - movl r31=XSI_IPSR;; + ld8 r21=[r3],16;; + ld8 r22=[r2],16; + ld8 r23=[r3],16;; +#endif + mov r31=r4 + ;; bsw.0 ;; - mov r24=ar.unat; - mov r2=r30; mov r3=r29;; + mov r24=ar.unat; + mov r2=r30 + mov r3=r29 #ifdef HANDLE_AR_UNAT mov ar.unat=r28; #endif - adds r25=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18 ; + mov r4=r27 + ;; + adds r25=XSI_B1NATS_OFS-XSI_PSR_IC_OFS,r18 ; adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;; - st8 [r25]=r24; + st8 [r25]=r24; st4 [r20]=r0 ;; mov pr=r31,-1 ;; rfi @@ -732,7 +759,8 @@ GLOBAL_ENTRY(fast_access_reflect) extr.u r21=r30,IA64_PSR_CPL0_BIT,2 ;; cmp.eq p7,p0=r21,r0 (p7) br.spnt.few dispatch_reflection ;; - movl r18=XSI_PSR_IC;; + movl r18=THIS_CPU(current_psr_ic_addr);; + ld8 r18=[r18];; ld4 r21=[r18];; cmp.eq p7,p0=r0,r21 (p7) br.spnt.few dispatch_reflection ;; @@ -1043,8 +1071,8 @@ 1: // validate vcr.iip, if in Xen range, do it the slow way adds r20=XSI_IIP_OFS-XSI_PSR_IC_OFS,r18 ;; ld8 r22=[r20];; - movl r23=XEN_VIRT_SPACE_LOW - movl r24=XEN_VIRT_SPACE_HIGH ;; + movl r23=HYPERVISOR_VIRT_START + movl r24=HYPERVISOR_VIRT_END;; cmp.ltu p0,p7=r22,r23 ;; // if !(iip<low) && (p7) cmp.geu p0,p7=r22,r24 ;; // !(iip>=high) (p7) br.spnt.few dispatch_break_fault ;; diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/xen/irq.c --- a/xen/arch/ia64/xen/irq.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/xen/irq.c Thu Jul 06 06:47:16 2006 -0400 @@ -124,17 +124,6 @@ inline void synchronize_irq(unsigned int inline void synchronize_irq(unsigned int irq) {} EXPORT_SYMBOL(synchronize_irq); #endif - -static int noirqdebug; - -static int __init noirqdebug_setup(char *str) -{ - noirqdebug = 1; - printk("IRQ lockup detection disabled\n"); - return 1; -} - -__setup("noirqdebug", noirqdebug_setup); /* * Generic enable/disable code: this just calls @@ -232,122 +221,6 @@ out: return 1; } -/** - * request_irq - allocate an interrupt line - * @irq: Interrupt line to allocate - * @handler: Function to be called when the IRQ occurs - * @irqflags: Interrupt type flags - * @devname: An ascii name for the claiming device - * @dev_id: A cookie passed back to the handler function - * - * This call allocates interrupt resources and enables the - * interrupt line and IRQ handling. From the point this - * call is made your handler function may be invoked. Since - * your handler function must clear any interrupt the board - * raises, you must take care both to initialise your hardware - * and to set up the interrupt handler in the right order. - * - * Dev_id must be globally unique. Normally the address of the - * device data structure is used as the cookie. Since the handler - * receives this value it makes sense to use it. - * - * If your interrupt is shared you must pass a non NULL dev_id - * as this is required when freeing the interrupt. - * - * Flags: - * - * SA_SHIRQ Interrupt is shared - * - * SA_INTERRUPT Disable local interrupts while processing - * - * SA_SAMPLE_RANDOM The interrupt can be used for entropy - * - */ - -int request_irq(unsigned int irq, - irqreturn_t (*handler)(int, void *, struct pt_regs *), - unsigned long irqflags, - const char * devname, - void *dev_id) -{ - int retval; - struct irqaction * action; - - /* - * Sanity-check: shared interrupts should REALLY pass in - * a real dev-ID, otherwise we'll have trouble later trying - * to figure out which interrupt is which (messes up the - * interrupt freeing logic etc). - */ - if (irqflags & SA_SHIRQ) { - if (!dev_id) - printk(KERN_ERR "Bad boy: %s called us without a dev_id!\n", devname); - } - - if (irq >= NR_IRQS) - return -EINVAL; - if (!handler) - return -EINVAL; - - action = xmalloc(struct irqaction); - if (!action) - return -ENOMEM; - - action->handler = (void *) handler; - action->name = devname; - action->dev_id = dev_id; - - retval = setup_irq(irq, action); - if (retval) - xfree(action); - return retval; -} - -EXPORT_SYMBOL(request_irq); - -/** - * free_irq - free an interrupt - * @irq: Interrupt line to free - * @dev_id: Device identity to free - * - * Remove an interrupt handler. The handler is removed and if the - * interrupt line is no longer in use by any driver it is disabled. - * On a shared IRQ the caller must ensure the interrupt is disabled - * on the card it drives before calling this function. The function - * does not return until any executing interrupts for this IRQ - * have completed. - * - * This function must not be called from interrupt context. - */ - -void free_irq(unsigned int irq) -{ - irq_desc_t *desc; - unsigned long flags; - - if (irq >= NR_IRQS) - return; - - desc = irq_descp(irq); - spin_lock_irqsave(&desc->lock,flags); - if (desc->action) { - struct irqaction * action = desc->action; - desc->action = NULL; - desc->status |= IRQ_DISABLED; - desc->handler->shutdown(irq); - spin_unlock_irqrestore(&desc->lock,flags); - - /* Wait to make sure it's not being used on another CPU */ - synchronize_irq(irq); - xfree(action); - return; - } - printk(KERN_ERR "Trying to free free IRQ%d\n",irq); - spin_unlock_irqrestore(&desc->lock,flags); -} - -EXPORT_SYMBOL(free_irq); - /* * IRQ autodetection code.. * @@ -357,11 +230,14 @@ EXPORT_SYMBOL(free_irq); * disabled. */ -int setup_irq(unsigned int irq, struct irqaction * new) +int setup_vector(unsigned int irq, struct irqaction * new) { unsigned long flags; struct irqaction *old, **p; irq_desc_t *desc = irq_descp(irq); + + printf ("setup_vector(%d): handler=%p, flags=%x\n", + irq, desc->handler, desc->status); /* * The following block of code has to be executed atomically @@ -378,9 +254,28 @@ int setup_irq(unsigned int irq, struct i desc->depth = 0; desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_GUEST); desc->handler->startup(irq); + desc->handler->enable(irq); spin_unlock_irqrestore(&desc->lock,flags); return 0; +} + +/* Vectors reserved by xen (and thus not sharable with domains). */ +unsigned long ia64_xen_vector[BITS_TO_LONGS(NR_IRQS)]; + +int setup_irq(unsigned int irq, struct irqaction * new) +{ + unsigned int vec; + int res; + + /* Get vector for IRQ. */ + if (acpi_gsi_to_irq (irq, &vec) < 0) + return -ENOSYS; + /* Reserve the vector (and thus the irq). */ + if (test_and_set_bit(vec, ia64_xen_vector)) + return -EBUSY; + res = setup_vector (vec, new); + return res; } /* @@ -622,15 +517,3 @@ void guest_forward_keyboard_input(int ir { vcpu_pend_interrupt(dom0->vcpu[0],irq); } - -void serial_input_init(void) -{ - int retval; - int irq = 0x30; // FIXME - - retval = request_irq(irq,guest_forward_keyboard_input,SA_INTERRUPT,"siminput",NULL); - if (retval) { - printk("serial_input_init: broken request_irq call\n"); - while(1); - } -} diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/xen/ivt.S --- a/xen/arch/ia64/xen/ivt.S Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/xen/ivt.S Thu Jul 06 06:47:16 2006 -0400 @@ -508,10 +508,9 @@ late_alt_dtlb_miss: movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) mov r21=cr.ipsr ;; -#else #endif #ifdef CONFIG_DISABLE_VHPT - shr.u r22=r16,61 // get the region number into r21 + shr.u r22=r16,61 // get the region into r22 ;; cmp.gt p8,p0=6,r22 // access to region 0-5 ;; @@ -992,7 +991,9 @@ ENTRY(break_fault) cmp.eq p7,p0=r17,r18 ;; (p7) br.spnt.few dispatch_break_fault ;; #endif - movl r18=XSI_PSR_IC + movl r18=THIS_CPU(current_psr_ic_addr) + ;; + ld8 r18=[r18] ;; ld4 r19=[r18] ;; @@ -1493,7 +1494,7 @@ END(dispatch_unaligned_handler) * suitable spot... */ -ENTRY(dispatch_to_fault_handler) +GLOBAL_ENTRY(dispatch_to_fault_handler) /* * Input: * psr.ic: off diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/xen/mm.c --- a/xen/arch/ia64/xen/mm.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/xen/mm.c Thu Jul 06 06:47:16 2006 -0400 @@ -7,6 +7,159 @@ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp> * VA Linux Systems Japan K.K. * dom0 vp model support + */ + +/* + * NOTES on SMP + * + * * shared structures + * There are some structures which are accessed by CPUs concurrently. + * Here is the list of shared structures and operations on them which + * read/write the structures. + * + * - struct page_info + * This is a xen global resource. This structure is accessed by + * any CPUs. + * + * operations on this structure: + * - get_page() and its variant + * - put_page() and its variant + * + * - vTLB + * vcpu->arch.{d, i}tlb: Software tlb cache. These are per VCPU data. + * DEFINE_PER_CPU (unsigned long, vhpt_paddr): VHPT table per physical CPU. + * + * domain_flush_vtlb_range() and domain_flush_vtlb_all() + * write vcpu->arch.{d, i}tlb and VHPT table of vcpu which isn't current. + * So there are potential races to read/write VHPT and vcpu->arch.{d, i}tlb. + * Please note that reading VHPT is done by hardware page table walker. + * + * operations on this structure: + * - global tlb purge + * vcpu_ptc_g(), vcpu_ptc_ga() and domain_page_flush() + * I.e. callers of domain_flush_vtlb_range() and domain_flush_vtlb_all() + * These functions invalidate VHPT entry and vcpu->arch.{i, d}tlb + * + * - tlb insert and fc + * vcpu_itc_i() + * vcpu_itc_d() + * ia64_do_page_fault() + * vcpu_fc() + * These functions set VHPT entry and vcpu->arch.{i, d}tlb. + * Actually vcpu_itc_no_srlz() does. + * + * - the P2M table + * domain->mm and pgd, pud, pmd, pte table page. + * This structure is used to convert domain pseudo physical address + * to machine address. This is per domain resource. + * + * operations on this structure: + * - populate the P2M table tree + * lookup_alloc_domain_pte() and its variants. + * - set p2m entry + * assign_new_domain_page() and its variants. + * assign_domain_page() and its variants. + * - xchg p2m entry + * assign_domain_page_replace() + * - cmpxchg p2m entry + * assign_domain_page_cmpxchg_rel() + * destroy_grant_host_mapping() + * steal_page() + * zap_domain_page_one() + * - read p2m entry + * lookup_alloc_domain_pte() and its variants. + * + * - the M2P table + * mpt_table (or machine_to_phys_mapping) + * This is a table which converts from machine address to pseudo physical + * address. This is a global structure. + * + * operations on this structure: + * - set m2p entry + * set_gpfn_from_mfn() + * - zap m2p entry + * set_gpfn_from_mfn(INVALID_P2M_ENTRY) + * - get m2p entry + * get_gpfn_from_mfn() + * + * + * * avoiding races + * The resources which are shared by CPUs must be accessed carefully + * to avoid race. + * IA64 has weak memory ordering so that attention must be paid + * to access shared structures. [SDM vol2 PartII chap. 2] + * + * - struct page_info memory ordering + * get_page() has acquire semantics. + * put_page() has release semantics. + * + * - populating the p2m table + * pgd, pud, pmd are append only. + * + * - races when updating the P2M tables and the M2P table + * The P2M entry are shared by more than one vcpu. + * So they are accessed atomic operations. + * I.e. xchg or cmpxchg must be used to update the p2m entry. + * NOTE: When creating/destructing a domain, we don't need to take care of + * this race. + * + * The M2P table is inverse of the P2M table. + * I.e. P2M(M2P(p)) = p and M2P(P2M(m)) = m + * The M2P table and P2M table must be updated consistently. + * Here is the update sequence + * + * xchg or cmpxchg case + * - set_gpfn_from_mfn(new_mfn, gpfn) + * - memory barrier + * - atomic update of the p2m entry (xchg or cmpxchg the p2m entry) + * get old_mfn entry as a result. + * - memory barrier + * - set_gpfn_from_mfn(old_mfn, INVALID_P2M_ENTRY) + * + * Here memory barrier can be achieved by release semantics. + * + * - races between global tlb purge and tlb insert + * This is a race between reading/writing vcpu->arch.{d, i}tlb or VHPT entry. + * When a vcpu is about to insert tlb, another vcpu may purge tlb + * cache globally. Inserting tlb (vcpu_itc_no_srlz()) or global tlb purge + * (domain_flush_vtlb_range() and domain_flush_vtlb_all()) can't update + * cpu->arch.{d, i}tlb, VHPT and mTLB. So there is a race here. + * + * Here check vcpu->arch.{d, i}tlb.p bit + * After inserting tlb entry, check the p bit and retry to insert. + * This means that when global tlb purge and tlb insert are issued + * simultaneously, always global tlb purge happens after tlb insert. + * + * - races between p2m entry update and tlb insert + * This is a race between reading/writing the p2m entry. + * reader: vcpu_itc_i(), vcpu_itc_d(), ia64_do_page_fault(), vcpu_fc() + * writer: assign_domain_page_cmpxchg_rel(), destroy_grant_host_mapping(), + * steal_page(), zap_domain_page_one() + * + * For example, vcpu_itc_i() is about to insert tlb by calling + * vcpu_itc_no_srlz() after reading the p2m entry. + * At the same time, the p2m entry is replaced by xchg or cmpxchg and + * tlb cache of the page is flushed. + * There is a possibility that the p2m entry doesn't already point to the + * old page, but tlb cache still points to the old page. + * This can be detected similar to sequence lock using the p2m entry itself. + * reader remember the read value of the p2m entry, and insert tlb. + * Then read the p2m entry again. If the new p2m entry value is different + * from the used p2m entry value, the retry. + * + * - races between referencing page and p2m entry update + * This is a race between reading/writing the p2m entry. + * reader: vcpu_get_domain_bundle(), vmx_get_domain_bundle(), + * efi_emulate_get_time() + * writer: assign_domain_page_cmpxchg_rel(), destroy_grant_host_mapping(), + * steal_page(), zap_domain_page_one() + * + * A page which assigned to a domain can be de-assigned by another vcpu. + * So before read/write to a domain page, the page's reference count + * must be incremented. + * vcpu_get_domain_bundle(), vmx_get_domain_bundle() and + * efi_emulate_get_time() + * */ #include <xen/config.h> @@ -245,7 +398,7 @@ gmfn_to_mfn_foreign(struct domain *d, un if (d == dom0) return(gpfn); #endif - pte = lookup_domain_mpa(d,gpfn << PAGE_SHIFT); + pte = lookup_domain_mpa(d,gpfn << PAGE_SHIFT, NULL); if (!pte) { panic("gmfn_to_mfn_foreign: bad gpfn. spinning...\n"); } @@ -256,7 +409,8 @@ gmfn_to_mfn_foreign(struct domain *d, un // address, convert the pte for a physical address for (possibly different) // Xen PAGE_SIZE and return modified pte. (NOTE: TLB insert should use // PAGE_SIZE!) -u64 translate_domain_pte(u64 pteval, u64 address, u64 itir__, u64* logps) +u64 translate_domain_pte(u64 pteval, u64 address, u64 itir__, u64* logps, + struct p2m_entry* entry) { struct domain *d = current->domain; ia64_itir_t itir = {.itir = itir__}; @@ -298,7 +452,7 @@ u64 translate_domain_pte(u64 pteval, u64 address, pteval, itir.itir); } #endif - pteval2 = lookup_domain_mpa(d,mpaddr); + pteval2 = lookup_domain_mpa(d, mpaddr, entry); arflags = pteval & _PAGE_AR_MASK; arflags2 = pteval2 & _PAGE_AR_MASK; if (arflags != _PAGE_AR_R && arflags2 == _PAGE_AR_R) { @@ -311,17 +465,26 @@ u64 translate_domain_pte(u64 pteval, u64 pteval2, arflags2, mpaddr); #endif pteval = (pteval & ~_PAGE_AR_MASK) | _PAGE_AR_R; -} + } pteval2 &= _PAGE_PPN_MASK; // ignore non-addr bits pteval2 |= (pteval & _PAGE_ED); pteval2 |= _PAGE_PL_2; // force PL0->2 (PL3 is unaffected) pteval2 = (pteval & ~_PAGE_PPN_MASK) | pteval2; + /* + * Don't let non-dom0 domains map uncached addresses. This can + * happen when domU tries to touch i/o port space. Also prevents + * possible address aliasing issues. + */ + if (d != dom0) + pteval2 &= ~_PAGE_MA_MASK; + return pteval2; } // given a current domain metaphysical address, return the physical address -unsigned long translate_domain_mpaddr(unsigned long mpaddr) +unsigned long translate_domain_mpaddr(unsigned long mpaddr, + struct p2m_entry* entry) { unsigned long pteval; @@ -333,15 +496,47 @@ unsigned long translate_domain_mpaddr(un } } #endif - pteval = lookup_domain_mpa(current->domain,mpaddr); + pteval = lookup_domain_mpa(current->domain, mpaddr, entry); return ((pteval & _PAGE_PPN_MASK) | (mpaddr & ~PAGE_MASK)); +} + +//XXX !xxx_present() should be used instread of !xxx_none()? +// __assign_new_domain_page(), assign_new_domain_page() and +// assign_new_domain0_page() are used only when domain creation. +// their accesses aren't racy so that returned pte_t doesn't need +// volatile qualifier +static pte_t* +__lookup_alloc_domain_pte(struct domain* d, unsigned long mpaddr) +{ + struct mm_struct *mm = &d->arch.mm; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + BUG_ON(mm->pgd == NULL); + pgd = pgd_offset(mm, mpaddr); + if (pgd_none(*pgd)) { + pgd_populate(mm, pgd, pud_alloc_one(mm,mpaddr)); + } + + pud = pud_offset(pgd, mpaddr); + if (pud_none(*pud)) { + pud_populate(mm, pud, pmd_alloc_one(mm,mpaddr)); + } + + pmd = pmd_offset(pud, mpaddr); + if (pmd_none(*pmd)) { + pmd_populate_kernel(mm, pmd, pte_alloc_one_kernel(mm, mpaddr)); + } + + return pte_offset_map(pmd, mpaddr); } //XXX !xxx_present() should be used instread of !xxx_none()? // pud, pmd, pte page is zero cleared when they are allocated. // Their area must be visible before population so that // cmpxchg must have release semantics. -static pte_t* +static volatile pte_t* lookup_alloc_domain_pte(struct domain* d, unsigned long mpaddr) { struct mm_struct *mm = &d->arch.mm; @@ -384,11 +579,11 @@ lookup_alloc_domain_pte(struct domain* d } } - return pte_offset_map(pmd, mpaddr); + return (volatile pte_t*)pte_offset_map(pmd, mpaddr); } //XXX xxx_none() should be used instread of !xxx_present()? -static pte_t* +static volatile pte_t* lookup_noalloc_domain_pte(struct domain* d, unsigned long mpaddr) { struct mm_struct *mm = &d->arch.mm; @@ -409,11 +604,11 @@ lookup_noalloc_domain_pte(struct domain* if (unlikely(!pmd_present(*pmd))) return NULL; - return pte_offset_map(pmd, mpaddr); + return (volatile pte_t*)pte_offset_map(pmd, mpaddr); } #ifdef CONFIG_XEN_IA64_DOM0_VP -static pte_t* +static volatile pte_t* lookup_noalloc_domain_pte_none(struct domain* d, unsigned long mpaddr) { struct mm_struct *mm = &d->arch.mm; @@ -434,13 +629,13 @@ lookup_noalloc_domain_pte_none(struct do if (unlikely(pmd_none(*pmd))) return NULL; - return pte_offset_map(pmd, mpaddr); + return (volatile pte_t*)pte_offset_map(pmd, mpaddr); } unsigned long ____lookup_domain_mpa(struct domain *d, unsigned long mpaddr) { - pte_t *pte; + volatile pte_t *pte; pte = lookup_noalloc_domain_pte(d, mpaddr); if (pte == NULL) @@ -452,25 +647,12 @@ ____lookup_domain_mpa(struct domain *d, return GPFN_INV_MASK; return INVALID_MFN; } - -unsigned long -__lookup_domain_mpa(struct domain *d, unsigned long mpaddr) -{ - unsigned long machine = ____lookup_domain_mpa(d, mpaddr); - if (machine != INVALID_MFN) - return machine; - - printk("%s: d 0x%p id %d current 0x%p id %d\n", - __func__, d, d->domain_id, current, current->vcpu_id); - printk("%s: bad mpa 0x%lx (max_pages 0x%lx)\n", - __func__, mpaddr, (unsigned long)d->max_pages << PAGE_SHIFT); - return INVALID_MFN; -} -#endif - -unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr) -{ - pte_t *pte; +#endif + +unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr, + struct p2m_entry* entry) +{ + volatile pte_t *pte; #ifdef CONFIG_DOMAIN0_CONTIGUOUS if (d == dom0) { @@ -486,9 +668,12 @@ unsigned long lookup_domain_mpa(struct d #endif pte = lookup_noalloc_domain_pte(d, mpaddr); if (pte != NULL) { - if (pte_present(*pte)) { + pte_t tmp_pte = *pte;// pte is volatile. copy the value. + if (pte_present(tmp_pte)) { //printk("lookup_domain_page: found mapping for %lx, pte=%lx\n",mpaddr,pte_val(*pte)); - return pte_val(*pte); + if (entry != NULL) + p2m_entry_set(entry, pte, tmp_pte); + return pte_val(tmp_pte); } else if (VMX_DOMAIN(d->vcpu[0])) return GPFN_INV_MASK; } @@ -502,6 +687,8 @@ unsigned long lookup_domain_mpa(struct d printk("%s: bad mpa 0x%lx (=> 0x%lx)\n", __func__, mpaddr, (unsigned long)d->max_pages << PAGE_SHIFT); + if (entry != NULL) + p2m_entry_set(entry, NULL, __pte(0)); //XXX This is a work around until the emulation memory access to a region // where memory or device are attached is implemented. return pte_val(pfn_pte(0, __pgprot(__DIRTY_BITS | _PAGE_PL_2 | _PAGE_AR_RWX))); @@ -511,7 +698,7 @@ unsigned long lookup_domain_mpa(struct d #if 1 void *domain_mpa_to_imva(struct domain *d, unsigned long mpaddr) { - unsigned long pte = lookup_domain_mpa(d,mpaddr); + unsigned long pte = lookup_domain_mpa(d, mpaddr, NULL); unsigned long imva; pte &= _PAGE_PPN_MASK; @@ -530,7 +717,7 @@ void *domain_mpa_to_imva(struct domain * /* Allocate a new page for domain and map it to the specified metaphysical address. */ -struct page_info * +static struct page_info * __assign_new_domain_page(struct domain *d, unsigned long mpaddr, pte_t* pte) { struct page_info *p = NULL; @@ -595,7 +782,7 @@ assign_new_domain_page(struct domain *d, struct page_info *p = NULL; pte_t *pte; - pte = lookup_alloc_domain_pte(d, mpaddr); + pte = __lookup_alloc_domain_pte(d, mpaddr); if (pte_none(*pte)) p = __assign_new_domain_page(d, mpaddr, pte); @@ -610,7 +797,7 @@ assign_new_domain0_page(struct domain *d pte_t *pte; BUG_ON(d != dom0); - pte = lookup_alloc_domain_pte(d, mpaddr); + pte = __lookup_alloc_domain_pte(d, mpaddr); if (pte_none(*pte)) { struct page_info *p = __assign_new_domain_page(d, mpaddr, pte); if (p == NULL) { @@ -622,21 +809,27 @@ assign_new_domain0_page(struct domain *d /* map a physical address to the specified metaphysical addr */ // flags: currently only ASSIGN_readonly +// This is called by assign_domain_mmio_page(). +// So accessing to pte is racy. void __assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr, unsigned long flags) { - pte_t *pte; + volatile pte_t *pte; + pte_t old_pte; + pte_t new_pte; + pte_t ret_pte; unsigned long arflags = (flags & ASSIGN_readonly)? _PAGE_AR_R: _PAGE_AR_RWX; pte = lookup_alloc_domain_pte(d, mpaddr); - if (pte_none(*pte)) { - set_pte_rel(pte, - pfn_pte(physaddr >> PAGE_SHIFT, - __pgprot(__DIRTY_BITS | _PAGE_PL_2 | arflags))); + + old_pte = __pte(0); + new_pte = pfn_pte(physaddr >> PAGE_SHIFT, + __pgprot(__DIRTY_BITS | _PAGE_PL_2 | arflags)); + ret_pte = ptep_cmpxchg_rel(&d->arch.mm, mpaddr, pte, old_pte, new_pte); + if (pte_val(ret_pte) == pte_val(old_pte)) smp_mb(); - } } /* get_page() and map a physical address to the specified metaphysical addr */ @@ -755,7 +948,7 @@ assign_domain_page_replace(struct domain unsigned long mfn, unsigned long flags) { struct mm_struct *mm = &d->arch.mm; - pte_t* pte; + volatile pte_t* pte; pte_t old_pte; pte_t npte; unsigned long arflags = (flags & ASSIGN_readonly)? _PAGE_AR_R: _PAGE_AR_RWX; @@ -776,7 +969,8 @@ assign_domain_page_replace(struct domain if (mfn != old_mfn) { struct page_info* old_page = mfn_to_page(old_mfn); - if (page_get_owner(old_page) == d) { + if (page_get_owner(old_page) == d || + page_get_owner(old_page) == NULL) { BUG_ON(get_gpfn_from_mfn(old_mfn) != (mpaddr >> PAGE_SHIFT)); set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY); } @@ -798,7 +992,7 @@ assign_domain_page_cmpxchg_rel(struct do unsigned long flags) { struct mm_struct *mm = &d->arch.mm; - pte_t* pte; + volatile pte_t* pte; unsigned long old_mfn; unsigned long old_arflags; pte_t old_pte; @@ -810,9 +1004,14 @@ assign_domain_page_cmpxchg_rel(struct do pte = lookup_alloc_domain_pte(d, mpaddr); again: - old_arflags = pte_val(*pte) & ~_PAGE_PPN_MASK;//XXX + old_arflags = pte_val(*pte) & ~_PAGE_PPN_MASK; old_mfn = page_to_mfn(old_page); old_pte = pfn_pte(old_mfn, __pgprot(old_arflags)); + if (!pte_present(old_pte)) { + DPRINTK("%s: old_pte 0x%lx old_arflags 0x%lx old_mfn 0x%lx\n", + __func__, pte_val(old_pte), old_arflags, old_mfn); + return -EINVAL; + } new_arflags = (flags & ASSIGN_readonly)? _PAGE_AR_R: _PAGE_AR_RWX; new_mfn = page_to_mfn(new_page); @@ -847,12 +1046,11 @@ assign_domain_page_cmpxchg_rel(struct do } static void -zap_domain_page_one(struct domain *d, unsigned long mpaddr) +zap_domain_page_one(struct domain *d, unsigned long mpaddr, unsigned long mfn) { struct mm_struct *mm = &d->arch.mm; - pte_t *pte; + volatile pte_t *pte; pte_t old_pte; - unsigned long mfn; struct page_info *page; pte = lookup_noalloc_domain_pte_none(d, mpaddr); @@ -861,20 +1059,60 @@ zap_domain_page_one(struct domain *d, un if (pte_none(*pte)) return; - // update pte - old_pte = ptep_get_and_clear(mm, mpaddr, pte); - mfn = pte_pfn(old_pte); + if (mfn == INVALID_MFN) { + // clear pte + old_pte = ptep_get_and_clear(mm, mpaddr, pte); + mfn = pte_pfn(old_pte); + } else { + unsigned long old_arflags; + pte_t new_pte; + pte_t ret_pte; + + again: + // memory_exchange() calls guest_physmap_remove_page() with + // a stealed page. i.e. page owner = NULL. + BUG_ON(page_get_owner(mfn_to_page(mfn)) != d && + page_get_owner(mfn_to_page(mfn)) != NULL); + old_arflags = pte_val(*pte) & ~_PAGE_PPN_MASK; + old_pte = pfn_pte(mfn, __pgprot(old_arflags)); + new_pte = __pte(0); + + // update pte + ret_pte = ptep_cmpxchg_rel(mm, mpaddr, pte, old_pte, new_pte); + if (unlikely(pte_val(old_pte) != pte_val(ret_pte))) { + if (pte_pfn(old_pte) == pte_pfn(ret_pte)) { + goto again; + } + + DPRINTK("%s: old_pte 0x%lx old_arflags 0x%lx mfn 0x%lx " + "ret_pte 0x%lx ret_mfn 0x%lx\n", + __func__, + pte_val(old_pte), old_arflags, mfn, + pte_val(ret_pte), pte_pfn(ret_pte)); + return; + } + BUG_ON(mfn != pte_pfn(ret_pte)); + } + page = mfn_to_page(mfn); BUG_ON((page->count_info & PGC_count_mask) == 0); - if (page_get_owner(page) == d) { + if (page_get_owner(page) == d || + page_get_owner(page) == NULL) { + // exchange_memory() calls + // steal_page() + // page owner is set to NULL + // guest_physmap_remove_page() + // zap_domain_page_one() BUG_ON(get_gpfn_from_mfn(mfn) != (mpaddr >> PAGE_SHIFT)); set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); } domain_page_flush(d, mpaddr, mfn, INVALID_MFN); - try_to_clear_PGC_allocate(d, page); + if (page_get_owner(page) != NULL) { + try_to_clear_PGC_allocate(d, page); + } put_page(page); } @@ -887,7 +1125,7 @@ dom0vp_zap_physmap(struct domain *d, uns return -ENOSYS; } - zap_domain_page_one(d, gpfn << PAGE_SHIFT); + zap_domain_page_one(d, gpfn << PAGE_SHIFT, INVALID_MFN); return 0; } @@ -970,10 +1208,12 @@ destroy_grant_host_mapping(unsigned long unsigned long mfn, unsigned int flags) { struct domain* d = current->domain; - pte_t* pte; + volatile pte_t* pte; + unsigned long cur_arflags; + pte_t cur_pte; + pte_t new_pte; pte_t old_pte; - unsigned long old_mfn = INVALID_MFN; - struct page_info* old_page; + struct page_info* page; if (flags & (GNTMAP_application_map | GNTMAP_contains_pte)) { DPRINTK("%s: flags 0x%x\n", __func__, flags); @@ -981,26 +1221,52 @@ destroy_grant_host_mapping(unsigned long } pte = lookup_noalloc_domain_pte(d, gpaddr); - if (pte == NULL || !pte_present(*pte) || pte_pfn(*pte) != mfn) + if (pte == NULL) { + DPRINTK("%s: gpaddr 0x%lx mfn 0x%lx\n", __func__, gpaddr, mfn); return GNTST_general_error; - - // update pte - old_pte = ptep_get_and_clear(&d->arch.mm, gpaddr, pte); - if (pte_present(old_pte)) { - old_mfn = pte_pfn(old_pte); - } else { + } + + again: + cur_arflags = pte_val(*pte) & ~_PAGE_PPN_MASK; + cur_pte = pfn_pte(mfn, __pgprot(cur_arflags)); + if (!pte_present(cur_pte)) { + DPRINTK("%s: gpaddr 0x%lx mfn 0x%lx cur_pte 0x%lx\n", + __func__, gpaddr, mfn, pte_val(cur_pte)); return GNTST_general_error; } - domain_page_flush(d, gpaddr, old_mfn, INVALID_MFN); - - old_page = mfn_to_page(old_mfn); - BUG_ON(page_get_owner(old_page) == d);//try_to_clear_PGC_allocate(d, page) is not needed. - put_page(old_page); + new_pte = __pte(0); + + old_pte = ptep_cmpxchg_rel(&d->arch.mm, gpaddr, pte, cur_pte, new_pte); + if (unlikely(!pte_present(old_pte))) { + DPRINTK("%s: gpaddr 0x%lx mfn 0x%lx cur_pte 0x%lx old_pte 0x%lx\n", + __func__, gpaddr, mfn, pte_val(cur_pte), pte_val(old_pte)); + return GNTST_general_error; + } + if (unlikely(pte_val(cur_pte) != pte_val(old_pte))) { + if (pte_pfn(old_pte) == mfn) { + goto again; + } + DPRINTK("%s gpaddr 0x%lx mfn 0x%lx cur_pte 0x%lx old_pte 0x%lx\n", + __func__, gpaddr, mfn, pte_val(cur_pte), pte_val(old_pte)); + return GNTST_general_error; + } + BUG_ON(pte_pfn(old_pte) != mfn); + + domain_page_flush(d, gpaddr, mfn, INVALID_MFN); + + page = mfn_to_page(mfn); + BUG_ON(page_get_owner(page) == d);//try_to_clear_PGC_allocate(d, page) is not needed. + put_page(page); return GNTST_okay; } // heavily depends on the struct page layout. +// gnttab_transfer() calls steal_page() with memflags = 0 +// For grant table transfer, we must fill the page. +// memory_exchange() calls steal_page() with memflags = MEMF_no_refcount +// For memory exchange, we don't have to fill the page because +// memory_exchange() does it. int steal_page(struct domain *d, struct page_info *page, unsigned int memflags) { @@ -1009,40 +1275,49 @@ steal_page(struct domain *d, struct page #endif u32 _d, _nd; u64 x, nx, y; - unsigned long gpfn; - struct page_info *new; - unsigned long new_mfn; - int ret; - new = alloc_domheap_page(d); - if (new == NULL) { - DPRINTK("alloc_domheap_page() failed\n"); + + if (page_get_owner(page) != d) { + DPRINTK("%s d 0x%p owner 0x%p\n", __func__, d, page_get_owner(page)); return -1; } - // zero out pages for security reasons - clear_page(page_to_virt(new)); - // assign_domain_page_cmpxchg_rel() has release semantics - // so smp_mb() isn't needed. - - ret = get_page(new, d); - BUG_ON(ret == 0); - - gpfn = get_gpfn_from_mfn(page_to_mfn(page)); - if (gpfn == INVALID_M2P_ENTRY) { - free_domheap_page(new); - return -1; - } - new_mfn = page_to_mfn(new); - set_gpfn_from_mfn(new_mfn, gpfn); - // smp_mb() isn't needed because assign_domain_pge_cmpxchg_rel() - // has release semantics. - - ret = assign_domain_page_cmpxchg_rel(d, gpfn << PAGE_SHIFT, page, new, - ASSIGN_writable); - if (ret < 0) { - DPRINTK("assign_domain_page_cmpxchg_rel failed %d\n", ret); - set_gpfn_from_mfn(new_mfn, INVALID_M2P_ENTRY); - free_domheap_page(new); - return -1; + + if (!(memflags & MEMF_no_refcount)) { + unsigned long gpfn; + struct page_info *new; + unsigned long new_mfn; + int ret; + + new = alloc_domheap_page(d); + if (new == NULL) { + DPRINTK("alloc_domheap_page() failed\n"); + return -1; + } + // zero out pages for security reasons + clear_page(page_to_virt(new)); + // assign_domain_page_cmpxchg_rel() has release semantics + // so smp_mb() isn't needed. + + ret = get_page(new, d); + BUG_ON(ret == 0); + + gpfn = get_gpfn_from_mfn(page_to_mfn(page)); + if (gpfn == INVALID_M2P_ENTRY) { + free_domheap_page(new); + return -1; + } + new_mfn = page_to_mfn(new); + set_gpfn_from_mfn(new_mfn, gpfn); + // smp_mb() isn't needed because assign_domain_pge_cmpxchg_rel() + // has release semantics. + + ret = assign_domain_page_cmpxchg_rel(d, gpfn << PAGE_SHIFT, page, new, + ASSIGN_writable); + if (ret < 0) { + DPRINTK("assign_domain_page_cmpxchg_rel failed %d\n", ret); + set_gpfn_from_mfn(new_mfn, INVALID_M2P_ENTRY); + free_domheap_page(new); + return -1; + } } spin_lock(&d->page_alloc_lock); @@ -1061,28 +1336,40 @@ steal_page(struct domain *d, struct page // page->u.inused._domain = 0; _nd = x >> 32; - if (unlikely((x & (PGC_count_mask | PGC_allocated)) != - (1 | PGC_allocated)) || + if (unlikely(!(memflags & MEMF_no_refcount) && + ((x & (PGC_count_mask | PGC_allocated)) != + (1 | PGC_allocated))) || + + // when MEMF_no_refcount, page isn't de-assigned from + // this domain yet. So count_info = 2 + unlikely((memflags & MEMF_no_refcount) && + ((x & (PGC_count_mask | PGC_allocated)) != + (2 | PGC_allocated))) || + unlikely(_nd != _d)) { struct domain* nd = unpickle_domptr(_nd); if (nd == NULL) { DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, " "sd=%p 0x%x," - " caf=%016lx, taf=%" PRtype_info "\n", + " caf=%016lx, taf=%" PRtype_info + " memflags 0x%x\n", (void *) page_to_mfn(page), d, d->domain_id, _d, nd, _nd, x, - page->u.inuse.type_info); + page->u.inuse.type_info, + memflags); } else { DPRINTK("gnttab_transfer: Bad page %p: ed=%p(%u) 0x%x, " "sd=%p(%u) 0x%x," - " caf=%016lx, taf=%" PRtype_info "\n", + " caf=%016lx, taf=%" PRtype_info + " memflags 0x%x\n", (void *) page_to_mfn(page), d, d->domain_id, _d, nd, nd->domain_id, _nd, x, - page->u.inuse.type_info); + page->u.inuse.type_info, + memflags); } spin_unlock(&d->page_alloc_lock); return -1; @@ -1112,8 +1399,6 @@ guest_physmap_add_page(struct domain *d, BUG_ON(!mfn_valid(mfn)); ret = get_page(mfn_to_page(mfn), d); BUG_ON(ret == 0); - BUG_ON(page_get_owner(mfn_to_page(mfn)) == d && - get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY); set_gpfn_from_mfn(mfn, gpfn); smp_mb(); assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, ASSIGN_writable); @@ -1126,7 +1411,7 @@ guest_physmap_remove_page(struct domain unsigned long mfn) { BUG_ON(mfn == 0);//XXX - zap_domain_page_one(d, gpfn << PAGE_SHIFT); + zap_domain_page_one(d, gpfn << PAGE_SHIFT, mfn); } //XXX sledgehammer. @@ -1141,7 +1426,7 @@ int int domain_page_mapped(struct domain* d, unsigned long mpaddr) { - pte_t * pte; + volatile pte_t * pte; pte = lookup_noalloc_domain_pte(d, mpaddr); if(pte != NULL && !pte_none(*pte)) diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/xen/pcdp.c --- a/xen/arch/ia64/xen/pcdp.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/xen/pcdp.c Thu Jul 06 06:47:16 2006 -0400 @@ -16,21 +16,142 @@ #include <linux/efi.h> #include <linux/serial.h> #ifdef XEN +#include <linux/efi.h> #include <linux/errno.h> +#include <asm/iosapic.h> +#include <asm/system.h> +#include <acpi/acpi.h> #endif #include "pcdp.h" -static int __init -setup_serial_console(struct pcdp_uart *uart) -{ #ifdef XEN - extern struct ns16550_defaults ns16550_com1; +extern struct ns16550_defaults ns16550_com1; +extern unsigned int ns16550_com1_gsi; +extern unsigned int ns16550_com1_polarity; +extern unsigned int ns16550_com1_trigger; + +/* + * This is kind of ugly, but older rev HCDP tables don't provide interrupt + * polarity and trigger information. Linux/ia64 discovers these properties + * later via ACPI names, but we don't have that luxury in Xen/ia64. Since + * all future platforms should have newer PCDP tables, this should be a + * fixed list of boxes in the field, so we can hardcode based on the model. + */ +static void __init +pcdp_hp_irq_fixup(struct pcdp *pcdp, struct pcdp_uart *uart) +{ + efi_system_table_t *systab; + efi_config_table_t *tables; + struct acpi20_table_rsdp *rsdp = NULL; + struct acpi_table_xsdt *xsdt; + struct acpi_table_header *hdr; + int i; + + if (pcdp->rev >= 3 || strcmp((char *)pcdp->oemid, "HP")) + return; + + /* + * Manually walk firmware provided tables to get to the XSDT. + * The OEM table ID on the XSDT is the platform model string. + * We only care about ACPI 2.0 tables as that's all HP provides. + */ + systab = __va(ia64_boot_param->efi_systab); + + if (!systab || systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) + return; + + tables = __va(systab->tables); + + for (i = 0 ; i < (int)systab->nr_tables && !rsdp ; i++) { + if (efi_guidcmp(tables[i].guid, ACPI_20_TABLE_GUID) == 0) + rsdp = + (struct acpi20_table_rsdp *)__va(tables[i].table); + } + + if (!rsdp || strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) + return; + + xsdt = (struct acpi_table_xsdt *)__va(rsdp->xsdt_address); + hdr = &xsdt->header; + + if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) + return; + + /* Sanity check; are we still looking at HP firmware tables? */ + if (strcmp(hdr->oem_id, "HP")) + return; + + if (!strcmp(hdr->oem_table_id, "zx2000") || + !strcmp(hdr->oem_table_id, "zx6000") || + !strcmp(hdr->oem_table_id, "rx2600") || + !strcmp(hdr->oem_table_id, "cx2600")) { + + ns16550_com1.irq = ns16550_com1_gsi = uart->gsi; + ns16550_com1_polarity = IOSAPIC_POL_HIGH; + ns16550_com1_trigger = IOSAPIC_EDGE; + + } else if (!strcmp(hdr->oem_table_id, "rx2620") || + !strcmp(hdr->oem_table_id, "cx2620") || + !strcmp(hdr->oem_table_id, "rx1600") || + !strcmp(hdr->oem_table_id, "rx1620")) { + + ns16550_com1.irq = ns16550_com1_gsi = uart->gsi; + ns16550_com1_polarity = IOSAPIC_POL_LOW; + ns16550_com1_trigger = IOSAPIC_LEVEL; + } +} + +static void __init +setup_pcdp_irq(struct pcdp *pcdp, struct pcdp_uart *uart) +{ + /* PCDP provides full interrupt info */ + if (pcdp->rev >= 3) { + if (uart->flags & PCDP_UART_IRQ) { + ns16550_com1.irq = ns16550_com1_gsi = uart->gsi, + ns16550_com1_polarity = + uart->flags & PCDP_UART_ACTIVE_LOW ? + IOSAPIC_POL_LOW : IOSAPIC_POL_HIGH; + ns16550_com1_trigger = + uart->flags & PCDP_UART_EDGE_SENSITIVE ? + IOSAPIC_EDGE : IOSAPIC_LEVEL; + } + return; + } + + /* HCDP support */ + if (uart->pci_func & PCDP_UART_IRQ) { + /* + * HCDP tables don't provide interrupt polarity/trigger + * info. If the UART is a PCI device, we know to program + * it as low/level. Otherwise rely on platform hacks or + * default to polling (irq = 0). + */ + if (uart->pci_func & PCDP_UART_PCI) { + ns16550_com1.irq = ns16550_com1_gsi = uart->gsi; + ns16550_com1_polarity = IOSAPIC_POL_LOW; + ns16550_com1_trigger = IOSAPIC_LEVEL; + } else if (!strcmp((char *)pcdp->oemid, "HP")) + pcdp_hp_irq_fixup(pcdp, uart); + } +} + +static int __init +setup_serial_console(struct pcdp *pcdp, struct pcdp_uart *uart) +{ + ns16550_com1.baud = uart->baud; ns16550_com1.io_base = uart->addr.address; if (uart->bits) ns16550_com1.data_bits = uart->bits; + + setup_pcdp_irq(pcdp, uart); + return 0; -#else +} +#else +static int __init +setup_serial_console(struct pcdp_uart *uart) +{ #ifdef CONFIG_SERIAL_8250_CONSOLE int mmio; static char options[64]; @@ -44,10 +165,8 @@ setup_serial_console(struct pcdp_uart *u #else return -ENODEV; #endif -#endif -} - -#ifndef XEN +} + static int __init setup_vga_console(struct pcdp_vga *vga) { @@ -100,7 +219,12 @@ efi_setup_pcdp_console(char *cmdline) for (i = 0, uart = pcdp->uart; i < pcdp->num_uarts; i++, uart++) { if (uart->flags & PCDP_UART_PRIMARY_CONSOLE || serial) { if (uart->type == PCDP_CONSOLE_UART) { +#ifndef XEN return setup_serial_console(uart); +#else + return setup_serial_console(pcdp, uart); +#endif + } } } diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/xen/regionreg.c --- a/xen/arch/ia64/xen/regionreg.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/xen/regionreg.c Thu Jul 06 06:47:16 2006 -0400 @@ -342,3 +342,22 @@ void load_region_regs(struct vcpu *v) panic_domain(0,"load_region_regs: can't set! bad=%lx\n",bad); } } + +void load_region_reg7_and_pta(struct vcpu *v) +{ + unsigned long rr7, pta; + + if (!is_idle_domain(v->domain)) { + ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | + VHPT_ENABLED); + + // TODO: These probably should be validated + rr7 = VCPU(v,rrs[7]); + if (!set_one_rr(0xe000000000000000L, rr7)) + panic_domain(0, "%s: can't set!\n", __func__); + } + else { + pta = ia64_get_pta(); + ia64_set_pta(pta & ~VHPT_ENABLED); + } +} diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/xen/vcpu.c --- a/xen/arch/ia64/xen/vcpu.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/xen/vcpu.c Thu Jul 06 06:47:16 2006 -0400 @@ -29,7 +29,6 @@ extern void setfpreg (unsigned long regn extern void setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs); extern void panic_domain(struct pt_regs *, const char *, ...); -extern unsigned long translate_domain_mpaddr(unsigned long); extern IA64_BUNDLE __get_domain_bundle(UINT64); typedef union { @@ -175,7 +174,8 @@ void vcpu_init_regs (struct vcpu *v) VCPU(v, banknum) = 1; VCPU(v, metaphysical_mode) = 1; VCPU(v, interrupt_mask_addr) = - (uint64_t)SHAREDINFO_ADDR + INT_ENABLE_OFFSET(v); + (unsigned char *)v->domain->arch.shared_info_va + + INT_ENABLE_OFFSET(v); VCPU(v, itv) = (1 << 16); /* timer vector masked */ } @@ -272,8 +272,7 @@ IA64FAULT vcpu_reset_psr_sm(VCPU *vcpu, if (imm.pp) { ipsr->pp = 1; psr.pp = 1; // priv perf ctrs always enabled -// FIXME: need new field in mapped_regs_t for virtual psr.pp (psr.be too?) - PSCB(vcpu,tmp[8]) = 0; // but fool the domain if it gets psr + PSCB(vcpu,vpsr_pp) = 0; // but fool the domain if it gets psr } if (imm.up) { ipsr->up = 0; psr.up = 0; } if (imm.sp) { ipsr->sp = 0; psr.sp = 0; } @@ -316,9 +315,9 @@ IA64FAULT vcpu_set_psr_sm(VCPU *vcpu, UI if (imm.dfh) ipsr->dfh = 1; if (imm.dfl) ipsr->dfl = 1; if (imm.pp) { - ipsr->pp = 1; psr.pp = 1; -// FIXME: need new field in mapped_regs_t for virtual psr.pp (psr.be too?) - PSCB(vcpu,tmp[8]) = 1; + ipsr->pp = 1; + psr.pp = 1; + PSCB(vcpu,vpsr_pp) = 1; } if (imm.sp) { ipsr->sp = 1; psr.sp = 1; } if (imm.i) { @@ -363,12 +362,11 @@ IA64FAULT vcpu_set_psr_l(VCPU *vcpu, UIN if (newpsr.dfl) ipsr->dfl = 1; if (newpsr.pp) { ipsr->pp = 1; psr.pp = 1; -// FIXME: need new field in mapped_regs_t for virtual psr.pp (psr.be too?) - PSCB(vcpu,tmp[8]) = 1; + PSCB(vcpu,vpsr_pp) = 1; } else { ipsr->pp = 1; psr.pp = 1; - PSCB(vcpu,tmp[8]) = 0; + PSCB(vcpu,vpsr_pp) = 0; } if (newpsr.up) { ipsr->up = 1; psr.up = 1; } if (newpsr.sp) { ipsr->sp = 1; psr.sp = 1; } @@ -407,8 +405,7 @@ IA64FAULT vcpu_get_psr(VCPU *vcpu, UINT6 else newpsr.ic = 0; if (PSCB(vcpu,metaphysical_mode)) newpsr.dt = 0; else newpsr.dt = 1; -// FIXME: need new field in mapped_regs_t for virtual psr.pp (psr.be too?) - if (PSCB(vcpu,tmp[8])) newpsr.pp = 1; + if (PSCB(vcpu,vpsr_pp)) newpsr.pp = 1; else newpsr.pp = 0; *pval = *(unsigned long *)&newpsr; return IA64_NO_FAULT; @@ -1348,6 +1345,21 @@ static inline int range_overlap (u64 b1, return (b1 <= e2) && (e1 >= b2); } +/* Crash domain if [base, base + page_size] and Xen virtual space overlaps. + Note: LSBs of base inside page_size are ignored. */ +static inline void +check_xen_space_overlap (const char *func, u64 base, u64 page_size) +{ + /* Mask LSBs of base. */ + base &= ~(page_size - 1); + + /* FIXME: ideally an MCA should be generated... */ + if (range_overlap (HYPERVISOR_VIRT_START, HYPERVISOR_VIRT_END, + base, base + page_size)) + panic_domain (NULL, "%s on Xen virtual space (%lx)\n", + func, base); +} + // FIXME: also need to check && (!trp->key || vcpu_pkr_match(trp->key)) static inline int vcpu_match_tr_entry_no_p(TR_ENTRY *trp, UINT64 ifa, UINT64 rid) { @@ -1409,7 +1421,10 @@ vcpu_get_domain_bundle(VCPU* vcpu, REGS* vcpu_get_domain_bundle(VCPU* vcpu, REGS* regs, UINT64 gip, IA64_BUNDLE* bundle) { UINT64 gpip;// guest pseudo phyiscal ip - + unsigned long vaddr; + struct page_info* page; + +again: #if 0 // Currently xen doesn't track psr.it bits. // it assumes always psr.it = 1. @@ -1472,8 +1487,22 @@ vcpu_get_domain_bundle(VCPU* vcpu, REGS* gpip = ((tr.pte.ppn >> (tr.ps - 12)) << tr.ps) | (gip & ((1 << tr.ps) - 1)); } - - *bundle = *((IA64_BUNDLE*)__va(__gpa_to_mpa(vcpu->domain, gpip))); + + vaddr = (unsigned long)domain_mpa_to_imva(vcpu->domain, gpip); + page = virt_to_page(vaddr); + if (get_page(page, vcpu->domain) == 0) { + if (page_get_owner(page) != vcpu->domain) { + // This page might be a page granted by another + // domain. + panic_domain(regs, + "domain tries to execute foreign domain " + "page which might be mapped by grant " + "table.\n"); + } + goto again; + } + *bundle = *((IA64_BUNDLE*)vaddr); + put_page(page); return 1; } @@ -1937,13 +1966,13 @@ void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 unsigned long psr; unsigned long ps = (vcpu->domain==dom0) ? logps : PAGE_SHIFT; - // FIXME: validate ifa here (not in Xen space), COULD MACHINE CHECK! + check_xen_space_overlap ("itc", vaddr, 1UL << logps); + // FIXME, must be inlined or potential for nested fault here! - if ((vcpu->domain==dom0) && (logps < PAGE_SHIFT)) { - printf("vcpu_itc_no_srlz: domain0 use of smaller page size!\n"); - //FIXME: kill domain here - while(1); - } + if ((vcpu->domain==dom0) && (logps < PAGE_SHIFT)) + panic_domain (NULL, "vcpu_itc_no_srlz: domain trying to use " + "smaller page size!\n"); + #ifdef CONFIG_XEN_IA64_DOM0_VP BUG_ON(logps > PAGE_SHIFT); #endif @@ -1978,18 +2007,23 @@ IA64FAULT vcpu_itc_d(VCPU *vcpu, UINT64 { unsigned long pteval, logps = itir_ps(itir); BOOLEAN swap_rr0 = (!(ifa>>61) && PSCB(vcpu,metaphysical_mode)); - - if (logps < PAGE_SHIFT) { - printf("vcpu_itc_d: domain trying to use smaller page size!\n"); - //FIXME: kill domain here - while(1); - } + struct p2m_entry entry; + + if (logps < PAGE_SHIFT) + panic_domain (NULL, "vcpu_itc_d: domain trying to use " + "smaller page size!\n"); + +again: //itir = (itir & ~0xfc) | (PAGE_SHIFT<<2); // ignore domain's pagesize - pteval = translate_domain_pte(pte, ifa, itir, &logps); + pteval = translate_domain_pte(pte, ifa, itir, &logps, &entry); if (!pteval) return IA64_ILLOP_FAULT; if (swap_rr0) set_one_rr(0x0,PSCB(vcpu,rrs[0])); vcpu_itc_no_srlz(vcpu,2,ifa,pteval,pte,logps); if (swap_rr0) set_metaphysical_rr0(); + if (p2m_entry_retry(&entry)) { + vcpu_flush_tlb_vhpt_range(ifa & ((1 << logps) - 1), logps); + goto again; + } return IA64_NO_FAULT; } @@ -1997,26 +2031,30 @@ IA64FAULT vcpu_itc_i(VCPU *vcpu, UINT64 { unsigned long pteval, logps = itir_ps(itir); BOOLEAN swap_rr0 = (!(ifa>>61) && PSCB(vcpu,metaphysical_mode)); - - // FIXME: validate ifa here (not in Xen space), COULD MACHINE CHECK! - if (logps < PAGE_SHIFT) { - printf("vcpu_itc_i: domain trying to use smaller page size!\n"); - //FIXME: kill domain here - while(1); - } + struct p2m_entry entry; + + if (logps < PAGE_SHIFT) + panic_domain (NULL, "vcpu_itc_i: domain trying to use " + "smaller page size!\n"); +again: //itir = (itir & ~0xfc) | (PAGE_SHIFT<<2); // ignore domain's pagesize - pteval = translate_domain_pte(pte, ifa, itir, &logps); - // FIXME: what to do if bad physical address? (machine check?) + pteval = translate_domain_pte(pte, ifa, itir, &logps, &entry); if (!pteval) return IA64_ILLOP_FAULT; if (swap_rr0) set_one_rr(0x0,PSCB(vcpu,rrs[0])); vcpu_itc_no_srlz(vcpu, 1,ifa,pteval,pte,logps); if (swap_rr0) set_metaphysical_rr0(); + if (p2m_entry_retry(&entry)) { + vcpu_flush_tlb_vhpt_range(ifa & ((1 << logps) - 1), logps); + goto again; + } return IA64_NO_FAULT; } IA64FAULT vcpu_ptc_l(VCPU *vcpu, UINT64 vadr, UINT64 log_range) { BUG_ON(vcpu != current); + + check_xen_space_overlap ("ptc_l", vadr, 1UL << log_range); /* Purge TC */ vcpu_purge_tr_entry(&PSCBX(vcpu,dtlb)); @@ -2040,10 +2078,14 @@ IA64FAULT vcpu_fc(VCPU *vcpu, UINT64 vad UINT64 mpaddr, paddr; IA64FAULT fault; +again: fault = vcpu_tpa(vcpu, vadr, &mpaddr); if (fault == IA64_NO_FAULT) { - paddr = translate_domain_mpaddr(mpaddr); + struct p2m_entry entry; + paddr = translate_domain_mpaddr(mpaddr, &entry); ia64_fc(__va(paddr)); + if (p2m_entry_retry(&entry)) + goto again; } return fault; } @@ -2071,6 +2113,8 @@ IA64FAULT vcpu_ptc_ga(VCPU *vcpu,UINT64 // if (Xen address) return(IA64_ILLOP_FAULT); // FIXME: ??breaks if domain PAGE_SIZE < Xen PAGE_SIZE //printf("######## vcpu_ptc_ga(%p,%p) ##############\n",vadr,addr_range); + + check_xen_space_overlap ("ptc_ga", vadr, addr_range); domain_flush_vtlb_range (vcpu->domain, vadr, addr_range); @@ -2084,7 +2128,9 @@ IA64FAULT vcpu_ptr_d(VCPU *vcpu,UINT64 v unsigned long rid, rr; int i; TR_ENTRY *trp; + BUG_ON(vcpu != current); + check_xen_space_overlap ("ptr_d", vadr, 1UL << log_range); rr = PSCB(vcpu,rrs)[region]; rid = rr & RR_RID_MASK; @@ -2113,7 +2159,9 @@ IA64FAULT vcpu_ptr_i(VCPU *vcpu,UINT64 v unsigned long rid, rr; int i; TR_ENTRY *trp; + BUG_ON(vcpu != current); + check_xen_space_overlap ("ptr_i", vadr, 1UL << log_range); rr = PSCB(vcpu,rrs)[region]; rid = rr & RR_RID_MASK; diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/xen/vhpt.c --- a/xen/arch/ia64/xen/vhpt.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/xen/vhpt.c Thu Jul 06 06:47:16 2006 -0400 @@ -152,10 +152,11 @@ void domain_flush_vtlb_all (void) { int cpu = smp_processor_id (); struct vcpu *v; - seqlock_t* vtlb_lock = ¤t->domain->arch.vtlb_lock; - - write_seqlock(vtlb_lock); - for_each_vcpu (current->domain, v) + + for_each_vcpu (current->domain, v) { + if (!test_bit(_VCPUF_initialised, &v->vcpu_flags)) + continue; + if (v->processor == cpu) vcpu_flush_vtlb_all (); else @@ -163,7 +164,7 @@ void domain_flush_vtlb_all (void) (v->processor, (void(*)(void *))vcpu_flush_vtlb_all, NULL,1,1); - write_sequnlock(vtlb_lock); + } } static void cpu_flush_vhpt_range (int cpu, u64 vadr, u64 addr_range) @@ -190,7 +191,6 @@ void vcpu_flush_tlb_vhpt_range (u64 vadr void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range) { - seqlock_t* vtlb_lock = &d->arch.vtlb_lock; struct vcpu *v; #if 0 @@ -201,8 +201,10 @@ void domain_flush_vtlb_range (struct dom } #endif - write_seqlock(vtlb_lock); for_each_vcpu (d, v) { + if (!test_bit(_VCPUF_initialised, &v->vcpu_flags)) + continue; + /* Purge TC entries. FIXME: clear only if match. */ vcpu_purge_tr_entry(&PSCBX(v,dtlb)); @@ -211,6 +213,9 @@ void domain_flush_vtlb_range (struct dom smp_mb(); for_each_vcpu (d, v) { + if (!test_bit(_VCPUF_initialised, &v->vcpu_flags)) + continue; + /* Invalidate VHPT entries. */ cpu_flush_vhpt_range (v->processor, vadr, addr_range); } @@ -218,7 +223,6 @@ void domain_flush_vtlb_range (struct dom /* ptc.ga */ ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT); - write_sequnlock(vtlb_lock); } static void flush_tlb_vhpt_all (struct domain *d) @@ -230,8 +234,6 @@ static void flush_tlb_vhpt_all (struct d local_flush_tlb_all (); } -// this is called when a domain is destroyed -// so that there is no race. void domain_flush_destroy (struct domain *d) { /* Very heavy... */ @@ -241,10 +243,8 @@ void domain_flush_destroy (struct domain void flush_tlb_mask(cpumask_t mask) { - seqlock_t* vtlb_lock = ¤t->domain->arch.vtlb_lock; int cpu; - write_seqlock(vtlb_lock); cpu = smp_processor_id(); if (cpu_isset (cpu, mask)) { cpu_clear(cpu, mask); @@ -252,13 +252,11 @@ void flush_tlb_mask(cpumask_t mask) } if (cpus_empty(mask)) - goto out; + return; for_each_cpu_mask (cpu, mask) smp_call_function_single (cpu, (void (*)(void *))flush_tlb_vhpt_all, NULL, 1, 1); -out: - write_sequnlock(vtlb_lock); } void zero_vhpt_stats(void) @@ -268,16 +266,21 @@ void zero_vhpt_stats(void) int dump_vhpt_stats(char *buf) { - int i; + int i, cpu; char *s = buf; - struct vhpt_lf_entry *v = (void *)VHPT_ADDR; - unsigned long vhpt_valid = 0, vhpt_chains = 0; - - for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) { - if (!(v->ti_tag & INVALID_TI_TAG)) vhpt_valid++; - if (v->CChain) vhpt_chains++; - } - s += sprintf(s,"VHPT usage: %ld/%ld (%ld collision chains)\n", - vhpt_valid, (unsigned long) VHPT_NUM_ENTRIES, vhpt_chains); + + s += sprintf(s,"VHPT usage (%ld entries):\n", + (unsigned long) VHPT_NUM_ENTRIES); + + for_each_present_cpu (cpu) { + struct vhpt_lf_entry *v = __va(per_cpu(vhpt_paddr, cpu)); + unsigned long vhpt_valid = 0; + + for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) + if (!(v->ti_tag & INVALID_TI_TAG)) + vhpt_valid++; + s += sprintf(s," cpu %d: %ld\n", cpu, vhpt_valid); + } + return s - buf; } diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/xen/xenasm.S --- a/xen/arch/ia64/xen/xenasm.S Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/xen/xenasm.S Thu Jul 06 06:47:16 2006 -0400 @@ -10,7 +10,9 @@ #include <asm/processor.h> #include <asm/pgtable.h> #include <asm/vhpt.h> - +#include <asm/asm-xsi-offsets.h> +#include <public/arch-ia64.h> + // Change rr7 to the passed value while ensuring // Xen is mapped into the new region. #define PSR_BITS_TO_CLEAR \ @@ -140,8 +142,8 @@ 1: ;; itr.d dtr[r21]=r23 // wire in new mapping... - // Map for arch_vcpu_info_t - movl r22=XSI_OFS + // Map mapped_regs + mov r22=XMAPPEDREGS_OFS mov r24=PAGE_SHIFT<<2 ;; add r22=r22,in3 @@ -150,7 +152,7 @@ 1: or r23=loc7,r25 // construct PA | page properties mov cr.itir=r24 mov cr.ifa=r22 - mov r21=IA64_TR_ARCH_INFO + mov r21=IA64_TR_MAPPED_REGS ;; itr.d dtr[r21]=r23 // wire in new mapping... @@ -239,19 +241,24 @@ END(__get_domain_bundle) END(__get_domain_bundle) GLOBAL_ENTRY(dorfirfi) - movl r16 = XSI_IIP - movl r17 = XSI_IPSR - movl r18 = XSI_IFS + // Read current vcpu shared info + movl r16=THIS_CPU(current_psr_ic_addr) + ;; + ld8 r19 = [r16] + ;; + add r16 = XSI_IIP_OFS - XSI_PSR_IC_OFS, r19 + add r17 = XSI_IPSR_OFS - XSI_PSR_IC_OFS, r19 + add r18 = XSI_IFS_OFS - XSI_PSR_IC_OFS, r19 ;; ld8 r16 = [r16] ld8 r17 = [r17] ld8 r18 = [r18] ;; - mov cr.iip=r16 - mov cr.ipsr=r17 - mov cr.ifs=r18 - ;; - rfi + mov cr.iip=r16 + mov cr.ipsr=r17 + mov cr.ifs=r18 + ;; + rfi ;; END(dorfirfi) diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/xen/xenmem.c --- a/xen/arch/ia64/xen/xenmem.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/xen/xenmem.c Thu Jul 06 06:47:16 2006 -0400 @@ -35,7 +35,7 @@ unsigned long max_page; /* * Set up the page tables. */ -unsigned long *mpt_table; +volatile unsigned long *mpt_table; void paging_init (void) @@ -139,18 +139,18 @@ static int static int create_mpttable_page_table (u64 start, u64 end, void *arg) { + unsigned long map_start, map_end; unsigned long address, start_page, end_page; - unsigned long *map_start, *map_end; pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; - map_start = mpt_table + (__pa(start) >> PAGE_SHIFT); - map_end = mpt_table + (__pa(end) >> PAGE_SHIFT); - - start_page = (unsigned long) map_start & PAGE_MASK; - end_page = PAGE_ALIGN((unsigned long) map_end); + map_start = (unsigned long)(mpt_table + (__pa(start) >> PAGE_SHIFT)); + map_end = (unsigned long)(mpt_table + (__pa(end) >> PAGE_SHIFT)); + + start_page = map_start & PAGE_MASK; + end_page = PAGE_ALIGN(map_end); for (address = start_page; address < end_page; address += PAGE_SIZE) { pgd = frametable_pgd_offset(address); diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/xen/xenmisc.c --- a/xen/arch/ia64/xen/xenmisc.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/xen/xenmisc.c Thu Jul 06 06:47:16 2006 -0400 @@ -19,6 +19,7 @@ #include <public/sched.h> #include <asm/vhpt.h> #include <asm/debugger.h> +#include <asm/vmx.h> #include <asm/vmx_vcpu.h> #include <asm/vcpu.h> @@ -109,6 +110,17 @@ void die_if_kernel(char *str, struct pt_ domain_crash_synchronous(); } +void vmx_die_if_kernel(char *str, struct pt_regs *regs, long err) /* __attribute__ ((noreturn)) */ +{ + if (vmx_user_mode(regs)) + return; + + printk("%s: %s %ld\n", __func__, str, err); + debugtrace_dump(); + show_registers(regs); + domain_crash_synchronous(); +} + long ia64_peek (struct task_struct *child, struct switch_stack *child_stack, unsigned long user_rbs_end, unsigned long addr, long *val) diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ia64/xen/xensetup.c --- a/xen/arch/ia64/xen/xensetup.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ia64/xen/xensetup.c Thu Jul 06 06:47:16 2006 -0400 @@ -23,6 +23,7 @@ #include <xen/string.h> #include <asm/vmx.h> #include <linux/efi.h> +#include <asm/iosapic.h> /* Be sure the struct shared_info fits on a page because it is mapped in domain. */ @@ -40,9 +41,9 @@ extern unsigned long domain0_ready; extern unsigned long domain0_ready; int find_max_pfn (unsigned long, unsigned long, void *); -void start_of_day(void); /* FIXME: which header these declarations should be there ? */ +extern void initialize_keytable(void); extern long is_platform_hp_ski(void); extern void early_setup_arch(char **); extern void late_setup_arch(char **); @@ -60,6 +61,16 @@ boolean_param("nosmp", opt_nosmp); /* maxcpus: maximum number of CPUs to activate. */ static unsigned int max_cpus = NR_CPUS; integer_param("maxcpus", max_cpus); + +/* xencons: if true enable xenconsole input (and irq). + Note: you have to disable 8250 serials in domains (to avoid use of the + same resource). */ +static int opt_xencons = 0; +boolean_param("xencons", opt_xencons); + +/* Toggle to allow non-legacy xencons UARTs to run in polling mode */ +static int opt_xencons_poll = 0; +boolean_param("xencons_poll", opt_xencons_poll); /* * opt_xenheap_megabytes: Size of Xen heap in megabytes, including: @@ -143,6 +154,10 @@ struct ns16550_defaults ns16550_com1 = { .stop_bits = 1 }; +unsigned int ns16550_com1_gsi; +unsigned int ns16550_com1_polarity; +unsigned int ns16550_com1_trigger; + struct ns16550_defaults ns16550_com2 = { .baud = BAUD_AUTO, .data_bits = 8, @@ -231,7 +246,7 @@ md_overlaps(efi_memory_desc_t *md, unsig void start_kernel(void) { - unsigned char *cmdline; + char *cmdline; void *heap_start; unsigned long nr_pages; unsigned long dom0_memory_start, dom0_memory_size; @@ -247,7 +262,7 @@ void start_kernel(void) /* Kernel may be relocated by EFI loader */ xen_pstart = ia64_tpa(KERNEL_START); - early_setup_arch((char **) &cmdline); + early_setup_arch(&cmdline); /* We initialise the serial devices very early so we can get debugging. */ if (running_on_sim) hpsim_serial_init(); @@ -408,28 +423,21 @@ void start_kernel(void) (xenheap_phys_end-__pa(heap_start)) >> 20, (xenheap_phys_end-__pa(heap_start)) >> 10); -printk("About to call scheduler_init()\n"); scheduler_init(); idle_vcpu[0] = (struct vcpu*) ia64_r13; - idle_domain = domain_create(IDLE_DOMAIN_ID, 0); - BUG_ON(idle_domain == NULL); - - late_setup_arch((char **) &cmdline); + idle_domain = domain_create(IDLE_DOMAIN_ID); + if ( (idle_domain == NULL) || (alloc_vcpu(idle_domain, 0, 0) == NULL) ) + BUG(); + + late_setup_arch(&cmdline); alloc_dom_xen_and_dom_io(); setup_per_cpu_areas(); mem_init(); local_irq_disable(); init_IRQ (); -printk("About to call init_xen_time()\n"); init_xen_time(); /* initialise the time */ -printk("About to call timer_init()\n"); timer_init(); - -#ifdef CONFIG_XEN_CONSOLE_INPUT /* CONFIG_SERIAL_8250_CONSOLE=n in dom0! */ - initialize_keytable(); - serial_init_postirq(); -#endif #ifdef CONFIG_SMP if ( opt_nosmp ) @@ -472,17 +480,32 @@ printk("num_online_cpus=%d, max_cpus=%d\ initialise_gdb(); /* could be moved earlier */ do_initcalls(); -printk("About to call sort_main_extable()\n"); sort_main_extable(); - init_rid_allocator (); + local_irq_enable(); + + if (opt_xencons) { + initialize_keytable(); + if (ns16550_com1_gsi) { + if (opt_xencons_poll || + iosapic_register_intr(ns16550_com1_gsi, + ns16550_com1_polarity, + ns16550_com1_trigger) < 0) { + ns16550_com1.irq = 0; + ns16550_init(0, &ns16550_com1); + } + } + serial_init_postirq(); + + /* Hide the HCDP table from dom0 */ + efi.hcdp = NULL; + } + /* Create initial domain 0. */ -printk("About to call domain_create()\n"); - dom0 = domain_create(0, 0); - - if ( dom0 == NULL ) + dom0 = domain_create(0); + if ( (dom0 == NULL) || (alloc_vcpu(dom0, 0, 0) == NULL) ) panic("Error creating domain 0\n"); set_bit(_DOMF_privileged, &dom0->domain_flags); @@ -491,7 +514,6 @@ printk("About to call domain_create()\n" * We're going to setup domain0 using the module(s) that we stashed safely * above our heap. The second module, if present, is an initrd ramdisk. */ - printk("About to call construct_dom0()\n"); dom0_memory_start = (unsigned long) __va(ia64_boot_param->domain_start); dom0_memory_size = ia64_boot_param->domain_size; dom0_initrd_start = (unsigned long) __va(ia64_boot_param->initrd_start); @@ -508,24 +530,17 @@ printk("About to call domain_create()\n" if (!running_on_sim) // slow on ski and pages are pre-initialized to zero scrub_heap_pages(); -printk("About to call init_trace_bufs()\n"); init_trace_bufs(); -#ifdef CONFIG_XEN_CONSOLE_INPUT /* CONFIG_SERIAL_8250_CONSOLE=n in dom0! */ - console_endboot(); -#endif + if (opt_xencons) + console_endboot(); domain0_ready = 1; - local_irq_enable(); - - printf("About to call schedulers_start dom0=%p, idle_dom=%p\n", - dom0, idle_domain); schedulers_start(); domain_unpause_by_systemcontroller(dom0); -printk("About to call startup_cpu_idle_loop()\n"); startup_cpu_idle_loop(); } diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/ppc/setup.c --- a/xen/arch/ppc/setup.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/ppc/setup.c Thu Jul 06 06:47:16 2006 -0400 @@ -33,6 +33,7 @@ #include <xen/gdbstub.h> #include <xen/symbols.h> #include <xen/keyhandler.h> +#include <acm/acm_hooks.h> #include <public/version.h> #include <asm/processor.h> #include <asm/desc.h> @@ -112,7 +113,8 @@ static void hw_probe_attn(unsigned char { /* To continue the probe will step over the ATTN instruction. The * NOP is there to make sure there is something sane to "step - * over" to. */ + * over" to. If you are using GDB you may use the "return" + * command to immediately exit this function. */ asm volatile(".long 0x00000200; nop"); } @@ -125,8 +127,9 @@ static void __init start_of_day(void) scheduler_init(); /* create idle domain */ - idle_domain = domain_create(IDLE_DOMAIN_ID, 0); - BUG_ON(idle_domain == NULL); + idle_domain = domain_create(IDLE_DOMAIN_ID); + if ((idle_domain == NULL) || (alloc_vcpu(idle_domain, 0, 0) == NULL)) + BUG(); set_current(idle_domain->vcpu[0]); idle_vcpu[0] = current; @@ -287,10 +290,13 @@ static void __init __start_xen(multiboot start_of_day(); /* Create initial domain 0. */ - dom0 = domain_create(0, 0); - if (dom0 == NULL) + dom0 = domain_create(0); + if ((dom0 == NULL) || (alloc_vcpu(dom0, 0, 0) == NULL)) panic("Error creating domain 0\n"); + set_bit(_DOMF_privileged, &dom0->domain_flags); + /* post-create hooks sets security label */ + acm_post_domain0_create(dom0->domain_id); cmdline = (char *)(mod[0].string ? __va((ulong)mod[0].string) : NULL); diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/x86/domain.c Thu Jul 06 06:47:16 2006 -0400 @@ -951,7 +951,7 @@ void domain_relinquish_resources(struct } } - if ( hvm_guest(d->vcpu[0]) ) + if ( d->vcpu[0] && hvm_guest(d->vcpu[0]) ) hvm_relinquish_guest_resources(d); shadow_mode_disable(d); diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/x86/hvm/vmx/vmcs.c --- a/xen/arch/x86/hvm/vmx/vmcs.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Thu Jul 06 06:47:16 2006 -0400 @@ -36,38 +36,57 @@ #include <xen/kernel.h> #include <asm/shadow.h> #include <xen/keyhandler.h> + #if CONFIG_PAGING_LEVELS >= 3 #include <asm/shadow_64.h> #endif -int vmcs_size; - -struct vmcs_struct *vmx_alloc_vmcs(void) +static int vmcs_size; +static int vmcs_order; +static u32 vmcs_revision_id; + +void vmx_init_vmcs_config(void) +{ + u32 vmx_msr_low, vmx_msr_high; + + if ( vmcs_size ) + return; + + rdmsr(MSR_IA32_VMX_BASIC_MSR, vmx_msr_low, vmx_msr_high); + + vmcs_revision_id = vmx_msr_low; + + vmcs_size = vmx_msr_high & 0x1fff; + vmcs_order = get_order_from_bytes(vmcs_size); +} + +static struct vmcs_struct *vmx_alloc_vmcs(void) { struct vmcs_struct *vmcs; - u32 vmx_msr_low, vmx_msr_high; - - rdmsr(MSR_IA32_VMX_BASIC_MSR, vmx_msr_low, vmx_msr_high); - vmcs_size = vmx_msr_high & 0x1fff; - vmcs = alloc_xenheap_pages(get_order_from_bytes(vmcs_size)); - memset((char *)vmcs, 0, vmcs_size); /* don't remove this */ - - vmcs->vmcs_revision_id = vmx_msr_low; + + if ( (vmcs = alloc_xenheap_pages(vmcs_order)) == NULL ) + { + DPRINTK("Failed to allocate VMCS.\n"); + return NULL; + } + + memset(vmcs, 0, vmcs_size); /* don't remove this */ + vmcs->vmcs_revision_id = vmcs_revision_id; + return vmcs; } -static void free_vmcs(struct vmcs_struct *vmcs) -{ - int order; - - order = get_order_from_bytes(vmcs_size); - free_xenheap_pages(vmcs, order); +static void vmx_free_vmcs(struct vmcs_struct *vmcs) +{ + free_xenheap_pages(vmcs, vmcs_order); } static void __vmx_clear_vmcs(void *info) { struct vcpu *v = info; + __vmpclear(virt_to_maddr(v->arch.hvm_vmx.vmcs)); + v->arch.hvm_vmx.active_cpu = -1; v->arch.hvm_vmx.launched = 0; } @@ -127,11 +146,19 @@ void vmx_vmcs_exit(struct vcpu *v) vcpu_unpause(v); } +struct vmcs_struct *vmx_alloc_host_vmcs(void) +{ + return vmx_alloc_vmcs(); +} + +void vmx_free_host_vmcs(struct vmcs_struct *vmcs) +{ + vmx_free_vmcs(vmcs); +} + static inline int construct_vmcs_controls(struct arch_vmx_struct *arch_vmx) { int error = 0; - void *io_bitmap_a; - void *io_bitmap_b; error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL, MONITOR_PIN_BASED_EXEC_CONTROLS); @@ -140,19 +167,8 @@ static inline int construct_vmcs_control error |= __vmwrite(VM_ENTRY_CONTROLS, MONITOR_VM_ENTRY_CONTROLS); - /* need to use 0x1000 instead of PAGE_SIZE */ - io_bitmap_a = (void*) alloc_xenheap_pages(get_order_from_bytes(0x1000)); - io_bitmap_b = (void*) alloc_xenheap_pages(get_order_from_bytes(0x1000)); - memset(io_bitmap_a, 0xff, 0x1000); - /* don't bother debug port access */ - clear_bit(PC_DEBUG_PORT, io_bitmap_a); - memset(io_bitmap_b, 0xff, 0x1000); - - error |= __vmwrite(IO_BITMAP_A, (u64) virt_to_maddr(io_bitmap_a)); - error |= __vmwrite(IO_BITMAP_B, (u64) virt_to_maddr(io_bitmap_b)); - - arch_vmx->io_bitmap_a = io_bitmap_a; - arch_vmx->io_bitmap_b = io_bitmap_b; + error |= __vmwrite(IO_BITMAP_A, (u64)virt_to_maddr(arch_vmx->io_bitmap_a)); + error |= __vmwrite(IO_BITMAP_B, (u64)virt_to_maddr(arch_vmx->io_bitmap_b)); return error; } @@ -428,84 +444,71 @@ static inline int construct_vmcs_host(vo } /* - * Need to extend to support full virtualization. + * the working VMCS pointer has been set properly + * just before entering this function. */ static int construct_vmcs(struct vcpu *v, cpu_user_regs_t *regs) { struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx; int error; - long rc; - - memset(arch_vmx, 0, sizeof(struct arch_vmx_struct)); - - spin_lock_init(&arch_vmx->vmcs_lock); - arch_vmx->active_cpu = -1; - - /* - * Create a new VMCS - */ - if (!(arch_vmx->vmcs = vmx_alloc_vmcs())) { - printk("Failed to create a new VMCS\n"); + + if ( (error = construct_vmcs_controls(arch_vmx)) ) { + printk("construct_vmcs: construct_vmcs_controls failed.\n"); + return error; + } + + /* host selectors */ + if ( (error = construct_vmcs_host()) ) { + printk("construct_vmcs: construct_vmcs_host failed.\n"); + return error; + } + + /* guest selectors */ + if ( (error = construct_init_vmcs_guest(regs)) ) { + printk("construct_vmcs: construct_vmcs_guest failed.\n"); + return error; + } + + if ( (error = __vmwrite(EXCEPTION_BITMAP, + MONITOR_DEFAULT_EXCEPTION_BITMAP)) ) { + printk("construct_vmcs: setting exception bitmap failed.\n"); + return error; + } + + if ( regs->eflags & EF_TF ) + error = __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); + else + error = __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); + + return error; +} + +int vmx_create_vmcs(struct vcpu *v) +{ + if ( (v->arch.hvm_vmx.vmcs = vmx_alloc_vmcs()) == NULL ) return -ENOMEM; - } + __vmx_clear_vmcs(v); + return 0; +} + +void vmx_destroy_vmcs(struct vcpu *v) +{ + struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx; + + if ( arch_vmx->vmcs == NULL ) + return; vmx_clear_vmcs(v); - vmx_load_vmcs(v); - - if ((error = construct_vmcs_controls(arch_vmx))) { - printk("construct_vmcs: construct_vmcs_controls failed\n"); - rc = -EINVAL; - goto err_out; - } - - /* host selectors */ - if ((error = construct_vmcs_host())) { - printk("construct_vmcs: construct_vmcs_host failed\n"); - rc = -EINVAL; - goto err_out; - } - - /* guest selectors */ - if ((error = construct_init_vmcs_guest(regs))) { - printk("construct_vmcs: construct_vmcs_guest failed\n"); - rc = -EINVAL; - goto err_out; - } - - if ((error |= __vmwrite(EXCEPTION_BITMAP, - MONITOR_DEFAULT_EXCEPTION_BITMAP))) { - printk("construct_vmcs: setting Exception bitmap failed\n"); - rc = -EINVAL; - goto err_out; - } - - if (regs->eflags & EF_TF) - __vm_set_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); - else - __vm_clear_bit(EXCEPTION_BITMAP, EXCEPTION_BITMAP_DB); - - return 0; - -err_out: - vmx_destroy_vmcs(v); - return rc; -} - -void vmx_destroy_vmcs(struct vcpu *v) -{ - struct arch_vmx_struct *arch_vmx = &v->arch.hvm_vmx; - - vmx_clear_vmcs(v); - - free_vmcs(arch_vmx->vmcs); + + free_xenheap_pages(arch_vmx->io_bitmap_a, IO_BITMAP_ORDER); + free_xenheap_pages(arch_vmx->io_bitmap_b, IO_BITMAP_ORDER); + + arch_vmx->io_bitmap_a = NULL; + arch_vmx->io_bitmap_b = NULL; + + vmx_free_vmcs(arch_vmx->vmcs); arch_vmx->vmcs = NULL; - - free_xenheap_pages(arch_vmx->io_bitmap_a, get_order_from_bytes(0x1000)); - arch_vmx->io_bitmap_a = NULL; - - free_xenheap_pages(arch_vmx->io_bitmap_b, get_order_from_bytes(0x1000)); - arch_vmx->io_bitmap_b = NULL; } void vm_launch_fail(unsigned long eflags) @@ -544,19 +547,20 @@ void arch_vmx_do_resume(struct vcpu *v) void arch_vmx_do_launch(struct vcpu *v) { - int error; cpu_user_regs_t *regs = ¤t->arch.guest_context.user_regs; - error = construct_vmcs(v, regs); - if ( error < 0 ) + vmx_load_vmcs(v); + + if ( construct_vmcs(v, regs) < 0 ) { - if (v->vcpu_id == 0) { - printk("Failed to construct a new VMCS for BSP.\n"); + if ( v->vcpu_id == 0 ) { + printk("Failed to construct VMCS for BSP.\n"); } else { - printk("Failed to construct a new VMCS for AP %d\n", v->vcpu_id); + printk("Failed to construct VMCS for AP %d.\n", v->vcpu_id); } domain_crash_synchronous(); } + vmx_do_launch(v); reset_stack_and_jump(vmx_asm_do_vmentry); } diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/x86/hvm/vmx/vmx.c --- a/xen/arch/x86/hvm/vmx/vmx.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Jul 06 06:47:16 2006 -0400 @@ -54,34 +54,73 @@ static void vmx_ctxt_switch_from(struct static void vmx_ctxt_switch_from(struct vcpu *v); static void vmx_ctxt_switch_to(struct vcpu *v); -void vmx_final_setup_guest(struct vcpu *v) -{ +static int vmx_initialize_guest_resources(struct vcpu *v) +{ + struct domain *d = v->domain; + struct vcpu *vc; + void *io_bitmap_a, *io_bitmap_b; + int rc; + v->arch.schedule_tail = arch_vmx_do_launch; v->arch.ctxt_switch_from = vmx_ctxt_switch_from; v->arch.ctxt_switch_to = vmx_ctxt_switch_to; - if ( v->vcpu_id == 0 ) - { - struct domain *d = v->domain; - struct vcpu *vc; - + if ( v->vcpu_id != 0 ) + return 1; + + for_each_vcpu ( d, vc ) + { /* Initialize monitor page table */ - for_each_vcpu(d, vc) - vc->arch.monitor_table = pagetable_null(); - - /* - * Required to do this once per domain - * XXX todo: add a seperate function to do these. - */ - memset(&d->shared_info->evtchn_mask[0], 0xff, - sizeof(d->shared_info->evtchn_mask)); - - /* Put the domain in shadow mode even though we're going to be using - * the shared 1:1 page table initially. It shouldn't hurt */ - shadow_mode_enable(d, - SHM_enable|SHM_refcounts| - SHM_translate|SHM_external|SHM_wr_pt_pte); - } + vc->arch.monitor_table = pagetable_null(); + + memset(&vc->arch.hvm_vmx, 0, sizeof(struct arch_vmx_struct)); + + if ( (rc = vmx_create_vmcs(vc)) != 0 ) + { + DPRINTK("Failed to create VMCS for vcpu %d: err=%d.\n", + vc->vcpu_id, rc); + return 0; + } + + spin_lock_init(&vc->arch.hvm_vmx.vmcs_lock); + + if ( (io_bitmap_a = alloc_xenheap_pages(IO_BITMAP_ORDER)) == NULL ) + { + DPRINTK("Failed to allocate io bitmap b for vcpu %d.\n", + vc->vcpu_id); + return 0; + } + + if ( (io_bitmap_b = alloc_xenheap_pages(IO_BITMAP_ORDER)) == NULL ) + { + DPRINTK("Failed to allocate io bitmap b for vcpu %d.\n", + vc->vcpu_id); + return 0; + } + + memset(io_bitmap_a, 0xff, 0x1000); + memset(io_bitmap_b, 0xff, 0x1000); + + /* don't bother debug port access */ + clear_bit(PC_DEBUG_PORT, io_bitmap_a); + + vc->arch.hvm_vmx.io_bitmap_a = io_bitmap_a; + vc->arch.hvm_vmx.io_bitmap_b = io_bitmap_b; + } + + /* + * Required to do this once per domain XXX todo: add a seperate function + * to do these. + */ + memset(&d->shared_info->evtchn_mask[0], 0xff, + sizeof(d->shared_info->evtchn_mask)); + + /* Put the domain in shadow mode even though we're going to be using + * the shared 1:1 page table initially. It shouldn't hurt */ + shadow_mode_enable( + d, SHM_enable|SHM_refcounts|SHM_translate|SHM_external|SHM_wr_pt_pte); + + return 1; } static void vmx_relinquish_guest_resources(struct domain *d) @@ -90,9 +129,9 @@ static void vmx_relinquish_guest_resourc for_each_vcpu ( d, v ) { + vmx_destroy_vmcs(v); if ( !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) continue; - vmx_destroy_vmcs(v); free_monitor_pagetable(v); kill_timer(&v->arch.hvm_vmx.hlt_timer); if ( hvm_apic_support(v->domain) && (VLAPIC(v) != NULL) ) @@ -442,12 +481,6 @@ void stop_vmx(void) { if (read_cr4() & X86_CR4_VMXE) __vmxoff(); -} - -int vmx_initialize_guest_resources(struct vcpu *v) -{ - vmx_final_setup_guest(v); - return 1; } void vmx_migrate_timers(struct vcpu *v) @@ -638,58 +671,61 @@ static int check_vmx_controls(u32 ctrls, int start_vmx(void) { + u32 eax, edx; struct vmcs_struct *vmcs; - u32 ecx; - u32 eax, edx; - u64 phys_vmcs; /* debugging */ /* * Xen does not fill x86_capability words except 0. */ - ecx = cpuid_ecx(1); - boot_cpu_data.x86_capability[4] = ecx; + boot_cpu_data.x86_capability[4] = cpuid_ecx(1); if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability))) return 0; rdmsr(IA32_FEATURE_CONTROL_MSR, eax, edx); - if (eax & IA32_FEATURE_CONTROL_MSR_LOCK) { - if ((eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON) == 0x0) { + if ( eax & IA32_FEATURE_CONTROL_MSR_LOCK ) + { + if ( (eax & IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON) == 0x0 ) + { printk("VMX disabled by Feature Control MSR.\n"); return 0; } } - else { + else + { wrmsr(IA32_FEATURE_CONTROL_MSR, IA32_FEATURE_CONTROL_MSR_LOCK | IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON, 0); } - if (!check_vmx_controls(MONITOR_PIN_BASED_EXEC_CONTROLS, - MSR_IA32_VMX_PINBASED_CTLS_MSR)) + if ( !check_vmx_controls(MONITOR_PIN_BASED_EXEC_CONTROLS, + MSR_IA32_VMX_PINBASED_CTLS_MSR) ) return 0; - if (!check_vmx_controls(MONITOR_CPU_BASED_EXEC_CONTROLS, - MSR_IA32_VMX_PROCBASED_CTLS_MSR)) + if ( !check_vmx_controls(MONITOR_CPU_BASED_EXEC_CONTROLS, + MSR_IA32_VMX_PROCBASED_CTLS_MSR) ) return 0; - if (!check_vmx_controls(MONITOR_VM_EXIT_CONTROLS, - MSR_IA32_VMX_EXIT_CTLS_MSR)) + if ( !check_vmx_controls(MONITOR_VM_EXIT_CONTROLS, + MSR_IA32_VMX_EXIT_CTLS_MSR) ) return 0; - if (!check_vmx_controls(MONITOR_VM_ENTRY_CONTROLS, - MSR_IA32_VMX_ENTRY_CTLS_MSR)) + if ( !check_vmx_controls(MONITOR_VM_ENTRY_CONTROLS, + MSR_IA32_VMX_ENTRY_CTLS_MSR) ) return 0; - set_in_cr4(X86_CR4_VMXE); /* Enable VMXE */ - - if (!(vmcs = vmx_alloc_vmcs())) { - printk("Failed to allocate VMCS\n"); + set_in_cr4(X86_CR4_VMXE); + + vmx_init_vmcs_config(); + + if ( (vmcs = vmx_alloc_host_vmcs()) == NULL ) + { + printk("Failed to allocate host VMCS\n"); return 0; } - phys_vmcs = (u64) virt_to_maddr(vmcs); - - if (__vmxon(phys_vmcs)) { + if ( __vmxon(virt_to_maddr(vmcs)) ) + { printk("VMXON failed\n"); + vmx_free_host_vmcs(vmcs); return 0; } @@ -857,10 +893,14 @@ static void vmx_vmexit_do_cpuid(struct c #else if ( v->domain->arch.ops->guest_paging_levels == PAGING_L2 ) { - if ( !v->domain->arch.hvm_domain.pae_enabled ) + if ( v->domain->arch.hvm_domain.pae_enabled ) + clear_bit(X86_FEATURE_PSE36, &edx); + else + { clear_bit(X86_FEATURE_PAE, &edx); - clear_bit(X86_FEATURE_PSE, &edx); - clear_bit(X86_FEATURE_PSE36, &edx); + clear_bit(X86_FEATURE_PSE, &edx); + clear_bit(X86_FEATURE_PSE36, &edx); + } } #endif @@ -2053,8 +2093,26 @@ asmlinkage void vmx_vmexit_handler(struc if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) ) { - printk("Failed vm entry (reason 0x%x)\n", exit_reason); - printk("*********** VMCS Area **************\n"); + unsigned int failed_vmentry_reason = exit_reason & 0xFFFF; + + __vmread(EXIT_QUALIFICATION, &exit_qualification); + printk("Failed vm entry (exit reason 0x%x) ", exit_reason); + switch ( failed_vmentry_reason ) { + case EXIT_REASON_INVALID_GUEST_STATE: + printk("caused by invalid guest state (%ld).\n", exit_qualification); + break; + case EXIT_REASON_MSR_LOADING: + printk("caused by MSR entry %ld loading.\n", exit_qualification); + break; + case EXIT_REASON_MACHINE_CHECK: + printk("caused by machine check.\n"); + break; + default: + printk("reason not known yet!"); + break; + } + + printk("************* VMCS Area **************\n"); vmcs_dump_vcpu(); printk("**************************************\n"); domain_crash_synchronous(); diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/x86/setup.c --- a/xen/arch/x86/setup.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/x86/setup.c Thu Jul 06 06:47:16 2006 -0400 @@ -439,8 +439,9 @@ void __init __start_xen(multiboot_info_t scheduler_init(); - idle_domain = domain_create(IDLE_DOMAIN_ID, 0); - BUG_ON(idle_domain == NULL); + idle_domain = domain_create(IDLE_DOMAIN_ID); + if ( (idle_domain == NULL) || (alloc_vcpu(idle_domain, 0, 0) == NULL) ) + BUG(); set_current(idle_domain->vcpu[0]); this_cpu(curr_vcpu) = idle_domain->vcpu[0]; @@ -537,8 +538,8 @@ void __init __start_xen(multiboot_info_t acm_init(&initrdidx, mbi, initial_images_start); /* Create initial domain 0. */ - dom0 = domain_create(0, 0); - if ( dom0 == NULL ) + dom0 = domain_create(0); + if ( (dom0 == NULL) || (alloc_vcpu(dom0, 0, 0) == NULL) ) panic("Error creating domain 0\n"); set_bit(_DOMF_privileged, &dom0->domain_flags); @@ -624,7 +625,7 @@ void arch_get_xen_caps(xen_capabilities_ if ( hvm_enabled ) { p += sprintf(p, "hvm-%d.%d-x86_32 ", XEN_VERSION, XEN_SUBVERSION); - //p += sprintf(p, "hvm-%d.%d-x86_32p ", XEN_VERSION, XEN_SUBVERSION); + p += sprintf(p, "hvm-%d.%d-x86_32p ", XEN_VERSION, XEN_SUBVERSION); } #elif defined(CONFIG_X86_64) diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/x86/shadow.c --- a/xen/arch/x86/shadow.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/x86/shadow.c Thu Jul 06 06:47:16 2006 -0400 @@ -1726,6 +1726,7 @@ static int resync_all(struct domain *d, { guest_l1_pgentry_t tmp_gl1e = guest_l1e_empty(); validate_pte_change(d, tmp_gl1e, sl1e_p); + unshadow_l1 = 1; continue; } #endif @@ -3676,20 +3677,19 @@ static inline int l2e_rw_fault( put_page_from_l1e(old_sl1e, d); } - l1_p[gpfn - start_gpfn] = sl1e; - if (rw) { /* shadow_mark_va_out_of_sync() need modificatin for 2M pages*/ if ( mfn_is_page_table(mfn) ) shadow_mark_va_out_of_sync_2mp(v, gpfn, mfn, l2e_get_paddr(sl2e) | (sizeof(l1_pgentry_t) * (gpfn - start_gpfn))); } + + l1_p[gpfn - start_gpfn] = sl1e; } unmap_domain_page(l1_p); *gl2e_p = gl2e; return 1; - } /* @@ -3724,7 +3724,13 @@ static inline int guest_page_fault( } if ( guest_l2e_get_flags(*gpl2e) & _PAGE_PSE ) + { + printk("None-PAE HVM guests can NOT use PSE, " + "because we don't support 4MBytes PSE pages.\n"); + printk("remove pae=1 from your config file.\n"); + domain_crash_synchronous(); return 0; + } __guest_get_l1e(v, va, gpl1e); diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/x86/traps.c Thu Jul 06 06:47:16 2006 -0400 @@ -1397,13 +1397,14 @@ static void nmi_dom0_report(unsigned int static void nmi_dom0_report(unsigned int reason_idx) { struct domain *d; - - if ( (d = dom0) == NULL ) + struct vcpu *v; + + if ( ((d = dom0) == NULL) || ((v = d->vcpu[0]) == NULL) ) return; set_bit(reason_idx, &d->shared_info->arch.nmi_reason); - if ( test_and_set_bit(_VCPUF_nmi_pending, &d->vcpu[0]->vcpu_flags) ) + if ( test_and_set_bit(_VCPUF_nmi_pending, &v->vcpu_flags) ) raise_softirq(NMI_SOFTIRQ); /* not safe to wake up a vcpu here */ } diff -r 17f7a426b2cd -r e6c7667c97ed xen/arch/x86/x86_emulate.c --- a/xen/arch/x86/x86_emulate.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/arch/x86/x86_emulate.c Thu Jul 06 06:47:16 2006 -0400 @@ -118,7 +118,7 @@ static uint8_t opcode_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xC0 - 0xC7 */ ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, 0, 0, - 0, 0, ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM, + 0, 0, ByteOp|DstMem|SrcImm|ModRM|Mov, DstMem|SrcImm|ModRM|Mov, /* 0xC8 - 0xCF */ 0, 0, 0, 0, 0, 0, 0, 0, /* 0xD0 - 0xD7 */ diff -r 17f7a426b2cd -r e6c7667c97ed xen/common/dom0_ops.c --- a/xen/common/dom0_ops.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/common/dom0_ops.c Thu Jul 06 06:47:16 2006 -0400 @@ -90,6 +90,44 @@ static void getdomaininfo(struct domain memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t)); } +static unsigned int default_vcpu0_location(void) +{ + struct domain *d; + struct vcpu *v; + unsigned int i, cpu, cnt[NR_CPUS] = { 0 }; + cpumask_t cpu_exclude_map; + + /* Do an initial CPU placement. Pick the least-populated CPU. */ + read_lock(&domlist_lock); + for_each_domain ( d ) + for_each_vcpu ( d, v ) + if ( !test_bit(_VCPUF_down, &v->vcpu_flags) ) + cnt[v->processor]++; + read_unlock(&domlist_lock); + + /* + * If we're on a HT system, we only auto-allocate to a non-primary HT. We + * favour high numbered CPUs in the event of a tie. + */ + cpu = first_cpu(cpu_sibling_map[0]); + if ( cpus_weight(cpu_sibling_map[0]) > 1 ) + cpu = next_cpu(cpu, cpu_sibling_map[0]); + cpu_exclude_map = cpu_sibling_map[0]; + for_each_online_cpu ( i ) + { + if ( cpu_isset(i, cpu_exclude_map) ) + continue; + if ( (i == first_cpu(cpu_sibling_map[i])) && + (cpus_weight(cpu_sibling_map[i]) > 1) ) + continue; + cpus_or(cpu_exclude_map, cpu_exclude_map, cpu_sibling_map[i]); + if ( cnt[i] <= cnt[cpu] ) + cpu = i; + } + + return cpu; +} + long do_dom0_op(XEN_GUEST_HANDLE(dom0_op_t) u_dom0_op) { long ret = 0; @@ -150,7 +188,7 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op if ( d != NULL ) { ret = -EINVAL; - if ( (d != current->domain) && + if ( (d != current->domain) && (d->vcpu[0] != NULL) && test_bit(_VCPUF_initialised, &d->vcpu[0]->vcpu_flags) ) { domain_unpause_by_systemcontroller(d); @@ -164,11 +202,7 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op case DOM0_CREATEDOMAIN: { struct domain *d; - unsigned int pro; domid_t dom; - struct vcpu *v; - unsigned int i, cnt[NR_CPUS] = { 0 }; - cpumask_t cpu_exclude_map; static domid_t rover = 0; /* @@ -202,36 +236,8 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op rover = dom; } - /* Do an initial CPU placement. Pick the least-populated CPU. */ - read_lock(&domlist_lock); - for_each_domain ( d ) - for_each_vcpu ( d, v ) - if ( !test_bit(_VCPUF_down, &v->vcpu_flags) ) - cnt[v->processor]++; - read_unlock(&domlist_lock); - - /* - * If we're on a HT system, we only auto-allocate to a non-primary HT. - * We favour high numbered CPUs in the event of a tie. - */ - pro = first_cpu(cpu_sibling_map[0]); - if ( cpus_weight(cpu_sibling_map[0]) > 1 ) - pro = next_cpu(pro, cpu_sibling_map[0]); - cpu_exclude_map = cpu_sibling_map[0]; - for_each_online_cpu ( i ) - { - if ( cpu_isset(i, cpu_exclude_map) ) - continue; - if ( (i == first_cpu(cpu_sibling_map[i])) && - (cpus_weight(cpu_sibling_map[i]) > 1) ) - continue; - cpus_or(cpu_exclude_map, cpu_exclude_map, cpu_sibling_map[i]); - if ( cnt[i] <= cnt[pro] ) - pro = i; - } - ret = -ENOMEM; - if ( (d = domain_create(dom, pro)) == NULL ) + if ( (d = domain_create(dom)) == NULL ) break; memcpy(d->handle, op->u.createdomain.handle, @@ -258,14 +264,8 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op if ( (d = find_domain_by_id(op->u.max_vcpus.domain)) == NULL ) break; - /* - * Can only create new VCPUs while the domain is not fully constructed - * (and hence not runnable). Xen needs auditing for races before - * removing this check. - */ - ret = -EINVAL; - if ( test_bit(_VCPUF_initialised, &d->vcpu[0]->vcpu_flags) ) - goto maxvcpu_out; + /* Needed, for example, to ensure writable p.t. state is synced. */ + domain_pause(d); /* We cannot reduce maximum VCPUs. */ ret = -EINVAL; @@ -275,17 +275,21 @@ long do_dom0_op(XEN_GUEST_HANDLE(dom0_op ret = -ENOMEM; for ( i = 0; i < max; i++ ) { - if ( d->vcpu[i] == NULL ) - { - cpu = (d->vcpu[i-1]->processor + 1) % num_online_cpus(); - if ( alloc_vcpu(d, i, cpu) == NULL ) - goto maxvcpu_out; - } + if ( d->vcpu[i] != NULL ) + continue; + + cpu = (i == 0) ? + default_vcpu0_location() : + (d->vcpu[i-1]->processor + 1) % num_online_cpus(); + + if ( alloc_vcpu(d, i, cpu) == NULL ) + goto maxvcpu_out; } ret = 0; maxvcpu_out: + domain_unpause(d); put_domain(d); } break; diff -r 17f7a426b2cd -r e6c7667c97ed xen/common/domain.c --- a/xen/common/domain.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/common/domain.c Thu Jul 06 06:47:16 2006 -0400 @@ -46,6 +46,7 @@ struct domain *alloc_domain(domid_t domi atomic_set(&d->refcnt, 1); spin_lock_init(&d->big_lock); spin_lock_init(&d->page_alloc_lock); + spin_lock_init(&d->pause_lock); INIT_LIST_HEAD(&d->page_list); INIT_LIST_HEAD(&d->xenpage_list); @@ -81,8 +82,8 @@ struct vcpu *alloc_vcpu( v->domain = d; v->vcpu_id = vcpu_id; v->processor = cpu_id; - atomic_set(&v->pausecnt, 0); v->vcpu_info = &d->shared_info->vcpu_info[vcpu_id]; + spin_lock_init(&v->pause_lock); v->cpu_affinity = is_idle_domain(d) ? cpumask_of_cpu(cpu_id) : CPU_MASK_ALL; @@ -110,30 +111,22 @@ struct vcpu *alloc_idle_vcpu(unsigned in { struct domain *d; struct vcpu *v; - unsigned int vcpu_id; - - if ((vcpu_id = cpu_id % MAX_VIRT_CPUS) == 0) - { - d = domain_create(IDLE_DOMAIN_ID, cpu_id); - BUG_ON(d == NULL); - v = d->vcpu[0]; - } - else - { - d = idle_vcpu[cpu_id - vcpu_id]->domain; - BUG_ON(d == NULL); - v = alloc_vcpu(d, vcpu_id, cpu_id); - } - + unsigned int vcpu_id = cpu_id % MAX_VIRT_CPUS; + + d = (vcpu_id == 0) ? + domain_create(IDLE_DOMAIN_ID) : + idle_vcpu[cpu_id - vcpu_id]->domain; + BUG_ON(d == NULL); + + v = alloc_vcpu(d, vcpu_id, cpu_id); idle_vcpu[cpu_id] = v; return v; } -struct domain *domain_create(domid_t domid, unsigned int cpu) +struct domain *domain_create(domid_t domid) { struct domain *d, **pd; - struct vcpu *v; if ( (d = alloc_domain(domid)) == NULL ) return NULL; @@ -152,13 +145,10 @@ struct domain *domain_create(domid_t dom if ( arch_domain_create(d) != 0 ) goto fail3; - if ( (v = alloc_vcpu(d, 0, cpu)) == NULL ) - goto fail4; - d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex); d->irq_caps = rangeset_new(d, "Interrupts", 0); if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) ) - goto fail4; /* NB. alloc_vcpu() is undone in free_domain() */ + goto fail4; if ( !is_idle_domain(d) ) { @@ -327,11 +317,12 @@ void domain_shutdown(struct domain *d, u d->shutdown_code = reason; /* Put every vcpu to sleep, but don't wait (avoids inter-vcpu deadlock). */ + spin_lock(&d->pause_lock); + d->pause_count++; + set_bit(_DOMF_paused, &d->domain_flags); + spin_unlock(&d->pause_lock); for_each_vcpu ( d, v ) - { - atomic_inc(&v->pausecnt); vcpu_sleep_nosync(v); - } get_knownalive_domain(d); domain_shuttingdown[smp_processor_id()] = d; @@ -398,34 +389,65 @@ void domain_destroy(struct domain *d) void vcpu_pause(struct vcpu *v) { - BUG_ON(v == current); - atomic_inc(&v->pausecnt); + ASSERT(v != current); + + spin_lock(&v->pause_lock); + if ( v->pause_count++ == 0 ) + set_bit(_VCPUF_paused, &v->vcpu_flags); + spin_unlock(&v->pause_lock); + vcpu_sleep_sync(v); } +void vcpu_unpause(struct vcpu *v) +{ + int wake; + + ASSERT(v != current); + + spin_lock(&v->pause_lock); + wake = (--v->pause_count == 0); + if ( wake ) + clear_bit(_VCPUF_paused, &v->vcpu_flags); + spin_unlock(&v->pause_lock); + + if ( wake ) + vcpu_wake(v); +} + void domain_pause(struct domain *d) { struct vcpu *v; + ASSERT(d != current->domain); + + spin_lock(&d->pause_lock); + if ( d->pause_count++ == 0 ) + set_bit(_DOMF_paused, &d->domain_flags); + spin_unlock(&d->pause_lock); + for_each_vcpu( d, v ) - vcpu_pause(v); + vcpu_sleep_sync(v); sync_pagetable_state(d); } -void vcpu_unpause(struct vcpu *v) -{ - BUG_ON(v == current); - if ( atomic_dec_and_test(&v->pausecnt) ) - vcpu_wake(v); -} - void domain_unpause(struct domain *d) { struct vcpu *v; - - for_each_vcpu( d, v ) - vcpu_unpause(v); + int wake; + + ASSERT(d != current->domain); + + spin_lock(&d->pause_lock); + wake = (--d->pause_count == 0); + if ( wake ) + clear_bit(_DOMF_paused, &d->domain_flags); + spin_unlock(&d->pause_lock); + + if ( wake ) + for_each_vcpu( d, v ) + vcpu_wake(v); } void domain_pause_by_systemcontroller(struct domain *d) diff -r 17f7a426b2cd -r e6c7667c97ed xen/common/elf.c --- a/xen/common/elf.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/common/elf.c Thu Jul 06 06:47:16 2006 -0400 @@ -95,7 +95,11 @@ int parseelfimage(struct domain_setup_in elf_pa_off = elf_pa_off_defined ? simple_strtoul(p+17, &p, 0) : virt_base; if ( elf_pa_off_defined && !virt_base_defined ) - goto bad_image; + { + printk("ERROR: Neither ELF_PADDR_OFFSET nor VIRT_BASE found in" + " __xen_guest section.\n"); + return -EINVAL; + } for ( h = 0; h < ehdr->e_phnum; h++ ) { @@ -104,7 +108,11 @@ int parseelfimage(struct domain_setup_in continue; vaddr = phdr->p_paddr - elf_pa_off + virt_base; if ( (vaddr + phdr->p_memsz) < vaddr ) - goto bad_image; + { + printk("ERROR: ELF program header %d is too large.\n", h); + return -EINVAL; + } + if ( vaddr < kernstart ) kernstart = vaddr; if ( (vaddr + phdr->p_memsz) > kernend ) @@ -127,7 +135,10 @@ int parseelfimage(struct domain_setup_in (dsi->v_kernentry < kernstart) || (dsi->v_kernentry > kernend) || (dsi->v_start > kernstart) ) - goto bad_image; + { + printk("ERROR: ELF start or entries are out of bounds.\n"); + return -EINVAL; + } if ( (p = strstr(guestinfo, "BSD_SYMTAB")) != NULL ) dsi->load_symtab = 1; @@ -139,10 +150,6 @@ int parseelfimage(struct domain_setup_in loadelfsymtab(dsi, 0); return 0; - - bad_image: - printk("Malformed ELF image.\n"); - return -EINVAL; } int loadelfimage(struct domain_setup_info *dsi) diff -r 17f7a426b2cd -r e6c7667c97ed xen/common/event_channel.c --- a/xen/common/event_channel.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/common/event_channel.c Thu Jul 06 06:47:16 2006 -0400 @@ -525,11 +525,16 @@ void send_guest_global_virq(struct domai void send_guest_global_virq(struct domain *d, int virq) { int port; + struct vcpu *v; struct evtchn *chn; ASSERT(virq_is_global(virq)); - port = d->vcpu[0]->virq_to_evtchn[virq]; + v = d->vcpu[0]; + if ( unlikely(v == NULL) ) + return; + + port = v->virq_to_evtchn[virq]; if ( unlikely(port == 0) ) return; diff -r 17f7a426b2cd -r e6c7667c97ed xen/common/keyhandler.c --- a/xen/common/keyhandler.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/common/keyhandler.c Thu Jul 06 06:47:16 2006 -0400 @@ -167,7 +167,7 @@ static void dump_domains(unsigned char k v->vcpu_id, v->processor, test_bit(_VCPUF_running, &v->vcpu_flags) ? 'T':'F', v->vcpu_flags, - (unsigned)v->vcpu_info->evtchn_upcall_pending, + v->vcpu_info->evtchn_upcall_pending, v->vcpu_info->evtchn_upcall_mask); cpuset_print(cpuset, sizeof(cpuset), v->vcpu_dirty_cpumask); printk("dirty_cpus=%s ", cpuset); diff -r 17f7a426b2cd -r e6c7667c97ed xen/common/memory.c --- a/xen/common/memory.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/common/memory.c Thu Jul 06 06:47:16 2006 -0400 @@ -174,7 +174,7 @@ guest_remove_page( { /* We'll make this a guest-visible error in future, so take heed! */ DPRINTK("Dom%d freeing in-use page %lx (pseudophys %lx):" - " count=%"PRIx64" type=%lx\n", + " count=%lx type=%lx\n", d->domain_id, mfn, get_gpfn_from_mfn(mfn), (unsigned long)page->count_info, page->u.inuse.type_info); } diff -r 17f7a426b2cd -r e6c7667c97ed xen/common/sched_credit.c --- a/xen/common/sched_credit.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/common/sched_credit.c Thu Jul 06 06:47:16 2006 -0400 @@ -622,9 +622,12 @@ csched_dom_cntl( if ( cmd->u.credit.weight != 0 ) { - csched_priv.weight -= sdom->weight; + if ( !list_empty(&sdom->active_sdom_elem) ) + { + csched_priv.weight -= sdom->weight; + csched_priv.weight += cmd->u.credit.weight; + } sdom->weight = cmd->u.credit.weight; - csched_priv.weight += sdom->weight; } if ( cmd->u.credit.cap != (uint16_t)~0U ) diff -r 17f7a426b2cd -r e6c7667c97ed xen/common/sched_sedf.c --- a/xen/common/sched_sedf.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/common/sched_sedf.c Thu Jul 06 06:47:16 2006 -0400 @@ -1429,6 +1429,8 @@ static int sedf_adjdom(struct domain *p, } else if ( cmd->direction == SCHED_INFO_GET ) { + if ( p->vcpu[0] == NULL ) + return -EINVAL; cmd->u.sedf.period = EDOM_INFO(p->vcpu[0])->period; cmd->u.sedf.slice = EDOM_INFO(p->vcpu[0])->slice; cmd->u.sedf.extratime = EDOM_INFO(p->vcpu[0])->status & EXTRA_AWARE; diff -r 17f7a426b2cd -r e6c7667c97ed xen/drivers/char/console.c --- a/xen/drivers/char/console.c Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/drivers/char/console.c Thu Jul 06 06:47:16 2006 -0400 @@ -279,7 +279,7 @@ static void switch_serial_input(void) { static char *input_str[2] = { "DOM0", "Xen" }; xen_rx = !xen_rx; - if ( SWITCH_CODE != 0 ) + if ( (SWITCH_CODE != 0) && (dom0 != NULL) ) { printk("*** Serial input -> %s " "(type 'CTRL-%c' three times to switch input to %s).\n", diff -r 17f7a426b2cd -r e6c7667c97ed xen/include/asm-ia64/dom_fw.h --- a/xen/include/asm-ia64/dom_fw.h Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/include/asm-ia64/dom_fw.h Thu Jul 06 06:47:16 2006 -0400 @@ -14,8 +14,8 @@ /* This is used to determined the portion of a domain's metaphysical memory space reserved for the hypercall patch table. */ //FIXME: experiment with smaller sizes -#define HYPERCALL_START 1*MB -#define HYPERCALL_END 2*MB +#define HYPERCALL_START 1UL*MB +#define HYPERCALL_END 2UL*MB #define FW_HYPERCALL_BASE_PADDR HYPERCALL_START #define FW_HYPERCALL_END_PADDR HYPERCALL_END @@ -145,6 +145,9 @@ #define FW_HYPERCALL_FPSWA_PATCH_PADDR FW_HYPERCALL_PADDR(FW_HYPERCALL_FPSWA_PATCH_INDEX) #define FW_HYPERCALL_FPSWA 0x500UL +/* Set the shared_info base virtual address. */ +#define FW_HYPERCALL_SET_SHARED_INFO_VA 0x600UL + /* Hypercalls index bellow _FIRST_ARCH are reserved by Xen, while those above are for the architecture. Note: this limit was defined by Xen/ia64 (and not by Xen).² diff -r 17f7a426b2cd -r e6c7667c97ed xen/include/asm-ia64/domain.h --- a/xen/include/asm-ia64/domain.h Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/include/asm-ia64/domain.h Thu Jul 06 06:47:16 2006 -0400 @@ -12,10 +12,37 @@ #include <xen/cpumask.h> #include <asm/fpswa.h> +struct p2m_entry { + volatile pte_t* pte; + pte_t used; +}; + +static inline void +p2m_entry_set(struct p2m_entry* entry, volatile pte_t* pte, pte_t used) +{ + entry->pte = pte; + entry->used = used; +} + +static inline int +p2m_entry_retry(struct p2m_entry* entry) +{ + //XXX see lookup_domain_pte(). + // NULL is set for invalid gpaddr for the time being. + if (entry->pte == NULL) + return 0; + + return (pte_val(*entry->pte) != pte_val(entry->used)); +} + extern void domain_relinquish_resources(struct domain *); /* given a current domain metaphysical address, return the physical address */ -extern unsigned long translate_domain_mpaddr(unsigned long mpaddr); +extern unsigned long translate_domain_mpaddr(unsigned long mpaddr, + struct p2m_entry* entry); + +/* Set shared_info virtual address. */ +extern unsigned long domain_set_shared_info_va (unsigned long va); /* Flush cache of domain d. If sync_only is true, only synchronize I&D caches, @@ -74,9 +101,6 @@ struct arch_domain { void *efi_runtime; /* Metaphysical address to fpswa_interface_t in domain firmware memory is set. */ void *fpswa_inf; - - // protect v->itlb, v->dtlb and vhpt - seqlock_t vtlb_lock ____cacheline_aligned_in_smp; }; #define INT_ENABLE_OFFSET(v) \ (sizeof(vcpu_info_t) * (v)->vcpu_id + \ diff -r 17f7a426b2cd -r e6c7667c97ed xen/include/asm-ia64/linux-xen/asm/iosapic.h --- a/xen/include/asm-ia64/linux-xen/asm/iosapic.h Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/include/asm-ia64/linux-xen/asm/iosapic.h Thu Jul 06 06:47:16 2006 -0400 @@ -158,6 +158,7 @@ struct rte_entry { #define IOSAPIC_RTEINDEX(reg) (((reg) - 0x10) >> 1) extern unsigned long ia64_vector_mask[]; +extern unsigned long ia64_xen_vector[]; #endif /* XEN */ # endif /* !__ASSEMBLY__ */ diff -r 17f7a426b2cd -r e6c7667c97ed xen/include/asm-ia64/linux-xen/asm/pgtable.h --- a/xen/include/asm-ia64/linux-xen/asm/pgtable.h Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/include/asm-ia64/linux-xen/asm/pgtable.h Thu Jul 06 06:47:16 2006 -0400 @@ -210,7 +210,7 @@ ia64_phys_addr_valid (unsigned long addr #define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) #ifdef XEN static inline void -set_pte_rel(pte_t* ptep, pte_t pteval) +set_pte_rel(volatile pte_t* ptep, pte_t pteval) { #if CONFIG_SMP asm volatile ("st8.rel [%0]=%1" :: @@ -402,8 +402,14 @@ ptep_test_and_clear_dirty (struct vm_are } #endif +#ifdef XEN +static inline pte_t +ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + volatile pte_t *ptep) +#else static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +#endif { #ifdef CONFIG_SMP return __pte(xchg((long *) ptep, 0)); @@ -416,7 +422,8 @@ ptep_get_and_clear(struct mm_struct *mm, #ifdef XEN static inline pte_t -ptep_xchg(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t npte) +ptep_xchg(struct mm_struct *mm, unsigned long addr, + volatile pte_t *ptep, pte_t npte) { #ifdef CONFIG_SMP return __pte(xchg((long *) ptep, pte_val(npte))); @@ -428,8 +435,8 @@ ptep_xchg(struct mm_struct *mm, unsigned } static inline pte_t -ptep_cmpxchg_rel(struct mm_struct *mm, unsigned long addr, pte_t *ptep, - pte_t old_pte, pte_t new_pte) +ptep_cmpxchg_rel(struct mm_struct *mm, unsigned long addr, + volatile pte_t *ptep, pte_t old_pte, pte_t new_pte) { #ifdef CONFIG_SMP return __pte(cmpxchg_rel(&pte_val(*ptep), diff -r 17f7a426b2cd -r e6c7667c97ed xen/include/asm-ia64/mm.h --- a/xen/include/asm-ia64/mm.h Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/include/asm-ia64/mm.h Thu Jul 06 06:47:16 2006 -0400 @@ -149,8 +149,6 @@ extern unsigned long max_page; extern void __init init_frametable(void); void add_to_domain_alloc_list(unsigned long ps, unsigned long pe); - -extern unsigned long gmfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); static inline void put_page(struct page_info *page) { @@ -428,7 +426,8 @@ extern void __assign_domain_page(struct extern void __assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr, unsigned long flags); extern void assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr); extern void assign_domain_io_page(struct domain *d, unsigned long mpaddr, unsigned long flags); -extern unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr); +struct p2m_entry; +extern unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr, struct p2m_entry* entry); extern void *domain_mpa_to_imva(struct domain *d, unsigned long mpaddr); #ifdef CONFIG_XEN_IA64_DOM0_VP @@ -436,16 +435,15 @@ extern unsigned long assign_domain_mach_ extern unsigned long assign_domain_mach_page(struct domain *d, unsigned long mpaddr, unsigned long size, unsigned long flags); int domain_page_mapped(struct domain *d, unsigned long mpaddr); int efi_mmio(unsigned long physaddr, unsigned long size); -extern unsigned long __lookup_domain_mpa(struct domain *d, unsigned long mpaddr); extern unsigned long ____lookup_domain_mpa(struct domain *d, unsigned long mpaddr); extern unsigned long do_dom0vp_op(unsigned long cmd, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3); extern unsigned long dom0vp_zap_physmap(struct domain *d, unsigned long gpfn, unsigned int extent_order); extern unsigned long dom0vp_add_physmap(struct domain* d, unsigned long gpfn, unsigned long mfn, unsigned long flags, domid_t domid); #endif -extern unsigned long *mpt_table; +extern volatile unsigned long *mpt_table; extern unsigned long gmfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); -extern u64 translate_domain_pte(u64 pteval, u64 address, u64 itir__, u64* logps); +extern u64 translate_domain_pte(u64 pteval, u64 address, u64 itir__, u64* logps, struct p2m_entry* entry); #define machine_to_phys_mapping mpt_table #define INVALID_M2P_ENTRY (~0UL) @@ -466,7 +464,7 @@ extern u64 translate_domain_pte(u64 ptev gmfn_to_mfn_foreign((_d), (gpfn)) #define __gpfn_invalid(_d, gpfn) \ - (lookup_domain_mpa((_d), ((gpfn)<<PAGE_SHIFT)) & GPFN_INV_MASK) + (lookup_domain_mpa((_d), ((gpfn)<<PAGE_SHIFT), NULL) & GPFN_INV_MASK) #define __gmfn_valid(_d, gpfn) !__gpfn_invalid(_d, gpfn) @@ -474,7 +472,7 @@ extern u64 translate_domain_pte(u64 ptev #define __gpfn_is_io(_d, gpfn) \ ({ \ u64 pte, ret=0; \ - pte=lookup_domain_mpa((_d), ((gpfn)<<PAGE_SHIFT)); \ + pte = lookup_domain_mpa((_d), ((gpfn)<<PAGE_SHIFT), NULL); \ if(!(pte&GPFN_INV_MASK)) \ ret = pte & GPFN_IO_MASK; \ ret; \ @@ -483,7 +481,7 @@ extern u64 translate_domain_pte(u64 ptev #define __gpfn_is_mem(_d, gpfn) \ ({ \ u64 pte, ret=0; \ - pte=lookup_domain_mpa((_d), ((gpfn)<<PAGE_SHIFT)); \ + pte = lookup_domain_mpa((_d), ((gpfn)<<PAGE_SHIFT), NULL); \ if((!(pte&GPFN_INV_MASK))&&((pte & GPFN_IO_MASK)==GPFN_MEM)) \ ret = 1; \ ret; \ diff -r 17f7a426b2cd -r e6c7667c97ed xen/include/asm-ia64/offsets.h --- a/xen/include/asm-ia64/offsets.h Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/include/asm-ia64/offsets.h Thu Jul 06 06:47:16 2006 -0400 @@ -2,7 +2,8 @@ #ifndef __IA64_OFFSETS_H #define __IA64_OFFSETS_H +#ifndef GENERATE_ASM_OFFSETS #include <asm/asm-offsets.h> -#include <asm/asm-xsi-offsets.h> +#endif #endif /* __IA64_OFFSETS_H */ diff -r 17f7a426b2cd -r e6c7667c97ed xen/include/asm-ia64/regionreg.h --- a/xen/include/asm-ia64/regionreg.h Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/include/asm-ia64/regionreg.h Thu Jul 06 06:47:16 2006 -0400 @@ -79,5 +79,6 @@ extern int set_metaphysical_rr0(void); extern int set_metaphysical_rr0(void); extern void load_region_regs(struct vcpu *v); +extern void load_region_reg7_and_pta(struct vcpu *v); #endif /* !_REGIONREG_H_ */ diff -r 17f7a426b2cd -r e6c7667c97ed xen/include/asm-ia64/vmx.h --- a/xen/include/asm-ia64/vmx.h Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/include/asm-ia64/vmx.h Thu Jul 06 06:47:16 2006 -0400 @@ -24,6 +24,7 @@ #define RR7_SWITCH_SHIFT 12 /* 4k enough */ #include <public/hvm/ioreq.h> +#define vmx_user_mode(regs) (((struct ia64_psr *)&(regs)->cr_ipsr)->vm == 1) #define VCPU_LID(v) (((u64)(v)->vcpu_id)<<24) @@ -36,7 +37,6 @@ extern void vmx_setup_platform(struct do extern void vmx_setup_platform(struct domain *d, struct vcpu_guest_context *c); extern void vmx_wait_io(void); extern void vmx_io_assist(struct vcpu *v); -extern void vmx_load_all_rr(struct vcpu *vcpu); extern void panic_domain(struct pt_regs *regs, const char *fmt, ...); extern int ia64_hypercall (struct pt_regs *regs); extern void vmx_save_state(struct vcpu *v); @@ -56,6 +56,7 @@ extern void set_illegal_op_isr (struct v extern void set_illegal_op_isr (struct vcpu *vcpu); extern void illegal_op (struct vcpu *vcpu); extern void vmx_relinquish_vcpu_resources(struct vcpu *v); +extern void vmx_die_if_kernel(char *str, struct pt_regs *regs, long err); static inline vcpu_iodata_t *get_vio(struct domain *d, unsigned long cpu) { diff -r 17f7a426b2cd -r e6c7667c97ed xen/include/asm-ia64/vmx_phy_mode.h --- a/xen/include/asm-ia64/vmx_phy_mode.h Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/include/asm-ia64/vmx_phy_mode.h Thu Jul 06 06:47:16 2006 -0400 @@ -96,6 +96,7 @@ extern void recover_if_physical_mode(VCP extern void recover_if_physical_mode(VCPU *vcpu); extern void vmx_init_all_rr(VCPU *vcpu); extern void vmx_load_all_rr(VCPU *vcpu); +extern void vmx_load_rr7_and_pta(VCPU *vcpu); extern void physical_tlb_miss(VCPU *vcpu, u64 vadr); /* * No sanity check here, since all psr changes have been diff -r 17f7a426b2cd -r e6c7667c97ed xen/include/asm-ia64/xenkregs.h --- a/xen/include/asm-ia64/xenkregs.h Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/include/asm-ia64/xenkregs.h Thu Jul 06 06:47:16 2006 -0400 @@ -6,7 +6,7 @@ */ #define IA64_TR_SHARED_INFO 3 /* dtr3: page shared with domain */ #define IA64_TR_VHPT 4 /* dtr4: vhpt */ -#define IA64_TR_ARCH_INFO 5 +#define IA64_TR_MAPPED_REGS 5 /* dtr5: vcpu mapped regs */ #define IA64_TR_PERVP_VHPT 6 #define IA64_DTR_GUEST_KERNEL 7 #define IA64_ITR_GUEST_KERNEL 2 diff -r 17f7a426b2cd -r e6c7667c97ed xen/include/asm-ia64/xensystem.h --- a/xen/include/asm-ia64/xensystem.h Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/include/asm-ia64/xensystem.h Thu Jul 06 06:47:16 2006 -0400 @@ -16,26 +16,20 @@ /* Define HV space hierarchy. VMM memory space is protected by CPL for paravirtualized domains and by VA for VTi domains. VTi imposes VA bit 60 != VA bit 59 for VMM. */ -#define XEN_VIRT_SPACE_LOW 0xe800000000000000 -#define XEN_VIRT_SPACE_HIGH 0xf800000000000000 -#define __IA64_UNCACHED_OFFSET 0xe800000000000000UL - -#define XEN_START_ADDR 0xf000000000000000 -#define HYPERVISOR_VIRT_START 0xf000000000000000 +#define HYPERVISOR_VIRT_START 0xe800000000000000 #define KERNEL_START 0xf000000004000000 -#define SHAREDINFO_ADDR 0xf100000000000000 -#define XSI_OFS PAGE_SIZE -#define SHARED_ARCHINFO_ADDR (SHAREDINFO_ADDR + XSI_OFS) -#define PERCPU_ADDR (SHAREDINFO_ADDR - PERCPU_PAGE_SIZE) +#define DEFAULT_SHAREDINFO_ADDR 0xf100000000000000 +#define PERCPU_ADDR (DEFAULT_SHAREDINFO_ADDR - PERCPU_PAGE_SIZE) #define VHPT_ADDR 0xf200000000000000 #ifdef CONFIG_VIRTUAL_FRAME_TABLE #define VIRT_FRAME_TABLE_ADDR 0xf300000000000000 #define VIRT_FRAME_TABLE_END 0xf400000000000000 #endif -#define XEN_END_ADDR 0xf400000000000000 +#define HYPERVISOR_VIRT_END 0xf800000000000000 -#define PAGE_OFFSET __IA64_UL_CONST(0xf000000000000000) +#define PAGE_OFFSET __IA64_UL_CONST(0xf000000000000000) +#define __IA64_UNCACHED_OFFSET 0xe800000000000000UL #define IS_VMM_ADDRESS(addr) ((((addr) >> 60) ^ ((addr) >> 59)) & 1) diff -r 17f7a426b2cd -r e6c7667c97ed xen/include/asm-x86/hvm/vmx/vmcs.h --- a/xen/include/asm-x86/hvm/vmx/vmcs.h Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h Thu Jul 06 06:47:16 2006 -0400 @@ -27,9 +27,7 @@ extern int start_vmx(void); extern int start_vmx(void); extern void stop_vmx(void); extern void vmcs_dump_vcpu(void); -void vmx_final_setup_guest(struct vcpu *v); - -void vmx_enter_scheduler(void); +extern void vmx_init_vmcs_config(void); enum { VMX_CPU_STATE_PAE_ENABLED=0, @@ -45,8 +43,6 @@ struct vmcs_struct { u32 vmcs_revision_id; unsigned char data [0]; /* vmcs size is read from MSR */ }; - -extern int vmcs_size; enum { VMX_INDEX_MSR_LSTAR = 0, @@ -63,6 +59,10 @@ struct vmx_msr_state { unsigned long msr_items[VMX_MSR_COUNT]; unsigned long shadow_gs; }; + +/* io bitmap is 4KBytes in size */ +#define IO_BITMAP_SIZE 0x1000 +#define IO_BITMAP_ORDER (get_order_from_bytes(IO_BITMAP_SIZE)) struct arch_vmx_struct { /* Virtual address of VMCS. */ @@ -101,7 +101,10 @@ struct arch_vmx_struct { void vmx_do_resume(struct vcpu *); -struct vmcs_struct *vmx_alloc_vmcs(void); +struct vmcs_struct *vmx_alloc_host_vmcs(void); +void vmx_free_host_vmcs(struct vmcs_struct *vmcs); + +int vmx_create_vmcs(struct vcpu *v); void vmx_destroy_vmcs(struct vcpu *v); void vmx_vmcs_enter(struct vcpu *v); void vmx_vmcs_exit(struct vcpu *v); diff -r 17f7a426b2cd -r e6c7667c97ed xen/include/asm-x86/hvm/vmx/vmx.h --- a/xen/include/asm-x86/hvm/vmx/vmx.h Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h Thu Jul 06 06:47:16 2006 -0400 @@ -133,6 +133,11 @@ extern unsigned int cpu_rev; #define EXIT_REASON_MSR_WRITE 32 #define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_INVALID_GUEST_STATE 33 +#define EXIT_REASON_MSR_LOADING 34 +#define EXIT_REASON_MACHINE_CHECK 41 + + /* * Interruption-information format */ diff -r 17f7a426b2cd -r e6c7667c97ed xen/include/public/arch-ia64.h --- a/xen/include/public/arch-ia64.h Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/include/public/arch-ia64.h Thu Jul 06 06:47:16 2006 -0400 @@ -285,17 +285,18 @@ struct mapped_regs { * shared info area now. interrupt_mask_addr is the address * of evtchn_upcall_mask for current vcpu */ - unsigned long interrupt_mask_addr; + unsigned char *interrupt_mask_addr; int pending_interruption; int incomplete_regframe; // see SDM vol2 6.8 - unsigned long reserved5_1[4]; + unsigned char vpsr_pp; + unsigned char reserved5_2[7]; + unsigned long reserved5_1[3]; int metaphysical_mode; // 1 = use metaphys mapping, 0 = use virtual int banknum; // 0 or 1, which virtual register bank is active unsigned long rrs[8]; // region registers unsigned long krs[8]; // kernel registers unsigned long pkrs[8]; // protection key registers unsigned long tmp[8]; // temp registers (e.g. for hyperprivops) - // FIXME: tmp[8] temp'ly being used for virtual psr.pp }; }; unsigned long reserved6[3456]; @@ -338,7 +339,7 @@ struct vcpu_guest_context { unsigned long sys_pgnr; /* System pages out of domain memory */ unsigned long vm_assist; /* VMASST_TYPE_* bitmap, now none on IPF */ - struct cpu_user_regs regs; + struct cpu_user_regs user_regs; struct mapped_regs *privregs; struct arch_shared_info shared; struct arch_initrd_info initrd; @@ -381,6 +382,18 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_guest_conte #endif /* !__ASSEMBLY__ */ +/* Address of shared_info in domain virtual space. + This is the default address, for compatibility only. */ +#define XSI_BASE 0xf100000000000000 + +/* Size of the shared_info area (this is not related to page size). */ +#define XSI_LOG_SIZE 14 +#define XSI_SIZE (1 << XSI_LOG_SIZE) +/* Log size of mapped_regs area (64 KB - only 4KB is used). */ +#define XMAPPEDREGS_LOG_SIZE 16 +/* Offset of XASI (Xen arch shared info) wrt XSI_BASE. */ +#define XMAPPEDREGS_OFS XSI_SIZE + /* Hyperprivops. */ #define HYPERPRIVOP_RFI 0x1 #define HYPERPRIVOP_RSM_DT 0x2 diff -r 17f7a426b2cd -r e6c7667c97ed xen/include/public/io/netif.h --- a/xen/include/public/io/netif.h Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/include/public/io/netif.h Thu Jul 06 06:47:16 2006 -0400 @@ -65,7 +65,7 @@ typedef struct netif_tx_request netif_tx #define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE) /* GSO types - only TCPv4 currently supported. */ -#define XEN_NETIF_GSO_TCPV4 (1) +#define XEN_NETIF_GSO_TYPE_TCPV4 (1) /* * This structure needs to fit within both netif_tx_request and @@ -87,7 +87,16 @@ struct netif_extra_info { * GSO type. This determines the protocol of the packet and any * extra features required to segment the packet properly. */ - uint16_t type; /* XEN_NETIF_GSO_* */ + uint8_t type; /* XEN_NETIF_GSO_TYPE_* */ + + /* Future expansion. */ + uint8_t pad; + + /* + * GSO features. This specifies any extra GSO features required + * to process this packet, such as ECN support for TCPv4. + */ + uint16_t features; /* XEN_NETIF_GSO_FEAT_* */ } gso; uint16_t pad[3]; diff -r 17f7a426b2cd -r e6c7667c97ed xen/include/xen/sched.h --- a/xen/include/xen/sched.h Thu Jul 06 06:44:19 2006 -0400 +++ b/xen/include/xen/sched.h Thu Jul 06 06:47:16 2006 -0400 @@ -78,9 +78,10 @@ struct vcpu unsigned long vcpu_flags; + spinlock_t pause_lock; + unsigned int pause_count; + u16 virq_to_evtchn[NR_VIRQS]; - - atomic_t pausecnt; /* Bitmask of CPUs on which this VCPU may run. */ cpumask_t cpu_affinity; @@ -141,6 +142,10 @@ struct domain struct rangeset *irq_caps; unsigned long domain_flags; + + spinlock_t pause_lock; + unsigned int pause_count; + unsigned long vm_assist; atomic_t refcnt; @@ -220,8 +225,7 @@ static inline void get_knownalive_domain ASSERT(!(atomic_read(&d->refcnt) & DOMAIN_DESTROYED)); } -extern struct domain *domain_create( - domid_t domid, unsigned int cpu); +extern struct domain *domain_create(domid_t domid); extern int construct_dom0( struct domain *d, unsigned long image_start, unsigned long image_len, @@ -368,6 +372,9 @@ extern struct domain *domain_list; /* VCPU is polling a set of event channels (SCHEDOP_poll). */ #define _VCPUF_polling 10 #define VCPUF_polling (1UL<<_VCPUF_polling) + /* VCPU is paused by the hypervisor? */ +#define _VCPUF_paused 11 +#define VCPUF_paused (1UL<<_VCPUF_paused) /* * Per-domain flags (domain_flags). @@ -390,12 +397,16 @@ extern struct domain *domain_list; /* Are any VCPUs polling event channels (SCHEDOP_poll)? */ #define _DOMF_polling 5 #define DOMF_polling (1UL<<_DOMF_polling) + /* Domain is paused by the hypervisor? */ +#define _DOMF_paused 6 +#define DOMF_paused (1UL<<_DOMF_paused) static inline int vcpu_runnable(struct vcpu *v) { - return ( (atomic_read(&v->pausecnt) == 0) && - !(v->vcpu_flags & (VCPUF_blocked|VCPUF_down)) && - !(v->domain->domain_flags & (DOMF_shutdown|DOMF_ctrl_pause)) ); + return ( !(v->vcpu_flags & + (VCPUF_blocked|VCPUF_down|VCPUF_paused)) && + !(v->domain->domain_flags & + (DOMF_shutdown|DOMF_ctrl_pause|DOMF_paused)) ); } void vcpu_pause(struct vcpu *v); diff -r 17f7a426b2cd -r e6c7667c97ed extras/mini-os/gnttab.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extras/mini-os/gnttab.c Thu Jul 06 06:47:16 2006 -0400 @@ -0,0 +1,158 @@ +/* + **************************************************************************** + * (C) 2006 - Cambridge University + **************************************************************************** + * + * File: gnttab.c + * Author: Steven Smith (sos22@xxxxxxxxx) + * Changes: Grzegorz Milos (gm281@xxxxxxxxx) + * + * Date: July 2006 + * + * Environment: Xen Minimal OS + * Description: Simple grant tables implementation. About as stupid as it's + * possible to be and still work. + * + **************************************************************************** + */ +#include <os.h> +#include <mm.h> +#include <gnttab.h> + +#define NR_RESERVED_ENTRIES 8 + +#define NR_GRANT_FRAMES 4 +#define NR_GRANT_ENTRIES (NR_GRANT_FRAMES * PAGE_SIZE / sizeof(grant_entry_t)) +#define GNTTAB_LIST_END (NR_GRANT_ENTRIES + 1) + +static grant_entry_t *gnttab_table; +static grant_ref_t gnttab_list[NR_GRANT_ENTRIES]; +static grant_ref_t gnttab_free_head; + +static grant_ref_t +get_free_entries(int count) +{ + grant_ref_t ref; + grant_ref_t head; + + ref = head = gnttab_free_head; + while (count-- > 1) + head = gnttab_list[head]; + gnttab_free_head = gnttab_list[head]; + gnttab_list[head] = GNTTAB_LIST_END; + return ref; +} + +static void +put_free_entry(grant_ref_t gref) +{ + gnttab_list[gref] = gnttab_free_head; + gnttab_free_head = gref; +} + +grant_ref_t +gnttab_grant_access(domid_t domid, unsigned long frame, int readonly) +{ + grant_ref_t ref; + + ref = get_free_entries(1); + gnttab_table[ref].frame = frame; + gnttab_table[ref].domid = domid; + wmb(); + readonly *= GTF_readonly; + gnttab_table[ref].flags = GTF_permit_access | readonly; + + return ref; +} + +grant_ref_t +gnttab_grant_transfer(domid_t domid, unsigned long pfn) +{ + grant_ref_t ref; + + ref = get_free_entries(1); + gnttab_table[ref].frame = pfn; + gnttab_table[ref].domid = domid; + wmb(); + gnttab_table[ref].flags = GTF_accept_transfer; + + return ref; +} + +int +gnttab_end_access(grant_ref_t ref) +{ + u16 flags, nflags; + + nflags = gnttab_table[ref].flags; + do { + if ((flags = nflags) & (GTF_reading|GTF_writing)) { + printk("WARNING: g.e. still in use!\n"); + return 0; + } + } while ((nflags = synch_cmpxchg(&gnttab_table[ref].flags, flags, 0)) != + flags); + + put_free_entry(ref); + return 1; +} + +unsigned long +gnttab_end_transfer(grant_ref_t ref) +{ + unsigned long frame; + u16 flags; + + while (!((flags = gnttab_table[ref].flags) & GTF_transfer_committed)) { + if (synch_cmpxchg(&gnttab_table[ref].flags, flags, 0) == flags) { + printk("Release unused transfer grant.\n"); + put_free_entry(ref); + return 0; + } + } + + /* If a transfer is in progress then wait until it is completed. */ + while (!(flags & GTF_transfer_completed)) { + flags = gnttab_table[ref].flags; + } + + /* Read the frame number /after/ reading completion status. */ + rmb(); + frame = gnttab_table[ref].frame; + + put_free_entry(ref); + + return frame; +} + +grant_ref_t +gnttab_alloc_and_grant(void **map) +{ + unsigned long mfn; + grant_ref_t gref; + + *map = (void *)alloc_page(); + mfn = virt_to_mfn(*map); + gref = gnttab_grant_access(0, mfn, 0); + return gref; +} + +void +init_gnttab(void) +{ + struct gnttab_setup_table setup; + unsigned long frames[NR_GRANT_FRAMES]; + int i; + + for (i = NR_RESERVED_ENTRIES; i < NR_GRANT_ENTRIES; i++) + gnttab_list[i] = i + 1; + gnttab_free_head = NR_RESERVED_ENTRIES; + + setup.dom = DOMID_SELF; + setup.nr_frames = NR_GRANT_FRAMES; + set_xen_guest_handle(setup.frame_list, frames); + + HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); + gnttab_table = map_frames(frames, NR_GRANT_FRAMES); + printk("gnttab_table mapped at %p.\n", gnttab_table); +} diff -r 17f7a426b2cd -r e6c7667c97ed extras/mini-os/include/gnttab.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extras/mini-os/include/gnttab.h Thu Jul 06 06:47:16 2006 -0400 @@ -0,0 +1,14 @@ +#ifndef __GNTTAB_H__ +#define __GNTTAB_H__ + +#include <xen/grant_table.h> + +void init_gnttab(void); +grant_ref_t gnttab_alloc_and_grant(void **map); +grant_ref_t gnttab_grant_access(domid_t domid, unsigned long frame, + int readonly); +grant_ref_t gnttab_grant_transfer(domid_t domid, unsigned long pfn); +unsigned long gnttab_end_transfer(grant_ref_t gref); +int gnttab_end_access(grant_ref_t ref); + +#endif /* !__GNTTAB_H__ */ diff -r 17f7a426b2cd -r e6c7667c97ed linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/asm-offsets.c Thu Jul 06 06:47:16 2006 -0400 @@ -0,0 +1,289 @@ +/* + * Generate definitions needed by assembly language modules. + * This code generates raw asm output which is post-processed + * to extract and format the required data. + */ + +#define ASM_OFFSETS_C 1 +#include <linux/config.h> + +#include <linux/sched.h> + +#include <asm-ia64/processor.h> +#include <asm-ia64/ptrace.h> +#include <asm-ia64/siginfo.h> +#include <asm-ia64/sigcontext.h> +#include <asm-ia64/mca.h> + +#include "../kernel/sigframe.h" + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define BLANK() asm volatile("\n->" : : ) + +void foo(void) +{ + DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct)); + DEFINE(IA64_THREAD_INFO_SIZE, sizeof (struct thread_info)); + DEFINE(IA64_PT_REGS_SIZE, sizeof (struct pt_regs)); + DEFINE(IA64_SWITCH_STACK_SIZE, sizeof (struct switch_stack)); + DEFINE(IA64_SIGINFO_SIZE, sizeof (struct siginfo)); + DEFINE(IA64_CPU_SIZE, sizeof (struct cpuinfo_ia64)); + DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe)); + DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info)); + + BLANK(); + + DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); + DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); + + BLANK(); + + DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked)); + DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid)); + DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader)); + DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending)); + DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid)); + DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent)); + DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand)); + DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal)); + DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid)); + DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp)); + DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack)); + + BLANK(); + + DEFINE(IA64_SIGHAND_SIGLOCK_OFFSET,offsetof (struct sighand_struct, siglock)); + + BLANK(); + + DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct, + group_stop_count)); + DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending)); + + BLANK(); + + DEFINE(IA64_PT_REGS_B6_OFFSET, offsetof (struct pt_regs, b6)); + DEFINE(IA64_PT_REGS_B7_OFFSET, offsetof (struct pt_regs, b7)); + DEFINE(IA64_PT_REGS_AR_CSD_OFFSET, offsetof (struct pt_regs, ar_csd)); + DEFINE(IA64_PT_REGS_AR_SSD_OFFSET, offsetof (struct pt_regs, ar_ssd)); + DEFINE(IA64_PT_REGS_R8_OFFSET, offsetof (struct pt_regs, r8)); + DEFINE(IA64_PT_REGS_R9_OFFSET, offsetof (struct pt_regs, r9)); + DEFINE(IA64_PT_REGS_R10_OFFSET, offsetof (struct pt_regs, r10)); + DEFINE(IA64_PT_REGS_R11_OFFSET, offsetof (struct pt_regs, r11)); + DEFINE(IA64_PT_REGS_CR_IPSR_OFFSET, offsetof (struct pt_regs, cr_ipsr)); + DEFINE(IA64_PT_REGS_CR_IIP_OFFSET, offsetof (struct pt_regs, cr_iip)); + DEFINE(IA64_PT_REGS_CR_IFS_OFFSET, offsetof (struct pt_regs, cr_ifs)); + DEFINE(IA64_PT_REGS_AR_UNAT_OFFSET, offsetof (struct pt_regs, ar_unat)); + DEFINE(IA64_PT_REGS_AR_PFS_OFFSET, offsetof (struct pt_regs, ar_pfs)); + DEFINE(IA64_PT_REGS_AR_RSC_OFFSET, offsetof (struct pt_regs, ar_rsc)); + DEFINE(IA64_PT_REGS_AR_RNAT_OFFSET, offsetof (struct pt_regs, ar_rnat)); + + DEFINE(IA64_PT_REGS_AR_BSPSTORE_OFFSET, offsetof (struct pt_regs, ar_bspstore)); + DEFINE(IA64_PT_REGS_PR_OFFSET, offsetof (struct pt_regs, pr)); + DEFINE(IA64_PT_REGS_B0_OFFSET, offsetof (struct pt_regs, b0)); + DEFINE(IA64_PT_REGS_LOADRS_OFFSET, offsetof (struct pt_regs, loadrs)); + DEFINE(IA64_PT_REGS_R1_OFFSET, offsetof (struct pt_regs, r1)); + DEFINE(IA64_PT_REGS_R12_OFFSET, offsetof (struct pt_regs, r12)); + DEFINE(IA64_PT_REGS_R13_OFFSET, offsetof (struct pt_regs, r13)); + DEFINE(IA64_PT_REGS_AR_FPSR_OFFSET, offsetof (struct pt_regs, ar_fpsr)); + DEFINE(IA64_PT_REGS_R15_OFFSET, offsetof (struct pt_regs, r15)); + DEFINE(IA64_PT_REGS_R14_OFFSET, offsetof (struct pt_regs, r14)); + DEFINE(IA64_PT_REGS_R2_OFFSET, offsetof (struct pt_regs, r2)); + DEFINE(IA64_PT_REGS_R3_OFFSET, offsetof (struct pt_regs, r3)); + DEFINE(IA64_PT_REGS_R16_OFFSET, offsetof (struct pt_regs, r16)); + DEFINE(IA64_PT_REGS_R17_OFFSET, offsetof (struct pt_regs, r17)); + DEFINE(IA64_PT_REGS_R18_OFFSET, offsetof (struct pt_regs, r18)); + DEFINE(IA64_PT_REGS_R19_OFFSET, offsetof (struct pt_regs, r19)); + DEFINE(IA64_PT_REGS_R20_OFFSET, offsetof (struct pt_regs, r20)); + DEFINE(IA64_PT_REGS_R21_OFFSET, offsetof (struct pt_regs, r21)); + DEFINE(IA64_PT_REGS_R22_OFFSET, offsetof (struct pt_regs, r22)); + DEFINE(IA64_PT_REGS_R23_OFFSET, offsetof (struct pt_regs, r23)); + DEFINE(IA64_PT_REGS_R24_OFFSET, offsetof (struct pt_regs, r24)); + DEFINE(IA64_PT_REGS_R25_OFFSET, offsetof (struct pt_regs, r25)); + DEFINE(IA64_PT_REGS_R26_OFFSET, offsetof (struct pt_regs, r26)); + DEFINE(IA64_PT_REGS_R27_OFFSET, offsetof (struct pt_regs, r27)); + DEFINE(IA64_PT_REGS_R28_OFFSET, offsetof (struct pt_regs, r28)); + DEFINE(IA64_PT_REGS_R29_OFFSET, offsetof (struct pt_regs, r29)); + DEFINE(IA64_PT_REGS_R30_OFFSET, offsetof (struct pt_regs, r30)); + DEFINE(IA64_PT_REGS_R31_OFFSET, offsetof (struct pt_regs, r31)); + DEFINE(IA64_PT_REGS_AR_CCV_OFFSET, offsetof (struct pt_regs, ar_ccv)); + DEFINE(IA64_PT_REGS_F6_OFFSET, offsetof (struct pt_regs, f6)); + DEFINE(IA64_PT_REGS_F7_OFFSET, offsetof (struct pt_regs, f7)); + DEFINE(IA64_PT_REGS_F8_OFFSET, offsetof (struct pt_regs, f8)); + DEFINE(IA64_PT_REGS_F9_OFFSET, offsetof (struct pt_regs, f9)); + DEFINE(IA64_PT_REGS_F10_OFFSET, offsetof (struct pt_regs, f10)); + DEFINE(IA64_PT_REGS_F11_OFFSET, offsetof (struct pt_regs, f11)); + + BLANK(); + + DEFINE(IA64_SWITCH_STACK_CALLER_UNAT_OFFSET, offsetof (struct switch_stack, caller_unat)); + DEFINE(IA64_SWITCH_STACK_AR_FPSR_OFFSET, offsetof (struct switch_stack, ar_fpsr)); + DEFINE(IA64_SWITCH_STACK_F2_OFFSET, offsetof (struct switch_stack, f2)); + DEFINE(IA64_SWITCH_STACK_F3_OFFSET, offsetof (struct switch_stack, f3)); + DEFINE(IA64_SWITCH_STACK_F4_OFFSET, offsetof (struct switch_stack, f4)); + DEFINE(IA64_SWITCH_STACK_F5_OFFSET, offsetof (struct switch_stack, f5)); + DEFINE(IA64_SWITCH_STACK_F12_OFFSET, offsetof (struct switch_stack, f12)); + DEFINE(IA64_SWITCH_STACK_F13_OFFSET, offsetof (struct switch_stack, f13)); + DEFINE(IA64_SWITCH_STACK_F14_OFFSET, offsetof (struct switch_stack, f14)); + DEFINE(IA64_SWITCH_STACK_F15_OFFSET, offsetof (struct switch_stack, f15)); + DEFINE(IA64_SWITCH_STACK_F16_OFFSET, offsetof (struct switch_stack, f16)); + DEFINE(IA64_SWITCH_STACK_F17_OFFSET, offsetof (struct switch_stack, f17)); + DEFINE(IA64_SWITCH_STACK_F18_OFFSET, offsetof (struct switch_stack, f18)); + DEFINE(IA64_SWITCH_STACK_F19_OFFSET, offsetof (struct switch_stack, f19)); + DEFINE(IA64_SWITCH_STACK_F20_OFFSET, offsetof (struct switch_stack, f20)); + DEFINE(IA64_SWITCH_STACK_F21_OFFSET, offsetof (struct switch_stack, f21)); + DEFINE(IA64_SWITCH_STACK_F22_OFFSET, offsetof (struct switch_stack, f22)); + DEFINE(IA64_SWITCH_STACK_F23_OFFSET, offsetof (struct switch_stack, f23)); + DEFINE(IA64_SWITCH_STACK_F24_OFFSET, offsetof (struct switch_stack, f24)); + DEFINE(IA64_SWITCH_STACK_F25_OFFSET, offsetof (struct switch_stack, f25)); + DEFINE(IA64_SWITCH_STACK_F26_OFFSET, offsetof (struct switch_stack, f26)); + DEFINE(IA64_SWITCH_STACK_F27_OFFSET, offsetof (struct switch_stack, f27)); + DEFINE(IA64_SWITCH_STACK_F28_OFFSET, offsetof (struct switch_stack, f28)); + DEFINE(IA64_SWITCH_STACK_F29_OFFSET, offsetof (struct switch_stack, f29)); + DEFINE(IA64_SWITCH_STACK_F30_OFFSET, offsetof (struct switch_stack, f30)); + DEFINE(IA64_SWITCH_STACK_F31_OFFSET, offsetof (struct switch_stack, f31)); + DEFINE(IA64_SWITCH_STACK_R4_OFFSET, offsetof (struct switch_stack, r4)); + DEFINE(IA64_SWITCH_STACK_R5_OFFSET, offsetof (struct switch_stack, r5)); + DEFINE(IA64_SWITCH_STACK_R6_OFFSET, offsetof (struct switch_stack, r6)); + DEFINE(IA64_SWITCH_STACK_R7_OFFSET, offsetof (struct switch_stack, r7)); + DEFINE(IA64_SWITCH_STACK_B0_OFFSET, offsetof (struct switch_stack, b0)); + DEFINE(IA64_SWITCH_STACK_B1_OFFSET, offsetof (struct switch_stack, b1)); + DEFINE(IA64_SWITCH_STACK_B2_OFFSET, offsetof (struct switch_stack, b2)); + DEFINE(IA64_SWITCH_STACK_B3_OFFSET, offsetof (struct switch_stack, b3)); + DEFINE(IA64_SWITCH_STACK_B4_OFFSET, offsetof (struct switch_stack, b4)); + DEFINE(IA64_SWITCH_STACK_B5_OFFSET, offsetof (struct switch_stack, b5)); + DEFINE(IA64_SWITCH_STACK_AR_PFS_OFFSET, offsetof (struct switch_stack, ar_pfs)); + DEFINE(IA64_SWITCH_STACK_AR_LC_OFFSET, offsetof (struct switch_stack, ar_lc)); + DEFINE(IA64_SWITCH_STACK_AR_UNAT_OFFSET, offsetof (struct switch_stack, ar_unat)); + DEFINE(IA64_SWITCH_STACK_AR_RNAT_OFFSET, offsetof (struct switch_stack, ar_rnat)); + DEFINE(IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET, offsetof (struct switch_stack, ar_bspstore)); + DEFINE(IA64_SWITCH_STACK_PR_OFFSET, offsetof (struct switch_stack, pr)); + + BLANK(); + + DEFINE(IA64_SIGCONTEXT_IP_OFFSET, offsetof (struct sigcontext, sc_ip)); + DEFINE(IA64_SIGCONTEXT_AR_BSP_OFFSET, offsetof (struct sigcontext, sc_ar_bsp)); + DEFINE(IA64_SIGCONTEXT_AR_FPSR_OFFSET, offsetof (struct sigcontext, sc_ar_fpsr)); + DEFINE(IA64_SIGCONTEXT_AR_RNAT_OFFSET, offsetof (struct sigcontext, sc_ar_rnat)); + DEFINE(IA64_SIGCONTEXT_AR_UNAT_OFFSET, offsetof (struct sigcontext, sc_ar_unat)); + DEFINE(IA64_SIGCONTEXT_B0_OFFSET, offsetof (struct sigcontext, sc_br[0])); + DEFINE(IA64_SIGCONTEXT_CFM_OFFSET, offsetof (struct sigcontext, sc_cfm)); + DEFINE(IA64_SIGCONTEXT_FLAGS_OFFSET, offsetof (struct sigcontext, sc_flags)); + DEFINE(IA64_SIGCONTEXT_FR6_OFFSET, offsetof (struct sigcontext, sc_fr[6])); + DEFINE(IA64_SIGCONTEXT_PR_OFFSET, offsetof (struct sigcontext, sc_pr)); + DEFINE(IA64_SIGCONTEXT_R12_OFFSET, offsetof (struct sigcontext, sc_gr[12])); + DEFINE(IA64_SIGCONTEXT_RBS_BASE_OFFSET,offsetof (struct sigcontext, sc_rbs_base)); + DEFINE(IA64_SIGCONTEXT_LOADRS_OFFSET, offsetof (struct sigcontext, sc_loadrs)); + + BLANK(); + + DEFINE(IA64_SIGPENDING_SIGNAL_OFFSET, offsetof (struct sigpending, signal)); + + BLANK(); + + DEFINE(IA64_SIGFRAME_ARG0_OFFSET, offsetof (struct sigframe, arg0)); + DEFINE(IA64_SIGFRAME_ARG1_OFFSET, offsetof (struct sigframe, arg1)); + DEFINE(IA64_SIGFRAME_ARG2_OFFSET, offsetof (struct sigframe, arg2)); + DEFINE(IA64_SIGFRAME_HANDLER_OFFSET, offsetof (struct sigframe, handler)); + DEFINE(IA64_SIGFRAME_SIGCONTEXT_OFFSET, offsetof (struct sigframe, sc)); + BLANK(); + /* for assembly files which can't include sched.h: */ + DEFINE(IA64_CLONE_VFORK, CLONE_VFORK); + DEFINE(IA64_CLONE_VM, CLONE_VM); + + BLANK(); + DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET, + offsetof (struct cpuinfo_ia64, nsec_per_cyc)); + DEFINE(IA64_CPUINFO_PTCE_BASE_OFFSET, + offsetof (struct cpuinfo_ia64, ptce_base)); + DEFINE(IA64_CPUINFO_PTCE_COUNT_OFFSET, + offsetof (struct cpuinfo_ia64, ptce_count)); + DEFINE(IA64_CPUINFO_PTCE_STRIDE_OFFSET, + offsetof (struct cpuinfo_ia64, ptce_stride)); + BLANK(); + DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, + offsetof (struct timespec, tv_nsec)); + + DEFINE(CLONE_SETTLS_BIT, 19); +#if CLONE_SETTLS != (1<<19) +# error "CLONE_SETTLS_BIT incorrect, please fix" +#endif + + BLANK(); + DEFINE(IA64_MCA_CPU_MCA_STACK_OFFSET, + offsetof (struct ia64_mca_cpu, mca_stack)); + DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET, + offsetof (struct ia64_mca_cpu, init_stack)); + BLANK(); + DEFINE(IA64_SAL_OS_STATE_COMMON_OFFSET, + offsetof (struct ia64_sal_os_state, sal_ra)); + DEFINE(IA64_SAL_OS_STATE_OS_GP_OFFSET, + offsetof (struct ia64_sal_os_state, os_gp)); + DEFINE(IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET, + offsetof (struct ia64_sal_os_state, pal_min_state)); + DEFINE(IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET, + offsetof (struct ia64_sal_os_state, proc_state_param)); + DEFINE(IA64_SAL_OS_STATE_SIZE, + sizeof (struct ia64_sal_os_state)); + DEFINE(IA64_PMSA_GR_OFFSET, + offsetof (struct pal_min_state_area_s, pmsa_gr)); + DEFINE(IA64_PMSA_BANK1_GR_OFFSET, + offsetof (struct pal_min_state_area_s, pmsa_bank1_gr)); + DEFINE(IA64_PMSA_PR_OFFSET, + offsetof (struct pal_min_state_area_s, pmsa_pr)); + DEFINE(IA64_PMSA_BR0_OFFSET, + offsetof (struct pal_min_state_area_s, pmsa_br0)); + DEFINE(IA64_PMSA_RSC_OFFSET, + offsetof (struct pal_min_state_area_s, pmsa_rsc)); + DEFINE(IA64_PMSA_IIP_OFFSET, + offsetof (struct pal_min_state_area_s, pmsa_iip)); + DEFINE(IA64_PMSA_IPSR_OFFSET, + offsetof (struct pal_min_state_area_s, pmsa_ipsr)); + DEFINE(IA64_PMSA_IFS_OFFSET, + offsetof (struct pal_min_state_area_s, pmsa_ifs)); + DEFINE(IA64_PMSA_XIP_OFFSET, + offsetof (struct pal_min_state_area_s, pmsa_xip)); + BLANK(); + + /* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */ + DEFINE(IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET, offsetof (struct time_interpolator, addr)); + DEFINE(IA64_TIME_INTERPOLATOR_SOURCE_OFFSET, offsetof (struct time_interpolator, source)); + DEFINE(IA64_TIME_INTERPOLATOR_SHIFT_OFFSET, offsetof (struct time_interpolator, shift)); + DEFINE(IA64_TIME_INTERPOLATOR_NSEC_OFFSET, offsetof (struct time_interpolator, nsec_per_cyc)); + DEFINE(IA64_TIME_INTERPOLATOR_OFFSET_OFFSET, offsetof (struct time_interpolator, offset)); + DEFINE(IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET, offsetof (struct time_interpolator, last_cycle)); + DEFINE(IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET, offsetof (struct time_interpolator, last_counter)); + DEFINE(IA64_TIME_INTERPOLATOR_JITTER_OFFSET, offsetof (struct time_interpolator, jitter)); + DEFINE(IA64_TIME_INTERPOLATOR_MASK_OFFSET, offsetof (struct time_interpolator, mask)); + DEFINE(IA64_TIME_SOURCE_CPU, TIME_SOURCE_CPU); + DEFINE(IA64_TIME_SOURCE_MMIO64, TIME_SOURCE_MMIO64); + DEFINE(IA64_TIME_SOURCE_MMIO32, TIME_SOURCE_MMIO32); + DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec)); + +#ifdef CONFIG_XEN + BLANK(); + +#define DEFINE_MAPPED_REG_OFS(sym, field) \ + DEFINE(sym, (XMAPPEDREGS_OFS + offsetof(mapped_regs_t, field))) + + DEFINE_MAPPED_REG_OFS(XSI_PSR_I_ADDR_OFS, interrupt_mask_addr); + DEFINE_MAPPED_REG_OFS(XSI_IPSR_OFS, ipsr); + DEFINE_MAPPED_REG_OFS(XSI_IIP_OFS, iip); + DEFINE_MAPPED_REG_OFS(XSI_IFS_OFS, ifs); + DEFINE_MAPPED_REG_OFS(XSI_PRECOVER_IFS_OFS, precover_ifs); + DEFINE_MAPPED_REG_OFS(XSI_ISR_OFS, isr); + DEFINE_MAPPED_REG_OFS(XSI_IFA_OFS, ifa); + DEFINE_MAPPED_REG_OFS(XSI_IIPA_OFS, iipa); + DEFINE_MAPPED_REG_OFS(XSI_IIM_OFS, iim); + DEFINE_MAPPED_REG_OFS(XSI_IHA_OFS, iha); + DEFINE_MAPPED_REG_OFS(XSI_ITIR_OFS, itir); + DEFINE_MAPPED_REG_OFS(XSI_PSR_IC_OFS, interrupt_collection_enabled); + DEFINE_MAPPED_REG_OFS(XSI_PEND_OFS, pending_interruption); + DEFINE_MAPPED_REG_OFS(XSI_INCOMPL_REGFR_OFS, incomplete_regframe); + DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum); + DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]); + DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]); +#endif /* CONFIG_XEN */ +} diff -r 17f7a426b2cd -r e6c7667c97ed linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-post --- a/linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-post Thu Jul 06 06:44:19 2006 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -#!/bin/bash -echo 'NOTHING YET IN ' ${0} diff -r 17f7a426b2cd -r e6c7667c97ed linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre --- a/linux-2.6-xen-sparse/arch/ia64/xen-mkbuildtree-pre Thu Jul 06 06:44:19 2006 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -#!/bin/bash -# restructure directories to match future drivers/xen plan -# and move aside xen/x86 specific changes -# WARNING!: This directory movement really confuses hg which makes -# it difficult to do development in a directory which is being used -# for building (as all files in mv'd directories are thought by hg -# to have been deleted). I don't know how to avoid this right now, -# but if someone has a better way, I'm all ears - -#eventually asm-xsi-offsets needs to be part of hypervisor.h/hypercall.h -ln -sf ../../../../xen/include/asm-ia64/asm-xsi-offsets.h include/asm-ia64/xen/ - _______________________________________________ Xen-ppc-devel mailing list Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-ppc-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |