[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-changelog] [xen-unstable] merge with xen-unstable.hg
# HG changeset patch # User Alex Williamson <alex.williamson@xxxxxx> # Date 1204301998 25200 # Node ID 71a8366fb212b9199090bf9e87e389bdd65e5cbd # Parent 0b20ac6ec64aa50558bea7145552c341277f9f19 # Parent 9049b0b62e0891e9bfb188bef40f68c04b5ea653 merge with xen-unstable.hg --- extras/mini-os/fbfront.c | 12 extras/mini-os/include/fbfront.h | 3 extras/mini-os/kernel.c | 6 extras/mini-os/xenbus/xenbus.c | 4 stubdom/README | 91 + stubdom/stubdom-dm | 27 tools/blktap/drivers/block-qcow2.c | 161 -- tools/ioemu/block-qcow.c | 2 tools/ioemu/block-qcow2.c | 2 tools/ioemu/block-raw.c | 2 tools/ioemu/block-vmdk.c | 2 tools/ioemu/block.c | 17 tools/ioemu/block_int.h | 4 tools/ioemu/hw/e1000.c | 2 tools/ioemu/hw/xenfb.c | 216 +++ tools/ioemu/keymaps.c | 4 tools/ioemu/monitor.c | 2 tools/ioemu/vl.c | 8 tools/ioemu/vl.h | 7 tools/ioemu/xenstore.c | 31 tools/python/xen/xend/XendAPI.py | 3 tools/python/xen/xend/XendCheckpoint.py | 33 tools/python/xen/xend/XendDomain.py | 10 tools/python/xen/xend/XendDomainInfo.py | 36 tools/python/xen/xend/image.py | 29 tools/python/xen/xm/main.py | 9 tools/python/xen/xm/migrate.py | 10 tools/xenstat/libxenstat/src/xenstat_solaris.c | 44 tools/xentrace/xentrace.c | 8 unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c | 3 xen/arch/ia64/xen/machine_kexec.c | 49 xen/arch/powerpc/machine_kexec.c | 6 xen/arch/x86/machine_kexec.c | 10 xen/arch/x86/mm/shadow/multi.c | 202 ++- xen/arch/x86/mm/shadow/types.h | 1 xen/arch/x86/x86_32/Makefile | 1 xen/arch/x86/x86_32/machine_kexec.c | 33 xen/arch/x86/x86_64/Makefile | 1 xen/arch/x86/x86_64/machine_kexec.c | 32 xen/common/compat/kexec.c | 5 xen/common/kexec.c | 97 - xen/drivers/acpi/tables.c | 1 xen/drivers/passthrough/amd/Makefile | 1 xen/drivers/passthrough/amd/iommu_acpi.c | 874 +++++++++++++ xen/drivers/passthrough/amd/iommu_detect.c | 36 xen/drivers/passthrough/amd/iommu_init.c | 41 xen/drivers/passthrough/amd/iommu_map.c | 42 xen/drivers/passthrough/amd/pci_amd_iommu.c | 142 +- xen/include/asm-x86/amd-iommu.h | 36 xen/include/asm-x86/domain.h | 5 xen/include/asm-x86/hvm/svm/amd-iommu-acpi.h | 176 ++ xen/include/asm-x86/hvm/svm/amd-iommu-defs.h | 6 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h | 24 xen/include/asm-x86/perfc_defn.h | 5 xen/include/public/io/kbdif.h | 2 xen/include/public/kexec.h | 15 xen/include/xen/acpi.h | 1 xen/include/xen/kexec.h | 1 58 files changed, 2173 insertions(+), 460 deletions(-) diff -r 0b20ac6ec64a -r 71a8366fb212 extras/mini-os/fbfront.c --- a/extras/mini-os/fbfront.c Fri Feb 29 09:18:01 2008 -0700 +++ b/extras/mini-os/fbfront.c Fri Feb 29 09:19:58 2008 -0700 @@ -31,13 +31,6 @@ struct kbdfront_dev { char *nodename; char *backend; - char *data; - int width; - int height; - int depth; - int line_length; - int mem_length; - #ifdef HAVE_LIBC int fd; #endif @@ -316,7 +309,10 @@ struct fbfront_dev *init_fbfront(char *n for (i = 0; mapped < mem_length && i < max_pd; i++) { unsigned long *pd = (unsigned long *) alloc_page(); for (j = 0; mapped < mem_length && j < PAGE_SIZE / sizeof(unsigned long); j++) { - pd[j] = virt_to_mfn((unsigned long) data + mapped); + /* Trigger CoW */ + * ((char *)data + mapped) = 0; + barrier(); + pd[j] = virtual_to_mfn((unsigned long) data + mapped); mapped += PAGE_SIZE; } for ( ; j < PAGE_SIZE / sizeof(unsigned long); j++) diff -r 0b20ac6ec64a -r 71a8366fb212 extras/mini-os/include/fbfront.h --- a/extras/mini-os/include/fbfront.h Fri Feb 29 09:18:01 2008 -0700 +++ b/extras/mini-os/include/fbfront.h Fri Feb 29 09:19:58 2008 -0700 @@ -14,6 +14,9 @@ #ifndef KEY_Q #define KEY_Q 16 #endif +#ifndef KEY_MAX +#define KEY_MAX 0x1ff +#endif struct kbdfront_dev; diff -r 0b20ac6ec64a -r 71a8366fb212 extras/mini-os/kernel.c --- a/extras/mini-os/kernel.c Fri Feb 29 09:18:01 2008 -0700 +++ b/extras/mini-os/kernel.c Fri Feb 29 09:19:58 2008 -0700 @@ -360,13 +360,13 @@ static void kbdfront_thread(void *p) refresh_cursor(x, y); break; case XENKBD_TYPE_POS: - printk("pos x:%d y:%d z:%d\n", + printk("pos x:%d y:%d dz:%d\n", event.pos.abs_x, event.pos.abs_y, - event.pos.abs_z); + event.pos.rel_z); x = event.pos.abs_x; y = event.pos.abs_y; - z = event.pos.abs_z; + z = event.pos.rel_z; clip_cursor(&x, &y); refresh_cursor(x, y); break; diff -r 0b20ac6ec64a -r 71a8366fb212 extras/mini-os/xenbus/xenbus.c --- a/extras/mini-os/xenbus/xenbus.c Fri Feb 29 09:18:01 2008 -0700 +++ b/extras/mini-os/xenbus/xenbus.c Fri Feb 29 09:19:58 2008 -0700 @@ -637,9 +637,7 @@ char* xenbus_printf(xenbus_transaction_t va_start(args, fmt); vsprintf(val, fmt, args); va_end(args); - xenbus_write(xbt,fullpath,val); - - return NULL; + return xenbus_write(xbt,fullpath,val); } static void do_ls_test(const char *pre) diff -r 0b20ac6ec64a -r 71a8366fb212 stubdom/README --- a/stubdom/README Fri Feb 29 09:18:01 2008 -0700 +++ b/stubdom/README Fri Feb 29 09:19:58 2008 -0700 @@ -6,6 +6,73 @@ Then make install to install the result. Also, run make and make install in $XEN_ROOT/tools/fs-back +General Configuration +===================== + +In your HVM config "hvmconfig", + +- use /usr/lib/xen/bin/stubdom-dm as dm script + +device_model = '/usr/lib/xen/bin/stubdom-dm' + +- comment the disk statement: + +#disk = [ 'file:/tmp/install.iso,hdc:cdrom,r', 'phy:/dev/sda6,hda,w', 'file:/tmp/test,hdb,r' ] + + +Create /etc/xen/stubdom-hvmconfig (where "hvmconfig" is the name of your HVM +guest) with + +kernel = "/usr/lib/xen/boot/stubdom.gz" +vif = [ '', 'ip=10.0.1.1,mac=aa:00:00:12:23:34'] +disk = [ 'file:/tmp/install.iso,hdc:cdrom,r', 'phy:/dev/sda6,hda,w', 'file:/tmp/test,hdb,r' ] + +where +- the first vif ('') is reserved for VNC (see below) +- 'ip=10.0.1.1,mac= etc...' is the same net configuration as in the hvmconfig +script, +- and disk = is the same block configuration as in the hvmconfig script. + +Display Configuration +===================== + +There are three posibilities + +* Using SDL + +In hvmconfig, disable vnc: + +vnc = 0 + +In stubdom-hvmconfig, set a vfb: + +vfb = [ 'type=sdl' ] + +* Using a VNC server in the stub domain + +In hvmconfig, set vnclisten to "172.30.206.1" for instance. Do not use a host +name as Mini-OS does not have a name resolver. Do not use 127.0.0.1 since then +you will not be able to connect to it. + +vnc = 1 +vnclisten = "172.30.206.1" + +In stubdom-hvmconfig, fill the reserved vif with the same IP, for instance: + +vif = [ 'ip=172.30.206.1', 'ip=10.0.1.1,mac=aa:00:00:12:23:34'] + +* Using a VNC server in dom0 + +In hvmconfig, disable vnc: + +vnc = 0 + +In stubdom-hvmconfig, set a vfb: + +vfb = [ 'type=vnc' ] + +and any other parameter as wished. + To run ====== @@ -13,32 +80,4 @@ ln -s /usr/share/qemu/keymaps /exports/u ln -s /usr/share/qemu/keymaps /exports/usr/share/qemu /usr/sbin/fs-backend & - -In your HVM config "hvmconfig", - -- use VNC, set vnclisten to "172.30.206.1" for instance. Do not use a host name -as Mini-OS does not have a name resolver. Do not use 127.0.0.1 since then you -will not be able to connect to it. - -vnc = 1 -vnclisten = "172.30.206.1" - -- use /usr/lib/xen/bin/stubdom-dm as dm script - -device_model = '/usr/lib/xen/bin/stubdom-dm' - -- comment the disk statement: -#disk = [ 'file:/tmp/install.iso,hdc:cdrom,r', 'phy:/dev/sda6,hda,w', 'file:/tmp/test,hdb,r' ] - -Create /etc/xen/stubdom-hvmconfig (where "hvmconfig" is your HVM guest domain -name) with - -kernel = "/usr/lib/xen/boot/stubdom.gz" -vif = [ 'ip=172.30.206.1', 'ip=10.0.1.1,mac=aa:00:00:12:23:34'] -disk = [ 'file:/tmp/install.iso,hdc:cdrom,r', 'phy:/dev/sda6,hda,w', 'file:/tmp/test,hdb,r' ] - -where -- 172.30.206.1 is the IP for vnc, -- 'ip=10.0.1.1,mac= etc...' is the same net configuration as in the hvmconfig -script, -- and disk = is the same block configuration as in the hvmconfig script. +xm create hvmconfig diff -r 0b20ac6ec64a -r 71a8366fb212 stubdom/stubdom-dm --- a/stubdom/stubdom-dm Fri Feb 29 09:18:01 2008 -0700 +++ b/stubdom/stubdom-dm Fri Feb 29 09:19:58 2008 -0700 @@ -62,32 +62,23 @@ done creation="xm create -c stubdom-$domname target=$domid memory=32" -(while true ; do sleep 60 ; done) | $creation > /var/log/xen/qemu-dm-$domid.log & +(while true ; do sleep 60 ; done) | $creation & #xterm -geometry +0+0 -e /bin/sh -c "$creation ; echo ; echo press ENTER to shut down ; read" & consolepid=$! - -# Wait for vnc server to appear -while ! vnc_port=`xenstore-read /local/domain/$domid/console/vnc-port` -do - # Check that the stubdom job is still alive - kill -0 $consolepid || term - sleep 1 -done - -################ -# DEBUG: tcpdump -#while ! stubdomid=`xm domid stubdom-$domname` -#do -# sleep 1 -#done -#xterm -geometry 160x25+0+$height -e /bin/sh -c "tcpdump -n -i vif$stubdomid.0" & -#xterm -geometry 160x25+0+$((2 * $height)) -e /bin/sh -c "tcpdump -n -i vif$stubdomid.1" & ########### # vncviewer if [ "$vncviewer" = 1 ] then + # Wait for vnc server to appear + while ! vnc_port=`xenstore-read /local/domain/$domid/console/vnc-port` + do + # Check that the stubdom job is still alive + kill -0 $consolepid || term + sleep 1 + done + vncviewer $ip:$vnc_port & vncpid=$! fi diff -r 0b20ac6ec64a -r 71a8366fb212 tools/blktap/drivers/block-qcow2.c --- a/tools/blktap/drivers/block-qcow2.c Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/blktap/drivers/block-qcow2.c Fri Feb 29 09:19:58 2008 -0700 @@ -1241,167 +1241,6 @@ static void create_refcount_update(QCowC refcount++; *p = cpu_to_be16(refcount); } -} - -static int qcow2_create(const char *filename, int64_t total_size, - const char *backing_file, int flags) -{ - int fd, header_size, backing_filename_len, l1_size, i, shift, l2_bits; - QCowHeader header; - uint64_t tmp, offset; - QCowCreateState s1, *s = &s1; - - memset(s, 0, sizeof(*s)); - - fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); - if (fd < 0) - return -1; - memset(&header, 0, sizeof(header)); - header.magic = cpu_to_be32(QCOW_MAGIC); - header.version = cpu_to_be32(QCOW_VERSION); - header.size = cpu_to_be64(total_size * 512); - header_size = sizeof(header); - backing_filename_len = 0; - if (backing_file) { - header.backing_file_offset = cpu_to_be64(header_size); - backing_filename_len = strlen(backing_file); - header.backing_file_size = cpu_to_be32(backing_filename_len); - header_size += backing_filename_len; - } - s->cluster_bits = 12; /* 4 KB clusters */ - s->cluster_size = 1 << s->cluster_bits; - header.cluster_bits = cpu_to_be32(s->cluster_bits); - header_size = (header_size + 7) & ~7; - if (flags & BLOCK_FLAG_ENCRYPT) { - header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES); - } else { - header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE); - } - l2_bits = s->cluster_bits - 3; - shift = s->cluster_bits + l2_bits; - l1_size = (((total_size * 512) + (1LL << shift) - 1) >> shift); - offset = align_offset(header_size, s->cluster_size); - s->l1_table_offset = offset; - header.l1_table_offset = cpu_to_be64(s->l1_table_offset); - header.l1_size = cpu_to_be32(l1_size); - offset += align_offset(l1_size * sizeof(uint64_t), s->cluster_size); - - s->refcount_table = qemu_mallocz(s->cluster_size); - if (!s->refcount_table) - goto fail; - s->refcount_block = qemu_mallocz(s->cluster_size); - if (!s->refcount_block) - goto fail; - - s->refcount_table_offset = offset; - header.refcount_table_offset = cpu_to_be64(offset); - header.refcount_table_clusters = cpu_to_be32(1); - offset += s->cluster_size; - - s->refcount_table[0] = cpu_to_be64(offset); - s->refcount_block_offset = offset; - offset += s->cluster_size; - - /* update refcounts */ - create_refcount_update(s, 0, header_size); - create_refcount_update(s, s->l1_table_offset, l1_size * sizeof(uint64_t)); - create_refcount_update(s, s->refcount_table_offset, s->cluster_size); - create_refcount_update(s, s->refcount_block_offset, s->cluster_size); - - /* write all the data */ - write(fd, &header, sizeof(header)); - if (backing_file) { - write(fd, backing_file, backing_filename_len); - } - lseek(fd, s->l1_table_offset, SEEK_SET); - tmp = 0; - for(i = 0;i < l1_size; i++) { - write(fd, &tmp, sizeof(tmp)); - } - lseek(fd, s->refcount_table_offset, SEEK_SET); - write(fd, s->refcount_table, s->cluster_size); - - lseek(fd, s->refcount_block_offset, SEEK_SET); - write(fd, s->refcount_block, s->cluster_size); - - qemu_free(s->refcount_table); - qemu_free(s->refcount_block); - close(fd); - return 0; -fail: - qemu_free(s->refcount_table); - qemu_free(s->refcount_block); - close(fd); - return -ENOMEM; -} - -/* XXX: put compressed sectors first, then all the cluster aligned - tables to avoid losing bytes in alignment */ -static int qcow_write_compressed(struct disk_driver *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) -{ - BDRVQcowState *s = bs->private; - z_stream strm; - int ret, out_len; - uint8_t *out_buf; - uint64_t cluster_offset; - - if (nb_sectors == 0) { - /* align end of file to a sector boundary to ease reading with - sector based I/Os */ - cluster_offset = 512 * s->total_sectors; - cluster_offset = (cluster_offset + 511) & ~511; - ftruncate(s->fd, cluster_offset); - return 0; - } - - if (nb_sectors != s->cluster_sectors) - return -EINVAL; - - out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128); - if (!out_buf) - return -ENOMEM; - - /* best compression, small window, no zlib header */ - memset(&strm, 0, sizeof(strm)); - ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, - Z_DEFLATED, -12, - 9, Z_DEFAULT_STRATEGY); - if (ret != 0) { - qemu_free(out_buf); - return -1; - } - - strm.avail_in = s->cluster_size; - strm.next_in = (uint8_t *)buf; - strm.avail_out = s->cluster_size; - strm.next_out = out_buf; - - ret = deflate(&strm, Z_FINISH); - if (ret != Z_STREAM_END && ret != Z_OK) { - qemu_free(out_buf); - deflateEnd(&strm); - return -1; - } - out_len = strm.next_out - out_buf; - - deflateEnd(&strm); - - if (ret != Z_STREAM_END || out_len >= s->cluster_size) { - /* could not compress: write normal cluster */ - qcow_write(bs, sector_num, buf, s->cluster_sectors); - } else { - cluster_offset = get_cluster_offset(bs, sector_num << 9, 2, - out_len, 0, 0); - cluster_offset &= s->cluster_offset_mask; - if (bdrv_pwrite(s->fd, cluster_offset, out_buf, out_len) != out_len) { - qemu_free(out_buf); - return -1; - } - } - - qemu_free(out_buf); - return 0; } static int qcow_submit(struct disk_driver *bs) diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/block-qcow.c --- a/tools/ioemu/block-qcow.c Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/ioemu/block-qcow.c Fri Feb 29 09:19:58 2008 -0700 @@ -95,7 +95,7 @@ static int qcow_open(BlockDriverState *b int len, i, shift, ret; QCowHeader header; - ret = bdrv_file_open(&s->hd, filename, flags); + ret = bdrv_file_open(&s->hd, filename, flags | BDRV_O_EXTENDABLE); if (ret < 0) return ret; if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header)) diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/block-qcow2.c --- a/tools/ioemu/block-qcow2.c Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/ioemu/block-qcow2.c Fri Feb 29 09:19:58 2008 -0700 @@ -191,7 +191,7 @@ static int qcow_open(BlockDriverState *b int len, i, shift, ret; QCowHeader header; - ret = bdrv_file_open(&s->hd, filename, flags); + ret = bdrv_file_open(&s->hd, filename, flags | BDRV_O_EXTENDABLE); if (ret < 0) return ret; if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header)) diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/block-raw.c --- a/tools/ioemu/block-raw.c Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/ioemu/block-raw.c Fri Feb 29 09:19:58 2008 -0700 @@ -1489,5 +1489,7 @@ BlockDriver bdrv_host_device = { .bdrv_pread = raw_pread, .bdrv_pwrite = raw_pwrite, .bdrv_getlength = raw_getlength, + + .bdrv_flags = BLOCK_DRIVER_FLAG_EXTENDABLE }; #endif /* _WIN32 */ diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/block-vmdk.c --- a/tools/ioemu/block-vmdk.c Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/ioemu/block-vmdk.c Fri Feb 29 09:19:58 2008 -0700 @@ -352,7 +352,7 @@ static int vmdk_open(BlockDriverState *b uint32_t magic; int l1_size, i, ret; - ret = bdrv_file_open(&s->hd, filename, flags); + ret = bdrv_file_open(&s->hd, filename, flags | BDRV_O_EXTENDABLE); if (ret < 0) return ret; if (bdrv_pread(s->hd, 0, &magic, sizeof(magic)) != sizeof(magic)) diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/block.c --- a/tools/ioemu/block.c Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/ioemu/block.c Fri Feb 29 09:19:58 2008 -0700 @@ -123,20 +123,23 @@ static int bdrv_rw_badreq_sectors(BlockD static int bdrv_rw_badreq_sectors(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { - return + return ( nb_sectors < 0 || nb_sectors > bs->total_sectors || - sector_num > bs->total_sectors - nb_sectors; + sector_num > bs->total_sectors - nb_sectors + ) && !bs->extendable; } static int bdrv_rw_badreq_bytes(BlockDriverState *bs, int64_t offset, int count) { int64_t size = bs->total_sectors << SECTOR_BITS; - return + return ( count < 0 || count > size || - offset > size - count; + offset > size - count + ) && !bs->extendable; + } void bdrv_register(BlockDriver *bdrv) @@ -347,6 +350,12 @@ int bdrv_open2(BlockDriverState *bs, con bs->is_temporary = 0; bs->encrypted = 0; + if (flags & BDRV_O_EXTENDABLE) { + if (!(drv->bdrv_flags & BLOCK_DRIVER_FLAG_EXTENDABLE)) + return -ENOSYS; + bs->extendable = 1; + } + if (flags & BDRV_O_SNAPSHOT) { BlockDriverState *bs1; int64_t total_size; diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/block_int.h --- a/tools/ioemu/block_int.h Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/ioemu/block_int.h Fri Feb 29 09:19:58 2008 -0700 @@ -23,6 +23,8 @@ */ #ifndef BLOCK_INT_H #define BLOCK_INT_H + +#define BLOCK_DRIVER_FLAG_EXTENDABLE 0x0001u struct BlockDriver { const char *format_name; @@ -76,6 +78,7 @@ struct BlockDriver { int (*bdrv_eject)(BlockDriverState *bs, int eject_flag); int (*bdrv_set_locked)(BlockDriverState *bs, int locked); + unsigned bdrv_flags; BlockDriverAIOCB *free_aiocb; struct BlockDriver *next; }; @@ -87,6 +90,7 @@ struct BlockDriverState { int removable; /* if true, the media can be removed */ int locked; /* if true, the media cannot temporarily be ejected */ int encrypted; /* if true, the media is encrypted */ + int extendable;/* if true, we may write out of original range */ /* event callback when inserting/removing */ void (*change_cb)(void *opaque); void *change_opaque; diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/hw/e1000.c --- a/tools/ioemu/hw/e1000.c Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/ioemu/hw/e1000.c Fri Feb 29 09:19:58 2008 -0700 @@ -48,7 +48,7 @@ static int debugflags = DBGBIT(TXERR) | #endif #define IOPORT_SIZE 0x40 -#define PNPMMIO_SIZE 0x60000 +#define PNPMMIO_SIZE 0x20000 /* * HW models: diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/hw/xenfb.c --- a/tools/ioemu/hw/xenfb.c Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/ioemu/hw/xenfb.c Fri Feb 29 09:19:58 2008 -0700 @@ -18,6 +18,12 @@ #include <xs.h> #include "xenfb.h" + +#ifdef CONFIG_STUBDOM +#include <semaphore.h> +#include <sched.h> +#include <fbfront.h> +#endif #ifndef BTN_LEFT #define BTN_LEFT 0x110 /* from <linux/input.h> */ @@ -592,7 +598,8 @@ static int xenfb_send_key(struct xenfb * } /* Send a relative mouse movement event */ -static int xenfb_send_motion(struct xenfb *xenfb, int rel_x, int rel_y, int rel_z) +static int xenfb_send_motion(struct xenfb *xenfb, + int rel_x, int rel_y, int rel_z) { union xenkbd_in_event event; @@ -606,7 +613,8 @@ static int xenfb_send_motion(struct xenf } /* Send an absolute mouse movement event */ -static int xenfb_send_position(struct xenfb *xenfb, int abs_x, int abs_y, int abs_z) +static int xenfb_send_position(struct xenfb *xenfb, + int abs_x, int abs_y, int rel_z) { union xenkbd_in_event event; @@ -614,7 +622,7 @@ static int xenfb_send_position(struct xe event.type = XENKBD_TYPE_POS; event.pos.abs_x = abs_x; event.pos.abs_y = abs_y; - event.pos.abs_z = abs_z; + event.pos.rel_z = rel_z; return xenfb_kbd_event(xenfb, &event); } @@ -1124,12 +1132,10 @@ static void xenfb_guest_copy(struct xenf dpy_update(xenfb->ds, x, y, w, h); } -/* QEMU display state changed, so refresh the framebuffer copy */ -/* XXX - can we optimize this, or the next func at all ? */ +/* Periodic update of display, no need for any in our case */ static void xenfb_update(void *opaque) { struct xenfb *xenfb = opaque; - xenfb_guest_copy(xenfb, 0, 0, xenfb->width, xenfb->height); } /* QEMU display state changed, so refresh the framebuffer copy */ @@ -1169,6 +1175,204 @@ static int xenfb_register_console(struct return 0; } +#ifdef CONFIG_STUBDOM +static struct semaphore kbd_sem = __SEMAPHORE_INITIALIZER(kbd_sem, 0); +static struct kbdfront_dev *kbd_dev; +static char *kbd_path, *fb_path; + +static unsigned char linux2scancode[KEY_MAX + 1]; + +#define WIDTH 1024 +#define HEIGHT 768 +#define DEPTH 32 +#define LINESIZE (1280 * (DEPTH / 8)) +#define MEMSIZE (LINESIZE * HEIGHT) + +int xenfb_connect_vkbd(const char *path) +{ + kbd_path = strdup(path); + return 0; +} + +int xenfb_connect_vfb(const char *path) +{ + fb_path = strdup(path); + return 0; +} + +static void xenfb_pv_update(DisplayState *s, int x, int y, int w, int h) +{ + struct fbfront_dev *fb_dev = s->opaque; + fbfront_update(fb_dev, x, y, w, h); +} + +static void xenfb_pv_resize(DisplayState *s, int w, int h) +{ + struct fbfront_dev *fb_dev = s->opaque; + fprintf(stderr,"resize to %dx%d required\n", w, h); + s->width = w; + s->height = h; + /* TODO: send resize event if supported */ + memset(s->data, 0, MEMSIZE); + fbfront_update(fb_dev, 0, 0, WIDTH, HEIGHT); +} + +static void xenfb_pv_colourdepth(DisplayState *s, int depth) +{ + /* TODO: send redepth event if supported */ + fprintf(stderr,"redepth to %d required\n", depth); +} + +static void xenfb_kbd_handler(void *opaque) +{ +#define KBD_NUM_BATCH 64 + union xenkbd_in_event buf[KBD_NUM_BATCH]; + int n, i; + DisplayState *s = opaque; + static int buttons; + static int x, y; + + n = kbdfront_receive(kbd_dev, buf, KBD_NUM_BATCH); + for (i = 0; i < n; i++) { + switch (buf[i].type) { + + case XENKBD_TYPE_MOTION: + fprintf(stderr, "FB backend sent us relative mouse motion event!\n"); + break; + + case XENKBD_TYPE_POS: + { + int new_x = buf[i].pos.abs_x; + int new_y = buf[i].pos.abs_y; + if (new_x >= s->width) + new_x = s->width - 1; + if (new_y >= s->height) + new_y = s->height - 1; + if (kbd_mouse_is_absolute()) { + kbd_mouse_event( + new_x * 0x7FFF / (s->width - 1), + new_y * 0x7FFF / (s->height - 1), + buf[i].pos.rel_z, + buttons); + } else { + kbd_mouse_event( + new_x - x, + new_y - y, + buf[i].pos.rel_z, + buttons); + } + x = new_x; + y = new_y; + break; + } + + case XENKBD_TYPE_KEY: + { + int keycode = buf[i].key.keycode; + int button = 0; + + if (keycode == BTN_LEFT) + button = MOUSE_EVENT_LBUTTON; + else if (keycode == BTN_RIGHT) + button = MOUSE_EVENT_RBUTTON; + else if (keycode == BTN_MIDDLE) + button = MOUSE_EVENT_MBUTTON; + + if (button) { + if (buf[i].key.pressed) + buttons |= button; + else + buttons &= ~button; + if (kbd_mouse_is_absolute()) + kbd_mouse_event( + x * 0x7FFF / s->width, + y * 0x7FFF / s->height, + 0, + buttons); + else + kbd_mouse_event(0, 0, 0, buttons); + } else { + int scancode = linux2scancode[keycode]; + if (!scancode) { + fprintf(stderr, "Can't convert keycode %x to scancode\n", keycode); + break; + } + if (scancode & 0x80) { + kbd_put_keycode(0xe0); + scancode &= 0x7f; + } + if (!buf[i].key.pressed) + scancode |= 0x80; + kbd_put_keycode(scancode); + } + break; + } + } + } +} + +static void xenfb_pv_refresh(DisplayState *ds) +{ + vga_hw_update(); +} + +static void kbdfront_thread(void *p) +{ + int scancode, keycode; + kbd_dev = init_kbdfront(p, 1); + if (!kbd_dev) { + fprintf(stderr,"can't open keyboard\n"); + exit(1); + } + up(&kbd_sem); + for (scancode = 0; scancode < 128; scancode++) { + keycode = atkbd_set2_keycode[atkbd_unxlate_table[scancode]]; + linux2scancode[keycode] = scancode; + keycode = atkbd_set2_keycode[atkbd_unxlate_table[scancode] | 0x80]; + linux2scancode[keycode] = scancode | 0x80; + } +} + +int xenfb_pv_display_init(DisplayState *ds) +{ + void *data; + struct fbfront_dev *fb_dev; + int kbd_fd; + + if (!fb_path || !kbd_path) + return -1; + + create_thread("kbdfront", kbdfront_thread, (void*) kbd_path); + + data = qemu_memalign(PAGE_SIZE, VGA_RAM_SIZE); + fb_dev = init_fbfront(fb_path, data, WIDTH, HEIGHT, DEPTH, LINESIZE, MEMSIZE); + if (!fb_dev) { + fprintf(stderr,"can't open frame buffer\n"); + exit(1); + } + free(fb_path); + + down(&kbd_sem); + free(kbd_path); + + kbd_fd = kbdfront_open(kbd_dev); + qemu_set_fd_handler(kbd_fd, xenfb_kbd_handler, NULL, ds); + + ds->data = data; + ds->linesize = LINESIZE; + ds->depth = DEPTH; + ds->bgr = 0; + ds->width = WIDTH; + ds->height = HEIGHT; + ds->dpy_update = xenfb_pv_update; + ds->dpy_resize = xenfb_pv_resize; + ds->dpy_colourdepth = NULL; //xenfb_pv_colourdepth; + ds->dpy_refresh = xenfb_pv_refresh; + ds->opaque = fb_dev; + return 0; +} +#endif + /* * Local variables: * c-indent-level: 8 diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/keymaps.c --- a/tools/ioemu/keymaps.c Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/ioemu/keymaps.c Fri Feb 29 09:19:58 2008 -0700 @@ -126,11 +126,11 @@ static kbd_layout_t *parse_keyboard_layo if (rest && strstr(rest, "numlock")) { add_to_key_range(&k->keypad_range, keycode); add_to_key_range(&k->numlock_range, keysym); - fprintf(stderr, "keypad keysym %04x keycode %d\n", keysym, keycode); + //fprintf(stderr, "keypad keysym %04x keycode %d\n", keysym, keycode); } if (rest && strstr(rest, "shift")) { add_to_key_range(&k->shift_range, keysym); - fprintf(stderr, "shift keysym %04x keycode %d\n", keysym, keycode); + //fprintf(stderr, "shift keysym %04x keycode %d\n", keysym, keycode); } /* if(keycode&0x80) diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/monitor.c --- a/tools/ioemu/monitor.c Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/ioemu/monitor.c Fri Feb 29 09:19:58 2008 -0700 @@ -2520,7 +2520,7 @@ static void monitor_handle_command1(void static void monitor_start_input(void) { - readline_start("(HVMXen) ", 0, monitor_handle_command1, NULL); + readline_start("(qemu) ", 0, monitor_handle_command1, NULL); } static void term_event(void *opaque, int event) diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/vl.c --- a/tools/ioemu/vl.c Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/ioemu/vl.c Fri Feb 29 09:19:58 2008 -0700 @@ -7611,9 +7611,7 @@ int main(int argc, char **argv) } } - /* Now send logs to our named config */ - sprintf(qemu_dm_logfilename, "/var/log/xen/qemu-dm-%d.log", domid); - cpu_set_log_filename(qemu_dm_logfilename); + cpu_set_log(0); #ifndef NO_DAEMONIZE if (daemonize && !nographic && vnc_display == NULL && vncunused == 0) { @@ -7831,6 +7829,10 @@ int main(int argc, char **argv) init_ioports(); /* terminal init */ +#ifdef CONFIG_STUBDOM + if (xenfb_pv_display_init(ds) == 0) { + } else +#endif if (nographic) { dumb_display_init(ds); } else if (vnc_display != NULL || vncunused != 0) { diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/vl.h --- a/tools/ioemu/vl.h Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/ioemu/vl.h Fri Feb 29 09:19:58 2008 -0700 @@ -614,6 +614,8 @@ typedef struct QEMUSnapshotInfo { use a disk image format on top of it (default for bdrv_file_open()) */ +#define BDRV_O_EXTENDABLE 0x0080 /* allow writes out of original size range; + only effective for some drivers */ void bdrv_init(void); BlockDriver *bdrv_find_format(const char *format_name); @@ -1525,6 +1527,11 @@ int xenstore_vm_write(int domid, char *k int xenstore_vm_write(int domid, char *key, char *val); char *xenstore_vm_read(int domid, char *key, unsigned int *len); +/* xenfb.c */ +int xenfb_pv_display_init(DisplayState *ds); +int xenfb_connect_vkbd(const char *path); +int xenfb_connect_vfb(const char *path); + /* helper2.c */ extern long time_offset; void timeoffset_get(void); diff -r 0b20ac6ec64a -r 71a8366fb212 tools/ioemu/xenstore.c --- a/tools/ioemu/xenstore.c Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/ioemu/xenstore.c Fri Feb 29 09:19:58 2008 -0700 @@ -238,6 +238,37 @@ void xenstore_parse_domain_config(int do } } +#ifdef CONFIG_STUBDOM + if (pasprintf(&buf, "%s/device/vkbd", path) == -1) + goto out; + + free(e); + e = xs_directory(xsh, XBT_NULL, buf, &num); + + if (e) { + for (i = 0; i < num; i++) { + if (pasprintf(&buf, "%s/device/vkbd/%s", path, e[i]) == -1) + continue; + xenfb_connect_vkbd(buf); + } + } + + if (pasprintf(&buf, "%s/device/vfb", path) == -1) + goto out; + + free(e); + e = xs_directory(xsh, XBT_NULL, buf, &num); + + if (e) { + for (i = 0; i < num; i++) { + if (pasprintf(&buf, "%s/device/vfb/%s", path, e[i]) == -1) + continue; + xenfb_connect_vfb(buf); + } + } +#endif + + /* Set a watch for log-dirty requests from the migration tools */ if (pasprintf(&buf, "/local/domain/0/device-model/%u/logdirty/next-active", domid) != -1) { diff -r 0b20ac6ec64a -r 71a8366fb212 tools/python/xen/xend/XendAPI.py --- a/tools/python/xen/xend/XendAPI.py Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/python/xen/xend/XendAPI.py Fri Feb 29 09:19:58 2008 -0700 @@ -1761,9 +1761,10 @@ class XendAPI(object): resource = other_config.get("resource", 0) port = other_config.get("port", 0) + node = other_config.get("node", 0) xendom.domain_migrate(xeninfo.getDomid(), destination_url, - bool(live), resource, port) + bool(live), resource, port, node) return xen_api_success_void() def VM_save(self, _, vm_ref, dest, checkpoint): diff -r 0b20ac6ec64a -r 71a8366fb212 tools/python/xen/xend/XendCheckpoint.py --- a/tools/python/xen/xend/XendCheckpoint.py Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/python/xen/xend/XendCheckpoint.py Fri Feb 29 09:19:58 2008 -0700 @@ -22,6 +22,7 @@ from xen.xend.XendLogging import log from xen.xend.XendLogging import log from xen.xend.XendConfig import XendConfig from xen.xend.XendConstants import * +from xen.xend import XendNode SIGNATURE = "LinuxGuestRecord" QEMU_SIGNATURE = "QemuDeviceModelRecord" @@ -56,10 +57,23 @@ def read_exact(fd, size, errmsg): return buf -def save(fd, dominfo, network, live, dst, checkpoint=False): +def insert_after(list, pred, value): + for i,k in enumerate(list): + if type(k) == type([]): + if k[0] == pred: + list.insert (i+1, value) + return + + +def save(fd, dominfo, network, live, dst, checkpoint=False, node=-1): write_exact(fd, SIGNATURE, "could not write guest state file: signature") - config = sxp.to_string(dominfo.sxpr()) + sxprep = dominfo.sxpr() + + if node > -1: + insert_after(sxprep,'vcpus',['node', str(node)]) + + config = sxp.to_string(sxprep) domain_name = dominfo.getName() # Rename the domain temporarily, so that we don't get a name clash if this @@ -191,6 +205,21 @@ def restore(xd, fd, dominfo = None, paus dominfo.resume() else: dominfo = xd.restore_(vmconfig) + + # repin domain vcpus if a target node number was specified + # this is done prior to memory allocation to aide in memory + # distribution for NUMA systems. + nodenr = -1 + for i,l in enumerate(vmconfig): + if type(l) == type([]): + if l[0] == 'node': + nodenr = int(l[1]) + + if nodenr >= 0: + node_to_cpu = XendNode.instance().xc.physinfo()['node_to_cpu'] + if nodenr < len(node_to_cpu): + for v in range(0, dominfo.info['VCPUs_max']): + xc.vcpu_setaffinity(dominfo.domid, v, node_to_cpu[nodenr]) store_port = dominfo.getStorePort() console_port = dominfo.getConsolePort() diff -r 0b20ac6ec64a -r 71a8366fb212 tools/python/xen/xend/XendDomain.py --- a/tools/python/xen/xend/XendDomain.py Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/python/xen/xend/XendDomain.py Fri Feb 29 09:19:58 2008 -0700 @@ -865,7 +865,7 @@ class XendDomain: raise XendInvalidDomain(domname) if dominfo.getDomid() == DOM0_ID: - raise XendError("Cannot save privileged domain %s" % domname) + raise XendError("Cannot suspend privileged domain %s" % domname) if dominfo._stateGet() != DOM_STATE_RUNNING: raise VMBadState("Domain is not running", @@ -910,7 +910,7 @@ class XendDomain: raise XendInvalidDomain(domname) if dominfo.getDomid() == DOM0_ID: - raise XendError("Cannot save privileged domain %s" % domname) + raise XendError("Cannot resume privileged domain %s" % domname) if dominfo._stateGet() != XEN_API_VM_POWER_STATE_SUSPENDED: raise XendError("Cannot resume domain that is not suspended.") @@ -1258,7 +1258,7 @@ class XendDomain: return val - def domain_migrate(self, domid, dst, live=False, resource=0, port=0): + def domain_migrate(self, domid, dst, live=False, resource=0, port=0, node=-1): """Start domain migration. @param domid: Domain ID or Name @@ -1271,6 +1271,8 @@ class XendDomain: @type live: bool @keyword resource: not used?? @rtype: None + @keyword node: use node number for target + @rtype: int @raise XendError: Failed to migrate @raise XendInvalidDomain: Domain is not valid """ @@ -1299,7 +1301,7 @@ class XendDomain: sock.send("receive\n") sock.recv(80) - XendCheckpoint.save(sock.fileno(), dominfo, True, live, dst) + XendCheckpoint.save(sock.fileno(), dominfo, True, live, dst, node=node) sock.close() def domain_save(self, domid, dst, checkpoint=False): diff -r 0b20ac6ec64a -r 71a8366fb212 tools/python/xen/xend/XendDomainInfo.py --- a/tools/python/xen/xend/XendDomainInfo.py Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/python/xen/xend/XendDomainInfo.py Fri Feb 29 09:19:58 2008 -0700 @@ -1406,9 +1406,6 @@ class XendDomainInfo: def setWeight(self, cpu_weight): self.info['vcpus_params']['weight'] = cpu_weight - def setResume(self, state): - self._resume = state - def getRestartCount(self): return self._readVm('xend/restart_count') @@ -1963,6 +1960,39 @@ class XendDomainInfo: if self.info['cpus'] is not None and len(self.info['cpus']) > 0: for v in range(0, self.info['VCPUs_max']): xc.vcpu_setaffinity(self.domid, v, self.info['cpus']) + else: + info = xc.physinfo() + if info['nr_nodes'] > 1: + node_memory_list = info['node_to_memory'] + needmem = self.image.getRequiredAvailableMemory(self.info['memory_dynamic_max']) / 1024 + candidate_node_list = [] + for i in range(0, info['nr_nodes']): + if node_memory_list[i] >= needmem: + candidate_node_list.append(i) + if candidate_node_list is None or len(candidate_node_list) == 1: + index = node_memory_list.index( max(node_memory_list) ) + cpumask = info['node_to_cpu'][index] + else: + nodeload = [0] + nodeload = nodeload * info['nr_nodes'] + from xen.xend import XendDomain + doms = XendDomain.instance().list('all') + for dom in doms: + cpuinfo = dom.getVCPUInfo() + for vcpu in sxp.children(cpuinfo, 'vcpu'): + def vinfo(n, t): + return t(sxp.child_value(vcpu, n)) + cpumap = vinfo('cpumap', list) + for i in candidate_node_list: + node_cpumask = info['node_to_cpu'][i] + for j in node_cpumask: + if j in cpumap: + nodeload[i] += 1 + break + index = nodeload.index( min(nodeload) ) + cpumask = info['node_to_cpu'][index] + for v in range(0, self.info['VCPUs_max']): + xc.vcpu_setaffinity(self.domid, v, cpumask) # Use architecture- and image-specific calculations to determine # the various headrooms necessary, given the raw configured diff -r 0b20ac6ec64a -r 71a8366fb212 tools/python/xen/xend/image.py --- a/tools/python/xen/xend/image.py Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/python/xen/xend/image.py Fri Feb 29 09:19:58 2008 -0700 @@ -296,7 +296,34 @@ class ImageHandler: { 'dom': self.vm.getDomid(), 'read': True, 'write': True }) log.info("spawning device models: %s %s", self.device_model, args) # keep track of pid and spawned options to kill it later - self.pid = os.spawnve(os.P_NOWAIT, self.device_model, args, env) + + logfile = "/var/log/xen/qemu-dm-%s.log" % str(self.vm.info['name_label']) + if os.path.exists(logfile): + if os.path.exists(logfile + ".1"): + os.unlink(logfile + ".1") + os.rename(logfile, logfile + ".1") + + null = os.open("/dev/null", os.O_RDONLY) + logfd = os.open(logfile, os.O_WRONLY|os.O_CREAT|os.O_TRUNC) + + pid = os.fork() + if pid == 0: #child + try: + os.dup2(null, 0) + os.dup2(logfd, 1) + os.dup2(logfd, 2) + os.close(null) + os.close(logfd) + try: + os.execve(self.device_model, args, env) + except: + os._exit(127) + except: + os._exit(127) + else: + self.pid = pid + os.close(null) + os.close(logfd) self.vm.storeDom("image/device-model-pid", self.pid) log.info("device model pid: %d", self.pid) diff -r 0b20ac6ec64a -r 71a8366fb212 tools/python/xen/xm/main.py --- a/tools/python/xen/xm/main.py Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/python/xen/xm/main.py Fri Feb 29 09:19:58 2008 -0700 @@ -699,9 +699,6 @@ def xm_save(args): err(opterr) sys.exit(1) - dom = params[0] - savefile = params[1] - checkpoint = False for (k, v) in options: if k in ['-c', '--checkpoint']: @@ -710,9 +707,9 @@ def xm_save(args): if len(params) != 2: err("Wrong number of parameters") usage('save') - sys.exit(1) - - savefile = os.path.abspath(savefile) + + dom = params[0] + savefile = os.path.abspath(params[1]) if not os.access(os.path.dirname(savefile), os.W_OK): err("xm save: Unable to create file %s" % savefile) diff -r 0b20ac6ec64a -r 71a8366fb212 tools/python/xen/xm/migrate.py --- a/tools/python/xen/xm/migrate.py Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/python/xen/xm/migrate.py Fri Feb 29 09:19:58 2008 -0700 @@ -43,6 +43,10 @@ gopts.opt('port', short='p', val='portnu fn=set_int, default=0, use="Use specified port for migration.") +gopts.opt('node', short='n', val='nodenum', + fn=set_int, default=-1, + use="Use specified NUMA node on target.") + gopts.opt('resource', short='r', val='MBIT', fn=set_int, default=0, use="Set level of resource usage for migration.") @@ -65,11 +69,13 @@ def main(argv): vm_ref = get_single_vm(dom) other_config = { "port": opts.vals.port, - "resource": opts.vals.resource + "resource": opts.vals.resource, + "node": opts.vals.node } server.xenapi.VM.migrate(vm_ref, dst, bool(opts.vals.live), other_config) else: server.xend.domain.migrate(dom, dst, opts.vals.live, opts.vals.resource, - opts.vals.port) + opts.vals.port, + opts.vals.node) diff -r 0b20ac6ec64a -r 71a8366fb212 tools/xenstat/libxenstat/src/xenstat_solaris.c --- a/tools/xenstat/libxenstat/src/xenstat_solaris.c Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/xenstat/libxenstat/src/xenstat_solaris.c Fri Feb 29 09:19:58 2008 -0700 @@ -113,49 +113,23 @@ static void xenstat_uninit_devs(xenstat_ priv->kc = NULL; } -static int parse_nic(const char *nic, char *module, int *instance) -{ - const char *c; - - for (c = &nic[strlen(nic) - 1]; c != nic && isdigit(*c); c--) - ; - - if (c == nic) - return 0; - - c++; - - if (sscanf(c, "%d", instance) != 1) - return 0; - - strncpy(module, nic, c - nic); - module[c - nic] = '\0'; - return 1; -} - static int update_dev_stats(priv_data_t *priv, stdevice_t *dev) { - char mod[256]; - const char *name; - int inst; kstat_t *ksp; + if (kstat_chain_update(priv->kc) == -1) + return 0; + if (dev->type == DEVICE_NIC) { - if (!parse_nic(dev->name, mod, &inst)) - return 0; - name = "mac"; + ksp = kstat_lookup(priv->kc, "link", 0, (char *)dev->name); } else { - strcpy(mod, "xdb"); - inst = dev->instance; - name = "req_statistics"; - } - - if (kstat_chain_update(priv->kc) == -1) - return 0; - - ksp = kstat_lookup(priv->kc, mod, inst, (char *)name); + ksp = kstat_lookup(priv->kc, "xdb", dev->instance, + (char *)"req_statistics"); + } + if (ksp == NULL) return 0; + if (kstat_read(priv->kc, ksp, NULL) == -1) return 0; diff -r 0b20ac6ec64a -r 71a8366fb212 tools/xentrace/xentrace.c --- a/tools/xentrace/xentrace.c Fri Feb 29 09:18:01 2008 -0700 +++ b/tools/xentrace/xentrace.c Fri Feb 29 09:19:58 2008 -0700 @@ -15,7 +15,6 @@ #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> -#include <sys/vfs.h> #include <fcntl.h> #include <unistd.h> #include <errno.h> @@ -25,6 +24,7 @@ #include <getopt.h> #include <assert.h> #include <sys/poll.h> +#include <sys/statvfs.h> #include <xen/xen.h> #include <xen/trace.h> @@ -87,7 +87,7 @@ void write_buffer(unsigned int cpu, unsi void write_buffer(unsigned int cpu, unsigned char *start, int size, int total_size, int outfd) { - struct statfs stat; + struct statvfs stat; size_t written = 0; if ( opts.disk_rsvd != 0 ) @@ -95,13 +95,13 @@ void write_buffer(unsigned int cpu, unsi unsigned long long freespace; /* Check that filesystem has enough space. */ - if ( fstatfs (outfd, &stat) ) + if ( fstatvfs (outfd, &stat) ) { fprintf(stderr, "Statfs failed!\n"); goto fail; } - freespace = stat.f_bsize * (unsigned long long)stat.f_bfree; + freespace = stat.f_frsize * (unsigned long long)stat.f_bfree; if ( total_size ) freespace -= total_size; diff -r 0b20ac6ec64a -r 71a8366fb212 unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c --- a/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c Fri Feb 29 09:18:01 2008 -0700 +++ b/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c Fri Feb 29 09:19:58 2008 -0700 @@ -71,7 +71,7 @@ static int bp_suspend(void) return suspend_cancelled; } -int __xen_suspend(int fast_suspend) +int __xen_suspend(int fast_suspend, void (*resume_notifier)(void)) { int err, suspend_cancelled, nr_cpus; struct ap_suspend_info info; @@ -101,6 +101,7 @@ int __xen_suspend(int fast_suspend) local_irq_disable(); suspend_cancelled = bp_suspend(); + resume_notifier(); local_irq_enable(); smp_mb(); diff -r 0b20ac6ec64a -r 71a8366fb212 xen/arch/ia64/xen/machine_kexec.c --- a/xen/arch/ia64/xen/machine_kexec.c Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/arch/ia64/xen/machine_kexec.c Fri Feb 29 09:19:58 2008 -0700 @@ -24,6 +24,7 @@ #include <linux/cpu.h> #include <linux/notifier.h> #include <asm/dom_fw_dom0.h> +#include <asm-generic/sections.h> #define kexec_flush_icache_page(page) \ do { \ @@ -144,6 +145,54 @@ void machine_reboot_kexec(xen_kexec_imag machine_kexec(image); } +static int machine_kexec_get_xen(xen_kexec_range_t *range) +{ + range->start = range->start = ia64_tpa(_text); + range->size = (unsigned long)_end - (unsigned long)_text; + return 0; +} + +#define ELF_PAGE_SHIFT 16 +#define ELF_PAGE_SIZE (__IA64_UL_CONST(1) << ELF_PAGE_SHIFT) +#define ELF_PAGE_MASK (~(ELF_PAGE_SIZE - 1)) + +static int machine_kexec_get_xenheap(xen_kexec_range_t *range) +{ + range->start = (ia64_tpa(_end) + (ELF_PAGE_SIZE - 1)) & ELF_PAGE_MASK; + range->size = (unsigned long)xenheap_phys_end - + (unsigned long)range->start; + return 0; +} + +static int machine_kexec_get_boot_param(xen_kexec_range_t *range) +{ + range->start = __pa(ia64_boot_param); + range->size = sizeof(*ia64_boot_param); + return 0; +} + +static int machine_kexec_get_efi_memmap(xen_kexec_range_t *range) +{ + range->start = ia64_boot_param->efi_memmap; + range->size = ia64_boot_param->efi_memmap_size; + return 0; +} + +int machine_kexec_get(xen_kexec_range_t *range) +{ + switch (range->range) { + case KEXEC_RANGE_MA_XEN: + return machine_kexec_get_xen(range); + case KEXEC_RANGE_MA_XENHEAP: + return machine_kexec_get_xenheap(range); + case KEXEC_RANGE_MA_BOOT_PARAM: + return machine_kexec_get_boot_param(range); + case KEXEC_RANGE_MA_EFI_MEMMAP: + return machine_kexec_get_efi_memmap(range); + } + return -EINVAL; +} + /* * Local variables: * mode: C diff -r 0b20ac6ec64a -r 71a8366fb212 xen/arch/powerpc/machine_kexec.c --- a/xen/arch/powerpc/machine_kexec.c Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/arch/powerpc/machine_kexec.c Fri Feb 29 09:19:58 2008 -0700 @@ -24,6 +24,12 @@ void machine_kexec(xen_kexec_image_t *im printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); } +int machine_kexec_get(xen_kexec_image_t *image) +{ + printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__); + return -1; +} + /* * Local variables: * mode: C diff -r 0b20ac6ec64a -r 71a8366fb212 xen/arch/x86/machine_kexec.c --- a/xen/arch/x86/machine_kexec.c Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/arch/x86/machine_kexec.c Fri Feb 29 09:19:58 2008 -0700 @@ -23,6 +23,9 @@ typedef void (*relocate_new_kernel_t)( unsigned long indirection_page, unsigned long *page_list, unsigned long start_address); + +extern int machine_kexec_get_xen(xen_kexec_range_t *range); + int machine_kexec_load(int type, int slot, xen_kexec_image_t *image) { @@ -135,6 +138,13 @@ void machine_kexec(xen_kexec_image_t *im } } +int machine_kexec_get(xen_kexec_range_t *range) +{ + if (range->range != KEXEC_RANGE_MA_XEN) + return -EINVAL; + return machine_kexec_get_xen(range); +} + /* * Local variables: * mode: C diff -r 0b20ac6ec64a -r 71a8366fb212 xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/arch/x86/mm/shadow/multi.c Fri Feb 29 09:19:58 2008 -0700 @@ -55,12 +55,6 @@ * l3-and-l2h-only shadow mode for PAE PV guests that would allow them * to share l2h pages again. * - * GUEST_WALK_TABLES TLB FLUSH COALESCE - * guest_walk_tables can do up to three remote TLB flushes as it walks to - * the first l1 of a new pagetable. Should coalesce the flushes to the end, - * and if we do flush, re-do the walk. If anything has changed, then - * pause all the other vcpus and do the walk *again*. - * * PSE disabled / PSE36 * We don't support any modes other than PSE enabled, PSE36 disabled. * Neither of those would be hard to change, but we'd need to be able to @@ -246,10 +240,95 @@ static uint32_t set_ad_bits(void *guest_ return 0; } +/* This validation is called with lock held, and after write permission + * removal. Then check is atomic and no more inconsistent content can + * be observed before lock is released + * + * Return 1 to indicate success and 0 for inconsistency + */ +static inline uint32_t +shadow_check_gwalk(struct vcpu *v, unsigned long va, walk_t *gw) +{ + struct domain *d = v->domain; + guest_l1e_t *l1p; + guest_l2e_t *l2p; +#if GUEST_PAGING_LEVELS >= 4 + guest_l3e_t *l3p; + guest_l4e_t *l4p; +#endif + int mismatch = 0; + + ASSERT(shadow_locked_by_me(d)); + + if ( gw->version == + atomic_read(&d->arch.paging.shadow.gtable_dirty_version) ) + return 1; + + /* We may consider caching guest page mapping from last + * guest table walk. However considering this check happens + * relatively less-frequent, and a bit burden here to + * remap guest page is better than caching mapping in each + * guest table walk. + * + * Also when inconsistency occurs, simply return to trigger + * another fault instead of re-validate new path to make + * logic simple. + */ + perfc_incr(shadow_check_gwalk); +#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ +#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ + l4p = (guest_l4e_t *)v->arch.paging.shadow.guest_vtable; + mismatch |= (gw->l4e.l4 != l4p[guest_l4_table_offset(va)].l4); + l3p = sh_map_domain_page(gw->l3mfn); + mismatch |= (gw->l3e.l3 != l3p[guest_l3_table_offset(va)].l3); + sh_unmap_domain_page(l3p); +#else + mismatch |= (gw->l3e.l3 != + v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)].l3); +#endif + l2p = sh_map_domain_page(gw->l2mfn); + mismatch |= (gw->l2e.l2 != l2p[guest_l2_table_offset(va)].l2); + sh_unmap_domain_page(l2p); +#else + l2p = (guest_l2e_t *)v->arch.paging.shadow.guest_vtable; + mismatch |= (gw->l2e.l2 != l2p[guest_l2_table_offset(va)].l2); +#endif + if ( !(guest_supports_superpages(v) && + (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) ) + { + l1p = sh_map_domain_page(gw->l1mfn); + mismatch |= (gw->l1e.l1 != l1p[guest_l1_table_offset(va)].l1); + sh_unmap_domain_page(l1p); + } + + return !mismatch; +} + +/* Remove write access permissions from a gwalk_t in a batch, and + * return OR-ed result for TLB flush hint + */ +static inline uint32_t +gw_remove_write_accesses(struct vcpu *v, unsigned long va, walk_t *gw) +{ + int rc = 0; + +#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ +#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ + rc = sh_remove_write_access(v, gw->l3mfn, 3, va); +#endif + rc |= sh_remove_write_access(v, gw->l2mfn, 2, va); +#endif + if ( !(guest_supports_superpages(v) && + (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) ) + rc |= sh_remove_write_access(v, gw->l1mfn, 1, va); + + return rc; +} + /* Walk the guest pagetables, after the manner of a hardware walker. * * Inputs: a vcpu, a virtual address, a walk_t to fill, a - * pointer to a pagefault code, and a flag "shadow_op". + * pointer to a pagefault code * * We walk the vcpu's guest pagetables, filling the walk_t with what we * see and adding any Accessed and Dirty bits that are needed in the @@ -257,10 +336,9 @@ static uint32_t set_ad_bits(void *guest_ * we go. For the purposes of reading pagetables we treat all non-RAM * memory as contining zeroes. * - * If "shadow_op" is non-zero, we are serving a genuine guest memory access, - * and must (a) be under the shadow lock, and (b) remove write access - * from any guest PT pages we see, as we will be shadowing them soon - * and will rely on the contents' not having changed. + * The walk is done in a lock-free style, with some sanity check postponed + * after grabbing shadow lock later. Those delayed checks will make sure + * no inconsistent mapping being translated into shadow page table. * * Returns 0 for success, or the set of permission bits that we failed on * if the walk did not complete. @@ -268,8 +346,7 @@ static uint32_t set_ad_bits(void *guest_ * checked the old return code anyway. */ static uint32_t -guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, - uint32_t pfec, int shadow_op) +guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, uint32_t pfec) { struct domain *d = v->domain; p2m_type_t p2mt; @@ -282,11 +359,12 @@ guest_walk_tables(struct vcpu *v, unsign uint32_t gflags, mflags, rc = 0; int pse; - ASSERT(!shadow_op || shadow_locked_by_me(d)); - perfc_incr(shadow_guest_walk); memset(gw, 0, sizeof(*gw)); gw->va = va; + + gw->version = atomic_read(&d->arch.paging.shadow.gtable_dirty_version); + rmb(); /* Mandatory bits that must be set in every entry. We invert NX, to * calculate as if there were an "X" bit that allowed access. @@ -312,9 +390,7 @@ guest_walk_tables(struct vcpu *v, unsign goto out; } ASSERT(mfn_valid(gw->l3mfn)); - /* This mfn is a pagetable: make sure the guest can't write to it. */ - if ( shadow_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 ) - flush_tlb_mask(d->domain_dirty_cpumask); + /* Get the l3e and check its flags*/ l3p = sh_map_domain_page(gw->l3mfn); gw->l3e = l3p[guest_l3_table_offset(va)]; @@ -343,9 +419,7 @@ guest_walk_tables(struct vcpu *v, unsign goto out; } ASSERT(mfn_valid(gw->l2mfn)); - /* This mfn is a pagetable: make sure the guest can't write to it. */ - if ( shadow_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 ) - flush_tlb_mask(d->domain_dirty_cpumask); + /* Get the l2e */ l2p = sh_map_domain_page(gw->l2mfn); gw->l2e = l2p[guest_l2_table_offset(va)]; @@ -403,10 +477,6 @@ guest_walk_tables(struct vcpu *v, unsign goto out; } ASSERT(mfn_valid(gw->l1mfn)); - /* This mfn is a pagetable: make sure the guest can't write to it. */ - if ( shadow_op - && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 ) - flush_tlb_mask(d->domain_dirty_cpumask); l1p = sh_map_domain_page(gw->l1mfn); gw->l1e = l1p[guest_l1_table_offset(va)]; gflags = guest_l1e_get_flags(gw->l1e) ^ _PAGE_NX_BIT; @@ -548,8 +618,7 @@ sh_guest_map_l1e(struct vcpu *v, unsigne // XXX -- this is expensive, but it's easy to cobble together... // FIXME! - shadow_lock(v->domain); - if ( guest_walk_tables(v, addr, &gw, PFEC_page_present, 1) == 0 + if ( guest_walk_tables(v, addr, &gw, PFEC_page_present) == 0 && mfn_valid(gw.l1mfn) ) { if ( gl1mfn ) @@ -558,8 +627,6 @@ sh_guest_map_l1e(struct vcpu *v, unsigne (guest_l1_table_offset(addr) * sizeof(guest_l1e_t)); } - shadow_unlock(v->domain); - return pl1e; } @@ -573,10 +640,8 @@ sh_guest_get_eff_l1e(struct vcpu *v, uns // XXX -- this is expensive, but it's easy to cobble together... // FIXME! - shadow_lock(v->domain); - (void) guest_walk_tables(v, addr, &gw, PFEC_page_present, 1); + (void) guest_walk_tables(v, addr, &gw, PFEC_page_present); *(guest_l1e_t *)eff_l1e = gw.l1e; - shadow_unlock(v->domain); } #endif /* CONFIG==SHADOW==GUEST */ @@ -2842,14 +2907,12 @@ static int sh_page_fault(struct vcpu *v, return 0; } - shadow_lock(d); - - shadow_audit_tables(v); - - if ( guest_walk_tables(v, va, &gw, regs->error_code, 1) != 0 ) + if ( guest_walk_tables(v, va, &gw, regs->error_code) != 0 ) { perfc_incr(shadow_fault_bail_real_fault); - goto not_a_shadow_fault; + SHADOW_PRINTK("not a shadow fault\n"); + reset_early_unshadow(v); + return 0; } /* It's possible that the guest has put pagetables in memory that it has @@ -2859,11 +2922,8 @@ static int sh_page_fault(struct vcpu *v, if ( unlikely(d->is_shutting_down) ) { SHADOW_PRINTK("guest is shutting down\n"); - shadow_unlock(d); return 0; } - - sh_audit_gw(v, &gw); /* What kind of access are we dealing with? */ ft = ((regs->error_code & PFEC_write_access) @@ -2879,7 +2939,8 @@ static int sh_page_fault(struct vcpu *v, perfc_incr(shadow_fault_bail_bad_gfn); SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n", gfn_x(gfn), mfn_x(gmfn)); - goto not_a_shadow_fault; + reset_early_unshadow(v); + return 0; } #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) @@ -2887,6 +2948,28 @@ static int sh_page_fault(struct vcpu *v, vtlb_insert(v, va >> PAGE_SHIFT, gfn_x(gfn), regs->error_code | PFEC_page_present); #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */ + + shadow_lock(d); + + if ( gw_remove_write_accesses(v, va, &gw) ) + { + /* Write permission removal is also a hint that other gwalks + * overlapping with this one may be inconsistent + */ + perfc_incr(shadow_rm_write_flush_tlb); + atomic_inc(&d->arch.paging.shadow.gtable_dirty_version); + flush_tlb_mask(d->domain_dirty_cpumask); + } + + if ( !shadow_check_gwalk(v, va, &gw) ) + { + perfc_incr(shadow_inconsistent_gwalk); + shadow_unlock(d); + return EXCRET_fault_fixed; + } + + shadow_audit_tables(v); + sh_audit_gw(v, &gw); /* Make sure there is enough free shadow memory to build a chain of * shadow tables. (We never allocate a top-level shadow on this path, @@ -3223,7 +3306,7 @@ sh_gva_to_gfn(struct vcpu *v, unsigned l return vtlb_gfn; #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */ - if ( guest_walk_tables(v, va, &gw, pfec[0], 0) != 0 ) + if ( guest_walk_tables(v, va, &gw, pfec[0]) != 0 ) { if ( !(guest_l1e_get_flags(gw.l1e) & _PAGE_PRESENT) ) pfec[0] &= ~PFEC_page_present; @@ -4276,6 +4359,8 @@ static void emulate_unmap_dest(struct vc } else sh_unmap_domain_page(addr); + + atomic_inc(&v->domain->arch.paging.shadow.gtable_dirty_version); } int @@ -4430,29 +4515,13 @@ static char * sh_audit_flags(struct vcpu return NULL; } -static inline mfn_t -audit_gfn_to_mfn(struct vcpu *v, gfn_t gfn, mfn_t gmfn) -/* Convert this gfn to an mfn in the manner appropriate for the - * guest pagetable it's used in (gmfn) */ -{ - p2m_type_t p2mt; - if ( !shadow_mode_translate(v->domain) ) - return _mfn(gfn_x(gfn)); - - if ( (mfn_to_page(gmfn)->u.inuse.type_info & PGT_type_mask) - != PGT_writable_page ) - return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */ - else - return gfn_to_mfn(v->domain, gfn, &p2mt); -} - - int sh_audit_l1_table(struct vcpu *v, mfn_t sl1mfn, mfn_t x) { guest_l1e_t *gl1e, *gp; shadow_l1e_t *sl1e; mfn_t mfn, gmfn, gl1mfn; gfn_t gfn; + p2m_type_t p2mt; char *s; int done = 0; @@ -4491,7 +4560,7 @@ int sh_audit_l1_table(struct vcpu *v, mf { gfn = guest_l1e_get_gfn(*gl1e); mfn = shadow_l1e_get_mfn(*sl1e); - gmfn = audit_gfn_to_mfn(v, gfn, gl1mfn); + gmfn = gfn_to_mfn(v->domain, gfn, &p2mt); if ( mfn_x(gmfn) != mfn_x(mfn) ) AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn " --> %" PRI_mfn " != mfn %" PRI_mfn, @@ -4532,6 +4601,7 @@ int sh_audit_l2_table(struct vcpu *v, mf shadow_l2e_t *sl2e; mfn_t mfn, gmfn, gl2mfn; gfn_t gfn; + p2m_type_t p2mt; char *s; int done = 0; @@ -4550,7 +4620,7 @@ int sh_audit_l2_table(struct vcpu *v, mf mfn = shadow_l2e_get_mfn(*sl2e); gmfn = (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? get_fl1_shadow_status(v, gfn) - : get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl2mfn), + : get_shadow_status(v, gfn_to_mfn(v->domain, gfn, &p2mt), SH_type_l1_shadow); if ( mfn_x(gmfn) != mfn_x(mfn) ) AUDIT_FAIL(2, "bad translation: gfn %" SH_PRI_gfn @@ -4558,7 +4628,7 @@ int sh_audit_l2_table(struct vcpu *v, mf " --> %" PRI_mfn " != mfn %" PRI_mfn, gfn_x(gfn), (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? 0 - : mfn_x(audit_gfn_to_mfn(v, gfn, gl2mfn)), + : mfn_x(gfn_to_mfn(v->domain, gfn, &p2mt)), mfn_x(gmfn), mfn_x(mfn)); } }); @@ -4573,6 +4643,7 @@ int sh_audit_l3_table(struct vcpu *v, mf shadow_l3e_t *sl3e; mfn_t mfn, gmfn, gl3mfn; gfn_t gfn; + p2m_type_t p2mt; char *s; int done = 0; @@ -4589,7 +4660,7 @@ int sh_audit_l3_table(struct vcpu *v, mf { gfn = guest_l3e_get_gfn(*gl3e); mfn = shadow_l3e_get_mfn(*sl3e); - gmfn = get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl3mfn), + gmfn = get_shadow_status(v, gfn_to_mfn(v->domain, gfn, &p2mt), ((GUEST_PAGING_LEVELS == 3 || is_pv_32on64_vcpu(v)) && !shadow_mode_external(v->domain) @@ -4612,6 +4683,7 @@ int sh_audit_l4_table(struct vcpu *v, mf shadow_l4e_t *sl4e; mfn_t mfn, gmfn, gl4mfn; gfn_t gfn; + p2m_type_t p2mt; char *s; int done = 0; @@ -4628,7 +4700,7 @@ int sh_audit_l4_table(struct vcpu *v, mf { gfn = guest_l4e_get_gfn(*gl4e); mfn = shadow_l4e_get_mfn(*sl4e); - gmfn = get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl4mfn), + gmfn = get_shadow_status(v, gfn_to_mfn(v->domain, gfn, &p2mt), SH_type_l3_shadow); if ( mfn_x(gmfn) != mfn_x(mfn) ) AUDIT_FAIL(4, "bad translation: gfn %" SH_PRI_gfn diff -r 0b20ac6ec64a -r 71a8366fb212 xen/arch/x86/mm/shadow/types.h --- a/xen/arch/x86/mm/shadow/types.h Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/arch/x86/mm/shadow/types.h Fri Feb 29 09:19:58 2008 -0700 @@ -435,6 +435,7 @@ struct shadow_walk_t #endif mfn_t l2mfn; /* MFN that the level 2 entry was in */ mfn_t l1mfn; /* MFN that the level 1 entry was in */ + int version; /* Saved guest dirty version */ }; /* macros for dealing with the naming of the internal function names of the diff -r 0b20ac6ec64a -r 71a8366fb212 xen/arch/x86/x86_32/Makefile --- a/xen/arch/x86/x86_32/Makefile Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/arch/x86/x86_32/Makefile Fri Feb 29 09:19:58 2008 -0700 @@ -4,6 +4,7 @@ obj-y += mm.o obj-y += mm.o obj-y += seg_fixup.o obj-y += traps.o +obj-y += machine_kexec.o obj-$(crash_debug) += gdbstub.o diff -r 0b20ac6ec64a -r 71a8366fb212 xen/arch/x86/x86_32/machine_kexec.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/x86_32/machine_kexec.c Fri Feb 29 09:19:58 2008 -0700 @@ -0,0 +1,33 @@ +/****************************************************************************** + * machine_kexec.c + * + * Xen port written by: + * - Simon 'Horms' Horman <horms@xxxxxxxxxxxx> + * - Magnus Damm <magnus@xxxxxxxxxxxxx> + */ + +#ifndef CONFIG_COMPAT + +#include <xen/types.h> +#include <xen/kernel.h> +#include <asm/page.h> +#include <public/kexec.h> + +int machine_kexec_get_xen(xen_kexec_range_t *range) +{ + range->start = virt_to_maddr(_start); + range->size = (unsigned long)xenheap_phys_end - + (unsigned long)range->start; + return 0; +} +#endif + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 0b20ac6ec64a -r 71a8366fb212 xen/arch/x86/x86_64/Makefile --- a/xen/arch/x86/x86_64/Makefile Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/arch/x86/x86_64/Makefile Fri Feb 29 09:19:58 2008 -0700 @@ -4,6 +4,7 @@ obj-y += gpr_switch.o obj-y += gpr_switch.o obj-y += mm.o obj-y += traps.o +obj-y += machine_kexec.o obj-$(crash_debug) += gdbstub.o obj-$(CONFIG_COMPAT) += compat.o diff -r 0b20ac6ec64a -r 71a8366fb212 xen/arch/x86/x86_64/machine_kexec.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/arch/x86/x86_64/machine_kexec.c Fri Feb 29 09:19:58 2008 -0700 @@ -0,0 +1,32 @@ +/****************************************************************************** + * machine_kexec.c + * + * Xen port written by: + * - Simon 'Horms' Horman <horms@xxxxxxxxxxxx> + * - Magnus Damm <magnus@xxxxxxxxxxxxx> + */ + +#ifndef CONFIG_COMPAT + +#include <xen/types.h> +#include <asm/page.h> +#include <public/kexec.h> + +int machine_kexec_get_xen(xen_kexec_range_t *range) +{ + range->start = xenheap_phys_start; + range->size = (unsigned long)xenheap_phys_end - + (unsigned long)range->start; + return 0; +} +#endif + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff -r 0b20ac6ec64a -r 71a8366fb212 xen/common/compat/kexec.c --- a/xen/common/compat/kexec.c Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/common/compat/kexec.c Fri Feb 29 09:19:58 2008 -0700 @@ -8,11 +8,6 @@ #define ret_t int #define do_kexec_op compat_kexec_op - -#undef kexec_get -#define kexec_get(x) compat_kexec_get_##x -#define xen_kexec_range compat_kexec_range -#define xen_kexec_range_t compat_kexec_range_t #define kexec_load_unload compat_kexec_load_unload #define xen_kexec_load compat_kexec_load diff -r 0b20ac6ec64a -r 71a8366fb212 xen/common/kexec.c --- a/xen/common/kexec.c Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/common/kexec.c Fri Feb 29 09:19:58 2008 -0700 @@ -20,6 +20,7 @@ #include <xen/spinlock.h> #include <xen/version.h> #include <xen/console.h> +#include <xen/kexec.h> #include <public/elfnote.h> #include <xsm/xsm.h> @@ -153,11 +154,7 @@ static int sizeof_note(const char *name, ELFNOTE_ALIGN(descsz)); } -#define kexec_get(x) kexec_get_##x - -#endif - -static int kexec_get(reserve)(xen_kexec_range_t *range) +static int kexec_get_reserve(xen_kexec_range_t *range) { if ( kexec_crash_area.size > 0 && kexec_crash_area.start > 0) { range->start = kexec_crash_area.start; @@ -168,18 +165,7 @@ static int kexec_get(reserve)(xen_kexec_ return 0; } -static int kexec_get(xen)(xen_kexec_range_t *range) -{ -#ifdef CONFIG_X86_64 - range->start = xenheap_phys_start; -#else - range->start = virt_to_maddr(_start); -#endif - range->size = (unsigned long)xenheap_phys_end - (unsigned long)range->start; - return 0; -} - -static int kexec_get(cpu)(xen_kexec_range_t *range) +static int kexec_get_cpu(xen_kexec_range_t *range) { int nr = range->nr; int nr_bytes = 0; @@ -223,7 +209,27 @@ static int kexec_get(cpu)(xen_kexec_rang return 0; } -static int kexec_get(range)(XEN_GUEST_HANDLE(void) uarg) +static int kexec_get_range_internal(xen_kexec_range_t *range) +{ + int ret = -EINVAL; + + switch ( range->range ) + { + case KEXEC_RANGE_MA_CRASH: + ret = kexec_get_reserve(range); + break; + case KEXEC_RANGE_MA_CPU: + ret = kexec_get_cpu(range); + break; + default: + ret = machine_kexec_get(range); + break; + } + + return ret; +} + +static int kexec_get_range(XEN_GUEST_HANDLE(void) uarg) { xen_kexec_range_t range; int ret = -EINVAL; @@ -231,24 +237,49 @@ static int kexec_get(range)(XEN_GUEST_HA if ( unlikely(copy_from_guest(&range, uarg, 1)) ) return -EFAULT; - switch ( range.range ) - { - case KEXEC_RANGE_MA_CRASH: - ret = kexec_get(reserve)(&range); - break; - case KEXEC_RANGE_MA_XEN: - ret = kexec_get(xen)(&range); - break; - case KEXEC_RANGE_MA_CPU: - ret = kexec_get(cpu)(&range); - break; - } + ret = kexec_get_range_internal(&range); if ( ret == 0 && unlikely(copy_to_guest(uarg, &range, 1)) ) return -EFAULT; return ret; } + +#else /* COMPAT */ + +#ifdef CONFIG_COMPAT +static int kexec_get_range_compat(XEN_GUEST_HANDLE(void) uarg) +{ + xen_kexec_range_t range; + compat_kexec_range_t compat_range; + int ret = -EINVAL; + + if ( unlikely(copy_from_guest(&compat_range, uarg, 1)) ) + return -EFAULT; + + range.range = compat_range.range; + range.nr = compat_range.nr; + range.size = compat_range.size; + range.start = compat_range.start; + + ret = kexec_get_range_internal(&range); + + if ( ret == 0 ) { + range.range = compat_range.range; + range.nr = compat_range.nr; + range.size = compat_range.size; + range.start = compat_range.start; + + if ( unlikely(copy_to_guest(uarg, &compat_range, 1)) ) + return -EFAULT; + } + + return ret; +} +#endif /* CONFIG_COMPAT */ + +#endif /* COMPAT */ + #ifndef COMPAT @@ -375,7 +406,11 @@ ret_t do_kexec_op(unsigned long op, XEN_ switch ( op ) { case KEXEC_CMD_kexec_get_range: - ret = kexec_get(range)(uarg); +#ifndef COMPAT + ret = kexec_get_range(uarg); +#else + ret = kexec_get_range_compat(uarg); +#endif break; case KEXEC_CMD_kexec_load: case KEXEC_CMD_kexec_unload: diff -r 0b20ac6ec64a -r 71a8366fb212 xen/drivers/acpi/tables.c --- a/xen/drivers/acpi/tables.c Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/drivers/acpi/tables.c Fri Feb 29 09:19:58 2008 -0700 @@ -60,6 +60,7 @@ static char *acpi_table_signatures[ACPI_ [ACPI_HPET] = "HPET", [ACPI_MCFG] = "MCFG", [ACPI_DMAR] = "DMAR", + [ACPI_IVRS] = "IVRS", }; static char *mps_inti_flags_polarity[] = { "dfl", "high", "res", "low" }; diff -r 0b20ac6ec64a -r 71a8366fb212 xen/drivers/passthrough/amd/Makefile --- a/xen/drivers/passthrough/amd/Makefile Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/drivers/passthrough/amd/Makefile Fri Feb 29 09:19:58 2008 -0700 @@ -2,3 +2,4 @@ obj-y += iommu_init.o obj-y += iommu_init.o obj-y += iommu_map.o obj-y += pci_amd_iommu.o +obj-y += iommu_acpi.o diff -r 0b20ac6ec64a -r 71a8366fb212 xen/drivers/passthrough/amd/iommu_acpi.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/drivers/passthrough/amd/iommu_acpi.c Fri Feb 29 09:19:58 2008 -0700 @@ -0,0 +1,874 @@ +/* + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * Author: Leo Duran <leo.duran@xxxxxxx> + * Author: Wei Wang <wei.wang2@xxxxxxx> - adapted to xen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <xen/config.h> +#include <xen/errno.h> +#include <asm/amd-iommu.h> +#include <asm/hvm/svm/amd-iommu-proto.h> +#include <asm/hvm/svm/amd-iommu-acpi.h> + +extern unsigned long amd_iommu_page_entries; +extern unsigned short ivrs_bdf_entries; +extern struct ivrs_mappings *ivrs_mappings; + +static struct amd_iommu * __init find_iommu_from_bdf_cap( + u16 bdf, u8 cap_offset) +{ + struct amd_iommu *iommu; + + for_each_amd_iommu( iommu ) + if ( iommu->bdf == bdf && iommu->cap_offset == cap_offset ) + return iommu; + + return NULL; +} + +static void __init reserve_iommu_exclusion_range( + struct amd_iommu *iommu, uint64_t base, uint64_t limit) +{ + /* need to extend exclusion range? */ + if ( iommu->exclusion_enable ) + { + if ( iommu->exclusion_base < base ) + base = iommu->exclusion_base; + if ( iommu->exclusion_limit > limit ) + limit = iommu->exclusion_limit; + } + + iommu->exclusion_enable = IOMMU_CONTROL_ENABLED; + iommu->exclusion_base = base; + iommu->exclusion_limit = limit; +} + +static void __init reserve_iommu_exclusion_range_all(struct amd_iommu *iommu, + unsigned long base, unsigned long limit) +{ + reserve_iommu_exclusion_range(iommu, base, limit); + iommu->exclusion_allow_all = IOMMU_CONTROL_ENABLED; +} + +static void __init reserve_unity_map_for_device(u16 bdf, unsigned long base, + unsigned long length, u8 iw, u8 ir) +{ + unsigned long old_top, new_top; + + /* need to extend unity-mapped range? */ + if ( ivrs_mappings[bdf].unity_map_enable ) + { + old_top = ivrs_mappings[bdf].addr_range_start + + ivrs_mappings[bdf].addr_range_length; + new_top = base + length; + if ( old_top > new_top ) + new_top = old_top; + if ( ivrs_mappings[bdf].addr_range_start < base ) + base = ivrs_mappings[bdf].addr_range_start; + length = new_top - base; + } + + /* extend r/w permissioms and keep aggregate */ + if ( iw ) + ivrs_mappings[bdf].write_permission = IOMMU_CONTROL_ENABLED; + if ( ir ) + ivrs_mappings[bdf].read_permission = IOMMU_CONTROL_ENABLED; + ivrs_mappings[bdf].unity_map_enable = IOMMU_CONTROL_ENABLED; + ivrs_mappings[bdf].addr_range_start = base; + ivrs_mappings[bdf].addr_range_length = length; +} + +static int __init register_exclusion_range_for_all_devices( + unsigned long base, unsigned long limit, u8 iw, u8 ir) +{ + unsigned long range_top, iommu_top, length; + struct amd_iommu *iommu; + u16 bdf; + + /* is part of exclusion range inside of IOMMU virtual address space? */ + /* note: 'limit' parameter is assumed to be page-aligned */ + range_top = limit + PAGE_SIZE; + iommu_top = max_page * PAGE_SIZE; + if ( base < iommu_top ) + { + if (range_top > iommu_top) + range_top = iommu_top; + length = range_top - base; + /* reserve r/w unity-mapped page entries for devices */ + /* note: these entries are part of the exclusion range */ + for (bdf = 0; bdf < ivrs_bdf_entries; ++bdf) + reserve_unity_map_for_device(bdf, base, length, iw, ir); + /* push 'base' just outside of virtual address space */ + base = iommu_top; + } + /* register IOMMU exclusion range settings */ + if (limit >= iommu_top) + { + for_each_amd_iommu( iommu ) + reserve_iommu_exclusion_range_all(iommu, base, limit); + } + + return 0; +} + +static int __init register_exclusion_range_for_device(u16 bdf, + unsigned long base, unsigned long limit, u8 iw, u8 ir) +{ + unsigned long range_top, iommu_top, length; + struct amd_iommu *iommu; + u16 bus, devfn, req; + + bus = bdf >> 8; + devfn = bdf & 0xFF; + iommu = find_iommu_for_device(bus, devfn); + if ( !iommu ) + { + dprintk(XENLOG_ERR, "IVMD Error: No IOMMU for Dev_Id 0x%x!\n", bdf); + return -ENODEV; + } + req = ivrs_mappings[bdf].dte_requestor_id; + + /* note: 'limit' parameter is assumed to be page-aligned */ + range_top = limit + PAGE_SIZE; + iommu_top = max_page * PAGE_SIZE; + if ( base < iommu_top ) + { + if (range_top > iommu_top) + range_top = iommu_top; + length = range_top - base; + /* reserve unity-mapped page entries for device */ + /* note: these entries are part of the exclusion range */ + reserve_unity_map_for_device(bdf, base, length, iw, ir); + reserve_unity_map_for_device(req, base, length, iw, ir); + + /* push 'base' just outside of virtual address space */ + base = iommu_top; + } + + /* register IOMMU exclusion range settings for device */ + if ( limit >= iommu_top ) + { + reserve_iommu_exclusion_range(iommu, base, limit); + ivrs_mappings[bdf].dte_allow_exclusion = IOMMU_CONTROL_ENABLED; + ivrs_mappings[req].dte_allow_exclusion = IOMMU_CONTROL_ENABLED; + } + + return 0; +} + +static int __init register_exclusion_range_for_iommu_devices( + struct amd_iommu *iommu, + unsigned long base, unsigned long limit, u8 iw, u8 ir) +{ + unsigned long range_top, iommu_top, length; + u16 bus, devfn, bdf, req; + + /* is part of exclusion range inside of IOMMU virtual address space? */ + /* note: 'limit' parameter is assumed to be page-aligned */ + range_top = limit + PAGE_SIZE; + iommu_top = max_page * PAGE_SIZE; + if ( base < iommu_top ) + { + if (range_top > iommu_top) + range_top = iommu_top; + length = range_top - base; + /* reserve r/w unity-mapped page entries for devices */ + /* note: these entries are part of the exclusion range */ + for ( bdf = 0; bdf < ivrs_bdf_entries; ++bdf ) + { + bus = bdf >> 8; + devfn = bdf & 0xFF; + if ( iommu == find_iommu_for_device(bus, devfn) ) + { + reserve_unity_map_for_device(bdf, base, length, iw, ir); + req = ivrs_mappings[bdf].dte_requestor_id; + reserve_unity_map_for_device(req, base, length, iw, ir); + } + } + + /* push 'base' just outside of virtual address space */ + base = iommu_top; + } + + /* register IOMMU exclusion range settings */ + if (limit >= iommu_top) + reserve_iommu_exclusion_range_all(iommu, base, limit); + return 0; +} + +static int __init parse_ivmd_device_select( + struct acpi_ivmd_block_header *ivmd_block, + unsigned long base, unsigned long limit, u8 iw, u8 ir) +{ + u16 bdf; + + bdf = ivmd_block->header.dev_id; + if (bdf >= ivrs_bdf_entries) + { + dprintk(XENLOG_ERR, "IVMD Error: Invalid Dev_Id 0x%x\n", bdf); + return -ENODEV; + } + + return register_exclusion_range_for_device(bdf, base, limit, iw, ir); +} + +static int __init parse_ivmd_device_range( + struct acpi_ivmd_block_header *ivmd_block, + unsigned long base, unsigned long limit, u8 iw, u8 ir) +{ + u16 first_bdf, last_bdf, bdf; + int error; + + first_bdf = ivmd_block->header.dev_id; + if (first_bdf >= ivrs_bdf_entries) + { + dprintk(XENLOG_ERR, "IVMD Error: " + "Invalid Range_First Dev_Id 0x%x\n", first_bdf); + return -ENODEV; + } + + last_bdf = ivmd_block->last_dev_id; + if (last_bdf >= ivrs_bdf_entries || last_bdf <= first_bdf) + { + dprintk(XENLOG_ERR, "IVMD Error: " + "Invalid Range_Last Dev_Id 0x%x\n", last_bdf); + return -ENODEV; + } + + dprintk(XENLOG_ERR, " Dev_Id Range: 0x%x -> 0x%x\n", + first_bdf, last_bdf); + + for ( bdf = first_bdf, error = 0; + bdf <= last_bdf && !error; ++bdf ) + { + error = register_exclusion_range_for_device( + bdf, base, limit, iw, ir); + } + + return error; +} + +static int __init parse_ivmd_device_iommu( + struct acpi_ivmd_block_header *ivmd_block, + unsigned long base, unsigned long limit, u8 iw, u8 ir) +{ + struct amd_iommu *iommu; + + /* find target IOMMU */ + iommu = find_iommu_from_bdf_cap(ivmd_block->header.dev_id, + ivmd_block->cap_offset); + if ( !iommu ) + { + dprintk(XENLOG_ERR, + "IVMD Error: No IOMMU for Dev_Id 0x%x Cap 0x%x\n", + ivmd_block->header.dev_id, ivmd_block->cap_offset); + return -ENODEV; + } + + return register_exclusion_range_for_iommu_devices( + iommu, base, limit, iw, ir); +} + +static int __init parse_ivmd_block(struct acpi_ivmd_block_header *ivmd_block) +{ + unsigned long start_addr, mem_length, base, limit; + u8 iw, ir; + + if (ivmd_block->header.length < + sizeof(struct acpi_ivmd_block_header)) + { + dprintk(XENLOG_ERR, "IVMD Error: Invalid Block Length!\n"); + return -ENODEV; + } + + start_addr = (unsigned long)ivmd_block->start_addr; + mem_length = (unsigned long)ivmd_block->mem_length; + base = start_addr & PAGE_MASK; + limit = (start_addr + mem_length - 1) & PAGE_MASK; + + dprintk(XENLOG_INFO, "IVMD Block: Type 0x%x\n", + ivmd_block->header.type); + dprintk(XENLOG_INFO, " Start_Addr_Phys 0x%lx\n", start_addr); + dprintk(XENLOG_INFO, " Mem_Length 0x%lx\n", mem_length); + + if ( get_field_from_byte(ivmd_block->header.flags, + AMD_IOMMU_ACPI_EXCLUSION_RANGE_MASK, + AMD_IOMMU_ACPI_EXCLUSION_RANGE_SHIFT) ) + iw = ir = IOMMU_CONTROL_ENABLED; + else if ( get_field_from_byte(ivmd_block->header.flags, + AMD_IOMMU_ACPI_UNITY_MAPPING_MASK, + AMD_IOMMU_ACPI_UNITY_MAPPING_SHIFT) ) + { + iw = get_field_from_byte(ivmd_block->header.flags, + AMD_IOMMU_ACPI_IW_PERMISSION_MASK, + AMD_IOMMU_ACPI_IW_PERMISSION_SHIFT); + ir = get_field_from_byte(ivmd_block->header.flags, + AMD_IOMMU_ACPI_IR_PERMISSION_MASK, + AMD_IOMMU_ACPI_IR_PERMISSION_SHIFT); + } + else + { + dprintk(KERN_ERR, "IVMD Error: Invalid Flag Field!\n"); + return -ENODEV; + } + + switch( ivmd_block->header.type ) + { + case AMD_IOMMU_ACPI_IVMD_ALL_TYPE: + return register_exclusion_range_for_all_devices( + base, limit, iw, ir); + + case AMD_IOMMU_ACPI_IVMD_ONE_TYPE: + return parse_ivmd_device_select(ivmd_block, + base, limit, iw, ir); + + case AMD_IOMMU_ACPI_IVMD_RANGE_TYPE: + return parse_ivmd_device_range(ivmd_block, + base, limit, iw, ir); + + case AMD_IOMMU_ACPI_IVMD_IOMMU_TYPE: + return parse_ivmd_device_iommu(ivmd_block, + base, limit, iw, ir); + + default: + dprintk(XENLOG_ERR, "IVMD Error: Invalid Block Type!\n"); + return -ENODEV; + } +} + +static u16 __init parse_ivhd_device_padding(u16 pad_length, + u16 header_length, u16 block_length) +{ + if ( header_length < (block_length + pad_length) ) + { + dprintk(XENLOG_ERR, "IVHD Error: Invalid Device_Entry Length!\n"); + return 0; + } + + return pad_length; +} + +static u16 __init parse_ivhd_device_select( + union acpi_ivhd_device *ivhd_device) +{ + u16 bdf; + + bdf = ivhd_device->header.dev_id; + if ( bdf >= ivrs_bdf_entries ) + { + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Device_Entry Dev_Id 0x%x\n", bdf); + return 0; + } + + /* override flags for device */ + ivrs_mappings[bdf].dte_sys_mgt_enable = + get_field_from_byte(ivhd_device->header.flags, + AMD_IOMMU_ACPI_SYS_MGT_MASK, + AMD_IOMMU_ACPI_SYS_MGT_SHIFT); + + return sizeof(struct acpi_ivhd_device_header); +} + +static u16 __init parse_ivhd_device_range( + union acpi_ivhd_device *ivhd_device, + u16 header_length, u16 block_length) +{ + u16 dev_length, first_bdf, last_bdf, bdf; + u8 sys_mgt; + + dev_length = sizeof(struct acpi_ivhd_device_range); + if ( header_length < (block_length + dev_length) ) + { + dprintk(XENLOG_ERR, "IVHD Error: Invalid Device_Entry Length!\n"); + return 0; + } + + if ( ivhd_device->range.trailer.type != + AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END) { + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Range: End_Type 0x%x\n", + ivhd_device->range.trailer.type); + return 0; + } + + first_bdf = ivhd_device->header.dev_id; + if ( first_bdf >= ivrs_bdf_entries ) + { + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Range: First Dev_Id 0x%x\n", first_bdf); + return 0; + } + + last_bdf = ivhd_device->range.trailer.dev_id; + if ( last_bdf >= ivrs_bdf_entries || last_bdf <= first_bdf ) + { + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Range: Last Dev_Id 0x%x\n", last_bdf); + return 0; + } + + dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n", + first_bdf, last_bdf); + + /* override flags for range of devices */ + sys_mgt = get_field_from_byte(ivhd_device->header.flags, + AMD_IOMMU_ACPI_SYS_MGT_MASK, + AMD_IOMMU_ACPI_SYS_MGT_SHIFT); + for ( bdf = first_bdf; bdf <= last_bdf; ++bdf ) + ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt; + + return dev_length; +} + +static u16 __init parse_ivhd_device_alias( + union acpi_ivhd_device *ivhd_device, + u16 header_length, u16 block_length) +{ + u16 dev_length, alias_id, bdf; + + dev_length = sizeof(struct acpi_ivhd_device_alias); + if ( header_length < (block_length + dev_length) ) + { + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Device_Entry Length!\n"); + return 0; + } + + bdf = ivhd_device->header.dev_id; + if ( bdf >= ivrs_bdf_entries ) + { + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Device_Entry Dev_Id 0x%x\n", bdf); + return 0; + } + + alias_id = ivhd_device->alias.dev_id; + if ( alias_id >= ivrs_bdf_entries ) + { + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Alias Dev_Id 0x%x\n", alias_id); + return 0; + } + + dprintk(XENLOG_INFO, " Dev_Id Alias: 0x%x\n", alias_id); + + /* override requestor_id and flags for device */ + ivrs_mappings[bdf].dte_requestor_id = alias_id; + ivrs_mappings[bdf].dte_sys_mgt_enable = + get_field_from_byte(ivhd_device->header.flags, + AMD_IOMMU_ACPI_SYS_MGT_MASK, + AMD_IOMMU_ACPI_SYS_MGT_SHIFT); + ivrs_mappings[alias_id].dte_sys_mgt_enable = + ivrs_mappings[bdf].dte_sys_mgt_enable; + + return dev_length; +} + +static u16 __init parse_ivhd_device_alias_range( + union acpi_ivhd_device *ivhd_device, + u16 header_length, u16 block_length) +{ + + u16 dev_length, first_bdf, last_bdf, alias_id, bdf; + u8 sys_mgt; + + dev_length = sizeof(struct acpi_ivhd_device_alias_range); + if ( header_length < (block_length + dev_length) ) + { + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Device_Entry Length!\n"); + return 0; + } + + if ( ivhd_device->alias_range.trailer.type != + AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END ) + { + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Range: End_Type 0x%x\n", + ivhd_device->alias_range.trailer.type); + return 0; + } + + first_bdf = ivhd_device->header.dev_id; + if ( first_bdf >= ivrs_bdf_entries ) + { + dprintk(XENLOG_ERR,"IVHD Error: " + "Invalid Range: First Dev_Id 0x%x\n", first_bdf); + return 0; + } + + last_bdf = ivhd_device->alias_range.trailer.dev_id; + if ( last_bdf >= ivrs_bdf_entries || last_bdf <= first_bdf ) + { + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Range: Last Dev_Id 0x%x\n", last_bdf); + return 0; + } + + alias_id = ivhd_device->alias_range.alias.dev_id; + if ( alias_id >= ivrs_bdf_entries ) + { + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Alias Dev_Id 0x%x\n", alias_id); + return 0; + } + + dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n", + first_bdf, last_bdf); + dprintk(XENLOG_INFO, " Dev_Id Alias: 0x%x\n", alias_id); + + /* override requestor_id and flags for range of devices */ + sys_mgt = get_field_from_byte(ivhd_device->header.flags, + AMD_IOMMU_ACPI_SYS_MGT_MASK, + AMD_IOMMU_ACPI_SYS_MGT_SHIFT); + for ( bdf = first_bdf; bdf <= last_bdf; ++bdf ) + { + ivrs_mappings[bdf].dte_requestor_id = alias_id; + ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt; + } + ivrs_mappings[alias_id].dte_sys_mgt_enable = sys_mgt; + + return dev_length; +} + +static u16 __init parse_ivhd_device_extended( + union acpi_ivhd_device *ivhd_device, + u16 header_length, u16 block_length) +{ + u16 dev_length, bdf; + + dev_length = sizeof(struct acpi_ivhd_device_extended); + if ( header_length < (block_length + dev_length) ) + { + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Device_Entry Length!\n"); + return 0; + } + + bdf = ivhd_device->header.dev_id; + if ( bdf >= ivrs_bdf_entries ) + { + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Device_Entry Dev_Id 0x%x\n", bdf); + return 0; + } + + /* override flags for device */ + ivrs_mappings[bdf].dte_sys_mgt_enable = + get_field_from_byte(ivhd_device->header.flags, + AMD_IOMMU_ACPI_SYS_MGT_MASK, + AMD_IOMMU_ACPI_SYS_MGT_SHIFT); + + return dev_length; +} + +static u16 __init parse_ivhd_device_extended_range( + union acpi_ivhd_device *ivhd_device, + u16 header_length, u16 block_length) +{ + u16 dev_length, first_bdf, last_bdf, bdf; + u8 sys_mgt; + + dev_length = sizeof(struct acpi_ivhd_device_extended_range); + if ( header_length < (block_length + dev_length) ) + { + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Device_Entry Length!\n"); + return 0; + } + + if ( ivhd_device->extended_range.trailer.type != + AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END ) + { + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Range: End_Type 0x%x\n", + ivhd_device->extended_range.trailer.type); + return 0; + } + + first_bdf = ivhd_device->header.dev_id; + if ( first_bdf >= ivrs_bdf_entries ) + { + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Range: First Dev_Id 0x%x\n", first_bdf); + return 0; + } + + last_bdf = ivhd_device->extended_range.trailer.dev_id; + if ( last_bdf >= ivrs_bdf_entries || last_bdf <= first_bdf ) + { + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Range: Last Dev_Id 0x%x\n", last_bdf); + return 0; + } + + dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n", + first_bdf, last_bdf); + + /* override flags for range of devices */ + sys_mgt = get_field_from_byte(ivhd_device->header.flags, + AMD_IOMMU_ACPI_SYS_MGT_MASK, + AMD_IOMMU_ACPI_SYS_MGT_SHIFT); + for ( bdf = first_bdf; bdf <= last_bdf; ++bdf ) + ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt; + + return dev_length; +} + +static int __init parse_ivhd_block(struct acpi_ivhd_block_header *ivhd_block) +{ + union acpi_ivhd_device *ivhd_device; + u16 block_length, dev_length; + struct amd_iommu *iommu; + + if ( ivhd_block->header.length < + sizeof(struct acpi_ivhd_block_header) ) + { + dprintk(XENLOG_ERR, "IVHD Error: Invalid Block Length!\n"); + return -ENODEV; + } + + iommu = find_iommu_from_bdf_cap(ivhd_block->header.dev_id, + ivhd_block->cap_offset); + if ( !iommu ) + { + dprintk(XENLOG_ERR, + "IVHD Error: No IOMMU for Dev_Id 0x%x Cap 0x%x\n", + ivhd_block->header.dev_id, ivhd_block->cap_offset); + return -ENODEV; + } + + dprintk(XENLOG_INFO, "IVHD Block:\n"); + dprintk(XENLOG_INFO, " Cap_Offset 0x%x\n", + ivhd_block->cap_offset); + dprintk(XENLOG_INFO, " MMIO_BAR_Phys 0x%lx\n", + (unsigned long)ivhd_block->mmio_base); + dprintk(XENLOG_INFO, " PCI_Segment 0x%x\n", + ivhd_block->pci_segment); + dprintk(XENLOG_INFO, " IOMMU_Info 0x%x\n", + ivhd_block->iommu_info); + + /* override IOMMU support flags */ + iommu->coherent = get_field_from_byte(ivhd_block->header.flags, + AMD_IOMMU_ACPI_COHERENT_MASK, + AMD_IOMMU_ACPI_COHERENT_SHIFT); + iommu->iotlb_support = get_field_from_byte(ivhd_block->header.flags, + AMD_IOMMU_ACPI_IOTLB_SUP_MASK, + AMD_IOMMU_ACPI_IOTLB_SUP_SHIFT); + iommu->isochronous = get_field_from_byte(ivhd_block->header.flags, + AMD_IOMMU_ACPI_ISOC_MASK, + AMD_IOMMU_ACPI_ISOC_SHIFT); + iommu->res_pass_pw = get_field_from_byte(ivhd_block->header.flags, + AMD_IOMMU_ACPI_RES_PASS_PW_MASK, + AMD_IOMMU_ACPI_RES_PASS_PW_SHIFT); + iommu->pass_pw = get_field_from_byte(ivhd_block->header.flags, + AMD_IOMMU_ACPI_PASS_PW_MASK, + AMD_IOMMU_ACPI_PASS_PW_SHIFT); + iommu->ht_tunnel_enable = get_field_from_byte( + ivhd_block->header.flags, + AMD_IOMMU_ACPI_HT_TUN_ENB_MASK, + AMD_IOMMU_ACPI_HT_TUN_ENB_SHIFT); + + /* parse Device Entries */ + block_length = sizeof(struct acpi_ivhd_block_header); + while( ivhd_block->header.length >= + (block_length + sizeof(struct acpi_ivhd_device_header)) ) + { + ivhd_device = (union acpi_ivhd_device *) + ((u8 *)ivhd_block + block_length); + + dprintk(XENLOG_INFO, "IVHD Device Entry:\n"); + dprintk(XENLOG_INFO, " Type 0x%x\n", + ivhd_device->header.type); + dprintk(XENLOG_INFO, " Dev_Id 0x%x\n", + ivhd_device->header.dev_id); + dprintk(XENLOG_INFO, " Flags 0x%x\n", + ivhd_device->header.flags); + + switch( ivhd_device->header.type ) + { + case AMD_IOMMU_ACPI_IVHD_DEV_U32_PAD: + dev_length = parse_ivhd_device_padding( + sizeof(u32), + ivhd_block->header.length, block_length); + break; + case AMD_IOMMU_ACPI_IVHD_DEV_U64_PAD: + dev_length = parse_ivhd_device_padding( + sizeof(u64), + ivhd_block->header.length, block_length); + break; + case AMD_IOMMU_ACPI_IVHD_DEV_SELECT: + dev_length = parse_ivhd_device_select(ivhd_device); + break; + case AMD_IOMMU_ACPI_IVHD_DEV_RANGE_START: + dev_length = parse_ivhd_device_range(ivhd_device, + ivhd_block->header.length, block_length); + break; + case AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_SELECT: + dev_length = parse_ivhd_device_alias( + ivhd_device, + ivhd_block->header.length, block_length); + break; + case AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_RANGE: + dev_length = parse_ivhd_device_alias_range( + ivhd_device, + ivhd_block->header.length, block_length); + break; + case AMD_IOMMU_ACPI_IVHD_DEV_EXT_SELECT: + dev_length = parse_ivhd_device_extended( + ivhd_device, + ivhd_block->header.length, block_length); + break; + case AMD_IOMMU_ACPI_IVHD_DEV_EXT_RANGE: + dev_length = parse_ivhd_device_extended_range( + ivhd_device, + ivhd_block->header.length, block_length); + break; + default: + dprintk(XENLOG_ERR, "IVHD Error: " + "Invalid Device Type!\n"); + dev_length = 0; + break; + } + + block_length += dev_length; + if ( !dev_length ) + return -ENODEV; + } + + return 0; +} + +static int __init parse_ivrs_block(struct acpi_ivrs_block_header *ivrs_block) +{ + struct acpi_ivhd_block_header *ivhd_block; + struct acpi_ivmd_block_header *ivmd_block; + + switch(ivrs_block->type) + { + case AMD_IOMMU_ACPI_IVHD_TYPE: + ivhd_block = (struct acpi_ivhd_block_header *)ivrs_block; + return parse_ivhd_block(ivhd_block); + + case AMD_IOMMU_ACPI_IVMD_ALL_TYPE: + case AMD_IOMMU_ACPI_IVMD_ONE_TYPE: + case AMD_IOMMU_ACPI_IVMD_RANGE_TYPE: + case AMD_IOMMU_ACPI_IVMD_IOMMU_TYPE: + ivmd_block = (struct acpi_ivmd_block_header *)ivrs_block; + return parse_ivmd_block(ivmd_block); + + default: + dprintk(XENLOG_ERR, "IVRS Error: Invalid Block Type!\n"); + return -ENODEV; + } + + return 0; +} + +void __init dump_acpi_table_header(struct acpi_table_header *table) +{ + int i; + + printk(XENLOG_INFO "AMD IOMMU: ACPI Table:\n"); + printk(XENLOG_INFO " Signature "); + for ( i = 0; i < ACPI_NAME_SIZE; ++i ) + printk("%c", table->signature[i]); + printk("\n"); + + printk(" Length 0x%x\n", table->length); + printk(" Revision 0x%x\n", table->revision); + printk(" CheckSum 0x%x\n", table->checksum); + + printk(" OEM_Id "); + for ( i = 0; i < ACPI_OEM_ID_SIZE; ++i ) + printk("%c", table->oem_id[i]); + printk("\n"); + + printk(" OEM_Table_Id "); + for ( i = 0; i < ACPI_OEM_TABLE_ID_SIZE; ++i ) + printk("%c", table->oem_table_id[i]); + printk("\n"); + + printk(" OEM_Revision 0x%x\n", table->oem_revision); + + printk(" Creator_Id "); + for ( i = 0; i < ACPI_NAME_SIZE; ++i ) + printk("%c", table->asl_compiler_id[i]); + printk("\n"); + + printk(" Creator_Revision 0x%x\n", + table->asl_compiler_revision); +} + +int __init parse_ivrs_table(unsigned long phys_addr, + unsigned long size) +{ + struct acpi_ivrs_block_header *ivrs_block; + unsigned long length, i; + u8 checksum, *raw_table; + int error = 0; + struct acpi_table_header *table = + (struct acpi_table_header *) __acpi_map_table(phys_addr, size); + + BUG_ON(!table); + +#if 0 + dump_acpi_table_header(table); +#endif + + /* validate checksum: sum of entire table == 0 */ + checksum = 0; + raw_table = (u8 *)table; + for ( i = 0; i < table->length; ++i ) + checksum += raw_table[i]; + if ( checksum ) + { + dprintk(XENLOG_ERR, "IVRS Error: " + "Invalid Checksum 0x%x\n", checksum); + return -ENODEV; + } + + /* parse IVRS blocks */ + length = sizeof(struct acpi_ivrs_table_header); + while( error == 0 && table->length > + (length + sizeof(struct acpi_ivrs_block_header)) ) + { + ivrs_block = (struct acpi_ivrs_block_header *) + ((u8 *)table + length); + + dprintk(XENLOG_INFO, "IVRS Block:\n"); + dprintk(XENLOG_INFO, " Type 0x%x\n", ivrs_block->type); + dprintk(XENLOG_INFO, " Flags 0x%x\n", ivrs_block->flags); + dprintk(XENLOG_INFO, " Length 0x%x\n", ivrs_block->length); + dprintk(XENLOG_INFO, " Dev_Id 0x%x\n", ivrs_block->dev_id); + + if (table->length >= (length + ivrs_block->length)) + error = parse_ivrs_block(ivrs_block); + else + { + dprintk(XENLOG_ERR, "IVRS Error: " + "Table Length Exceeded: 0x%x -> 0x%lx\n", + table->length, + (length + ivrs_block->length)); + return -ENODEV; + } + length += ivrs_block->length; + } + + return error; +} diff -r 0b20ac6ec64a -r 71a8366fb212 xen/drivers/passthrough/amd/iommu_detect.c --- a/xen/drivers/passthrough/amd/iommu_detect.c Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/drivers/passthrough/amd/iommu_detect.c Fri Feb 29 09:19:58 2008 -0700 @@ -86,30 +86,24 @@ int __init get_iommu_capabilities(u8 bus int __init get_iommu_capabilities(u8 bus, u8 dev, u8 func, u8 cap_ptr, struct amd_iommu *iommu) { - u32 cap_header, cap_range; + u32 cap_header, cap_range, misc_info; u64 mmio_bar; -#if HACK_BIOS_SETTINGS - /* remove it when BIOS available */ - write_pci_config(bus, dev, func, - cap_ptr + PCI_CAP_MMIO_BAR_HIGH_OFFSET, 0x00000000); - write_pci_config(bus, dev, func, - cap_ptr + PCI_CAP_MMIO_BAR_LOW_OFFSET, 0x40000001); - /* remove it when BIOS available */ -#endif - mmio_bar = (u64)read_pci_config(bus, dev, func, - cap_ptr + PCI_CAP_MMIO_BAR_HIGH_OFFSET) << 32; + cap_ptr + PCI_CAP_MMIO_BAR_HIGH_OFFSET) << 32; mmio_bar |= read_pci_config(bus, dev, func, - cap_ptr + PCI_CAP_MMIO_BAR_LOW_OFFSET) & - PCI_CAP_MMIO_BAR_LOW_MASK; - iommu->mmio_base_phys = (unsigned long)mmio_bar; - - if ( (mmio_bar == 0) || ( (mmio_bar & 0x3FFF) != 0 ) ) { + cap_ptr + PCI_CAP_MMIO_BAR_LOW_OFFSET); + iommu->mmio_base_phys = mmio_bar & (u64)~0x3FFF; + + if ( (mmio_bar & 0x1) == 0 || iommu->mmio_base_phys == 0 ) + { dprintk(XENLOG_ERR , "AMD IOMMU: Invalid MMIO_BAR = 0x%"PRIx64"\n", mmio_bar); return -ENODEV; } + + iommu->bdf = (bus << 8) | PCI_DEVFN(dev, func); + iommu->cap_offset = cap_ptr; cap_header = read_pci_config(bus, dev, func, cap_ptr); iommu->revision = get_field_from_reg_u32(cap_header, @@ -119,12 +113,15 @@ int __init get_iommu_capabilities(u8 bus iommu->ht_tunnel_support = get_field_from_reg_u32(cap_header, PCI_CAP_HT_TUNNEL_MASK, PCI_CAP_HT_TUNNEL_SHIFT); - iommu->not_present_cached = get_field_from_reg_u32(cap_header, + iommu->pte_not_present_cached = get_field_from_reg_u32(cap_header, PCI_CAP_NP_CACHE_MASK, PCI_CAP_NP_CACHE_SHIFT); cap_range = read_pci_config(bus, dev, func, cap_ptr + PCI_CAP_RANGE_OFFSET); + iommu->unit_id = get_field_from_reg_u32(cap_range, + PCI_CAP_UNIT_ID_MASK, + PCI_CAP_UNIT_ID_SHIFT); iommu->root_bus = get_field_from_reg_u32(cap_range, PCI_CAP_BUS_NUMBER_MASK, PCI_CAP_BUS_NUMBER_SHIFT); @@ -135,6 +132,11 @@ int __init get_iommu_capabilities(u8 bus PCI_CAP_LAST_DEVICE_MASK, PCI_CAP_LAST_DEVICE_SHIFT); + misc_info = read_pci_config(bus, dev, func, + cap_ptr + PCI_MISC_INFO_OFFSET); + iommu->msi_number = get_field_from_reg_u32(misc_info, + PCI_CAP_MSI_NUMBER_MASK, + PCI_CAP_MSI_NUMBER_SHIFT); return 0; } diff -r 0b20ac6ec64a -r 71a8366fb212 xen/drivers/passthrough/amd/iommu_init.c --- a/xen/drivers/passthrough/amd/iommu_init.c Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/drivers/passthrough/amd/iommu_init.c Fri Feb 29 09:19:58 2008 -0700 @@ -137,8 +137,49 @@ static void __init set_iommu_command_buf writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET); } +static void __init register_iommu_exclusion_range(struct amd_iommu *iommu) +{ + u64 addr_lo, addr_hi; + u32 entry; + + addr_lo = iommu->exclusion_limit & DMA_32BIT_MASK; + addr_hi = iommu->exclusion_limit >> 32; + + set_field_in_reg_u32((u32)addr_hi, 0, + IOMMU_EXCLUSION_LIMIT_HIGH_MASK, + IOMMU_EXCLUSION_LIMIT_HIGH_SHIFT, &entry); + writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_LIMIT_HIGH_OFFSET); + + set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0, + IOMMU_EXCLUSION_LIMIT_LOW_MASK, + IOMMU_EXCLUSION_LIMIT_LOW_SHIFT, &entry); + writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_LIMIT_LOW_OFFSET); + + addr_lo = iommu->exclusion_base & DMA_32BIT_MASK; + addr_hi = iommu->exclusion_base >> 32; + + set_field_in_reg_u32((u32)addr_hi, 0, + IOMMU_EXCLUSION_BASE_HIGH_MASK, + IOMMU_EXCLUSION_BASE_HIGH_SHIFT, &entry); + writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_BASE_HIGH_OFFSET); + + set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0, + IOMMU_EXCLUSION_BASE_LOW_MASK, + IOMMU_EXCLUSION_BASE_LOW_SHIFT, &entry); + + set_field_in_reg_u32(iommu->exclusion_allow_all, entry, + IOMMU_EXCLUSION_ALLOW_ALL_MASK, + IOMMU_EXCLUSION_ALLOW_ALL_SHIFT, &entry); + + set_field_in_reg_u32(iommu->exclusion_enable, entry, + IOMMU_EXCLUSION_RANGE_ENABLE_MASK, + IOMMU_EXCLUSION_RANGE_ENABLE_SHIFT, &entry); + writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_BASE_LOW_OFFSET); +} + void __init enable_iommu(struct amd_iommu *iommu) { + register_iommu_exclusion_range(iommu); set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_ENABLED); set_iommu_translation_control(iommu, IOMMU_CONTROL_ENABLED); printk("AMD IOMMU %d: Enabled\n", nr_amd_iommus); diff -r 0b20ac6ec64a -r 71a8366fb212 xen/drivers/passthrough/amd/iommu_map.c --- a/xen/drivers/passthrough/amd/iommu_map.c Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/drivers/passthrough/amd/iommu_map.c Fri Feb 29 09:19:58 2008 -0700 @@ -234,16 +234,19 @@ static void amd_iommu_set_page_directory } void amd_iommu_set_dev_table_entry(u32 *dte, u64 root_ptr, u16 domain_id, - u8 paging_mode) + u8 sys_mgt, u8 dev_ex, u8 paging_mode) { u64 addr_hi, addr_lo; u32 entry; - dte[6] = dte[5] = dte[4] = 0; - - set_field_in_reg_u32(IOMMU_DEV_TABLE_SYS_MGT_MSG_FORWARDED, 0, + dte[7] = dte[6] = dte[5] = dte[4] = 0; + + set_field_in_reg_u32(sys_mgt, 0, IOMMU_DEV_TABLE_SYS_MGT_MSG_ENABLE_MASK, IOMMU_DEV_TABLE_SYS_MGT_MSG_ENABLE_SHIFT, &entry); + set_field_in_reg_u32(dev_ex, entry, + IOMMU_DEV_TABLE_ALLOW_EXCLUSION_MASK, + IOMMU_DEV_TABLE_ALLOW_EXCLUSION_SHIFT, &entry); dte[3] = entry; set_field_in_reg_u32(domain_id, 0, @@ -448,3 +451,34 @@ int amd_iommu_unmap_page(struct domain * return 0; } + +int amd_iommu_reserve_domain_unity_map( + struct domain *domain, + unsigned long phys_addr, + unsigned long size, int iw, int ir) +{ + unsigned long flags, npages, i; + void *pte; + struct hvm_iommu *hd = domain_hvm_iommu(domain); + + npages = region_to_pages(phys_addr, size); + + spin_lock_irqsave(&hd->mapping_lock, flags); + for ( i = 0; i < npages; ++i ) + { + pte = get_pte_from_page_tables(hd->root_table, + hd->paging_mode, phys_addr>>PAGE_SHIFT); + if ( pte == 0 ) + { + dprintk(XENLOG_ERR, + "AMD IOMMU: Invalid IO pagetable entry phys_addr = %lx\n", phys_addr); + spin_unlock_irqrestore(&hd->mapping_lock, flags); + return -EFAULT; + } + set_page_table_entry_present((u32 *)pte, + phys_addr, iw, ir); + phys_addr += PAGE_SIZE; + } + spin_unlock_irqrestore(&hd->mapping_lock, flags); + return 0; +} diff -r 0b20ac6ec64a -r 71a8366fb212 xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Feb 29 09:19:58 2008 -0700 @@ -20,6 +20,7 @@ #include <asm/amd-iommu.h> #include <asm/hvm/svm/amd-iommu-proto.h> +#include <asm/hvm/svm/amd-iommu-acpi.h> #include <xen/sched.h> #include <asm/mm.h> #include "../pci-direct.h" @@ -30,6 +31,9 @@ static long amd_iommu_cmd_buffer_entries static long amd_iommu_cmd_buffer_entries = IOMMU_CMD_BUFFER_DEFAULT_ENTRIES; int nr_amd_iommus = 0; +unsigned short ivrs_bdf_entries = 0; +struct ivrs_mappings *ivrs_mappings = NULL; + /* will set if amd-iommu HW is found */ int amd_iommu_enabled = 0; @@ -82,13 +86,12 @@ static void __init detect_cleanup(void) deallocate_iommu_resources(iommu); xfree(iommu); } -} - -static int requestor_id_from_bdf(int bdf) -{ - /* HACK - HACK */ - /* account for possible 'aliasing' by parent device */ - return bdf; + + if ( ivrs_mappings ) + { + xfree(ivrs_mappings); + ivrs_mappings = NULL; + } } static int __init allocate_iommu_table_struct(struct table_struct *table, @@ -179,10 +182,21 @@ static int __init amd_iommu_init(void) { struct amd_iommu *iommu; unsigned long flags; + u16 bdf; for_each_amd_iommu ( iommu ) { spin_lock_irqsave(&iommu->lock, flags); + + /* assign default IOMMU values */ + iommu->coherent = IOMMU_CONTROL_ENABLED; + iommu->isochronous = IOMMU_CONTROL_ENABLED; + iommu->res_pass_pw = IOMMU_CONTROL_ENABLED; + iommu->pass_pw = IOMMU_CONTROL_ENABLED; + iommu->ht_tunnel_enable = iommu->ht_tunnel_support ? + IOMMU_CONTROL_ENABLED : IOMMU_CONTROL_DISABLED; + iommu->exclusion_enable = IOMMU_CONTROL_DISABLED; + iommu->exclusion_allow_all = IOMMU_CONTROL_DISABLED; /* register IOMMU data strucures in MMIO space */ if ( map_iommu_mmio_region(iommu) != 0 ) @@ -190,10 +204,30 @@ static int __init amd_iommu_init(void) register_iommu_dev_table_in_mmio_space(iommu); register_iommu_cmd_buffer_in_mmio_space(iommu); + spin_unlock_irqrestore(&iommu->lock, flags); + } + + /* assign default values for device entries */ + for ( bdf = 0; bdf < ivrs_bdf_entries; ++bdf ) + { + ivrs_mappings[bdf].dte_requestor_id = bdf; + ivrs_mappings[bdf].dte_sys_mgt_enable = + IOMMU_DEV_TABLE_SYS_MGT_MSG_FORWARDED; + ivrs_mappings[bdf].dte_allow_exclusion = + IOMMU_CONTROL_DISABLED; + ivrs_mappings[bdf].unity_map_enable = + IOMMU_CONTROL_DISABLED; + } + + if ( acpi_table_parse(ACPI_IVRS, parse_ivrs_table) != 0 ) + dprintk(XENLOG_INFO, "AMD IOMMU: Did not find IVRS table!\n"); + + for_each_amd_iommu ( iommu ) + { + spin_lock_irqsave(&iommu->lock, flags); /* enable IOMMU translation services */ enable_iommu(iommu); nr_amd_iommus++; - spin_unlock_irqrestore(&iommu->lock, flags); } @@ -229,31 +263,38 @@ struct amd_iommu *find_iommu_for_device( } void amd_iommu_setup_domain_device( - struct domain *domain, struct amd_iommu *iommu, int requestor_id) + struct domain *domain, struct amd_iommu *iommu, int bdf) { void *dte; u64 root_ptr; unsigned long flags; + int req_id; + u8 sys_mgt, dev_ex; struct hvm_iommu *hd = domain_hvm_iommu(domain); - BUG_ON( !hd->root_table||!hd->paging_mode ); + BUG_ON( !hd->root_table || !hd->paging_mode ); root_ptr = (u64)virt_to_maddr(hd->root_table); + /* get device-table entry */ + req_id = ivrs_mappings[bdf].dte_requestor_id; dte = iommu->dev_table.buffer + - (requestor_id * IOMMU_DEV_TABLE_ENTRY_SIZE); + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE); if ( !amd_iommu_is_dte_page_translation_valid((u32 *)dte) ) { spin_lock_irqsave(&iommu->lock, flags); - amd_iommu_set_dev_table_entry( - (u32 *)dte, - root_ptr, hd->domain_id, hd->paging_mode); - invalidate_dev_table_entry(iommu, requestor_id); + /* bind DTE to domain page-tables */ + sys_mgt = ivrs_mappings[req_id].dte_sys_mgt_enable; + dev_ex = ivrs_mappings[req_id].dte_allow_exclusion; + amd_iommu_set_dev_table_entry((u32 *)dte, root_ptr, + req_id, sys_mgt, dev_ex, hd->paging_mode); + + invalidate_dev_table_entry(iommu, req_id); flush_command_buffer(iommu); dprintk(XENLOG_INFO, "AMD IOMMU: Set DTE req_id:%x, " "root_ptr:%"PRIx64", domain_id:%d, paging_mode:%d\n", - requestor_id, root_ptr, hd->domain_id, hd->paging_mode); + req_id, root_ptr, hd->domain_id, hd->paging_mode); spin_unlock_irqrestore(&iommu->lock, flags); } @@ -266,7 +307,7 @@ void __init amd_iommu_setup_dom0_devices struct pci_dev *pdev; int bus, dev, func; u32 l; - int req_id, bdf; + int bdf; for ( bus = 0; bus < 256; bus++ ) { @@ -286,11 +327,12 @@ void __init amd_iommu_setup_dom0_devices list_add_tail(&pdev->list, &hd->pdev_list); bdf = (bus << 8) | pdev->devfn; - req_id = requestor_id_from_bdf(bdf); - iommu = find_iommu_for_device(bus, pdev->devfn); + /* supported device? */ + iommu = (bdf < ivrs_bdf_entries) ? + find_iommu_for_device(bus, pdev->devfn) : NULL; if ( iommu ) - amd_iommu_setup_domain_device(dom0, iommu, req_id); + amd_iommu_setup_domain_device(dom0, iommu, bdf); } } } @@ -299,6 +341,8 @@ int amd_iommu_detect(void) int amd_iommu_detect(void) { unsigned long i; + int last_bus; + struct amd_iommu *iommu; if ( !enable_amd_iommu ) { @@ -318,6 +362,28 @@ int amd_iommu_detect(void) { printk("AMD IOMMU: Not found!\n"); return 0; + } + else + { + /* allocate 'ivrs mappings' table */ + /* note: the table has entries to accomodate all IOMMUs */ + last_bus = 0; + for_each_amd_iommu (iommu) + if (iommu->last_downstream_bus > last_bus) + last_bus = iommu->last_downstream_bus; + + ivrs_bdf_entries = (last_bus + 1) * + IOMMU_DEV_TABLE_ENTRIES_PER_BUS; + ivrs_mappings = xmalloc_array( struct ivrs_mappings, ivrs_bdf_entries); + + if ( !ivrs_mappings ) + { + dprintk(XENLOG_ERR, "AMD IOMMU:" + " Error allocating IVRS DevMappings table\n"); + goto error_out; + } + memset(ivrs_mappings, 0, + ivrs_bdf_entries * sizeof(struct ivrs_mappings)); } if ( amd_iommu_init() != 0 ) @@ -407,23 +473,25 @@ int amd_iommu_domain_init(struct domain } static void amd_iommu_disable_domain_device( - struct domain *domain, struct amd_iommu *iommu, u16 requestor_id) + struct domain *domain, struct amd_iommu *iommu, int bdf) { void *dte; unsigned long flags; - + int req_id; + + req_id = ivrs_mappings[bdf].dte_requestor_id; dte = iommu->dev_table.buffer + - (requestor_id * IOMMU_DEV_TABLE_ENTRY_SIZE); + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE); if ( amd_iommu_is_dte_page_translation_valid((u32 *)dte) ) { spin_lock_irqsave(&iommu->lock, flags); memset (dte, 0, IOMMU_DEV_TABLE_ENTRY_SIZE); - invalidate_dev_table_entry(iommu, requestor_id); + invalidate_dev_table_entry(iommu, req_id); flush_command_buffer(iommu); dprintk(XENLOG_INFO , "AMD IOMMU: disable DTE 0x%x," " domain_id:%d, paging_mode:%d\n", - requestor_id, domain_hvm_iommu(domain)->domain_id, + req_id, domain_hvm_iommu(domain)->domain_id, domain_hvm_iommu(domain)->paging_mode); spin_unlock_irqrestore(&iommu->lock, flags); } @@ -438,7 +506,7 @@ static int reassign_device( struct domai struct hvm_iommu *target_hd = domain_hvm_iommu(target); struct pci_dev *pdev; struct amd_iommu *iommu; - int req_id, bdf; + int bdf; unsigned long flags; for_each_pdev( source, pdev ) @@ -450,12 +518,13 @@ static int reassign_device( struct domai pdev->devfn = devfn; bdf = (bus << 8) | devfn; - req_id = requestor_id_from_bdf(bdf); - iommu = find_iommu_for_device(bus, devfn); + /* supported device? */ + iommu = (bdf < ivrs_bdf_entries) ? + find_iommu_for_device(bus, pdev->devfn) : NULL; if ( iommu ) { - amd_iommu_disable_domain_device(source, iommu, req_id); + amd_iommu_disable_domain_device(source, iommu, bdf); /* Move pci device from the source domain to target domain. */ spin_lock_irqsave(&source_hd->iommu_list_lock, flags); spin_lock_irqsave(&target_hd->iommu_list_lock, flags); @@ -463,7 +532,7 @@ static int reassign_device( struct domai spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags); spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags); - amd_iommu_setup_domain_device(target, iommu, req_id); + amd_iommu_setup_domain_device(target, iommu, bdf); gdprintk(XENLOG_INFO , "AMD IOMMU: reassign %x:%x.%x domain %d -> domain %d\n", bus, PCI_SLOT(devfn), PCI_FUNC(devfn), @@ -484,6 +553,19 @@ static int reassign_device( struct domai int amd_iommu_assign_device(struct domain *d, u8 bus, u8 devfn) { + int bdf = (bus << 8) | devfn; + int req_id; + req_id = ivrs_mappings[bdf].dte_requestor_id; + + if (ivrs_mappings[req_id].unity_map_enable) + { + amd_iommu_reserve_domain_unity_map(d, + ivrs_mappings[req_id].addr_range_start, + ivrs_mappings[req_id].addr_range_length, + ivrs_mappings[req_id].write_permission, + ivrs_mappings[req_id].read_permission); + } + pdev_flr(bus, devfn); return reassign_device(dom0, d, bus, devfn); } diff -r 0b20ac6ec64a -r 71a8366fb212 xen/include/asm-x86/amd-iommu.h --- a/xen/include/asm-x86/amd-iommu.h Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/include/asm-x86/amd-iommu.h Fri Feb 29 09:19:58 2008 -0700 @@ -43,14 +43,25 @@ struct amd_iommu { struct list_head list; spinlock_t lock; /* protect iommu */ - int iotlb_support; - int ht_tunnel_support; - int not_present_cached; + u16 bdf; + u8 cap_offset; u8 revision; + u8 unit_id; + u8 msi_number; u8 root_bus; u8 first_devfn; u8 last_devfn; + + u8 pte_not_present_cached; + u8 ht_tunnel_support; + u8 iotlb_support; + + u8 isochronous; + u8 coherent; + u8 res_pass_pw; + u8 pass_pw; + u8 ht_tunnel_enable; int last_downstream_bus; int downstream_bus_present[PCI_MAX_BUS_COUNT]; @@ -61,10 +72,23 @@ struct amd_iommu { struct table_struct dev_table; struct table_struct cmd_buffer; u32 cmd_buffer_tail; + struct table_struct event_log; + u32 event_log_head; - int exclusion_enabled; - unsigned long exclusion_base; - unsigned long exclusion_limit; + int exclusion_enable; + int exclusion_allow_all; + uint64_t exclusion_base; + uint64_t exclusion_limit; }; +struct ivrs_mappings { + u16 dte_requestor_id; + u8 dte_sys_mgt_enable; + u8 dte_allow_exclusion; + u8 unity_map_enable; + u8 write_permission; + u8 read_permission; + unsigned long addr_range_start; + unsigned long addr_range_length; +}; #endif /* _ASM_X86_64_AMD_IOMMU_H */ diff -r 0b20ac6ec64a -r 71a8366fb212 xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/include/asm-x86/domain.h Fri Feb 29 09:19:58 2008 -0700 @@ -97,6 +97,11 @@ struct shadow_domain { /* Fast MMIO path heuristic */ int has_fast_mmio_entries; + + /* reflect guest table dirty status, incremented by write + * emulation and remove write permission + */ + atomic_t gtable_dirty_version; }; struct shadow_vcpu { diff -r 0b20ac6ec64a -r 71a8366fb212 xen/include/asm-x86/hvm/svm/amd-iommu-acpi.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-acpi.h Fri Feb 29 09:19:58 2008 -0700 @@ -0,0 +1,176 @@ +/* + * Copyright (C) 2007 Advanced Micro Devices, Inc. + * Author: Leo Duran <leo.duran@xxxxxxx> + * Author: Wei Wang <wei.wang2@xxxxxxx> - adapted to xen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_X86_64_AMD_IOMMU_ACPI_H +#define _ASM_X86_64_AMD_IOMMU_ACPI_H + +#include <xen/acpi.h> + +/* I/O Virtualization Reporting Structure */ +#define AMD_IOMMU_ACPI_IVRS_SIG "IVRS" +#define AMD_IOMMU_ACPI_IVHD_TYPE 0x10 +#define AMD_IOMMU_ACPI_IVMD_ALL_TYPE 0x20 +#define AMD_IOMMU_ACPI_IVMD_ONE_TYPE 0x21 +#define AMD_IOMMU_ACPI_IVMD_RANGE_TYPE 0x22 +#define AMD_IOMMU_ACPI_IVMD_IOMMU_TYPE 0x23 + +/* 4-byte Device Entries */ +#define AMD_IOMMU_ACPI_IVHD_DEV_U32_PAD 0 +#define AMD_IOMMU_ACPI_IVHD_DEV_SELECT 2 +#define AMD_IOMMU_ACPI_IVHD_DEV_RANGE_START 3 +#define AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END 4 + +/* 8-byte Device Entries */ +#define AMD_IOMMU_ACPI_IVHD_DEV_U64_PAD 64 +#define AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_SELECT 66 +#define AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_RANGE 67 +#define AMD_IOMMU_ACPI_IVHD_DEV_EXT_SELECT 70 +#define AMD_IOMMU_ACPI_IVHD_DEV_EXT_RANGE 71 + +/* IVHD IOMMU Flags */ +#define AMD_IOMMU_ACPI_COHERENT_MASK 0x20 +#define AMD_IOMMU_ACPI_COHERENT_SHIFT 5 +#define AMD_IOMMU_ACPI_IOTLB_SUP_MASK 0x10 +#define AMD_IOMMU_ACPI_IOTLB_SUP_SHIFT 4 +#define AMD_IOMMU_ACPI_ISOC_MASK 0x08 +#define AMD_IOMMU_ACPI_ISOC_SHIFT 3 +#define AMD_IOMMU_ACPI_RES_PASS_PW_MASK 0x04 +#define AMD_IOMMU_ACPI_RES_PASS_PW_SHIFT 2 +#define AMD_IOMMU_ACPI_PASS_PW_MASK 0x02 +#define AMD_IOMMU_ACPI_PASS_PW_SHIFT 1 +#define AMD_IOMMU_ACPI_HT_TUN_ENB_MASK 0x01 +#define AMD_IOMMU_ACPI_HT_TUN_ENB_SHIFT 0 + +/* IVHD Device Flags */ +#define AMD_IOMMU_ACPI_LINT1_PASS_MASK 0x80 +#define AMD_IOMMU_ACPI_LINT1_PASS_SHIFT 7 +#define AMD_IOMMU_ACPI_LINT0_PASS_MASK 0x40 +#define AMD_IOMMU_ACPI_LINT0_PASS_SHIFT 6 +#define AMD_IOMMU_ACPI_SYS_MGT_MASK 0x30 +#define AMD_IOMMU_ACPI_SYS_MGT_SHIFT 4 +#define AMD_IOMMU_ACPI_NMI_PASS_MASK 0x04 +#define AMD_IOMMU_ACPI_NMI_PASS_SHIFT 2 +#define AMD_IOMMU_ACPI_EINT_PASS_MASK 0x02 +#define AMD_IOMMU_ACPI_EINT_PASS_SHIFT 1 +#define AMD_IOMMU_ACPI_INIT_PASS_MASK 0x01 +#define AMD_IOMMU_ACPI_INIT_PASS_SHIFT 0 + +/* IVHD Device Extended Flags */ +#define AMD_IOMMU_ACPI_ATS_DISABLED_MASK 0x80000000 +#define AMD_IOMMU_ACPI_ATS_DISABLED_SHIFT 31 + +/* IVMD Device Flags */ +#define AMD_IOMMU_ACPI_EXCLUSION_RANGE_MASK 0x08 +#define AMD_IOMMU_ACPI_EXCLUSION_RANGE_SHIFT 3 +#define AMD_IOMMU_ACPI_IW_PERMISSION_MASK 0x04 +#define AMD_IOMMU_ACPI_IW_PERMISSION_SHIFT 2 +#define AMD_IOMMU_ACPI_IR_PERMISSION_MASK 0x02 +#define AMD_IOMMU_ACPI_IR_PERMISSION_SHIFT 1 +#define AMD_IOMMU_ACPI_UNITY_MAPPING_MASK 0x01 +#define AMD_IOMMU_ACPI_UNITY_MAPPING_SHIFT 0 + +#define ACPI_OEM_ID_SIZE 6 +#define ACPI_OEM_TABLE_ID_SIZE 8 + +#pragma pack(1) +struct acpi_ivrs_table_header { + struct acpi_table_header acpi_header; + u32 io_info; + u8 reserved[8]; +}; + +struct acpi_ivrs_block_header { + u8 type; + u8 flags; + u16 length; + u16 dev_id; +}; + +struct acpi_ivhd_block_header { + struct acpi_ivrs_block_header header; + u16 cap_offset; + u64 mmio_base; + u16 pci_segment; + u16 iommu_info; + u8 reserved[4]; +}; + +struct acpi_ivhd_device_header { + u8 type; + u16 dev_id; + u8 flags; +}; + +struct acpi_ivhd_device_trailer { + u8 type; + u16 dev_id; + u8 reserved; +}; + +struct acpi_ivhd_device_range { + struct acpi_ivhd_device_header header; + struct acpi_ivhd_device_trailer trailer; +}; + +struct acpi_ivhd_device_alias { + struct acpi_ivhd_device_header header; + u8 reserved1; + u16 dev_id; + u8 reserved2; +}; + +struct acpi_ivhd_device_alias_range { + struct acpi_ivhd_device_alias alias; + struct acpi_ivhd_device_trailer trailer; +}; + +struct acpi_ivhd_device_extended { + struct acpi_ivhd_device_header header; + u32 ext_flags; +}; + +struct acpi_ivhd_device_extended_range { + struct acpi_ivhd_device_extended extended; + struct acpi_ivhd_device_trailer trailer; +}; + +union acpi_ivhd_device { + struct acpi_ivhd_device_header header; + struct acpi_ivhd_device_range range; + struct acpi_ivhd_device_alias alias; + struct acpi_ivhd_device_alias_range alias_range; + struct acpi_ivhd_device_extended extended; + struct acpi_ivhd_device_extended_range extended_range; +}; + +struct acpi_ivmd_block_header { + struct acpi_ivrs_block_header header; + union { + u16 last_dev_id; + u16 cap_offset; + u16 reserved1; + }; + u64 reserved2; + u64 start_addr; + u64 mem_length; +}; +#pragma pack() + +#endif /* _ASM_X86_64_AMD_IOMMU_ACPI_H */ diff -r 0b20ac6ec64a -r 71a8366fb212 xen/include/asm-x86/hvm/svm/amd-iommu-defs.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Fri Feb 29 09:19:58 2008 -0700 @@ -117,6 +117,12 @@ #define PCI_CAP_FIRST_DEVICE_SHIFT 16 #define PCI_CAP_LAST_DEVICE_MASK 0xFF000000 #define PCI_CAP_LAST_DEVICE_SHIFT 24 + +#define PCI_CAP_UNIT_ID_MASK 0x0000001F +#define PCI_CAP_UNIT_ID_SHIFT 0 +#define PCI_MISC_INFO_OFFSET 0x10 +#define PCI_CAP_MSI_NUMBER_MASK 0x0000001F +#define PCI_CAP_MSI_NUMBER_SHIFT 0 /* Device Table */ #define IOMMU_DEV_TABLE_BASE_LOW_OFFSET 0x00 diff -r 0b20ac6ec64a -r 71a8366fb212 xen/include/asm-x86/hvm/svm/amd-iommu-proto.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Fri Feb 29 09:19:58 2008 -0700 @@ -21,6 +21,7 @@ #ifndef _ASM_X86_64_AMD_IOMMU_PROTO_H #define _ASM_X86_64_AMD_IOMMU_PROTO_H +#include <xen/sched.h> #include <asm/amd-iommu.h> #define for_each_amd_iommu(amd_iommu) \ @@ -54,10 +55,12 @@ int amd_iommu_map_page(struct domain *d, int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn); int amd_iommu_unmap_page(struct domain *d, unsigned long gfn); void *amd_iommu_get_vptr_from_page_table_entry(u32 *entry); +int amd_iommu_reserve_domain_unity_map(struct domain *domain, + unsigned long phys_addr, unsigned long size, int iw, int ir); /* device table functions */ -void amd_iommu_set_dev_table_entry(u32 *dte, - u64 root_ptr, u16 domain_id, u8 paging_mode); +void amd_iommu_set_dev_table_entry(u32 *dte, u64 root_ptr, + u16 domain_id, u8 sys_mgt, u8 dev_ex, u8 paging_mode); int amd_iommu_is_dte_page_translation_valid(u32 *entry); void invalidate_dev_table_entry(struct amd_iommu *iommu, u16 devic_id); @@ -69,10 +72,13 @@ void flush_command_buffer(struct amd_iom /* iommu domain funtions */ int amd_iommu_domain_init(struct domain *domain); void amd_iommu_setup_domain_device(struct domain *domain, - struct amd_iommu *iommu, int requestor_id); + struct amd_iommu *iommu, int bdf); /* find iommu for bdf */ struct amd_iommu *find_iommu_for_device(int bus, int devfn); + +/* amd-iommu-acpi functions */ +int __init parse_ivrs_table(unsigned long phys_addr, unsigned long size); static inline u32 get_field_from_reg_u32(u32 reg_value, u32 mask, u32 shift) { @@ -91,4 +97,16 @@ static inline u32 set_field_in_reg_u32(u return reg_value; } +static inline u8 get_field_from_byte(u8 value, u8 mask, u8 shift) +{ + u8 field; + field = (value & mask) >> shift; + return field; +} + +static inline unsigned long region_to_pages(unsigned long addr, unsigned long size) +{ + return (PAGE_ALIGN(addr + size) - (addr & PAGE_MASK)) >> PAGE_SHIFT; +} + #endif /* _ASM_X86_64_AMD_IOMMU_PROTO_H */ diff -r 0b20ac6ec64a -r 71a8366fb212 xen/include/asm-x86/perfc_defn.h --- a/xen/include/asm-x86/perfc_defn.h Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/include/asm-x86/perfc_defn.h Fri Feb 29 09:19:58 2008 -0700 @@ -88,6 +88,11 @@ PERFCOUNTER(shadow_unshadow_bf, "shad PERFCOUNTER(shadow_unshadow_bf, "shadow unshadow brute-force") PERFCOUNTER(shadow_get_page_fail, "shadow_get_page_from_l1e failed") PERFCOUNTER(shadow_guest_walk, "shadow walks guest tables") +PERFCOUNTER(shadow_check_gwalk, "shadow checks gwalk") +PERFCOUNTER(shadow_inconsistent_gwalk, "shadow check inconsistent gwalk") +PERFCOUNTER(shadow_rm_write_flush_tlb, + "shadow flush tlb by removing write perm") + PERFCOUNTER(shadow_invlpg, "shadow emulates invlpg") PERFCOUNTER(shadow_invlpg_fault, "shadow invlpg faults") diff -r 0b20ac6ec64a -r 71a8366fb212 xen/include/public/io/kbdif.h --- a/xen/include/public/io/kbdif.h Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/include/public/io/kbdif.h Fri Feb 29 09:19:58 2008 -0700 @@ -65,7 +65,7 @@ struct xenkbd_position uint8_t type; /* XENKBD_TYPE_POS */ int32_t abs_x; /* absolute X position (in FB pixels) */ int32_t abs_y; /* absolute Y position (in FB pixels) */ - int32_t abs_z; /* absolute Z position (wheel) */ + int32_t rel_z; /* relative Z motion (wheel) */ }; #define XENKBD_IN_EVENT_SIZE 40 diff -r 0b20ac6ec64a -r 71a8366fb212 xen/include/public/kexec.h --- a/xen/include/public/kexec.h Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/include/public/kexec.h Fri Feb 29 09:19:58 2008 -0700 @@ -126,9 +126,18 @@ typedef struct xen_kexec_load { xen_kexec_image_t image; } xen_kexec_load_t; -#define KEXEC_RANGE_MA_CRASH 0 /* machine address and size of crash area */ -#define KEXEC_RANGE_MA_XEN 1 /* machine address and size of Xen itself */ -#define KEXEC_RANGE_MA_CPU 2 /* machine address and size of a CPU note */ +#define KEXEC_RANGE_MA_CRASH 0 /* machine address and size of crash area */ +#define KEXEC_RANGE_MA_XEN 1 /* machine address and size of Xen itself */ +#define KEXEC_RANGE_MA_CPU 2 /* machine address and size of a CPU note */ +#define KEXEC_RANGE_MA_XENHEAP 3 /* machine address and size of xenheap + * Note that although this is adjacent + * to Xen it exists in a separate EFI + * region on ia64, and thus needs to be + * inserted into iomem_machine separately */ +#define KEXEC_RANGE_MA_BOOT_PARAM 4 /* machine address and size of + * the ia64_boot_param */ +#define KEXEC_RANGE_MA_EFI_MEMMAP 5 /* machine address and size of + * of the EFI Memory Map */ /* * Find the address and size of certain memory areas diff -r 0b20ac6ec64a -r 71a8366fb212 xen/include/xen/acpi.h --- a/xen/include/xen/acpi.h Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/include/xen/acpi.h Fri Feb 29 09:19:58 2008 -0700 @@ -368,6 +368,7 @@ enum acpi_table_id { ACPI_HPET, ACPI_MCFG, ACPI_DMAR, + ACPI_IVRS, ACPI_TABLE_COUNT }; diff -r 0b20ac6ec64a -r 71a8366fb212 xen/include/xen/kexec.h --- a/xen/include/xen/kexec.h Fri Feb 29 09:18:01 2008 -0700 +++ b/xen/include/xen/kexec.h Fri Feb 29 09:19:58 2008 -0700 @@ -31,6 +31,7 @@ void kexec_crash_save_cpu(void); void kexec_crash_save_cpu(void); crash_xen_info_t *kexec_crash_save_info(void); void machine_crash_shutdown(void); +int machine_kexec_get(xen_kexec_range_t *range); #endif /* __XEN_KEXEC_H__ */ _______________________________________________ Xen-changelog mailing list Xen-changelog@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-changelog
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |