[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH V12 09/17] xen: Introduce the Xen mapcache
From: Jun Nakajima <jun.nakajima@xxxxxxxxx> On IA32 host or IA32 PAE host, at present, generally, we can't create an HVM guest with more than 2G memory, because generally it's almost impossible for Qemu to find a large enough and consecutive virtual address space to map an HVM guest's whole physical address space. The attached patch fixes this issue using dynamic mapping based on little blocks of memory. Each call to qemu_get_ram_ptr makes a call to qemu_map_cache with the lock option, so mapcache will not unmap these ram_ptr. Signed-off-by: Jun Nakajima <jun.nakajima@xxxxxxxxx> Signed-off-by: Anthony PERARD <anthony.perard@xxxxxxxxxx> Signed-off-by: Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx> --- Makefile.target | 3 + configure | 3 + exec.c | 40 ++++++- hw/xen.h | 13 ++ hw/xen_common.h | 9 ++ xen-all.c | 64 +++++++++++ xen-mapcache-stub.c | 40 +++++++ xen-mapcache.c | 310 +++++++++++++++++++++++++++++++++++++++++++++++++++ xen-mapcache.h | 22 ++++ xen-stub.c | 4 + 10 files changed, 504 insertions(+), 4 deletions(-) create mode 100644 xen-mapcache-stub.c create mode 100644 xen-mapcache.c create mode 100644 xen-mapcache.h diff --git a/Makefile.target b/Makefile.target index 8d5c75b..245d2a2 100644 --- a/Makefile.target +++ b/Makefile.target @@ -214,8 +214,11 @@ else CONFIG_NO_XEN = y endif # xen support +CONFIG_NO_XEN_MAPCACHE = $(if $(subst n,,$(CONFIG_XEN_MAPCACHE)),n,y) obj-i386-$(CONFIG_XEN) += xen-all.o obj-$(CONFIG_NO_XEN) += xen-stub.o +obj-i386-$(CONFIG_XEN_MAPCACHE) += xen-mapcache.o +obj-$(CONFIG_NO_XEN_MAPCACHE) += xen-mapcache-stub.o obj-i386-$(CONFIG_XEN) += xen_platform.o diff --git a/configure b/configure index bc6552e..309b210 100755 --- a/configure +++ b/configure @@ -3191,6 +3191,9 @@ case "$target_arch2" in i386|x86_64) if test "$xen" = "yes" -a "$target_softmmu" = "yes" ; then echo "CONFIG_XEN=y" >> $config_target_mak + if test "$cpu" = "i386" -o "$cpu" = "x86_64"; then + echo "CONFIG_XEN_MAPCACHE=y" >> $config_target_mak + fi fi esac case "$target_arch2" in diff --git a/exec.c b/exec.c index 964ce31..941abce 100644 --- a/exec.c +++ b/exec.c @@ -32,6 +32,7 @@ #include "hw/qdev.h" #include "osdep.h" #include "kvm.h" +#include "hw/xen.h" #include "qemu-timer.h" #if defined(CONFIG_USER_ONLY) #include <qemu.h> @@ -51,6 +52,8 @@ #include <libutil.h> #endif #endif +#else /* !CONFIG_USER_ONLY */ +#include "xen-mapcache.h" #endif //#define DEBUG_TB_INVALIDATE @@ -2868,6 +2871,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, } } + new_block->offset = find_ram_offset(size); if (host) { new_block->host = host; new_block->flags |= RAM_PREALLOC_MASK; @@ -2890,13 +2894,15 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name, PROT_EXEC|PROT_READ|PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); #else - new_block->host = qemu_vmalloc(size); + if (xen_mapcache_enabled()) { + xen_ram_alloc(new_block->offset, size); + } else { + new_block->host = qemu_vmalloc(size); + } #endif qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE); } } - - new_block->offset = find_ram_offset(size); new_block->length = size; QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next); @@ -2941,7 +2947,11 @@ void qemu_ram_free(ram_addr_t addr) #if defined(TARGET_S390X) && defined(CONFIG_KVM) munmap(block->host, block->length); #else - qemu_vfree(block->host); + if (xen_mapcache_enabled()) { + qemu_invalidate_entry(block->host); + } else { + qemu_vfree(block->host); + } #endif } qemu_free(block); @@ -3030,6 +3040,15 @@ void *qemu_get_ram_ptr(ram_addr_t addr) QLIST_REMOVE(block, next); QLIST_INSERT_HEAD(&ram_list.blocks, block, next); } + if (xen_mapcache_enabled()) { + /* We need to check if the requested address is in the RAM + * because we don't want to map the entire memory in QEMU. + */ + if (block->offset == 0) { + return qemu_map_cache(addr, 0, 1); + } + block->host = qemu_map_cache(block->offset, block->length, 1); + } return block->host + (addr - block->offset); } } @@ -3065,11 +3084,21 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) uint8_t *host = ptr; QLIST_FOREACH(block, &ram_list.blocks, next) { + /* This case append when the block is not mapped. */ + if (block->host == NULL) { + continue; + } if (host - block->host < block->length) { *ram_addr = block->offset + (host - block->host); return 0; } } + + if (xen_mapcache_enabled()) { + *ram_addr = qemu_ram_addr_from_mapcache(ptr); + return 0; + } + return -1; } @@ -3980,6 +4009,9 @@ void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len, if (is_write) { cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len); } + if (xen_enabled()) { + qemu_invalidate_entry(buffer); + } qemu_vfree(bounce.buffer); bounce.buffer = NULL; cpu_notify_map_clients(); diff --git a/hw/xen.h b/hw/xen.h index 63d953f..9b625fd 100644 --- a/hw/xen.h +++ b/hw/xen.h @@ -31,6 +31,15 @@ static inline int xen_enabled(void) #endif } +static inline int xen_mapcache_enabled(void) +{ +#ifdef CONFIG_XEN_MAPCACHE + return xen_enabled(); +#else + return 0; +#endif +} + int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num); void xen_piix3_set_irq(void *opaque, int irq_num, int level); void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len); @@ -39,6 +48,10 @@ qemu_irq *xen_interrupt_controller_init(void); int xen_init(void); +#if defined(NEED_CPU_H) && !defined(CONFIG_USER_ONLY) +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size); +#endif + #if defined(CONFIG_XEN) && CONFIG_XEN_CTRL_INTERFACE_VERSION < 400 # define HVM_MAX_VCPUS 32 #endif diff --git a/hw/xen_common.h b/hw/xen_common.h index 47b8afe..dd3e896 100644 --- a/hw/xen_common.h +++ b/hw/xen_common.h @@ -63,6 +63,15 @@ static inline int xc_fd(int xen_xc) } +static inline int xc_domain_populate_physmap_exact + (XenXC xc_handle, uint32_t domid, unsigned long nr_extents, + unsigned int extent_order, unsigned int mem_flags, xen_pfn_t *extent_start) +{ + return xc_domain_memory_populate_physmap + (xc_handle, domid, nr_extents, extent_order, mem_flags, extent_start); +} + + /* Xen 4.1 */ #else diff --git a/xen-all.c b/xen-all.c index 22bd413..dac545b 100644 --- a/xen-all.c +++ b/xen-all.c @@ -10,6 +10,8 @@ #include "hw/xen_common.h" #include "hw/xen_backend.h" +#include "xen-mapcache.h" + /* Xen specific function for piix pci */ int xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) @@ -52,6 +54,64 @@ qemu_irq *xen_interrupt_controller_init(void) return qemu_allocate_irqs(xen_set_irq, NULL, 16); } + +/* Memory Ops */ + +static void xen_ram_init(ram_addr_t ram_size) +{ + RAMBlock *new_block; + ram_addr_t below_4g_mem_size, above_4g_mem_size = 0; + + new_block = qemu_mallocz(sizeof (*new_block)); + pstrcpy(new_block->idstr, sizeof (new_block->idstr), "xen.ram"); + new_block->host = NULL; + new_block->offset = 0; + new_block->length = ram_size; + + QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next); + + ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty, + new_block->length >> TARGET_PAGE_BITS); + memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS), + 0xff, new_block->length >> TARGET_PAGE_BITS); + + if (ram_size >= 0xe0000000 ) { + above_4g_mem_size = ram_size - 0xe0000000; + below_4g_mem_size = 0xe0000000; + } else { + below_4g_mem_size = ram_size; + } + + cpu_register_physical_memory(0, below_4g_mem_size, new_block->offset); +#if TARGET_PHYS_ADDR_BITS > 32 + if (above_4g_mem_size > 0) { + cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size, + new_block->offset + below_4g_mem_size); + } +#endif +} + +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size) +{ + unsigned long nr_pfn; + xen_pfn_t *pfn_list; + int i; + + nr_pfn = size >> TARGET_PAGE_BITS; + pfn_list = qemu_malloc(sizeof (*pfn_list) * nr_pfn); + + for (i = 0; i < nr_pfn; i++) { + pfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i; + } + + if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) { + hw_error("xen: failed to populate ram at %lx", ram_addr); + } + + qemu_free(pfn_list); +} + + /* Initialise Xen */ int xen_init(void) @@ -62,5 +122,9 @@ int xen_init(void) return -1; } + /* Init RAM management */ + qemu_map_cache_init(); + xen_ram_init(ram_size); + return 0; } diff --git a/xen-mapcache-stub.c b/xen-mapcache-stub.c new file mode 100644 index 0000000..541bee6 --- /dev/null +++ b/xen-mapcache-stub.c @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2011 Citrix Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "config.h" + +#include "exec-all.h" +#include "qemu-common.h" +#include "cpu-common.h" +#include "xen-mapcache.h" + +void qemu_map_cache_init(void) +{ +} + +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock) +{ + return qemu_get_ram_ptr(phys_addr); +} + +void qemu_map_cache_unlock(void *buffer) +{ +} + +ram_addr_t qemu_ram_addr_from_mapcache(void *ptr) +{ + return -1; +} + +void qemu_invalidate_map_cache(void) +{ +} + +void qemu_invalidate_entry(uint8_t *buffer) +{ +} diff --git a/xen-mapcache.c b/xen-mapcache.c new file mode 100644 index 0000000..d7f44a7 --- /dev/null +++ b/xen-mapcache.c @@ -0,0 +1,310 @@ +/* + * Copyright (C) 2011 Citrix Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "config.h" + +#include <sys/resource.h> + +#include "hw/xen_backend.h" +#include "blockdev.h" + +#include <xen/hvm/params.h> +#include <sys/mman.h> + +#include "xen-mapcache.h" + + +//#define MAPCACHE_DEBUG + +#ifdef MAPCACHE_DEBUG +# define DPRINTF(fmt, ...) do { \ + fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \ +} while (0) +#else +# define DPRINTF(fmt, ...) do { } while (0) +#endif + +#if defined(__i386__) +# define MCACHE_BUCKET_SHIFT 16 +#elif defined(__x86_64__) +# define MCACHE_BUCKET_SHIFT 20 +#endif +#define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT) + +#define BITS_PER_LONG (sizeof(long) * 8) +#define BITS_TO_LONGS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG) +#define DECLARE_BITMAP(name, bits) unsigned long name[BITS_TO_LONGS(bits)] + +typedef struct MapCacheEntry { + target_phys_addr_t paddr_index; + uint8_t *vaddr_base; + DECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE >> XC_PAGE_SHIFT); + uint8_t lock; + struct MapCacheEntry *next; +} MapCacheEntry; + +typedef struct MapCacheRev { + uint8_t *vaddr_req; + target_phys_addr_t paddr_index; + QTAILQ_ENTRY(MapCacheRev) next; +} MapCacheRev; + +typedef struct MapCache { + MapCacheEntry *entry; + unsigned long nr_buckets; + QTAILQ_HEAD(map_cache_head, MapCacheRev) locked_entries; + + /* For most cases (>99.9%), the page address is the same. */ + target_phys_addr_t last_address_index; + uint8_t *last_address_vaddr; + unsigned long max_mcache_size; + unsigned int mcache_bucket_shift; +} MapCache; + +static MapCache *mapcache; + +static inline int test_bit(unsigned int bit, const unsigned long *map) +{ + return !!((map)[(bit) / BITS_PER_LONG] & (1UL << ((bit) % BITS_PER_LONG))); +} + +void qemu_map_cache_init(void) +{ + unsigned long size; + struct rlimit rlimit_as; + + mapcache = qemu_mallocz(sizeof (MapCache)); + + QTAILQ_INIT(&mapcache->locked_entries); + mapcache->last_address_index = -1; + + getrlimit(RLIMIT_AS, &rlimit_as); + rlimit_as.rlim_cur = rlimit_as.rlim_max; + setrlimit(RLIMIT_AS, &rlimit_as); + mapcache->max_mcache_size = rlimit_as.rlim_max; + + mapcache->nr_buckets = + (((mapcache->max_mcache_size >> XC_PAGE_SHIFT) + + (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >> + (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)); + + size = mapcache->nr_buckets * sizeof (MapCacheEntry); + size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1); + DPRINTF("qemu_map_cache_init, nr_buckets = %lx size %lu\n", mapcache->nr_buckets, size); + mapcache->entry = qemu_mallocz(size); +} + +static void qemu_remap_bucket(MapCacheEntry *entry, + target_phys_addr_t size, + target_phys_addr_t address_index) +{ + uint8_t *vaddr_base; + xen_pfn_t *pfns; + int *err; + unsigned int i, j; + target_phys_addr_t nb_pfn = size >> XC_PAGE_SHIFT; + + pfns = qemu_mallocz(nb_pfn * sizeof (xen_pfn_t)); + err = qemu_mallocz(nb_pfn * sizeof (int)); + + if (entry->vaddr_base != NULL) { + if (munmap(entry->vaddr_base, size) != 0) { + perror("unmap fails"); + exit(-1); + } + } + + for (i = 0; i < nb_pfn; i++) { + pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i; + } + + vaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE, + pfns, err, nb_pfn); + if (vaddr_base == NULL) { + perror("xc_map_foreign_bulk"); + exit(-1); + } + + entry->vaddr_base = vaddr_base; + entry->paddr_index = address_index; + + for (i = 0; i < nb_pfn; i += BITS_PER_LONG) { + unsigned long word = 0; + if ((i + BITS_PER_LONG) > nb_pfn) { + j = nb_pfn % BITS_PER_LONG; + } else { + j = BITS_PER_LONG; + } + while (j > 0) { + word = (word << 1) | !err[i + --j]; + } + entry->valid_mapping[i / BITS_PER_LONG] = word; + } + + qemu_free(pfns); + qemu_free(err); +} + +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock) +{ + MapCacheEntry *entry, *pentry = NULL; + target_phys_addr_t address_index = phys_addr >> MCACHE_BUCKET_SHIFT; + target_phys_addr_t address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1); + + if (address_index == mapcache->last_address_index && !lock) { + return mapcache->last_address_vaddr + address_offset; + } + + entry = &mapcache->entry[address_index % mapcache->nr_buckets]; + + while (entry && entry->lock && entry->paddr_index != address_index && entry->vaddr_base) { + pentry = entry; + entry = entry->next; + } + if (!entry) { + entry = qemu_mallocz(sizeof (MapCacheEntry)); + pentry->next = entry; + qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index); + } else if (!entry->lock) { + if (!entry->vaddr_base || entry->paddr_index != address_index || + !test_bit(address_offset >> XC_PAGE_SHIFT, entry->valid_mapping)) { + qemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index); + } + } + + if (!test_bit(address_offset >> XC_PAGE_SHIFT, entry->valid_mapping)) { + mapcache->last_address_index = -1; + return NULL; + } + + mapcache->last_address_index = address_index; + mapcache->last_address_vaddr = entry->vaddr_base; + if (lock) { + MapCacheRev *reventry = qemu_mallocz(sizeof(MapCacheRev)); + entry->lock++; + reventry->vaddr_req = mapcache->last_address_vaddr + address_offset; + reventry->paddr_index = mapcache->last_address_index; + QTAILQ_INSERT_TAIL(&mapcache->locked_entries, reventry, next); + } + + return mapcache->last_address_vaddr + address_offset; +} + +ram_addr_t qemu_ram_addr_from_mapcache(void *ptr) +{ + MapCacheRev *reventry; + target_phys_addr_t paddr_index; + int found = 0; + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + if (reventry->vaddr_req == ptr) { + paddr_index = reventry->paddr_index; + found = 1; + break; + } + } + if (!found) { + fprintf(stderr, "qemu_ram_addr_from_mapcache, could not find %p\n", ptr); + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + DPRINTF(" "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, + reventry->vaddr_req); + } + abort(); + return 0; + } + + return paddr_index << MCACHE_BUCKET_SHIFT; +} + +void qemu_invalidate_entry(uint8_t *buffer) +{ + MapCacheEntry *entry = NULL, *pentry = NULL; + MapCacheRev *reventry; + target_phys_addr_t paddr_index; + int found = 0; + + if (mapcache->last_address_vaddr == buffer) { + mapcache->last_address_index = -1; + } + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + if (reventry->vaddr_req == buffer) { + paddr_index = reventry->paddr_index; + found = 1; + break; + } + } + if (!found) { + DPRINTF("qemu_invalidate_entry, could not find %p\n", buffer); + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + DPRINTF(" "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, reventry->vaddr_req); + } + return; + } + QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next); + qemu_free(reventry); + + entry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; + while (entry && entry->paddr_index != paddr_index) { + pentry = entry; + entry = entry->next; + } + if (!entry) { + DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", buffer); + return; + } + entry->lock--; + if (entry->lock > 0 || pentry == NULL) { + return; + } + + pentry->next = entry->next; + if (munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE) != 0) { + perror("unmap fails"); + exit(-1); + } + qemu_free(entry); +} + +void qemu_invalidate_map_cache(void) +{ + unsigned long i; + MapCacheRev *reventry; + + /* Flush pending AIO before destroying the mapcache */ + qemu_aio_flush(); + + QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { + DPRINTF("There should be no locked mappings at this time, " + "but "TARGET_FMT_plx" -> %p is present\n", + reventry->paddr_index, reventry->vaddr_req); + } + + mapcache_lock(); + + for (i = 0; i < mapcache->nr_buckets; i++) { + MapCacheEntry *entry = &mapcache->entry[i]; + + if (entry->vaddr_base == NULL) { + continue; + } + + if (munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE) != 0) { + perror("unmap fails"); + exit(-1); + } + + entry->paddr_index = 0; + entry->vaddr_base = NULL; + } + + mapcache->last_address_index = -1; + mapcache->last_address_vaddr = NULL; + + mapcache_unlock(); +} diff --git a/xen-mapcache.h b/xen-mapcache.h new file mode 100644 index 0000000..ef3717f --- /dev/null +++ b/xen-mapcache.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2011 Citrix Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef XEN_MAPCACHE_H +#define XEN_MAPCACHE_H + +void qemu_map_cache_init(void); +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t size, uint8_t lock); +void qemu_map_cache_unlock(void *phys_addr); +ram_addr_t qemu_ram_addr_from_mapcache(void *ptr); +void qemu_invalidate_entry(uint8_t *buffer); +void qemu_invalidate_map_cache(void); + +#define mapcache_lock() ((void)0) +#define mapcache_unlock() ((void)0) + +#endif /* !XEN_MAPCACHE_H */ diff --git a/xen-stub.c b/xen-stub.c index 3a8449c..8d2fa54 100644 --- a/xen-stub.c +++ b/xen-stub.c @@ -22,6 +22,10 @@ void xen_piix_pci_write_config_client(uint32_t address, uint32_t val, int len) { } +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size) +{ +} + qemu_irq *xen_interrupt_controller_init(void) { return NULL; -- 1.7.2.3 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |