[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] Re: [Qemu-devel] [PATCH RFC V3 07/12] xen: Introduce the Xen mapcache
On Fri, Sep 17, 2010 at 11:15 AM, <anthony.perard@xxxxxxxxxx> wrote: > From: Anthony PERARD <anthony.perard@xxxxxxxxxx> > > The mapcache maps chucks of guest memory on demand, unmaps them when > they are not needed anymore. > > Each call to qemu_get_ram_ptr makes a call to qemu_map_cache with the > lock option, so mapcache will not unmap these ram_ptr. > > Signed-off-by: Anthony PERARD <anthony.perard@xxxxxxxxxx> > --- > ÂMakefile.target |  Â2 +- > Âexec.c     Â|  36 ++++++- > Âhw/xen.h    Â|  Â4 + > Âxen-all.c    |  63 ++++++++++++ > Âxen-stub.c   Â|  Â4 + > Âxen_mapcache.c Â| Â302 > +++++++++++++++++++++++++++++++++++++++++++++++++++++++ > Âxen_mapcache.h Â|  26 +++++ > Â7 files changed, 432 insertions(+), 5 deletions(-) > Âcreate mode 100644 xen_mapcache.c > Âcreate mode 100644 xen_mapcache.h > > diff --git a/Makefile.target b/Makefile.target > index 6b390e6..ea14393 100644 > --- a/Makefile.target > +++ b/Makefile.target > @@ -183,7 +183,7 @@ QEMU_CFLAGS += $(VNC_PNG_CFLAGS) > > Â# xen backend driver support > Âobj-$(CONFIG_XEN) += xen_machine_pv.o xen_domainbuild.o > -obj-$(CONFIG_XEN) += xen-all.o > +obj-$(CONFIG_XEN) += xen-all.o xen_mapcache.o > Âobj-$(CONFIG_NO_XEN) += xen-stub.o > > Â# xen full virtualized machine > diff --git a/exec.c b/exec.c > index 380dab5..f5888eb 100644 > --- a/exec.c > +++ b/exec.c > @@ -60,6 +60,9 @@ > Â#endif > Â#endif > > +#include "hw/xen.h" > +#include "xen_mapcache.h" > + > Â//#define DEBUG_TB_INVALIDATE > Â//#define DEBUG_FLUSH > Â//#define DEBUG_TLB > @@ -2833,6 +2836,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, > const char *name, >     } >   } > > +  Ânew_block->offset = find_ram_offset(size); >   if (host) { >     new_block->host = host; >   } else { > @@ -2856,15 +2860,17 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, > const char *name, >                  ÂPROT_EXEC|PROT_READ|PROT_WRITE, >                  ÂMAP_SHARED | MAP_ANONYMOUS, -1, 0); > Â#else > -      Ânew_block->host = qemu_vmalloc(size); > +      Âif (xen_enabled()) { > +        Âxen_ram_alloc(new_block->offset, size); > +      Â} else { > +        Ânew_block->host = qemu_vmalloc(size); > +      Â} > Â#endif > Â#ifdef MADV_MERGEABLE >       madvise(new_block->host, size, MADV_MERGEABLE); > Â#endif >     } >   } > - > -  Ânew_block->offset = find_ram_offset(size); >   new_block->length = size; > >   QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next); > @@ -2905,7 +2911,11 @@ void qemu_ram_free(ram_addr_t addr) > Â#if defined(TARGET_S390X) && defined(CONFIG_KVM) >         munmap(block->host, block->length); > Â#else > -        Âqemu_vfree(block->host); > +        Âif (xen_enabled()) { > +          Âqemu_invalidate_entry(block->host); > +        Â} else { > +          Âqemu_vfree(block->host); > +        Â} > Â#endif >       } >       qemu_free(block); > @@ -2931,6 +2941,14 @@ void *qemu_get_ram_ptr(ram_addr_t addr) >     if (addr - block->offset < block->length) { >       QLIST_REMOVE(block, next); >       QLIST_INSERT_HEAD(&ram_list.blocks, block, next); > +      Âif (xen_enabled()) { > +        Â/* We need to check if the requested address is in the RAM > +         * because we don't want to map the entire memory in QEMU. > +         */ > +        Âif (block->offset == 0) braces > +          Âreturn qemu_map_cache(addr, 0, 1); > +        Âblock->host = qemu_map_cache(block->offset, block->length, > 1); > +      Â} >       return block->host + (addr - block->offset); >     } >   } > @@ -2949,11 +2967,18 @@ ram_addr_t qemu_ram_addr_from_host(void *ptr) >   uint8_t *host = ptr; > >   QLIST_FOREACH(block, &ram_list.blocks, next) { > +    Â/* This case append when the block is not mapped. */ > +    Âif (block->host == NULL) braces > +      Âcontinue; >     if (host - block->host < block->length) { >       return block->offset + (host - block->host); >     } >   } > > +  Âif (xen_enabled()) { > +    Âreturn qemu_ram_addr_from_mapcache(ptr); > +  Â} > + >   fprintf(stderr, "Bad ram pointer %p\n", ptr); >   abort(); > > @@ -3728,6 +3753,9 @@ void cpu_physical_memory_unmap(void *buffer, > target_phys_addr_t len, >   if (is_write) { >     cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len); >   } > +  Âif (xen_enabled()) { > +    Âqemu_invalidate_entry(buffer); > +  Â} >   qemu_vfree(bounce.buffer); >   bounce.buffer = NULL; >   cpu_notify_map_clients(); > diff --git a/hw/xen.h b/hw/xen.h > index c5189b1..2b62ff5 100644 > --- a/hw/xen.h > +++ b/hw/xen.h > @@ -34,4 +34,8 @@ void xen_piix_pci_write_config_client(uint32_t address, > uint32_t val, int len); > > Âint xen_init(int smp_cpus); > > +#ifdef NEED_CPU_H > +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size); > +#endif > + > Â#endif /* QEMU_HW_XEN_H */ > diff --git a/xen-all.c b/xen-all.c > index 765f87a..4e0b061 100644 > --- a/xen-all.c > +++ b/xen-all.c > @@ -12,6 +12,8 @@ > Â#include "hw/xen_common.h" > Â#include "hw/xen_backend.h" > > +#include "xen_mapcache.h" > + > Â/* Xen specific function for piix pci */ > > Âint xen_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num) > @@ -52,6 +54,63 @@ qemu_irq *i8259_xen_init(void) >   return qemu_allocate_irqs(i8259_set_irq, NULL, 16); > Â} > > + > +/* Memory Ops */ > + > +static void xen_ram_init(ram_addr_t ram_size) > +{ > +  ÂRAMBlock *new_block; > +  Âram_addr_t below_4g_mem_size, above_4g_mem_size = 0; > + > +  Ânew_block = qemu_mallocz(sizeof (*new_block)); > +  Âpstrcpy(new_block->idstr, sizeof (new_block->idstr), "xen.ram"); > +  Ânew_block->host = NULL; > +  Ânew_block->offset = 0; > +  Ânew_block->length = ram_size; > + > +  ÂQLIST_INSERT_HEAD(&ram_list.blocks, new_block, next); > + > +  Âram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty, > +                    new_block->length >> > TARGET_PAGE_BITS); > +  Âmemset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS), > +      0xff, new_block->length >> TARGET_PAGE_BITS); > + > +  Âif (ram_size >= 0xe0000000 ) { > +    Âabove_4g_mem_size = ram_size - 0xe0000000; > +    Âbelow_4g_mem_size = 0xe0000000; > +  Â} else { > +    Âbelow_4g_mem_size = ram_size; > +  Â} > + > +  Âcpu_register_physical_memory(0, below_4g_mem_size, new_block->offset); > +#if TARGET_PHYS_ADDR_BITS > 32 > +  Âif (above_4g_mem_size > 0) { > +    Âcpu_register_physical_memory(0x100000000ULL, above_4g_mem_size, > +                   new_block->offset + below_4g_mem_size); > +  Â} > +#endif > +} > + > +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size) > +{ > +  Âunsigned long nr_pfn; > +  Âxen_pfn_t *pfn_list; > +  Âint i; > + > +  Ânr_pfn = size >> TARGET_PAGE_BITS; > +  Âpfn_list = qemu_malloc(sizeof (*pfn_list) * nr_pfn); > + > +  Âfor (i = 0; i < nr_pfn; i++) braces > +    Âpfn_list[i] = (ram_addr >> TARGET_PAGE_BITS) + i; > + > +  Âif (xc_domain_memory_populate_physmap(xen_xc, xen_domid, nr_pfn, 0, 0, > pfn_list)) { > +    Âhw_error("xen: failed to populate ram at %lx", ram_addr); > +  Â} > + > +  Âqemu_free(pfn_list); > +} > + > + > Â/* Initialise Xen */ > > Âint xen_init(int smp_cpus) > @@ -62,5 +121,9 @@ int xen_init(int smp_cpus) >     return -1; >   } > > +  Â/* Init RAM management */ > +  Âqemu_map_cache_init(); > +  Âxen_ram_init(ram_size); > + >   return 0; > Â} > diff --git a/xen-stub.c b/xen-stub.c > index 07e64bc..c9f477d 100644 > --- a/xen-stub.c > +++ b/xen-stub.c > @@ -24,6 +24,10 @@ void xen_piix_pci_write_config_client(uint32_t address, > uint32_t val, int len) > Â{ > Â} > > +void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size) > +{ > +} > + > Âint xen_init(int smp_cpus) > Â{ >   return -ENOSYS; > diff --git a/xen_mapcache.c b/xen_mapcache.c > new file mode 100644 > index 0000000..8e3bf6c > --- /dev/null > +++ b/xen_mapcache.c > @@ -0,0 +1,302 @@ > +#include "config.h" > + > +#include "hw/xen_backend.h" > +#include "blockdev.h" > + > +#include <xen/hvm/params.h> > +#include <sys/mman.h> > + > +#include "xen_mapcache.h" > + > + > +//#define MAPCACHE_DEBUG > + > +#ifdef MAPCACHE_DEBUG > +#define DPRINTF(fmt, ...) do { \ > +  Âfprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \ > +} while (0) > +#else > +#define DPRINTF(fmt, ...) do { } while (0) > +#endif > + > +#if defined(MAPCACHE) > + > +#define BITS_PER_LONG (sizeof(long)*8) Please add spaces around '*', also the below #defines need more spaces. > +#define BITS_TO_LONGS(bits) \ > +  Â(((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) > +#define DECLARE_BITMAP(name,bits) \ > +  Âunsigned long name[BITS_TO_LONGS(bits)] > +#define test_bit(bit,map) \ > +  Â(!!((map)[(bit)/BITS_PER_LONG] & (1UL << ((bit)%BITS_PER_LONG)))) > + > +typedef struct MapCacheEntry { > +  Âunsigned long paddr_index; > +  Âuint8_t *vaddr_base; > +  ÂDECLARE_BITMAP(valid_mapping, MCACHE_BUCKET_SIZE>>XC_PAGE_SHIFT); > +  Âuint8_t lock; > +  Âstruct MapCacheEntry *next; > +} MapCacheEntry; > + > +typedef struct MapCacheRev { > +  Âuint8_t *vaddr_req; > +  Âunsigned long paddr_index; > +  ÂQTAILQ_ENTRY(MapCacheRev) next; > +} MapCacheRev; > + > +typedef struct MapCache { > +  ÂMapCacheEntry *entry; > +  Âunsigned long nr_buckets; > +  ÂQTAILQ_HEAD(map_cache_head, MapCacheRev) locked_entries; > + > +  Â/* For most cases (>99.9%), the page address is the same. */ > +  Âunsigned long last_address_index; > +  Âuint8_t   Â*last_address_vaddr; > +} MapCache; > + > +static MapCache *mapcache; > + > + > +int qemu_map_cache_init(void) > +{ > +  Âunsigned long size; > + > +  Âmapcache = qemu_mallocz(sizeof (MapCache)); > + > +  ÂQTAILQ_INIT(&mapcache->locked_entries); > +  Âmapcache->last_address_index = ~0UL; > + > +  Âmapcache->nr_buckets = (((MAX_MCACHE_SIZE >> XC_PAGE_SHIFT) + > +          (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >> > +         Â(MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)); > + > +  Â/* > +   * Use mmap() directly: lets us allocate a big hash table with no > up-front > +   * cost in storage space. The OS will allocate memory only for the > buckets > +   * that we actually use. All others will contain all zeroes. > +   */ > +  Âsize = mapcache->nr_buckets * sizeof(MapCacheEntry); > +  Âsize = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1); > +  ÂDPRINTF("qemu_map_cache_init, nr_buckets = %lx size %lu\n", > mapcache->nr_buckets, size); > +  Âmapcache->entry = mmap(NULL, size, PROT_READ|PROT_WRITE, > +             ÂMAP_SHARED|MAP_ANON, -1, 0); > +  Âif (mapcache->entry == MAP_FAILED) { > +    Âerrno = ENOMEM; Is this needed, can't we just use whatever was in errno? > +    Âreturn -1; > +  Â} > + > +  Âreturn 0; > +} > + > +static void qemu_remap_bucket(MapCacheEntry *entry, > +               Âtarget_phys_addr_t size, > +               Âunsigned long address_index) > +{ > +  Âuint8_t *vaddr_base; > +  Âxen_pfn_t *pfns; > +  Âint *err; > +  Âunsigned int i, j; There are a lot of size >> XC_PAGE_SHIFT uses here. I think it would be clearer to add size >>= XC_PAGE_SHIFT or a new variable. > + > +  Âpfns = qemu_mallocz((size >> XC_PAGE_SHIFT) * sizeof (xen_pfn_t)); > +  Âerr = qemu_mallocz((size >> XC_PAGE_SHIFT) * sizeof (int)); > + > +  Âif (entry->vaddr_base != NULL) { > +    Âerrno = munmap(entry->vaddr_base, size); > +    Âif (errno) { > +      Âfprintf(stderr, "unmap fails %d\n", errno); munmap() returns -1 on error, so please don't clobber errno and use perror(). > +      Âexit(-1); > +    Â} > +  Â} > + > +  Âfor (i = 0; i < size >> XC_PAGE_SHIFT; i++) { > +    Âpfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i; > +  Â} > + > +  Âvaddr_base = xc_map_foreign_bulk(xen_xc, xen_domid, PROT_READ|PROT_WRITE, > +                   pfns, err, > +                   size >> XC_PAGE_SHIFT); > +  Âif (vaddr_base == NULL) { > +    Âfprintf(stderr, "xc_map_foreign_bulk error %d\n", errno); perror()? > +    Âexit(-1); > +  Â} > + > +  Âentry->vaddr_base Â= vaddr_base; > +  Âentry->paddr_index = address_index; > + > +  Âfor (i = 0; i < size >> XC_PAGE_SHIFT; i += BITS_PER_LONG) { > +    Âunsigned long word = 0; > +    Âj = ((i + BITS_PER_LONG) > (size >> XC_PAGE_SHIFT)) ? > +      Â(size >> XC_PAGE_SHIFT) % BITS_PER_LONG : BITS_PER_LONG; Maybe this would be clearer with 'if'. > +    Âwhile (j > 0) { > +      Âword = (word << 1) | !err[i + --j]; You are mixing bitwise OR with logical NOT, is this correct? > +    Â} > +    Âentry->valid_mapping[i / BITS_PER_LONG] = word; > +  Â} > + > +  Âqemu_free(pfns); > +  Âqemu_free(err); > +} > + > +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, target_phys_addr_t > size, uint8_t lock) > +{ > +  ÂMapCacheEntry *entry, *pentry = NULL; > +  Âunsigned long address_index Â= phys_addr >> MCACHE_BUCKET_SHIFT; > +  Âunsigned long address_offset = phys_addr & (MCACHE_BUCKET_SIZE-1); unsigned long will not be long enough on 32 bit host (or 32 bit user space) for a 64 bit target. I can't remember if this was a supported case for Xen anyway. How about address_offset >>= XC_PAGE_SHIFT? > + > +  Âif (address_index == mapcache->last_address_index && !lock) braces > +    Âreturn mapcache->last_address_vaddr + address_offset; > + > +  Âentry = &mapcache->entry[address_index % mapcache->nr_buckets]; > + > +  Âwhile (entry && entry->lock && entry->paddr_index != address_index && > entry->vaddr_base) { > +    Âpentry = entry; > +    Âentry = entry->next; > +  Â} > +  Âif (!entry) { > +    Âentry = qemu_mallocz(sizeof(MapCacheEntry)); > +    Âpentry->next = entry; > +    Âqemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, address_index); > +  Â} else if (!entry->lock) { > +    Âif (!entry->vaddr_base || entry->paddr_index != address_index || > !test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping)) I suspect this line is too long. Please also add braces. > +      Âqemu_remap_bucket(entry, size ? : MCACHE_BUCKET_SIZE, > address_index); > +  Â} > + > +  Âif (!test_bit(address_offset>>XC_PAGE_SHIFT, entry->valid_mapping)) { > +    Âmapcache->last_address_index = ~0UL; > +    Âreturn NULL; > +  Â} > + > +  Âmapcache->last_address_index = address_index; > +  Âmapcache->last_address_vaddr = entry->vaddr_base; > +  Âif (lock) { > +    ÂMapCacheRev *reventry = qemu_mallocz(sizeof(MapCacheRev)); > +    Âentry->lock++; > +    Âreventry->vaddr_req = mapcache->last_address_vaddr + address_offset; > +    Âreventry->paddr_index = mapcache->last_address_index; > +    ÂQTAILQ_INSERT_TAIL(&mapcache->locked_entries, reventry, next); > +  Â} > + > +  Âreturn mapcache->last_address_vaddr + address_offset; > +} > + > +ram_addr_t qemu_ram_addr_from_mapcache(void *ptr) > +{ > +  ÂMapCacheRev *reventry; > +  Âunsigned long paddr_index; > +  Âint found = 0; > + > +  ÂQTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { > +    Âif (reventry->vaddr_req == ptr) { > +      Âpaddr_index = reventry->paddr_index; > +      Âfound = 1; > +      Âbreak; > +    Â} > +  Â} > +  Âif (!found) { > +    Âfprintf(stderr, "qemu_ram_addr_from_mapcache, could not find %p\n", > ptr); > +    ÂQTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { > +      ÂDPRINTF("  %lx -> %p is present\n", reventry->paddr_index, > reventry->vaddr_req); > +    Â} > +    Âabort(); > +    Âreturn 0; > +  Â} > + > +  Âreturn paddr_index << MCACHE_BUCKET_SHIFT; > +} > + > +void qemu_invalidate_entry(uint8_t *buffer) > +{ > +  ÂMapCacheEntry *entry = NULL, *pentry = NULL; > +  ÂMapCacheRev *reventry; > +  Âunsigned long paddr_index; > +  Âint found = 0; > + > +  Âif (mapcache->last_address_vaddr == buffer) > +    Âmapcache->last_address_index = Â~0UL; > + > +  ÂQTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { > +    Âif (reventry->vaddr_req == buffer) { > +      Âpaddr_index = reventry->paddr_index; > +      Âfound = 1; > +      Âbreak; > +    Â} > +  Â} > +  Âif (!found) { > +    ÂDPRINTF("qemu_invalidate_entry, could not find %p\n", buffer); > +    ÂQTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { > +      ÂDPRINTF("  %lx -> %p is present\n", reventry->paddr_index, > reventry->vaddr_req); > +    Â} > +    Âreturn; > +  Â} > +  ÂQTAILQ_REMOVE(&mapcache->locked_entries, reventry, next); > +  Âqemu_free(reventry); > + > +  Âentry = &mapcache->entry[paddr_index % mapcache->nr_buckets]; > +  Âwhile (entry && entry->paddr_index != paddr_index) { > +    Âpentry = entry; > +    Âentry = entry->next; > +  Â} > +  Âif (!entry) { > +    ÂDPRINTF("Trying to unmap address %p that is not in the mapcache!\n", > buffer); > +    Âreturn; > +  Â} > +  Âentry->lock--; > +  Âif (entry->lock > 0 || pentry == NULL) > +    Âreturn; > + > +  Âpentry->next = entry->next; > +  Âerrno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE); > +  Âif (errno) { > +    Âfprintf(stderr, "unmap fails %d\n", errno); Please see my previous munmap comments. > +    Âexit(-1); > +  Â} > +  Âqemu_free(entry); > +} > + > +void qemu_invalidate_map_cache(void) > +{ > +  Âunsigned long i; > +  ÂMapCacheRev *reventry; > + > +  Âqemu_aio_flush(); > + > +  ÂQTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) { > +    ÂDPRINTF("There should be no locked mappings at this time, but %lx -> > %p is present\n", reventry->paddr_index, reventry->vaddr_req); Probably too long line. > +  Â} > + > +  Âmapcache_lock(); > + > +  Âfor (i = 0; i < mapcache->nr_buckets; i++) { > +    ÂMapCacheEntry *entry = &mapcache->entry[i]; > + > +    Âif (entry->vaddr_base == NULL) > +      Âcontinue; > + > +    Âerrno = munmap(entry->vaddr_base, MCACHE_BUCKET_SIZE); > +    Âif (errno) { > +      Âfprintf(stderr, "unmap fails %d\n", errno); > +      Âexit(-1); > +    Â} > + > +    Âentry->paddr_index = 0; > +    Âentry->vaddr_base Â= NULL; > +  Â} > + > +  Âmapcache->last_address_index = Â~0UL; > +  Âmapcache->last_address_vaddr = NULL; > + > +  Âmapcache_unlock(); > +} > +#else > +uint8_t *qemu_map_cache(target_phys_addr_t phys_addr, uint8_t lock) > +{ > +  Âreturn qemu_get_ram_ptr(phys_addr); > +} > + > +void qemu_invalidate_map_cache(void) > +{ > +} > + > +void qemu_invalidate_entry(uint8_t *buffer) > +{ > +} > +#endif /* !MAPCACHE */ > diff --git a/xen_mapcache.h b/xen_mapcache.h > new file mode 100644 > index 0000000..5a6730f > --- /dev/null > +++ b/xen_mapcache.h > @@ -0,0 +1,26 @@ > +#ifndef XEN_MAPCACHE_H > +#define XEN_MAPCACHE_H > + > +#if (defined(__i386__) || defined(__x86_64__)) > +# Âdefine MAPCACHE xen_mapcache.c could be split into two files, xen-mapcache-stub.c and xen-mapcache.c. configure could perform the check for i386 or x86_64 host and define CONFIG_XEN_MAPCACHE=y appropriately. Then Makefile.target would compile the correct file based on that. _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |