[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-ia64-devel] [PATCH 5/7] fix XENMEM_add_to_physmap with XENMAPSPACE_mfn



[IA64] fix XENMEM_add_to_physmap with XENMAPSPACE_mfn.

This patch fixes HVM domain save/restore.
Tools stack is aware of where memory is populated in guest domain.
But XENMEM_add_to_physmap with XENMAPSPACE_mfn doesn't update 
the information related to guest memory map. So guest domain
save/dump-core fails to dump pages which were added by the hypercall.

This patch makes the hypercall update the memory map information
of a given guest domain.
This introduces the race between writers and readers of
the info. By introducing sequence counter in shared_info page
the guest memory map information is protected by sequence lock.
The reader is tools stack. They will be updated the following patch.

Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>

diff -r df56aa8b2e34 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c    Mon Sep 29 21:12:54 2008 +0900
+++ b/xen/arch/ia64/xen/mm.c    Mon Sep 29 21:18:21 2008 +0900
@@ -186,6 +186,9 @@
 static void domain_page_flush_and_put(struct domain* d, unsigned long mpaddr,
                                       volatile pte_t* ptep, pte_t old_pte, 
                                       struct page_info* page);
+
+static void __xencomm_mark_dirty(struct domain *d,
+                                 unsigned long addr, unsigned int len);
 
 extern unsigned long ia64_iobase;
 
@@ -2141,6 +2144,323 @@
     rcu_unlock_domain(dest_dom);
     return ret;
 }
+
+/* this lock can be only for memmap_info. domain_lock() is abused here */
+static void
+memmap_lock(struct domain *d)
+{
+    domain_lock(d);
+}
+
+static void
+memmap_unlock(struct domain *d)
+{
+    domain_unlock(d);
+}
+
+static void
+memmap_seqlock(struct domain *d)
+{
+    memmap_lock(d);
+    ++d->shared_info->arch.memmap_sequence;
+    smp_wmb();
+}
+
+static void
+memmap_sequnlock(struct domain *d)
+{
+    smp_wmb();
+    d->shared_info->arch.memmap_sequence++;
+    memmap_unlock(d);
+}
+
+/* copy memory range to domain pseudo physical address space */
+static int
+memmap_copy_to(struct domain *d, unsigned long dest_gpfn,
+               void *src, unsigned long num_pages)
+{
+    BUG_ON(((unsigned long)src & ~PAGE_MASK) != 0);
+    
+    while (num_pages > 0) {
+        unsigned long mfn;
+        struct page_info *page;
+        void *virt;
+
+        mfn = gmfn_to_mfn_foreign(d, dest_gpfn);
+        if (mfn == 0 || mfn == INVALID_MFN)
+            return -EFAULT;
+        page = mfn_to_page(mfn);
+        if (get_page(page, d) == 0)
+            return -EFAULT;
+        virt = mfn_to_virt(mfn);
+        copy_page(virt, src);
+        __xencomm_mark_dirty(d, (unsigned long)virt, PAGE_SIZE);
+        put_page(page);
+
+        src += PAGE_SIZE;
+        dest_gpfn++;
+        num_pages--;
+    }
+
+    return 0;
+}
+
+/* copy memory range from domain pseudo physical address space */
+static int
+memmap_copy_from(void *dest, struct domain *d, unsigned long src_gpfn,
+                 unsigned long num_pages)
+{
+    BUG_ON(((unsigned long)dest & ~PAGE_MASK) != 0);
+
+    while (num_pages > 0) {
+        unsigned long mfn;
+        struct page_info *page;
+
+        mfn = gmfn_to_mfn_foreign(d, src_gpfn);
+        if (mfn == 0 || mfn == INVALID_MFN)
+            return -EFAULT;
+        page = mfn_to_page(mfn);
+        if (get_page(page, d) == 0)
+            return -EFAULT;
+        copy_page(dest, mfn_to_virt(mfn));
+        put_page(page);
+
+        dest += PAGE_SIZE;
+        src_gpfn++;
+        num_pages--;
+    }
+
+    return 0;
+}
+
+static int
+memdesc_can_expand(const struct xen_ia64_memmap_info *memmap_info,
+                   unsigned long num_pages)
+{
+    /* Is there room for one more md? */
+    if ((num_pages << PAGE_SHIFT) <
+        (sizeof(*memmap_info) + memmap_info->efi_memmap_size +
+         memmap_info->efi_memdesc_size))
+        return 0;
+
+    return 1;
+}
+
+static int
+memdesc_can_collapse(const efi_memory_desc_t *lhs,
+                     const efi_memory_desc_t *rhs)
+{
+    return (lhs->type == rhs->type && lhs->attribute == rhs->attribute);
+}
+
+static int
+__dom0vp_add_memdesc_one(struct xen_ia64_memmap_info *memmap_info,
+                         unsigned long num_pages,
+                         const efi_memory_desc_t *md)
+{
+    void* const memmap_end = (void*)memmap_info->memdesc +
+        memmap_info->efi_memmap_size;
+    void *p;
+    efi_memory_desc_t *tmp_md;
+    efi_memory_desc_t *s_md;
+    efi_memory_desc_t *e_md;
+    u64 phys_addr;
+    u64 phys_addr_end;
+
+    /* fast path. appending to the last entry */
+    tmp_md = (efi_memory_desc_t*)(memmap_end - memmap_info->efi_memdesc_size);
+    if (MD_END(tmp_md) < md->phys_addr) {
+        /* append one */
+        if (!memdesc_can_expand(memmap_info, num_pages))
+            return -ENOMEM;
+
+        memcpy(memmap_end, md, memmap_info->efi_memdesc_size);
+        memmap_info->efi_memmap_size += memmap_info->efi_memdesc_size;
+        return 0;
+    }
+    /* fast path. expand the last entry */
+    if (tmp_md->phys_addr <= md->phys_addr) {
+        if (!memdesc_can_collapse(tmp_md, md))
+            return -EINVAL;
+
+        phys_addr_end = max(MD_END(tmp_md), MD_END(md));
+        tmp_md->num_pages =
+            (phys_addr_end - tmp_md->phys_addr) >> EFI_PAGE_SHIFT;
+        return 0;
+    }
+
+    /* slow path */
+    s_md = NULL;
+    e_md = NULL;
+    for (p = memmap_info->memdesc;
+         p < memmap_end;
+         p += memmap_info->efi_memdesc_size) {
+        tmp_md = p;
+
+        if (MD_END(tmp_md) < md->phys_addr)
+            continue;
+
+        if (MD_END(md) < tmp_md->phys_addr) {
+            if (s_md == NULL) {
+                void *next_md = p + memmap_info->efi_memdesc_size;
+                size_t left_size = memmap_end - (void*)tmp_md;
+
+                /* found hole. just insert md here*/
+                if (!memdesc_can_expand(memmap_info, num_pages))
+                    return -ENOMEM;
+
+                memmove(next_md, tmp_md, left_size);
+                memcpy(tmp_md, md, memmap_info->efi_memdesc_size);
+                memmap_info->efi_memmap_size += memmap_info->efi_memdesc_size;
+                return 0;
+            }
+            break;
+        }
+
+        if (s_md == NULL)
+            s_md = tmp_md;
+        e_md = tmp_md;
+
+        if (!memdesc_can_collapse(tmp_md, md))
+            return -EINVAL;
+    }
+    BUG_ON(s_md == NULL || e_md == NULL);
+
+    /* collapse into one */
+    phys_addr = min(md->phys_addr, s_md->phys_addr);
+    phys_addr_end = max(MD_END(md), MD_END(e_md));
+    s_md->phys_addr = phys_addr;
+    s_md->num_pages = (phys_addr_end - phys_addr) >> EFI_PAGE_SHIFT;
+    if (s_md != e_md) {
+        void *next_s_md = (void*)s_md + memmap_info->efi_memdesc_size;
+        void *next_e_md = (void*)e_md + memmap_info->efi_memdesc_size;
+        size_t left_size = memmap_end - (void*)next_e_md;
+
+        memmap_info->efi_memmap_size -= (void*)e_md - (void*)s_md;
+        if (left_size > 0)
+            memmove(next_s_md, next_e_md, left_size);
+    }
+
+    return 0;
+}
+
+/*
+ * d->arch.convmem_end is mostly read only and sometimes increased.
+ * It is protected by memmap_lock()
+ *
+ * d->arch.convmem_end is also referned by guest(self p2m exposure)
+ * d->shared_info.arch.memmap_info_xxx and memmap_info are
+ * referenced by tools stack(save/dump-core/foreign p2m exposure).
+ * They are protected d->shared_info.arch.memmap_sequence of
+ * sequence lock variable by read side(tools stack and guest kernel).
+ *
+ * reader side:
+ *  - get d->arch.convmem_end (via XENMEM_maximum_gpfn)
+ *  - get sequence counter
+ *  - copy the memmap into local buffer
+ *  - check sequence counter. try again if necessary
+ *  - get d->arch.convmem_end. try again if changed.
+ *
+ * writer side:
+ *  - increase d->arch.convmem_end at first if necessary
+ *  - lock sequence lock
+ *    - lock memmap_lock spinlock.
+ *    - increment sequence counter
+ *  - update memmap_info
+ *  - release sequence lock
+ */
+static int
+__dom0vp_add_memdesc(struct domain *targ_d,
+                     const struct xen_ia64_memmap_info *u_memmap_info,
+                     const char *u_memmap)
+{
+    int ret = 0;
+    const void* const u_memmap_end = u_memmap + u_memmap_info->efi_memmap_size;
+    const efi_memory_desc_t *md;
+
+    unsigned long md_end_max;
+    unsigned long num_pages;
+    unsigned long order;
+    unsigned long memmap_info_pfn;
+
+    struct page_info *page;
+    struct xen_ia64_memmap_info *memmap_info;
+    size_t unused_size;
+
+    const void *p;
+
+    /* update d->arch.convmem_end */
+    md_end_max = 0;
+    for (p = u_memmap; p < u_memmap_end;
+         p += u_memmap_info->efi_memdesc_size) {
+        md = p;
+        if (MD_END(md) > md_end_max)
+            md_end_max = MD_END(md);
+    }
+    memmap_lock(targ_d);
+    /* convmem_end is also protected memdesc lock */
+    if (md_end_max > targ_d->arch.convmem_end)
+        targ_d->arch.convmem_end = md_end_max;
+    num_pages = targ_d->shared_info->arch.memmap_info_num_pages;
+    memmap_unlock(targ_d);
+
+ again:
+    order = get_order(num_pages << PAGE_SHIFT);
+    page = alloc_domheap_pages(NULL, order, 0);
+    if (page == NULL)
+        return -ENOMEM;
+    memmap_info = page_to_virt(page);
+
+    /* write seqlock targ_d->shared_info.memmap_sequence */
+    memmap_seqlock(targ_d);
+    if (targ_d->shared_info->arch.memmap_info_num_pages != num_pages) {
+        num_pages = targ_d->shared_info->arch.memmap_info_num_pages;
+        memmap_sequnlock(targ_d);
+        free_domheap_pages(page, order);
+        goto again;
+    }
+    memmap_info_pfn = targ_d->shared_info->arch.memmap_info_pfn;
+
+    /* copy into local to make them virtually contiguous */
+    ret = memmap_copy_from(memmap_info,
+                           targ_d, memmap_info_pfn, num_pages);
+    if (ret != 0)
+        goto out;
+
+    if (memmap_info->efi_memdesc_size != u_memmap_info->efi_memdesc_size ||
+        memmap_info->efi_memdesc_version !=
+        u_memmap_info->efi_memdesc_version) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* update memdesc */
+    for (p = u_memmap;
+         p < u_memmap_end;
+         p += u_memmap_info->efi_memdesc_size) {
+        md = p;
+        ret = __dom0vp_add_memdesc_one(memmap_info, num_pages, md);
+        if (ret != 0)
+            goto out;
+    }
+
+    /* zero out the unused region to avoid hypervisor bit leak */
+    unused_size = (num_pages << PAGE_SHIFT) -
+        (sizeof(*memmap_info) + memmap_info->efi_memmap_size);
+    if (unused_size > 0)
+        memset((void*)memmap_info->memdesc + memmap_info->efi_memmap_size,
+               0, unused_size);
+
+    /* copy back into domain. */
+    ret = memmap_copy_to(targ_d, memmap_info_pfn, memmap_info, num_pages);
+
+ out:
+    /* write sequnlock targ_d->shared_info.arch.memmap_sequence */
+    memmap_sequnlock(targ_d);
+
+    free_domheap_pages(page, order);
+    return ret;
+}
 #endif
 
 // grant table host mapping
@@ -2863,8 +3183,35 @@
         case XENMAPSPACE_mfn:
         {
             if ( get_page_from_pagenr(xatp.idx, d) ) {
+                struct xen_ia64_memmap_info memmap_info;
+                efi_memory_desc_t md;
+                int ret;
+
                 mfn = xatp.idx;
                 page = mfn_to_page(mfn);
+
+                memmap_info.efi_memmap_size = sizeof(md);
+                memmap_info.efi_memdesc_size = sizeof(md);
+                memmap_info.efi_memdesc_version =
+                    EFI_MEMORY_DESCRIPTOR_VERSION;
+
+                md.type = EFI_CONVENTIONAL_MEMORY;
+                md.pad = 0;
+                md.phys_addr = xatp.gpfn << PAGE_SHIFT;
+                md.virt_addr = 0;
+                md.num_pages = 1UL << (PAGE_SHIFT - EFI_PAGE_SHIFT);
+                md.attribute = EFI_MEMORY_WB;
+
+                ret = __dom0vp_add_memdesc(d, &memmap_info, (char*)&md);
+                if (ret != 0) {
+                    put_page(page);
+                    rcu_unlock_domain(d);
+                    gdprintk(XENLOG_DEBUG,
+                             "%s:%d td %d gpfn 0x%lx mfn 0x%lx ret %d\n",
+                             __func__, __LINE__,
+                             d->domain_id, xatp.gpfn, xatp.idx, ret);
+                    return ret;
+                }
             }
             break;
         }
@@ -2999,9 +3346,9 @@
     return (!mfn_valid(mfn) || (page_get_owner(mfn_to_page(mfn)) == dom_io));
 }
 
-void xencomm_mark_dirty(unsigned long addr, unsigned int len)
+static void __xencomm_mark_dirty(struct domain *d,
+                                 unsigned long addr, unsigned int len)
 {
-    struct domain *d = current->domain;
     unsigned long gpfn;
     unsigned long end_addr = addr + len;
 
@@ -3011,6 +3358,11 @@
             shadow_mark_page_dirty(d, gpfn);
         }
     }
+}
+
+void xencomm_mark_dirty(unsigned long addr, unsigned int len)
+{
+    __xencomm_mark_dirty(current->domain, addr, len);
 }
 
 int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn)
diff -r df56aa8b2e34 xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Mon Sep 29 21:12:54 2008 +0900
+++ b/xen/include/asm-ia64/mm.h Mon Sep 29 21:18:21 2008 +0900
@@ -455,6 +455,7 @@
 #define foreign_p2m_destroy(d) do { } while (0)
 #define dom0vp_expose_foreign_p2m(dest_dom, dest_gpfn, domid, buffer, flags)   
(-ENOSYS)
 #define dom0vp_unexpose_foreign_p2m(dest_dom, dest_gpfn, domid)        
(-ENOSYS)
+#define __dom0vp_add_memdesc(d, memmap_info, memdesc)  (-ENOSYS)
 #endif
 
 extern volatile unsigned long *mpt_table;
diff -r df56aa8b2e34 xen/include/public/arch-ia64.h
--- a/xen/include/public/arch-ia64.h    Mon Sep 29 21:12:54 2008 +0900
+++ b/xen/include/public/arch-ia64.h    Mon Sep 29 21:18:21 2008 +0900
@@ -250,8 +250,9 @@
     unsigned int memmap_info_num_pages;/* currently only = 1 case is
                                           supported. */
     unsigned long memmap_info_pfn;
+    unsigned long memmap_sequence;      /* seqlock to update memmap_info */
 
-    uint64_t pad[31];
+    uint64_t pad[30];
 };
 typedef struct arch_shared_info arch_shared_info_t;
 

Attachment: memmap-info-update.patch
Description: Text Data

_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.