[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v5 10/28] xsplice: Implement payload loading



> > Please advise.
> 
> Well, I certainly didn't think of it getting done that way. To me the
> most natural generalization would be for an arch to register one or
> more secondary ranges (which could even get referred to by an
> enum) at boot time (or maybe that could even be arranged for at
> compile time, i.e. no active registration necessary), with each such

The start of this region -  xen_virt_end is computed during boottime
so part of this is will be runtime.

> area getting the same data structures set up as is being done right
> now for the "base" VA range.

That actually ended up pretty simple. It won't compile for ARM yet,
see below please.
> 
> But if that's too cumbersome for now, I'm certainly fine with
> xSplice code dealing with the VA management itself. We can
> always add such an extension later (and then make xSplice use it).

From 45906039b18f1995b26c97b560244ac90308597f Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Date: Mon, 14 Mar 2016 12:02:05 -0400
Subject: [RFC PATCH] vmap: Add vmalloc_type WIP

TODO:
 - Check what happens on !CONFIG_XSPLICE
 - Move arch specific to arch specific (and make it compile under ARM)

For those users who want to use the virtual addresses that
are in the hypervisor's virtual address space - this new
function allows that. Along with providing the underlaying
MFNs for the user's (such as changing page table permissions).

Implementation wise the vmap API keeps track of two virtual
address regions now:
 a) VMAP_VIRT_START
 b) xen_virt_end up to XEN_VIRT_END (minus some other space).

The a) one is the default one and the existing behavior
for users of vmalloc, vmap, etc is the same.

If however one wishes to use the b) one only has to use
the vmalloc_type API on these virtual addresses.

This allows users (such as xSplice) to provide their own
mechanism to change the the page flags, and also use virtual
addresses closer to the hypervisor virtual addresses (at least
on x86) while not having to deal with the allocation of
pages.

For example of users, see patch titled "xsplice: Implement payload
loading", where we parse the payload's ELF relocations - which
is defined to be signed 32-bit (so max displacement is 2GB virtual
spacE). The displacement of the hypervisor virtual addresses to the
vmalloc (on x86) is more than 32-bits - which means that ELF relocations
would truncate the 34 and 33th bit. Hence this alternate API

Since there is only one user of this - it is conditional on
CONFIG_XSPLICE - and the generic vmalloc code had added checks
in case the b) range has not been initialized.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Suggested-by: Jan Beulich <jbeulich@xxxxxxxx>
---
Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx>
Cc: Jan Beulich <jbeulich@xxxxxxxx>
Cc: Keir Fraser <keir@xxxxxxx>
Cc: Tim Deegan <tim@xxxxxxx>

v4: New patch.
v5: Update per Jan's comments.
v6: Drop the stray parentheses on typedefs.
    Ditch the vunmap callback. Stash away the virtual addresses in lists.
    Ditch the vmap callback. Just provide virtual address.
    Ditch the vmalloc_range. Allocate on startup the vmalloc[XEN_VIRT]
    ranges.
---
---
 xen/arch/x86/mm.c                 |   2 +-
 xen/arch/x86/setup.c              |   3 +
 xen/common/vmap.c                 | 194 +++++++++++++++++++++++++-------------
 xen/drivers/acpi/osl.c            |   2 +-
 xen/include/asm-x86/x86_64/page.h |   2 +
 xen/include/xen/vmap.h            |  15 ++-
 6 files changed, 148 insertions(+), 70 deletions(-)

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index bca7532..6fa9208 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -6124,7 +6124,7 @@ void __iomem *ioremap(paddr_t pa, size_t len)
         unsigned int offs = pa & (PAGE_SIZE - 1);
         unsigned int nr = PFN_UP(offs + len);
 
-        va = __vmap(&mfn, nr, 1, 1, PAGE_HYPERVISOR_NOCACHE) + offs;
+        va = __vmap(&mfn, nr, 1, 1, PAGE_HYPERVISOR_NOCACHE, VMAP_VIRT) + offs;
     }
 
     return (void __force __iomem *)va;
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 0b4f94f..9b502b6 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -100,6 +100,9 @@ unsigned long __read_mostly xen_phys_start;
 
 unsigned long __read_mostly xen_virt_end;
 
+unsigned long __read_mostly avail_virt_start;
+unsigned long __read_mostly avail_virt_end;
+
 DEFINE_PER_CPU(struct tss_struct, init_tss);
 
 char __section(".bss.stack_aligned") cpu0_stack[STACK_SIZE];
diff --git a/xen/common/vmap.c b/xen/common/vmap.c
index 134eda0..343a25a 100644
--- a/xen/common/vmap.c
+++ b/xen/common/vmap.c
@@ -10,40 +10,61 @@
 #include <asm/page.h>
 
 static DEFINE_SPINLOCK(vm_lock);
-static void *__read_mostly vm_base;
-#define vm_bitmap ((unsigned long *)vm_base)
+static void *__read_mostly vm_base[VMAP_TYPE_MAX];
+#define vm_bitmap(x) ((unsigned long *)vm_base[x])
 /* highest allocated bit in the bitmap */
-static unsigned int __read_mostly vm_top;
+static unsigned int __read_mostly vm_top[VMAP_TYPE_MAX];
 /* total number of bits in the bitmap */
-static unsigned int __read_mostly vm_end;
+static unsigned int __read_mostly vm_end[VMAP_TYPE_MAX];
 /* lowest known clear bit in the bitmap */
-static unsigned int vm_low;
+static unsigned int vm_low[VMAP_TYPE_MAX];
 
-void __init vm_init(void)
+void __init vm_init_type(enum vmap_type type)
 {
     unsigned int i, nr;
     unsigned long va;
+    void *end;
+
+    if ( type == VMAP_VIRT )
+    {
+        vm_base[VMAP_VIRT] = (void *)VMAP_VIRT_START;
+        end = arch_vmap_virt_end();
+    }
+    else
+    {
+        vm_base[XEN_VIRT] = (void *)xen_virt_end;
+        end = (void *)(XEN_VIRT_END - NR_CPUS * PAGE_SIZE);
 
-    vm_base = (void *)VMAP_VIRT_START;
-    vm_end = PFN_DOWN(arch_vmap_virt_end() - vm_base);
-    vm_low = PFN_UP((vm_end + 7) / 8);
-    nr = PFN_UP((vm_low + 7) / 8);
-    vm_top = nr * PAGE_SIZE * 8;
+        BUG_ON(end <= vm_base[XEN_VIRT]);
+    }
+    vm_end[type] = PFN_DOWN(end - vm_base[type]);
+    vm_low[type]= PFN_UP((vm_end[type] + 7) / 8);
+    nr = PFN_UP((vm_low[type] + 7) / 8);
+    vm_top[type] = nr * PAGE_SIZE * 8;
 
-    for ( i = 0, va = (unsigned long)vm_bitmap; i < nr; ++i, va += PAGE_SIZE )
+    for ( i = 0, va = (unsigned long)vm_bitmap(type); i < nr; ++i, va += 
PAGE_SIZE )
     {
         struct page_info *pg = alloc_domheap_page(NULL, 0);
 
         map_pages_to_xen(va, page_to_mfn(pg), 1, PAGE_HYPERVISOR);
         clear_page((void *)va);
     }
-    bitmap_fill(vm_bitmap, vm_low);
+    bitmap_fill(vm_bitmap(type), vm_low[type]);
 
     /* Populate page tables for the bitmap if necessary. */
-    populate_pt_range(va, 0, vm_low - nr);
+    populate_pt_range(va, 0, vm_low[type] - nr);
 }
 
-void *vm_alloc(unsigned int nr, unsigned int align)
+void __init vm_init(void)
+{
+    vm_init_type(VMAP_VIRT);
+#ifdef CONFIG_XSPLICE
+    vm_init_type(XEN_VIRT);
+#endif
+}
+
+static void *vm_alloc_type(unsigned int nr, unsigned int align,
+                           enum vmap_type t)
 {
     unsigned int start, bit;
 
@@ -52,27 +73,30 @@ void *vm_alloc(unsigned int nr, unsigned int align)
     else if ( align & (align - 1) )
         align &= -align;
 
+    if ( !vm_base[t] )
+        return NULL;
+
     spin_lock(&vm_lock);
     for ( ; ; )
     {
         struct page_info *pg;
 
-        ASSERT(vm_low == vm_top || !test_bit(vm_low, vm_bitmap));
-        for ( start = vm_low; start < vm_top; )
+        ASSERT(vm_low[t] == vm_top[t] || !test_bit(vm_low[t], vm_bitmap(t)));
+        for ( start = vm_low[t]; start < vm_top[t]; )
         {
-            bit = find_next_bit(vm_bitmap, vm_top, start + 1);
-            if ( bit > vm_top )
-                bit = vm_top;
+            bit = find_next_bit(vm_bitmap(t), vm_top[t], start + 1);
+            if ( bit > vm_top[t] )
+                bit = vm_top[t];
             /*
              * Note that this skips the first bit, making the
              * corresponding page a guard one.
              */
             start = (start + align) & ~(align - 1);
-            if ( bit < vm_top )
+            if ( bit < vm_top[t] )
             {
                 if ( start + nr < bit )
                     break;
-                start = find_next_zero_bit(vm_bitmap, vm_top, bit + 1);
+                start = find_next_zero_bit(vm_bitmap(t), vm_top[t], bit + 1);
             }
             else
             {
@@ -82,12 +106,12 @@ void *vm_alloc(unsigned int nr, unsigned int align)
             }
         }
 
-        if ( start < vm_top )
+        if ( start < vm_top[t] )
             break;
 
         spin_unlock(&vm_lock);
 
-        if ( vm_top >= vm_end )
+        if ( vm_top[t] >= vm_end[t] )
             return NULL;
 
         pg = alloc_domheap_page(NULL, 0);
@@ -96,23 +120,23 @@ void *vm_alloc(unsigned int nr, unsigned int align)
 
         spin_lock(&vm_lock);
 
-        if ( start >= vm_top )
+        if ( start >= vm_top[t] )
         {
-            unsigned long va = (unsigned long)vm_bitmap + vm_top / 8;
+            unsigned long va = (unsigned long)vm_bitmap(t) + vm_top[t] / 8;
 
             if ( !map_pages_to_xen(va, page_to_mfn(pg), 1, PAGE_HYPERVISOR) )
             {
                 clear_page((void *)va);
-                vm_top += PAGE_SIZE * 8;
-                if ( vm_top > vm_end )
-                    vm_top = vm_end;
+                vm_top[t] += PAGE_SIZE * 8;
+                if ( vm_top[t] > vm_end[t] )
+                    vm_top[t] = vm_end[t];
                 continue;
             }
         }
 
         free_domheap_page(pg);
 
-        if ( start >= vm_top )
+        if ( start >= vm_top[t] )
         {
             spin_unlock(&vm_lock);
             return NULL;
@@ -120,47 +144,56 @@ void *vm_alloc(unsigned int nr, unsigned int align)
     }
 
     for ( bit = start; bit < start + nr; ++bit )
-        __set_bit(bit, vm_bitmap);
-    if ( bit < vm_top )
-        ASSERT(!test_bit(bit, vm_bitmap));
+        __set_bit(bit, vm_bitmap(t));
+    if ( bit < vm_top[t] )
+        ASSERT(!test_bit(bit, vm_bitmap(t)));
     else
-        ASSERT(bit == vm_top);
-    if ( start <= vm_low + 2 )
-        vm_low = bit;
+        ASSERT(bit == vm_top[t]);
+    if ( start <= vm_low[t] + 2 )
+        vm_low[t] = bit;
     spin_unlock(&vm_lock);
 
-    return vm_base + start * PAGE_SIZE;
+    return vm_base[t] + start * PAGE_SIZE;
 }
 
-static unsigned int vm_index(const void *va)
+void *vm_alloc(unsigned int nr, unsigned int align)
+{
+    return vm_alloc_type(nr, align, VMAP_VIRT);
+}
+
+static unsigned int vm_index(const void *va, enum vmap_type type)
 {
     unsigned long addr = (unsigned long)va & ~(PAGE_SIZE - 1);
     unsigned int idx;
+    unsigned long start = (unsigned long)vm_base[type];
+
+    if ( !start)
+        return 0;
 
-    if ( addr < VMAP_VIRT_START + (vm_end / 8) ||
-         addr >= VMAP_VIRT_START + vm_top * PAGE_SIZE )
+    if ( addr < start + (vm_end[type] / 8) ||
+         addr >= start + vm_top[type] * PAGE_SIZE )
         return 0;
 
-    idx = PFN_DOWN(va - vm_base);
-    return !test_bit(idx - 1, vm_bitmap) &&
-           test_bit(idx, vm_bitmap) ? idx : 0;
+    idx = PFN_DOWN(va - vm_base[type]);
+    return !test_bit(idx - 1, vm_bitmap(type)) &&
+           test_bit(idx, vm_bitmap(type)) ? idx : 0;
 }
 
-static unsigned int vm_size(const void *va)
+static unsigned int vm_size(const void *va, enum vmap_type type)
 {
-    unsigned int start = vm_index(va), end;
+    unsigned int start = vm_index(va, type), end;
 
     if ( !start )
         return 0;
 
-    end = find_next_zero_bit(vm_bitmap, vm_top, start + 1);
+    end = find_next_zero_bit(vm_bitmap(type), vm_top[type], start + 1);
 
-    return min(end, vm_top) - start;
+    return min(end, vm_top[type]) - start;
 }
 
-void vm_free(const void *va)
+static void vm_free_type(const void *va, enum vmap_type type)
 {
-    unsigned int bit = vm_index(va);
+    unsigned int bit = vm_index(va, type);
 
     if ( !bit )
     {
@@ -169,22 +202,28 @@ void vm_free(const void *va)
     }
 
     spin_lock(&vm_lock);
-    if ( bit < vm_low )
+    if ( bit < vm_low[type] )
     {
-        vm_low = bit - 1;
-        while ( !test_bit(vm_low - 1, vm_bitmap) )
-            --vm_low;
+        vm_low[type] = bit - 1;
+        while ( !test_bit(vm_low[type] - 1, vm_bitmap(type)) )
+            --vm_low[type];
     }
-    while ( __test_and_clear_bit(bit, vm_bitmap) )
-        if ( ++bit == vm_top )
+    while ( __test_and_clear_bit(bit, vm_bitmap(type)) )
+        if ( ++bit == vm_top[type] )
             break;
     spin_unlock(&vm_lock);
 }
 
+void vm_free(const void *va)
+{
+    vm_free_type(va, VMAP_VIRT);
+}
+
 void *__vmap(const mfn_t *mfn, unsigned int granularity,
-             unsigned int nr, unsigned int align, unsigned int flags)
+             unsigned int nr, unsigned int align, unsigned int flags,
+             enum vmap_type type)
 {
-    void *va = vm_alloc(nr * granularity, align);
+    void *va = vm_alloc_type(nr * granularity, align, type);
     unsigned long cur = (unsigned long)va;
 
     for ( ; va && nr--; ++mfn, cur += PAGE_SIZE * granularity )
@@ -201,22 +240,33 @@ void *__vmap(const mfn_t *mfn, unsigned int granularity,
 
 void *vmap(const mfn_t *mfn, unsigned int nr)
 {
-    return __vmap(mfn, 1, nr, 1, PAGE_HYPERVISOR);
+    return __vmap(mfn, 1, nr, 1, PAGE_HYPERVISOR, VMAP_VIRT);
 }
 
 void vunmap(const void *va)
 {
+    enum vmap_type type = VMAP_VIRT;
+    unsigned int size = vm_size(va, type);
+#ifndef _PAGE_NONE
+    unsigned long addr;
+#endif
+
+    if ( !size )
+    {
+        type = XEN_VIRT;
+        size = vm_size(va, type);
+    }
 #ifndef _PAGE_NONE
-    unsigned long addr = (unsigned long)va;
+    addr = (unsigned long)va;
 
-    destroy_xen_mappings(addr, addr + PAGE_SIZE * vm_size(va));
+    destroy_xen_mappings(addr, addr + PAGE_SIZE * size);
 #else /* Avoid tearing down intermediate page tables. */
-    map_pages_to_xen((unsigned long)va, 0, vm_size(va), _PAGE_NONE);
+    map_pages_to_xen((unsigned long)va, 0, size, _PAGE_NONE);
 #endif
-    vm_free(va);
+    vm_free_type(va, type);
 }
 
-void *vmalloc(size_t size)
+void *vmalloc_type(size_t size, enum vmap_type type, mfn_t **mfn_array)
 {
     mfn_t *mfn;
     size_t pages, i;
@@ -238,11 +288,15 @@ void *vmalloc(size_t size)
         mfn[i] = _mfn(page_to_mfn(pg));
     }
 
-    va = vmap(mfn, pages);
+    va = __vmap(mfn, 1, pages, 1, PAGE_HYPERVISOR, type);
     if ( va == NULL )
         goto error;
 
-    xfree(mfn);
+    if ( mfn_array )
+        *mfn_array = mfn;
+    else
+        xfree(mfn);
+
     return va;
 
  error:
@@ -252,6 +306,11 @@ void *vmalloc(size_t size)
     return NULL;
 }
 
+void *vmalloc(size_t size)
+{
+    return vmalloc_type(size, VMAP_VIRT, NULL);
+}
+
 void *vzalloc(size_t size)
 {
     void *p = vmalloc(size);
@@ -275,7 +334,10 @@ void vfree(void *va)
     if ( !va )
         return;
 
-    pages = vm_size(va);
+    pages = vm_size(va, VMAP_VIRT);
+    if ( !pages )
+        pages = vm_size(va, XEN_VIRT);
+
     ASSERT(pages);
 
     for ( i = 0; i < pages; i++ )
diff --git a/xen/drivers/acpi/osl.c b/xen/drivers/acpi/osl.c
index 8a28d87..66bec78 100644
--- a/xen/drivers/acpi/osl.c
+++ b/xen/drivers/acpi/osl.c
@@ -97,7 +97,7 @@ acpi_os_map_memory(acpi_physical_address phys, acpi_size size)
                if (IS_ENABLED(CONFIG_X86) && !((phys + size - 1) >> 20))
                        return __va(phys);
                return __vmap(&mfn, PFN_UP(offs + size), 1, 1,
-                             ACPI_MAP_MEM_ATTR) + offs;
+                             ACPI_MAP_MEM_ATTR, VMAP_VIRT) + offs;
        }
        return __acpi_map_table(phys, size);
 }
diff --git a/xen/include/asm-x86/x86_64/page.h 
b/xen/include/asm-x86/x86_64/page.h
index 86abb94..a854e05 100644
--- a/xen/include/asm-x86/x86_64/page.h
+++ b/xen/include/asm-x86/x86_64/page.h
@@ -38,6 +38,8 @@
 #include <xen/pdx.h>
 
 extern unsigned long xen_virt_end;
+extern unsigned long avail_virt_start;
+extern unsigned long avail_virt_end;
 
 #define spage_to_pdx(spg) (((spg) - spage_table)<<(SUPERPAGE_SHIFT-PAGE_SHIFT))
 #define pdx_to_spage(pdx) (spage_table + ((pdx)>>(SUPERPAGE_SHIFT-PAGE_SHIFT)))
diff --git a/xen/include/xen/vmap.h b/xen/include/xen/vmap.h
index 5671ac8..492d3e7 100644
--- a/xen/include/xen/vmap.h
+++ b/xen/include/xen/vmap.h
@@ -4,14 +4,25 @@
 #include <xen/mm.h>
 #include <asm/page.h>
 
+/* These two functions operate only on VMAP_VIRT address space. */
 void *vm_alloc(unsigned int nr, unsigned int align);
 void vm_free(const void *);
 
-void *__vmap(const mfn_t *mfn, unsigned int granularity,
-             unsigned int nr, unsigned int align, unsigned int flags);
+enum vmap_type {
+    VMAP_VIRT,
+    XEN_VIRT,
+    VMAP_TYPE_MAX,
+};
+
+void *__vmap(const mfn_t *mfn, unsigned int granularity, unsigned int nr,
+             unsigned int align, unsigned int flags, enum vmap_type);
 void *vmap(const mfn_t *mfn, unsigned int nr);
 void vunmap(const void *);
+/* Only operates on VMAP_VIRT. */
 void *vmalloc(size_t size);
+
+void *vmalloc_type(size_t size, enum vmap_type type, mfn_t **mfn_array);
+
 void *vzalloc(size_t size);
 void vfree(void *va);
 
-- 
2.4.3


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.