[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Minios-devel] [UNIKRAFT PATCH v2 2/6] plat/xen: Add new memory region - demand area (x86_64)
Reviewed-by: Yuri Volchkov <yuri.volchkov@xxxxxxxxx> Costin Lupu <costin.lupu@xxxxxxxxx> writes: > Demand area is a new memory region used for Xen for on-demand memory > mappings. An example of use case for on-demand mappins is the grant > subsystem which requires a virtual memory region different than the > memory used for heap. > > This patch also introduces the functions for reading and writing page > table entries, needed when creating memory mappings in the demand area. > > Signed-off-by: Costin Lupu <costin.lupu@xxxxxxxxx> > --- > plat/xen/include/xen-x86/mm.h | 16 +- > plat/xen/x86/mm.c | 359 > +++++++++++++++++++++++++++++++++++++++++- > plat/xen/x86/setup.c | 13 +- > 3 files changed, 382 insertions(+), 6 deletions(-) > > diff --git a/plat/xen/include/xen-x86/mm.h b/plat/xen/include/xen-x86/mm.h > index a1dbb26..0e59796 100644 > --- a/plat/xen/include/xen-x86/mm.h > +++ b/plat/xen/include/xen-x86/mm.h > @@ -275,11 +275,18 @@ static __inline__ paddr_t machine_to_phys(maddr_t > machine) > }) > #define virtual_to_mfn(_virt) pte_to_mfn(virtual_to_pte(_virt)) > > -#define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, NULL, > L1_PROT) > -#define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, > NULL, L1_PROT_RO) > -#define do_map_zero(start, n) do_map_frames(start, &mfn_zero, n, 0, 0, > DOMID_SELF, NULL, L1_PROT_RO) > +void *map_frames_ex(const unsigned long *mfns, unsigned long n, > + unsigned long stride, unsigned long incr, > + unsigned long alignment, > + domid_t id, int *err, unsigned long prot, > + struct uk_alloc *a); > +#define map_frames(f, n, a) \ > + map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, NULL, L1_PROT, a) > +#define map_zero(n, a) \ > + map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, NULL, L1_PROT_RO, a) > +#define do_map_zero(start, n, a) \ > + do_map_frames(start, &mfn_zero, n, 0, 0, DOMID_SELF, NULL, L1_PROT_RO, > a) > > -pgentry_t *need_pgt(unsigned long addr); > void arch_mm_preinit(void *p); > unsigned long alloc_virt_kernel(unsigned n_pages); > > @@ -294,5 +301,6 @@ void _init_mem_build_pagetable(unsigned long *start_pfn, > unsigned long > *max_pfn); > void _init_mem_set_readonly(void *text, void *etext); > void _init_mem_clear_bootstrap(void); > +void _init_mem_demand_area(unsigned long start, unsigned long page_num); > > #endif /* _ARCH_MM_H_ */ > diff --git a/plat/xen/x86/mm.c b/plat/xen/x86/mm.c > index a77868a..3a95ad0 100644 > --- a/plat/xen/x86/mm.c > +++ b/plat/xen/x86/mm.c > @@ -36,7 +36,9 @@ > */ > > #include <string.h> > -#include <xen-x86/os.h> > +#include <errno.h> > +#include <uk/alloc.h> > +#include <uk/plat/config.h> > #include <xen-x86/mm.h> > #include <xen/memory.h> > #include <uk/print.h> > @@ -224,6 +226,348 @@ void _init_mem_build_pagetable(unsigned long > *start_pfn, unsigned long *max_pfn) > } > > /* > + * Get the PTE for virtual address va if it exists. Otherwise NULL. > + */ > +static pgentry_t *get_pte(unsigned long va) > +{ > + unsigned long mfn; > + pgentry_t *tab; > + unsigned int offset; > + > + tab = pt_base; > + > +#if defined(__x86_64__) > + offset = l4_table_offset(va); > + if (!(tab[offset] & _PAGE_PRESENT)) > + return NULL; > + > + mfn = pte_to_mfn(tab[offset]); > + tab = mfn_to_virt(mfn); > +#endif > + offset = l3_table_offset(va); > + if (!(tab[offset] & _PAGE_PRESENT)) > + return NULL; > + > + mfn = pte_to_mfn(tab[offset]); > + tab = mfn_to_virt(mfn); > + offset = l2_table_offset(va); > + if (!(tab[offset] & _PAGE_PRESENT)) > + return NULL; > + > + if (tab[offset] & _PAGE_PSE) > + return &tab[offset]; > + > + mfn = pte_to_mfn(tab[offset]); > + tab = mfn_to_virt(mfn); > + offset = l1_table_offset(va); > + > + return &tab[offset]; > +} > + > +/* > + * Return a valid PTE for a given virtual address. > + * If PTE does not exist, allocate page-table pages. > + */ > +static pgentry_t *need_pte(unsigned long va, struct uk_alloc *a) > +{ > + unsigned long pt_mfn; > + pgentry_t *tab; > + unsigned long pt_pfn; > + unsigned int offset; > + > + tab = pt_base; > + pt_mfn = virt_to_mfn(pt_base); > +#if defined(__x86_64__) > + offset = l4_table_offset(va); > + if (!(tab[offset] & _PAGE_PRESENT)) { > + pt_pfn = virt_to_pfn(uk_malloc_page(a)); > + if (!pt_pfn) > + return NULL; > + new_pt_frame(&pt_pfn, pt_mfn, offset, L3_FRAME); > + } > + UK_ASSERT(tab[offset] & _PAGE_PRESENT); > + > + pt_mfn = pte_to_mfn(tab[offset]); > + tab = mfn_to_virt(pt_mfn); > +#endif > + offset = l3_table_offset(va); > + if (!(tab[offset] & _PAGE_PRESENT)) { > + pt_pfn = virt_to_pfn(uk_malloc_page(a)); > + if (!pt_pfn) > + return NULL; > + new_pt_frame(&pt_pfn, pt_mfn, offset, L2_FRAME); > + } > + UK_ASSERT(tab[offset] & _PAGE_PRESENT); > + > + pt_mfn = pte_to_mfn(tab[offset]); > + tab = mfn_to_virt(pt_mfn); > + offset = l2_table_offset(va); > + if (!(tab[offset] & _PAGE_PRESENT)) { > + pt_pfn = virt_to_pfn(uk_malloc_page(a)); > + if (!pt_pfn) > + return NULL; > + new_pt_frame(&pt_pfn, pt_mfn, offset, L1_FRAME); > + } > + UK_ASSERT(tab[offset] & _PAGE_PRESENT); > + > + if (tab[offset] & _PAGE_PSE) > + return &tab[offset]; > + > + pt_mfn = pte_to_mfn(tab[offset]); > + tab = mfn_to_virt(pt_mfn); > + offset = l1_table_offset(va); > + > + return &tab[offset]; > +} > + > +/** > + * do_map_frames - Map an array of MFNs contiguously into virtual > + * address space starting at va. > + * @va: Starting address of the virtual address range > + * @mfns: Array of MFNs > + * @n: Number of MFNs > + * @stride: Stride used for selecting the MFNs on each iteration > + * @incr: Increment added to MFNs on each iteration > + * @id: Domain id > + * @err: Array of errors statuses > + * @prot: Page protection flags > + * @a: Memory allocator used when new page table entries are needed > + * > + * Note that either @stride or @incr must be non-zero, not both of > + * them. One should use a non-zero value for @stride when providing > + * an array of MFNs. @incr parameter should be used when only the > + * first MFN is provided and the subsequent MFNs values are simply > + * derived by adding @incr. > + */ > +#define MAP_BATCH ((STACK_SIZE / 4) / sizeof(mmu_update_t)) > +int do_map_frames(unsigned long va, > + const unsigned long *mfns, unsigned long n, > + unsigned long stride, unsigned long incr, > + domid_t id, int *err, unsigned long prot, > + struct uk_alloc *a) > +{ > + pgentry_t *pte = NULL; > + unsigned long mapped = 0; > + > + if (!mfns) { > + uk_printd(DLVL_WARN, "do_map_frames: no mfns supplied\n"); > + return -EINVAL; > + } > + > + uk_printd(DLVL_EXTRA, > + "Mapping va=%p n=%lu, mfns[0]=0x%lx stride=%lu incr=%lu > prot=0x%lx\n", > + (void *) va, n, mfns[0], stride, incr, prot); > + > + if (err) > + memset(err, 0, n * sizeof(int)); > + > + while (mapped < n) { > +#ifdef CONFIG_PARAVIRT > + unsigned long i; > + int rc; > + unsigned long batched; > + > + if (err) > + batched = 1; > + else > + batched = n - mapped; > + > + if (batched > MAP_BATCH) > + batched = MAP_BATCH; > + > + mmu_update_t mmu_updates[batched]; > + > + for (i = 0; i < batched; i++, va += PAGE_SIZE, pte++) { > + if (!pte || !(va & L1_MASK)) > + pte = need_pte(va, a); > + if (!pte) > + return -ENOMEM; > + > + mmu_updates[i].ptr = > + virt_to_mach(pte) | MMU_NORMAL_PT_UPDATE; > + mmu_updates[i].val = > + ((pgentry_t) (mfns[(mapped + i) * stride] > + + (mapped + i) * incr) << PAGE_SHIFT) | prot; > + } > + > + rc = HYPERVISOR_mmu_update(mmu_updates, batched, NULL, id); > + if (rc < 0) { > + if (err) > + err[mapped * stride] = rc; > + else > + UK_CRASH( > + "Map %ld (%lx, ...) at %lx failed: > %d.\n", > + batched, > + mfns[mapped * stride] + mapped * incr, > + va, rc); > + } > + mapped += batched; > +#else > + if (!pte || !(va & L1_MASK)) > + pte = need_pte(va & ~L1_MASK, a); > + if (!pte) > + return -ENOMEM; > + > + UK_ASSERT(!(*pte & _PAGE_PSE)); > + pte[l1_table_offset(va)] = > + (pgentry_t) (((mfns[mapped * stride] > + + mapped * incr) << PAGE_SHIFT) | prot); > + mapped++; > +#endif > + } > + > + return 0; > +} > + > +static unsigned long demand_map_area_start; > +static unsigned long demand_map_area_end; > + > +unsigned long allocate_ondemand(unsigned long n, unsigned long align) > +{ > + unsigned long page_idx, contig = 0; > + > + /* Find a properly aligned run of n contiguous frames */ > + for (page_idx = 0; > + page_idx <= DEMAND_MAP_PAGES - n; > + page_idx = (page_idx + contig + 1 + align - 1) & ~(align - 1)) { > + > + unsigned long addr = > + demand_map_area_start + page_idx * PAGE_SIZE; > + pgentry_t *pte = get_pte(addr); > + > + for (contig = 0; contig < n; contig++, addr += PAGE_SIZE) { > + if (!(addr & L1_MASK)) > + pte = get_pte(addr); > + > + if (pte) { > + if (*pte & _PAGE_PRESENT) > + break; > + > + pte++; > + } > + } > + > + if (contig == n) > + break; > + } > + > + if (contig != n) { > + uk_printd(DLVL_ERR, "Failed to find %ld frames!\n", n); > + return 0; > + } > + > + return demand_map_area_start + page_idx * PAGE_SIZE; > +} > + > +/** > + * map_frames_ex - Map an array of MFNs contiguously into virtual > + * address space. Virtual addresses are allocated from the on demand > + * area. > + * @mfns: Array of MFNs > + * @n: Number of MFNs > + * @stride: Stride used for selecting the MFNs on each iteration > + * (e.g. 1 for every element, 0 always first element) > + * @incr: Increment added to MFNs on each iteration > + * @alignment: Alignment > + * @id: Domain id > + * @err: Array of errors statuses > + * @prot: Page protection flags > + * @a: Memory allocator used when new page table entries are needed > + * > + * Note that either @stride or @incr must be non-zero, not both of > + * them. One should use a non-zero value for @stride when providing > + * an array of MFNs. @incr parameter should be used when only the > + * first MFN is provided and the subsequent MFNs values are simply > + * derived by adding @incr. > + */ > +void *map_frames_ex(const unsigned long *mfns, unsigned long n, > + unsigned long stride, unsigned long incr, > + unsigned long alignment, > + domid_t id, int *err, unsigned long prot, > + struct uk_alloc *a) > +{ > + unsigned long va = allocate_ondemand(n, alignment); > + > + if (!va) > + return NULL; > + > + if (do_map_frames(va, mfns, n, stride, incr, id, err, prot, a)) > + return NULL; > + > + return (void *) va; > +} > + > +/* > + * Unmap num_frames frames mapped at virtual address va. > + */ > +#define UNMAP_BATCH ((STACK_SIZE / 4) / sizeof(multicall_entry_t)) > +int unmap_frames(unsigned long va, unsigned long num_frames) > +{ > +#ifdef CONFIG_PARAVIRT > + unsigned long i, n = UNMAP_BATCH; > + multicall_entry_t call[n]; > + int ret; > +#else > + pgentry_t *pte; > +#endif > + > + UK_ASSERT(!((unsigned long) va & ~PAGE_MASK)); > + > + uk_printd(DLVL_EXTRA, > + "Unmapping va=%p, num=%lu\n", (void *) va, num_frames); > + > + while (num_frames) { > +#ifdef CONFIG_PARAVIRT > + if (n > num_frames) > + n = num_frames; > + > + for (i = 0; i < n; i++) { > + int arg = 0; > + /* > + * simply update the PTE for the VA and > + * invalidate TLB > + */ > + call[i].op = __HYPERVISOR_update_va_mapping; > + call[i].args[arg++] = va; > + call[i].args[arg++] = 0; > +#ifdef __i386__ > + call[i].args[arg++] = 0; > +#endif > + call[i].args[arg++] = UVMF_INVLPG; > + > + va += PAGE_SIZE; > + } > + > + ret = HYPERVISOR_multicall(call, n); > + if (ret) { > + uk_printd(DLVL_ERR, > + "update_va_mapping hypercall failed with > rc=%d.\n", ret); > + return -ret; > + } > + > + for (i = 0; i < n; i++) { > + if (call[i].result) { > + uk_printd(DLVL_ERR, > + "update_va_mapping failed for with > rc=%d.\n", ret); > + return -(call[i].result); > + } > + } > + num_frames -= n; > +#else > + pte = get_pte(va); > + if (pte) { > + UK_ASSERT(!(*pte & _PAGE_PSE)); > + *pte = 0; > + invlpg(va); > + } > + va += PAGE_SIZE; > + num_frames--; > +#endif > + } > + return 0; > +} > + > +/* > * Mark portion of the address space read only. > */ > extern struct shared_info _libxenplat_shared_info; > @@ -352,3 +696,16 @@ void _init_mem_prepare(unsigned long *start_pfn, > unsigned long *max_pfn) > #error "Please port (see Mini-OS's arch_mm_preinit())" > #endif > } > + > +/* > + * Reserve an area of virtual address space for mappings > + */ > + > +void _init_mem_demand_area(unsigned long start, unsigned long page_num) > +{ > + demand_map_area_start = start; > + demand_map_area_end = demand_map_area_start + page_num * PAGE_SIZE; > + > + uk_printd(DLVL_INFO, "Demand map pfns at %lx-%lx.\n", > + demand_map_area_start, demand_map_area_end); > +} > diff --git a/plat/xen/x86/setup.c b/plat/xen/x86/setup.c > index ad603de..31d3a90 100644 > --- a/plat/xen/x86/setup.c > +++ b/plat/xen/x86/setup.c > @@ -165,7 +165,18 @@ static inline void _init_mem(void) > #if CONFIG_UKPLAT_MEMRNAME > _libxenplat_mrd[0].name = "heap"; > #endif > - _libxenplat_mrd_num = 1; > + > + /* demand area */ > + _libxenplat_mrd[1].base = (void *) VIRT_DEMAND_AREA; > + _libxenplat_mrd[1].len = DEMAND_MAP_PAGES * PAGE_SIZE; > + _libxenplat_mrd[1].flags = UKPLAT_MEMRF_RESERVED; > +#if CONFIG_UKPLAT_MEMRNAME > + _libxenplat_mrd[1].name = "demand"; > +#endif > + _init_mem_demand_area((unsigned long) _libxenplat_mrd[1].base, > + DEMAND_MAP_PAGES); > + > + _libxenplat_mrd_num = 2; > } > > void _libxenplat_x86entry(void *start_info) __noreturn; > -- > 2.11.0 > -- Yuri Volchkov Software Specialist NEC Europe Ltd Kurfürsten-Anlage 36 D-69115 Heidelberg _______________________________________________ Minios-devel mailing list Minios-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/minios-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |