[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Minios-devel] [UNIKRAFT PATCH 2/6] plat/xen: Add new memory region - demand area (x86_64)
On 06/27/2018 01:11 AM, Yuri Volchkov wrote: > Hi Costin, > > the comments are inline. > > -Yuri. > > Costin Lupu <costin.lupu@xxxxxxxxx> writes: >> Demand area is a new memory region used for Xen for on-demand memory >> mappings. An example of use case for on-demand mappins is the grant >> subsystem which requires a virtual memory region different than the >> memory used for heap. >> >> This patch also introduces the functions for reading and writing page >> table entries, needed when creating memory mappings in the demand area. >> >> Signed-off-by: Costin Lupu <costin.lupu@xxxxxxxxx> >> --- >> plat/xen/include/xen-x86/mm.h | 17 ++- >> plat/xen/x86/mm.c | 323 >> +++++++++++++++++++++++++++++++++++++++++- >> plat/xen/x86/setup.c | 13 +- >> 3 files changed, 346 insertions(+), 7 deletions(-) >> >> diff --git a/plat/xen/include/xen-x86/mm.h b/plat/xen/include/xen-x86/mm.h >> index a1dbb26..48ab295 100644 >> --- a/plat/xen/include/xen-x86/mm.h >> +++ b/plat/xen/include/xen-x86/mm.h >> @@ -275,11 +275,17 @@ static __inline__ paddr_t machine_to_phys(maddr_t >> machine) >> }) >> #define virtual_to_mfn(_virt) pte_to_mfn(virtual_to_pte(_virt)) >> >> -#define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, NULL, >> L1_PROT) >> -#define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, >> NULL, L1_PROT_RO) >> -#define do_map_zero(start, n) do_map_frames(start, &mfn_zero, n, 0, 0, >> DOMID_SELF, NULL, L1_PROT_RO) >> - >> -pgentry_t *need_pgt(unsigned long addr); >> +void *map_frames_ex(const unsigned long *mfns, unsigned long n, >> + unsigned long stride, unsigned long incr, >> + unsigned long alignment, >> + domid_t id, int *err, unsigned long prot, >> + struct uk_alloc *a); >> +#define map_frames(f, n, a) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, NULL, >> L1_PROT, a) >> +#define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, >> NULL, L1_PROT_RO, a) >> +#define do_map_zero(start, n, a) do_map_frames(start, &mfn_zero, n, 0, 0, >> DOMID_SELF, NULL, L1_PROT_RO, a) >> + >> +struct uk_alloc; >> +pgentry_t *need_pte(unsigned long addr, struct uk_alloc *a); >> void arch_mm_preinit(void *p); >> unsigned long alloc_virt_kernel(unsigned n_pages); >> >> @@ -294,5 +300,6 @@ void _init_mem_build_pagetable(unsigned long *start_pfn, >> unsigned long >> *max_pfn); >> void _init_mem_set_readonly(void *text, void *etext); >> void _init_mem_clear_bootstrap(void); >> +void _init_mem_demand_area(unsigned long start, unsigned long page_num); >> >> #endif /* _ARCH_MM_H_ */ >> diff --git a/plat/xen/x86/mm.c b/plat/xen/x86/mm.c >> index a77868a..818653f 100644 >> --- a/plat/xen/x86/mm.c >> +++ b/plat/xen/x86/mm.c >> @@ -36,7 +36,9 @@ >> */ >> >> #include <string.h> >> -#include <xen-x86/os.h> >> +#include <errno.h> >> +#include <uk/alloc.h> >> +#include <uk/plat/config.h> >> #include <xen-x86/mm.h> >> #include <xen/memory.h> >> #include <uk/print.h> >> @@ -224,6 +226,312 @@ void _init_mem_build_pagetable(unsigned long >> *start_pfn, unsigned long *max_pfn) >> } >> >> /* >> + * Get the PTE for virtual address va if it exists. Otherwise NULL. >> + */ >> +static pgentry_t *get_pte(unsigned long va) >> +{ >> + unsigned long mfn; >> + pgentry_t *tab; >> + unsigned int offset; >> + >> + tab = pt_base; >> + mfn = virt_to_mfn(pt_base); > this seams to be redundant operation. The mfn is overwritten in any case > later Ack. >> +#if defined(__x86_64__) >> + offset = l4_table_offset(va); >> + if (!(tab[offset] & _PAGE_PRESENT)) >> + return NULL; >> + >> + mfn = pte_to_mfn(tab[offset]); >> + tab = mfn_to_virt(mfn); >> +#endif >> + offset = l3_table_offset(va); >> + if (!(tab[offset] & _PAGE_PRESENT)) >> + return NULL; >> + >> + mfn = pte_to_mfn(tab[offset]); >> + tab = mfn_to_virt(mfn); >> + offset = l2_table_offset(va); >> + if (!(tab[offset] & _PAGE_PRESENT)) >> + return NULL; >> + >> + if (tab[offset] & _PAGE_PSE) >> + return &tab[offset]; >> + >> + mfn = pte_to_mfn(tab[offset]); >> + tab = mfn_to_virt(mfn); >> + offset = l1_table_offset(va); >> + >> + return &tab[offset]; >> +} >> + >> +/* >> + * Return a valid PTE for a given virtual address. >> + * If PTE does not exist, allocate page-table pages. >> + */ >> +pgentry_t *need_pte(unsigned long va, struct uk_alloc *a) > Should this be a static? Looks like purely internal function Since in Mini-OS it's not used anywhere else, I guess it's ok to make it static. >> +{ >> + unsigned long pt_mfn; >> + pgentry_t *tab; >> + unsigned long pt_pfn; >> + unsigned int offset; >> + >> + tab = pt_base; >> + pt_mfn = virt_to_mfn(pt_base); >> +#if defined(__x86_64__) >> + offset = l4_table_offset(va); >> + if (!(tab[offset] & _PAGE_PRESENT)) { >> + pt_pfn = virt_to_pfn(uk_malloc_page(a)); >> + if (!pt_pfn) >> + return NULL; >> + new_pt_frame(&pt_pfn, pt_mfn, offset, L3_FRAME); >> + } >> + UK_ASSERT(tab[offset] & _PAGE_PRESENT); >> + >> + pt_mfn = pte_to_mfn(tab[offset]); >> + tab = mfn_to_virt(pt_mfn); >> +#endif >> + offset = l3_table_offset(va); >> + if (!(tab[offset] & _PAGE_PRESENT)) { >> + pt_pfn = virt_to_pfn(uk_malloc_page(a)); >> + if (!pt_pfn) >> + return NULL; >> + new_pt_frame(&pt_pfn, pt_mfn, offset, L2_FRAME); >> + } >> + UK_ASSERT(tab[offset] & _PAGE_PRESENT); >> + >> + pt_mfn = pte_to_mfn(tab[offset]); >> + tab = mfn_to_virt(pt_mfn); >> + offset = l2_table_offset(va); >> + if (!(tab[offset] & _PAGE_PRESENT)) { >> + pt_pfn = virt_to_pfn(uk_malloc_page(a)); >> + if (!pt_pfn) >> + return NULL; >> + new_pt_frame(&pt_pfn, pt_mfn, offset, L1_FRAME); >> + } >> + UK_ASSERT(tab[offset] & _PAGE_PRESENT); >> + >> + if (tab[offset] & _PAGE_PSE) >> + return &tab[offset]; >> + >> + pt_mfn = pte_to_mfn(tab[offset]); >> + tab = mfn_to_virt(pt_mfn); >> + offset = l1_table_offset(va); >> + >> + return &tab[offset]; >> +} >> + >> +/* >> + * Map an array of MFNs contiguously into virtual address space starting at >> + * va. map f[i*stride]+i*increment for i in 0..n-1. >> + */ > Could you please also extend this comment describing each parameter? For > example as in ukarch_test_and_clr_bit(). Ack. >> +#define MAP_BATCH ((STACK_SIZE / 2) / sizeof(mmu_update_t)) >> +int do_map_frames(unsigned long va, >> + const unsigned long *mfns, unsigned long n, >> + unsigned long stride, unsigned long incr, >> + domid_t id, int *err, unsigned long prot, >> + struct uk_alloc *a) >> +{ >> + pgentry_t *pgt = NULL; > I really liked that you renamed pgt to pte in other parts of the > code. Maybe let's do it here too? BTW, what pgt stands for? Ack. I guess pgt stands for 'page table'. >> + unsigned long mapped = 0; >> + >> + if (!mfns) { >> + uk_printd(DLVL_WARN, "do_map_frames: no mfns supplied\n"); >> + return -EINVAL; >> + } >> + >> + uk_printd(DLVL_EXTRA, >> + "va=%p n=0x%lx, mfns[0]=0x%lx stride=0x%lx incr=0x%lx >> prot=0x%lx\n", >> + (void *) va, n, mfns[0], stride, incr, prot); > Since uk_printd does not print function name, maybe it is worth to tell > what is actually happening to a va? Ack. >> + >> + if (err) >> + memset(err, 0, n * sizeof(int)); >> + >> + while (mapped < n) { >> +#ifdef CONFIG_PARAVIRT > This is not related to this particular patch, but Unikraft does not have > CONFIG_PARAVIRT. Should we remove all mentions about it in the nearest > future? This should be handled when HVM support will be added for Unikraft. >> + unsigned long i; >> + int rc; >> + unsigned long batched; >> + >> + if (err) >> + batched = 1; >> + else >> + batched = n - mapped; >> + >> + if (batched > MAP_BATCH) >> + batched = MAP_BATCH; >> + >> + { > This brace is absolutely unnecessary. Could you please remove it? This > will also reduce the indentation level, which is a bit to high here. Ack. >> + mmu_update_t mmu_updates[batched]; >> + >> + for (i = 0; i < batched; i++, va += PAGE_SIZE, pgt++) { >> + if (!pgt || !(va & L1_MASK)) >> + pgt = need_pte(va, a); >> + if (!pgt) >> + return -ENOMEM; >> + >> + mmu_updates[i].ptr = >> + virt_to_mach(pgt) | >> MMU_NORMAL_PT_UPDATE; >> + mmu_updates[i].val = >> + ((pgentry_t) (mfns[(mapped + i) * >> stride] >> + + (mapped + i) * incr) << PAGE_SHIFT) | >> prot; >> + } >> + >> + rc = HYPERVISOR_mmu_update(mmu_updates, batched, NULL, >> id); >> + if (rc < 0) { >> + if (err) >> + err[mapped * stride] = rc; >> + else >> + UK_CRASH( >> + "Map %ld (%lx, ...) at %lx >> failed: %d.\n", >> + batched, mfns[mapped * stride] >> + mapped * incr, va, rc); >> + } >> + } >> + mapped += batched; >> +#else >> + if (!pgt || !(va & L1_MASK)) >> + pgt = need_pte(va & ~L1_MASK, a); >> + if (!pgt) >> + return -ENOMEM; >> + >> + UK_ASSERT(!(*pgt & _PAGE_PSE)); >> + pgt[l1_table_offset(va)] = >> + (pgentry_t) (((mfns[mapped * stride] >> + + mapped * incr) << PAGE_SHIFT) | prot); >> + mapped++; >> +#endif >> + } >> + >> + return 0; >> +} >> + >> +static unsigned long demand_map_area_start; >> +static unsigned long demand_map_area_end; >> + >> +unsigned long allocate_ondemand(unsigned long n, unsigned long align) >> +{ >> + unsigned long page_idx, contig = 0; >> + >> + /* Find a properly aligned run of n contiguous frames */ >> + for (page_idx = 0; >> + page_idx <= DEMAND_MAP_PAGES - n; >> + page_idx = (page_idx + contig + 1 + align - 1) & ~(align - 1)) { >> + >> + unsigned long addr = >> + demand_map_area_start + page_idx * PAGE_SIZE; >> + pgentry_t *pte = get_pte(addr); >> + >> + for (contig = 0; contig < n; contig++, addr += PAGE_SIZE) { >> + if (!(addr & L1_MASK)) >> + pte = get_pte(addr); >> + >> + if (pte) { >> + if (*pte & _PAGE_PRESENT) >> + break; >> + >> + pte++; >> + } >> + } >> + >> + if (contig == n) >> + break; >> + } >> + >> + if (contig != n) { >> + uk_printd(DLVL_ERR, "Failed to find %ld frames!\n", n); >> + return 0; >> + } >> + >> + return demand_map_area_start + page_idx * PAGE_SIZE; >> +} >> + >> +/* >> + * Map an array of MFNs contiguous into virtual address space. >> + * Virtual addresses are allocated from the on demand area. >> + */ > Also would be good to write comment about parameters of this function > too. Most of them would be just a copy paste from do_map_page. Ack. >> +void *map_frames_ex(const unsigned long *mfns, unsigned long n, >> + unsigned long stride, unsigned long incr, >> + unsigned long alignment, >> + domid_t id, int *err, unsigned long prot, >> + struct uk_alloc *a) >> +{ >> + unsigned long va = allocate_ondemand(n, alignment); >> + >> + if (!va) >> + return NULL; >> + >> + if (do_map_frames(va, mfns, n, stride, incr, id, err, prot, a)) >> + return NULL; >> + >> + return (void *) va; >> +} >> + >> +/* >> + * Unmap nun_frames frames mapped at virtual address va. >> + */ > Typo in num_frames Ack. >> +#define UNMAP_BATCH ((STACK_SIZE / 2) / sizeof(multicall_entry_t)) >> +int unmap_frames(unsigned long va, unsigned long num_frames) >> +{ >> +#ifdef CONFIG_PARAVIRT >> + unsigned long i, n = UNMAP_BATCH; >> + multicall_entry_t call[n]; >> + int ret; >> +#else >> + pgentry_t *pte; >> +#endif >> + >> + UK_ASSERT(!((unsigned long) va & ~PAGE_MASK)); >> + >> + uk_printd(DLVL_EXTRA, "va=%p, num=0x%lx\n", (void *) va, num_frames); > Same thing, would be good to tell what is happening to va in the debug > message Ack. >> + >> + while (num_frames) { >> +#ifdef CONFIG_PARAVIRT >> + if (n > num_frames) >> + n = num_frames; >> + >> + for (i = 0; i < n; i++) { >> + int arg = 0; >> + /* simply update the PTE for the VA and invalidate TLB >> */ > That is over 80 lines, and simple enough to fix. Please run the > checkpatch.pl. The last patch in the series also have a couple of > checkpatch warnings. Ack. >> + call[i].op = __HYPERVISOR_update_va_mapping; >> + call[i].args[arg++] = va; >> + call[i].args[arg++] = 0; >> +#ifdef __i386__ > I believe we do not support __i386__ do we? We should remove all i386 related code when we'll have a final resolution on whether we'll ever support it. >> + call[i].args[arg++] = 0; >> +#endif >> + call[i].args[arg++] = UVMF_INVLPG; >> + >> + va += PAGE_SIZE; >> + } >> + >> + ret = HYPERVISOR_multicall(call, n); > Seems that multicall is unnecessary here. The only operation batched is > 'update_va_mapping'. Calling HYPERVISOR_update_va_mapping directly would > simplify the code a bit. update_va_mapping doesn't support more than one virtual address, that's why it's used in batch mode by calling HYPERVISOR_multicall. >> + if (ret) { >> + uk_printd(DLVL_ERR, >> + "update_va_mapping hypercall failed with >> rc=%d.\n", ret); >> + return -ret; >> + } >> + >> + for (i = 0; i < n; i++) { >> + if (call[i].result) { >> + uk_printd(DLVL_ERR, >> + "update_va_mapping failed for with >> rc=%d.\n", ret); >> + return -(call[i].result); >> + } >> + } >> + num_frames -= n; >> +#else >> + pte = get_pte(va); >> + if (pte) { >> + UK_ASSERT(!(*pte & _PAGE_PSE)); >> + *pte = 0; >> + invlpg(va); >> + } >> + va += PAGE_SIZE; >> + num_frames--; >> +#endif >> + } >> + return 0; >> +} >> + >> +/* >> * Mark portion of the address space read only. >> */ >> extern struct shared_info _libxenplat_shared_info; >> @@ -352,3 +660,16 @@ void _init_mem_prepare(unsigned long *start_pfn, >> unsigned long *max_pfn) >> #error "Please port (see Mini-OS's arch_mm_preinit())" >> #endif >> } >> + >> +/* >> + * Reserve an area of virtual address space for mappings >> + */ >> + >> +void _init_mem_demand_area(unsigned long start, unsigned long page_num) >> +{ >> + demand_map_area_start = start; >> + demand_map_area_end = demand_map_area_start + page_num * PAGE_SIZE; >> + >> + uk_printd(DLVL_INFO, "Demand map pfns at %lx-%lx.\n", >> + demand_map_area_start, demand_map_area_end); >> +} >> diff --git a/plat/xen/x86/setup.c b/plat/xen/x86/setup.c >> index ad603de..8971f6f 100644 >> --- a/plat/xen/x86/setup.c >> +++ b/plat/xen/x86/setup.c >> @@ -165,7 +165,18 @@ static inline void _init_mem(void) >> #if CONFIG_UKPLAT_MEMRNAME >> _libxenplat_mrd[0].name = "heap"; >> #endif >> - _libxenplat_mrd_num = 1; >> + >> + /* demand area */ >> + _libxenplat_mrd[1].base = (void *) VIRT_DEMAND_AREA; >> + _libxenplat_mrd[1].len = DEMAND_MAP_PAGES * PAGE_SIZE; >> + _libxenplat_mrd[1].flags = (UKPLAT_MEMRF_RESERVED); > Unnecessary parentheses around UKPLAT_MEMRF_RESERVED Ack. >> +#if CONFIG_UKPLAT_MEMRNAME >> + _libxenplat_mrd[1].name = "demand"; >> +#endif >> + _init_mem_demand_area((unsigned long) _libxenplat_mrd[1].base, >> + DEMAND_MAP_PAGES); >> + >> + _libxenplat_mrd_num = 2; >> } >> >> void _libxenplat_x86entry(void *start_info) __noreturn; >> -- >> 2.11.0 >> > _______________________________________________ Minios-devel mailing list Minios-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/minios-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |