[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Minios-devel] [UNIKRAFT PATCH 2/6] plat/xen: Add new memory region - demand area (x86_64)



On 06/27/2018 01:11 AM, Yuri Volchkov wrote:
> Hi Costin,
> 
> the comments are inline.
> 
> -Yuri.
> 
> Costin Lupu <costin.lupu@xxxxxxxxx> writes:
>> Demand area is a new memory region used for Xen for on-demand memory
>> mappings. An example of use case for on-demand mappins is the grant
>> subsystem which requires a virtual memory region different than the
>> memory used for heap.
>>
>> This patch also introduces the functions for reading and writing page
>> table entries, needed when creating memory mappings in the demand area.
>>
>> Signed-off-by: Costin Lupu <costin.lupu@xxxxxxxxx>
>> ---
>>  plat/xen/include/xen-x86/mm.h |  17 ++-
>>  plat/xen/x86/mm.c             | 323 
>> +++++++++++++++++++++++++++++++++++++++++-
>>  plat/xen/x86/setup.c          |  13 +-
>>  3 files changed, 346 insertions(+), 7 deletions(-)
>>
>> diff --git a/plat/xen/include/xen-x86/mm.h b/plat/xen/include/xen-x86/mm.h
>> index a1dbb26..48ab295 100644
>> --- a/plat/xen/include/xen-x86/mm.h
>> +++ b/plat/xen/include/xen-x86/mm.h
>> @@ -275,11 +275,17 @@ static __inline__ paddr_t machine_to_phys(maddr_t 
>> machine)
>>  })
>>  #define virtual_to_mfn(_virt)          pte_to_mfn(virtual_to_pte(_virt))
>>  
>> -#define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, NULL, 
>> L1_PROT)
>> -#define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, 
>> NULL, L1_PROT_RO)
>> -#define do_map_zero(start, n) do_map_frames(start, &mfn_zero, n, 0, 0, 
>> DOMID_SELF, NULL, L1_PROT_RO)
>> -
>> -pgentry_t *need_pgt(unsigned long addr);
>> +void *map_frames_ex(const unsigned long *mfns, unsigned long n,
>> +            unsigned long stride, unsigned long incr,
>> +            unsigned long alignment,
>> +            domid_t id, int *err, unsigned long prot,
>> +            struct uk_alloc *a);
>> +#define map_frames(f, n, a) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, NULL, 
>> L1_PROT, a)
>> +#define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, 
>> NULL, L1_PROT_RO, a)
>> +#define do_map_zero(start, n, a) do_map_frames(start, &mfn_zero, n, 0, 0, 
>> DOMID_SELF, NULL, L1_PROT_RO, a)
>> +
>> +struct uk_alloc;
>> +pgentry_t *need_pte(unsigned long addr, struct uk_alloc *a);
>>  void arch_mm_preinit(void *p);
>>  unsigned long alloc_virt_kernel(unsigned n_pages);
>>  
>> @@ -294,5 +300,6 @@ void _init_mem_build_pagetable(unsigned long *start_pfn,
>>                                                         unsigned long 
>> *max_pfn);
>>  void _init_mem_set_readonly(void *text, void *etext);
>>  void _init_mem_clear_bootstrap(void);
>> +void _init_mem_demand_area(unsigned long start, unsigned long page_num);
>>  
>>  #endif /* _ARCH_MM_H_ */
>> diff --git a/plat/xen/x86/mm.c b/plat/xen/x86/mm.c
>> index a77868a..818653f 100644
>> --- a/plat/xen/x86/mm.c
>> +++ b/plat/xen/x86/mm.c
>> @@ -36,7 +36,9 @@
>>   */
>>  
>>  #include <string.h>
>> -#include <xen-x86/os.h>
>> +#include <errno.h>
>> +#include <uk/alloc.h>
>> +#include <uk/plat/config.h>
>>  #include <xen-x86/mm.h>
>>  #include <xen/memory.h>
>>  #include <uk/print.h>
>> @@ -224,6 +226,312 @@ void _init_mem_build_pagetable(unsigned long 
>> *start_pfn, unsigned long *max_pfn)
>>  }
>>  
>>  /*
>> + * Get the PTE for virtual address va if it exists. Otherwise NULL.
>> + */
>> +static pgentry_t *get_pte(unsigned long va)
>> +{
>> +    unsigned long mfn;
>> +    pgentry_t *tab;
>> +    unsigned int offset;
>> +
>> +    tab = pt_base;
>> +    mfn = virt_to_mfn(pt_base);
> this seams to be redundant operation. The mfn is overwritten in any case
> later

Ack.

>> +#if defined(__x86_64__)
>> +    offset = l4_table_offset(va);
>> +    if (!(tab[offset] & _PAGE_PRESENT))
>> +            return NULL;
>> +
>> +    mfn = pte_to_mfn(tab[offset]);
>> +    tab = mfn_to_virt(mfn);
>> +#endif
>> +    offset = l3_table_offset(va);
>> +    if (!(tab[offset] & _PAGE_PRESENT))
>> +            return NULL;
>> +
>> +    mfn = pte_to_mfn(tab[offset]);
>> +    tab = mfn_to_virt(mfn);
>> +    offset = l2_table_offset(va);
>> +    if (!(tab[offset] & _PAGE_PRESENT))
>> +            return NULL;
>> +
>> +    if (tab[offset] & _PAGE_PSE)
>> +            return &tab[offset];
>> +
>> +    mfn = pte_to_mfn(tab[offset]);
>> +    tab = mfn_to_virt(mfn);
>> +    offset = l1_table_offset(va);
>> +
>> +    return &tab[offset];
>> +}
>> +
>> +/*
>> + * Return a valid PTE for a given virtual address.
>> + * If PTE does not exist, allocate page-table pages.
>> + */
>> +pgentry_t *need_pte(unsigned long va, struct uk_alloc *a)
> Should this be a static? Looks like purely internal function

Since in Mini-OS it's not used anywhere else, I guess it's ok to make it
static.

>> +{
>> +    unsigned long pt_mfn;
>> +    pgentry_t *tab;
>> +    unsigned long pt_pfn;
>> +    unsigned int offset;
>> +
>> +    tab = pt_base;
>> +    pt_mfn = virt_to_mfn(pt_base);
>> +#if defined(__x86_64__)
>> +    offset = l4_table_offset(va);
>> +    if (!(tab[offset] & _PAGE_PRESENT)) {
>> +            pt_pfn = virt_to_pfn(uk_malloc_page(a));
>> +            if (!pt_pfn)
>> +                    return NULL;
>> +            new_pt_frame(&pt_pfn, pt_mfn, offset, L3_FRAME);
>> +    }
>> +    UK_ASSERT(tab[offset] & _PAGE_PRESENT);
>> +
>> +    pt_mfn = pte_to_mfn(tab[offset]);
>> +    tab = mfn_to_virt(pt_mfn);
>> +#endif
>> +    offset = l3_table_offset(va);
>> +    if (!(tab[offset] & _PAGE_PRESENT)) {
>> +            pt_pfn = virt_to_pfn(uk_malloc_page(a));
>> +            if (!pt_pfn)
>> +                    return NULL;
>> +            new_pt_frame(&pt_pfn, pt_mfn, offset, L2_FRAME);
>> +    }
>> +    UK_ASSERT(tab[offset] & _PAGE_PRESENT);
>> +
>> +    pt_mfn = pte_to_mfn(tab[offset]);
>> +    tab = mfn_to_virt(pt_mfn);
>> +    offset = l2_table_offset(va);
>> +    if (!(tab[offset] & _PAGE_PRESENT)) {
>> +            pt_pfn = virt_to_pfn(uk_malloc_page(a));
>> +            if (!pt_pfn)
>> +                    return NULL;
>> +            new_pt_frame(&pt_pfn, pt_mfn, offset, L1_FRAME);
>> +    }
>> +    UK_ASSERT(tab[offset] & _PAGE_PRESENT);
>> +
>> +    if (tab[offset] & _PAGE_PSE)
>> +            return &tab[offset];
>> +
>> +    pt_mfn = pte_to_mfn(tab[offset]);
>> +    tab = mfn_to_virt(pt_mfn);
>> +    offset = l1_table_offset(va);
>> +
>> +    return &tab[offset];
>> +}
>> +
>> +/*
>> + * Map an array of MFNs contiguously into virtual address space starting at
>> + * va. map f[i*stride]+i*increment for i in 0..n-1.
>> + */
> Could you please also extend this comment describing each parameter? For
> example as in ukarch_test_and_clr_bit().

Ack.

>> +#define MAP_BATCH ((STACK_SIZE / 2) / sizeof(mmu_update_t))
>> +int do_map_frames(unsigned long va,
>> +            const unsigned long *mfns, unsigned long n,
>> +            unsigned long stride, unsigned long incr,
>> +            domid_t id, int *err, unsigned long prot,
>> +            struct uk_alloc *a)
>> +{
>> +    pgentry_t *pgt = NULL;
> I really liked that you renamed pgt to pte in other parts of the
> code. Maybe let's do it here too?  BTW, what pgt stands for?

Ack. I guess pgt stands for 'page table'.

>> +    unsigned long mapped = 0;
>> +
>> +    if (!mfns) {
>> +            uk_printd(DLVL_WARN, "do_map_frames: no mfns supplied\n");
>> +            return -EINVAL;
>> +    }
>> +
>> +    uk_printd(DLVL_EXTRA,
>> +            "va=%p n=0x%lx, mfns[0]=0x%lx stride=0x%lx incr=0x%lx 
>> prot=0x%lx\n",
>> +            (void *) va, n, mfns[0], stride, incr, prot);
> Since uk_printd does not print function name, maybe it is worth to tell
> what is actually happening to a va?

Ack.

>> +
>> +    if (err)
>> +            memset(err, 0, n * sizeof(int));
>> +
>> +    while (mapped < n) {
>> +#ifdef CONFIG_PARAVIRT
> This is not related to this particular patch, but Unikraft does not have
> CONFIG_PARAVIRT. Should we remove all mentions about it in the nearest
> future?

This should be handled when HVM support will be added for Unikraft.

>> +            unsigned long i;
>> +            int rc;
>> +            unsigned long batched;
>> +
>> +            if (err)
>> +                    batched = 1;
>> +            else
>> +                    batched = n - mapped;
>> +
>> +            if (batched > MAP_BATCH)
>> +                    batched = MAP_BATCH;
>> +
>> +            {
> This brace is absolutely unnecessary. Could you please remove it? This
> will also reduce the indentation level, which is a bit to high here.

Ack.

>> +                    mmu_update_t mmu_updates[batched];
>> +
>> +                    for (i = 0; i < batched; i++, va += PAGE_SIZE, pgt++) {
>> +                            if (!pgt || !(va & L1_MASK))
>> +                                    pgt = need_pte(va, a);
>> +                            if (!pgt)
>> +                                    return -ENOMEM;
>> +
>> +                            mmu_updates[i].ptr =
>> +                                    virt_to_mach(pgt) | 
>> MMU_NORMAL_PT_UPDATE;
>> +                            mmu_updates[i].val =
>> +                                    ((pgentry_t) (mfns[(mapped + i) * 
>> stride]
>> +                                    + (mapped + i) * incr) << PAGE_SHIFT) | 
>> prot;
>> +                    }
>> +
>> +                    rc = HYPERVISOR_mmu_update(mmu_updates, batched, NULL, 
>> id);
>> +                    if (rc < 0) {
>> +                            if (err)
>> +                                    err[mapped * stride] = rc;
>> +                            else
>> +                                    UK_CRASH(
>> +                                            "Map %ld (%lx, ...) at %lx 
>> failed: %d.\n",
>> +                                            batched, mfns[mapped * stride] 
>> + mapped * incr, va, rc);
>> +                    }
>> +            }
>> +            mapped += batched;
>> +#else
>> +            if (!pgt || !(va & L1_MASK))
>> +                    pgt = need_pte(va & ~L1_MASK, a);
>> +            if (!pgt)
>> +                    return -ENOMEM;
>> +
>> +            UK_ASSERT(!(*pgt & _PAGE_PSE));
>> +            pgt[l1_table_offset(va)] =
>> +                    (pgentry_t) (((mfns[mapped * stride]
>> +                    + mapped * incr) << PAGE_SHIFT) | prot);
>> +            mapped++;
>> +#endif
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static unsigned long demand_map_area_start;
>> +static unsigned long demand_map_area_end;
>> +
>> +unsigned long allocate_ondemand(unsigned long n, unsigned long align)
>> +{
>> +    unsigned long page_idx, contig = 0;
>> +
>> +    /* Find a properly aligned run of n contiguous frames */
>> +    for (page_idx = 0;
>> +            page_idx <= DEMAND_MAP_PAGES - n;
>> +            page_idx = (page_idx + contig + 1 + align - 1) & ~(align - 1)) {
>> +
>> +            unsigned long addr =
>> +                    demand_map_area_start + page_idx * PAGE_SIZE;
>> +            pgentry_t *pte = get_pte(addr);
>> +
>> +            for (contig = 0; contig < n; contig++, addr += PAGE_SIZE) {
>> +                    if (!(addr & L1_MASK))
>> +                            pte = get_pte(addr);
>> +
>> +                    if (pte) {
>> +                            if (*pte & _PAGE_PRESENT)
>> +                                    break;
>> +
>> +                            pte++;
>> +                    }
>> +            }
>> +
>> +            if (contig == n)
>> +                    break;
>> +    }
>> +
>> +    if (contig != n) {
>> +            uk_printd(DLVL_ERR, "Failed to find %ld frames!\n", n);
>> +            return 0;
>> +    }
>> +
>> +    return demand_map_area_start + page_idx * PAGE_SIZE;
>> +}
>> +
>> +/*
>> + * Map an array of MFNs contiguous into virtual address space.
>> + * Virtual addresses are allocated from the on demand area.
>> + */
> Also would be good to write comment about parameters of this function
> too. Most of them would be just a copy paste from do_map_page.

Ack.

>> +void *map_frames_ex(const unsigned long *mfns, unsigned long n,
>> +            unsigned long stride, unsigned long incr,
>> +            unsigned long alignment,
>> +            domid_t id, int *err, unsigned long prot,
>> +            struct uk_alloc *a)
>> +{
>> +    unsigned long va = allocate_ondemand(n, alignment);
>> +
>> +    if (!va)
>> +            return NULL;
>> +
>> +    if (do_map_frames(va, mfns, n, stride, incr, id, err, prot, a))
>> +            return NULL;
>> +
>> +    return (void *) va;
>> +}
>> +
>> +/*
>> + * Unmap nun_frames frames mapped at virtual address va.
>> + */
> Typo in num_frames

Ack.

>> +#define UNMAP_BATCH ((STACK_SIZE / 2) / sizeof(multicall_entry_t))
>> +int unmap_frames(unsigned long va, unsigned long num_frames)
>> +{
>> +#ifdef CONFIG_PARAVIRT
>> +    unsigned long i, n = UNMAP_BATCH;
>> +    multicall_entry_t call[n];
>> +    int ret;
>> +#else
>> +    pgentry_t *pte;
>> +#endif
>> +
>> +    UK_ASSERT(!((unsigned long) va & ~PAGE_MASK));
>> +
>> +    uk_printd(DLVL_EXTRA, "va=%p, num=0x%lx\n", (void *) va, num_frames);
> Same thing, would be good to tell what is happening to va in the debug
> message

Ack.

>> +
>> +    while (num_frames) {
>> +#ifdef CONFIG_PARAVIRT
>> +            if (n > num_frames)
>> +                    n = num_frames;
>> +
>> +            for (i = 0; i < n; i++) {
>> +                    int arg = 0;
>> +                    /* simply update the PTE for the VA and invalidate TLB 
>> */
> That is over 80 lines, and simple enough to fix. Please run the
> checkpatch.pl. The last patch in the series also have a couple of
> checkpatch warnings.

Ack.

>> +                    call[i].op = __HYPERVISOR_update_va_mapping;
>> +                    call[i].args[arg++] = va;
>> +                    call[i].args[arg++] = 0;
>> +#ifdef __i386__
> I believe we do not support __i386__ do we?

We should remove all i386 related code when we'll have a final
resolution on whether we'll ever support it.

>> +                    call[i].args[arg++] = 0;
>> +#endif
>> +                    call[i].args[arg++] = UVMF_INVLPG;
>> +
>> +                    va += PAGE_SIZE;
>> +            }
>> +
>> +            ret = HYPERVISOR_multicall(call, n);
> Seems that multicall is unnecessary here. The only operation batched is
> 'update_va_mapping'. Calling HYPERVISOR_update_va_mapping directly would
> simplify the code a bit.

update_va_mapping doesn't support more than one virtual address, that's
why it's used in batch mode by calling HYPERVISOR_multicall.

>> +            if (ret) {
>> +                    uk_printd(DLVL_ERR,
>> +                            "update_va_mapping hypercall failed with 
>> rc=%d.\n", ret);
>> +                    return -ret;
>> +            }
>> +
>> +            for (i = 0; i < n; i++) {
>> +                    if (call[i].result) {
>> +                            uk_printd(DLVL_ERR,
>> +                                    "update_va_mapping failed for with 
>> rc=%d.\n", ret);
>> +                            return -(call[i].result);
>> +                    }
>> +            }
>> +            num_frames -= n;
>> +#else
>> +            pte = get_pte(va);
>> +            if (pte) {
>> +                    UK_ASSERT(!(*pte & _PAGE_PSE));
>> +                    *pte = 0;
>> +                    invlpg(va);
>> +            }
>> +            va += PAGE_SIZE;
>> +            num_frames--;
>> +#endif
>> +    }
>> +    return 0;
>> +}
>> +
>> +/*
>>   * Mark portion of the address space read only.
>>   */
>>  extern struct shared_info _libxenplat_shared_info;
>> @@ -352,3 +660,16 @@ void _init_mem_prepare(unsigned long *start_pfn, 
>> unsigned long *max_pfn)
>>  #error "Please port (see Mini-OS's arch_mm_preinit())"
>>  #endif
>>  }
>> +
>> +/*
>> + * Reserve an area of virtual address space for mappings
>> + */
>> +
>> +void _init_mem_demand_area(unsigned long start, unsigned long page_num)
>> +{
>> +    demand_map_area_start = start;
>> +    demand_map_area_end = demand_map_area_start + page_num * PAGE_SIZE;
>> +
>> +    uk_printd(DLVL_INFO, "Demand map pfns at %lx-%lx.\n",
>> +            demand_map_area_start, demand_map_area_end);
>> +}
>> diff --git a/plat/xen/x86/setup.c b/plat/xen/x86/setup.c
>> index ad603de..8971f6f 100644
>> --- a/plat/xen/x86/setup.c
>> +++ b/plat/xen/x86/setup.c
>> @@ -165,7 +165,18 @@ static inline void _init_mem(void)
>>  #if CONFIG_UKPLAT_MEMRNAME
>>      _libxenplat_mrd[0].name  = "heap";
>>  #endif
>> -    _libxenplat_mrd_num = 1;
>> +
>> +    /* demand area */
>> +    _libxenplat_mrd[1].base  = (void *) VIRT_DEMAND_AREA;
>> +    _libxenplat_mrd[1].len   = DEMAND_MAP_PAGES * PAGE_SIZE;
>> +    _libxenplat_mrd[1].flags = (UKPLAT_MEMRF_RESERVED);
> Unnecessary  parentheses around UKPLAT_MEMRF_RESERVED

Ack.

>> +#if CONFIG_UKPLAT_MEMRNAME
>> +    _libxenplat_mrd[1].name  = "demand";
>> +#endif
>> +    _init_mem_demand_area((unsigned long) _libxenplat_mrd[1].base,
>> +                    DEMAND_MAP_PAGES);
>> +
>> +    _libxenplat_mrd_num = 2;
>>  }
>>  
>>  void _libxenplat_x86entry(void *start_info) __noreturn;
>> -- 
>> 2.11.0
>>
> 

_______________________________________________
Minios-devel mailing list
Minios-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/minios-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.