[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v2 7/8] xen: arm: use superpages in p2m when pages are suitably aligned
On Wed, 11 Jun 2014, Ian Campbell wrote: > +static bool_t is_mapping_aligned(const paddr_t start_gpaddr, > + const paddr_t end_gpaddr, > + const paddr_t maddr, > + const paddr_t level_size) > +{ > + const paddr_t level_mask = level_size - 1; > + > + /* No hardware superpages at level 0 */ > + if ( level_size == ZEROETH_SIZE ) > + return false; > + > + /* > + * A range smaller than the size of a superpage at this level > + * cannot be superpage aligned. > + */ > + if ( ( end_gpaddr - start_gpaddr ) < level_size - 1 ) > + return false; Shouldn't this be if ( ( end_gpaddr - start_gpaddr ) < level_size ) ? > + /* Both the gpaddr and maddr must be aligned */ > + if ( start_gpaddr & level_mask ) > + return false; > + if ( maddr & level_mask ) > + return false; > + return true; > +} > + > +#define P2M_ONE_DESCEND 0 > +#define P2M_ONE_PROGRESS_NOP 0x1 > +#define P2M_ONE_PROGRESS 0x10 > + > +/* > + * 0 == (P2M_ONE_DESCEND) continue to descend the tree > + * +ve == (P2M_ONE_PROGRESS_*) handled at this level, continue, flush, > + * entry, addr and maddr updated. Return value is an > + * indication of the amount of work done (for preemption). > + * -ve == (-Exxx) error. > + */ > +static int apply_one_level(struct domain *d, > + lpae_t *entry, > + unsigned int level, > + bool_t flush_cache, > + enum p2m_operation op, > + paddr_t start_gpaddr, > + paddr_t end_gpaddr, > + paddr_t *addr, > + paddr_t *maddr, > + bool_t *flush, > + int mattr, > + p2m_type_t t) > +{ > + /* Helpers to lookup the properties of each level */ > + const paddr_t level_sizes[] = > + { ZEROETH_SIZE, FIRST_SIZE, SECOND_SIZE, THIRD_SIZE }; > + const paddr_t level_masks[] = > + { ZEROETH_MASK, FIRST_MASK, SECOND_MASK, THIRD_MASK }; > + const paddr_t level_shifts[] = > + { ZEROETH_SHIFT, FIRST_SHIFT, SECOND_SHIFT, THIRD_SHIFT }; > + const paddr_t level_size = level_sizes[level]; > + const paddr_t level_mask = level_masks[level]; > + const paddr_t level_shift = level_shifts[level]; > + > + struct p2m_domain *p2m = &d->arch.p2m; > + lpae_t pte; > + const lpae_t orig_pte = *entry; > + int rc; > + > + BUG_ON(level > 3); > + > + switch ( op ) > + { > + case ALLOCATE: > + ASSERT(level < 3 || !p2m_valid(orig_pte)); > + ASSERT(*maddr == 0); > + > + if ( p2m_valid(orig_pte) ) > + return P2M_ONE_DESCEND; > + > + if ( is_mapping_aligned(*addr, end_gpaddr, 0, level_size) ) > + { > + struct page_info *page; > + > + page = alloc_domheap_pages(d, level_shift - PAGE_SHIFT, 0); > + if ( page ) > + { > + pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t); > + if ( level != 3 ) > + pte.p2m.table = 0; > + p2m_write_pte(entry, pte, flush_cache); > + p2m->stats.mappings[level]++; > + return P2M_ONE_PROGRESS; > + } > + else if ( level == 3 ) > + return -ENOMEM; > + } > + > + BUG_ON(level == 3); /* L3 is always superpage aligned */ > + > + /* > + * If we get here then we failed to allocate a sufficiently > + * large contiguous region for this level (which can't be > + * L3). Create a page table and continue to descend so we try > + * smaller allocations. > + */ > + rc = p2m_create_table(d, entry, 0, flush_cache); > + if ( rc < 0 ) > + return rc; > + > + return P2M_ONE_DESCEND; > + > + case INSERT: Shouldn't you add a if ( p2m_valid(orig_pte) ) return P2M_ONE_DESCEND; test here too? > + if ( is_mapping_aligned(*addr, end_gpaddr, *maddr, level_size) && > + /* We do not handle replacing an existing table with a superpage > */ > + (level == 3 || !p2m_table(orig_pte)) ) Why the difference with ALLOCATE? > + { > + /* New mapping is superpage aligned, make it */ > + pte = mfn_to_p2m_entry(*maddr >> PAGE_SHIFT, mattr, t); > + if ( level < 3 ) > + pte.p2m.table = 0; /* Superpage entry */ > + > + p2m_write_pte(entry, pte, flush_cache); > + > + *flush |= p2m_valid(orig_pte); > + > + *addr += level_size; > + *maddr += level_size; > + > + if ( p2m_valid(orig_pte) ) > + { > + /* > + * We can't currently get here for an existing table > + * mapping, since we don't handle replacing an > + * existing table with a superpage. If we did we would > + * need to handle freeing (and accounting) for the bit > + * of the p2m tree which we would be about to lop off. > + */ > + BUG_ON(level < 3 && p2m_table(orig_pte)); > + if ( level == 3 ) > + p2m_put_l3_page(orig_pte); > + } > + else /* New mapping */ > + p2m->stats.mappings[level]++; > + > + return P2M_ONE_PROGRESS; > + } > + else > + { > + /* New mapping is not superpage aligned, create a new table > entry */ > + BUG_ON(level == 3); /* L3 is always superpage aligned */ > + > + /* Not present -> create table entry and descend */ > + if ( !p2m_valid(orig_pte) ) > + { > + rc = p2m_create_table(d, entry, 0, flush_cache); > + if ( rc < 0 ) > + return rc; > + return P2M_ONE_DESCEND; > + } > + > + /* Existing superpage mapping -> shatter and descend */ > + if ( p2m_entry(orig_pte) ) > + { > + *flush = true; > + rc = p2m_create_table(d, entry, > + level_shift - PAGE_SHIFT, flush_cache); > + if ( rc < 0 ) > + return rc; > + > + p2m->stats.shattered[level]++; > + p2m->stats.mappings[level]--; > + p2m->stats.mappings[level+1] += LPAE_ENTRIES; > + } /* else: an existing table mapping -> descend */ > + > + BUG_ON(!entry->p2m.table); > + > + return P2M_ONE_DESCEND; > + } > + > + break; > + > + case RELINQUISH: > + case REMOVE: > + if ( !p2m_valid(orig_pte) ) > + { > + /* Progress up to next boundary */ > + *addr = (*addr + level_size) & level_mask; > + return P2M_ONE_PROGRESS_NOP; > + } You might as well have a single !p2m_valid check before the switch > + if ( level < 3 && p2m_table(orig_pte) ) > + return P2M_ONE_DESCEND; > + > + *flush = true; > + > + memset(&pte, 0x00, sizeof(pte)); > + p2m_write_pte(entry, pte, flush_cache); > + > + *addr += level_size; > + > + p2m->stats.mappings[level]--; > + > + if ( level == 3 ) > + p2m_put_l3_page(orig_pte); > + > + /* > + * This is still a single pte write, no matter the level, so no need > to > + * scale. > + */ > + return P2M_ONE_PROGRESS; > + > + case CACHEFLUSH: > + if ( !p2m_valid(orig_pte) ) > + { > + *addr = (*addr + level_size) & level_mask; > + return P2M_ONE_PROGRESS_NOP; > + } > + > + /* > + * could flush up to the next boundary, but would need to be > + * careful about preemption, so just do one page now and loop. > + */ > + *addr += PAGE_SIZE; > + if ( p2m_is_ram(orig_pte.p2m.type) ) > + { > + flush_page_to_ram(orig_pte.p2m.base + third_table_offset(*addr)); > + return P2M_ONE_PROGRESS; > + } > + else > + return P2M_ONE_PROGRESS_NOP; > + } > + > + BUG(); /* Should never get here */ > +} > + > static int apply_p2m_changes(struct domain *d, > enum p2m_operation op, > paddr_t start_gpaddr, > @@ -346,9 +650,7 @@ static int apply_p2m_changes(struct domain *d, > cur_first_offset = ~0, > cur_second_offset = ~0; > unsigned long count = 0; > - unsigned int flush = 0; > - bool_t populate = (op == INSERT || op == ALLOCATE); > - lpae_t pte; > + bool_t flush = false; > bool_t flush_pt; > > /* Some IOMMU don't support coherent PT walk. When the p2m is > @@ -362,6 +664,25 @@ static int apply_p2m_changes(struct domain *d, > addr = start_gpaddr; > while ( addr < end_gpaddr ) > { > + /* > + * Arbitrarily, preempt every 512 operations or 8192 nops. > + * 512*P2M_ONE_PROGRESS == 8192*P2M_ONE_PROGRESS_NOP == 0x2000 > + * > + * count is initialised to 0 above, so we are guaranteed to > + * always make at least one pass. > + */ > + > + if ( op == RELINQUISH && count >= 0x2000 ) > + { > + if ( hypercall_preempt_check() ) > + { > + p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT; > + rc = -ERESTART; > + goto out; > + } > + count = 0; > + } > + > if ( cur_first_page != p2m_first_level_index(addr) ) > { > if ( first ) unmap_domain_page(first); > @@ -374,22 +695,18 @@ static int apply_p2m_changes(struct domain *d, > cur_first_page = p2m_first_level_index(addr); > } > > - if ( !p2m_valid(first[first_table_offset(addr)]) ) > - { > - if ( !populate ) > - { > - addr = (addr + FIRST_SIZE) & FIRST_MASK; > - continue; > - } > + /* We only use a 3 level p2m at the moment, so no level 0, > + * current hardware doesn't support super page mappings at > + * level 0 anyway */ > > - rc = p2m_create_table(d, &first[first_table_offset(addr)], > - flush_pt); > - if ( rc < 0 ) > - { > - printk("p2m_populate_ram: L1 failed\n"); > - goto out; > - } > - } > + rc = apply_one_level(d, &first[first_table_offset(addr)], > + 1, flush_pt, op, > + start_gpaddr, end_gpaddr, > + &addr, &maddr, &flush, > + mattr, t); > + if ( rc < 0 ) goto out; > + count += rc; Adding an rc to a counter looks a bit strange. > + if ( rc != P2M_ONE_DESCEND ) continue; > > BUG_ON(!p2m_valid(first[first_table_offset(addr)])); > > @@ -401,23 +718,16 @@ static int apply_p2m_changes(struct domain *d, > } > /* else: second already valid */ > > - if ( !p2m_valid(second[second_table_offset(addr)]) ) > - { > - if ( !populate ) > - { > - addr = (addr + SECOND_SIZE) & SECOND_MASK; > - continue; > - } > - > - rc = p2m_create_table(d, &second[second_table_offset(addr)], > - flush_pt); > - if ( rc < 0 ) { > - printk("p2m_populate_ram: L2 failed\n"); > - goto out; > - } > - } > + rc = apply_one_level(d,&second[second_table_offset(addr)], > + 2, flush_pt, op, > + start_gpaddr, end_gpaddr, > + &addr, &maddr, &flush, > + mattr, t); > + if ( rc < 0 ) goto out; > + count += rc; > + if ( rc != P2M_ONE_DESCEND ) continue; > > - BUG_ON(!second[second_table_offset(addr)].p2m.valid); > + BUG_ON(!p2m_valid(second[second_table_offset(addr)])); > > if ( cur_second_offset != second_table_offset(addr) ) > { > @@ -427,84 +737,15 @@ static int apply_p2m_changes(struct domain *d, > cur_second_offset = second_table_offset(addr); > } > > - pte = third[third_table_offset(addr)]; > - > - flush |= pte.p2m.valid; > - > - switch (op) { > - case ALLOCATE: > - { > - /* Allocate a new RAM page and attach */ > - struct page_info *page; > - > - ASSERT(!pte.p2m.valid); > - rc = -ENOMEM; > - page = alloc_domheap_page(d, 0); > - if ( page == NULL ) { > - printk("p2m_populate_ram: failed to allocate > page\n"); > - goto out; > - } > - > - pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t); > - > - p2m_write_pte(&third[third_table_offset(addr)], > - pte, flush_pt); > - } > - break; > - case INSERT: > - { > - if ( pte.p2m.valid ) > - p2m_put_page(pte); > - pte = mfn_to_p2m_entry(maddr >> PAGE_SHIFT, mattr, t); > - p2m_write_pte(&third[third_table_offset(addr)], > - pte, flush_pt); > - maddr += PAGE_SIZE; > - } > - break; > - case RELINQUISH: > - case REMOVE: > - { > - if ( !pte.p2m.valid ) > - { > - count++; > - break; > - } > - > - p2m_put_page(pte); > - > - count += 0x10; > - > - memset(&pte, 0x00, sizeof(pte)); > - p2m_write_pte(&third[third_table_offset(addr)], > - pte, flush_pt); > - count++; > - } > - break; > - > - case CACHEFLUSH: > - { > - if ( !pte.p2m.valid || !p2m_is_ram(pte.p2m.type) ) > - break; > - > - flush_page_to_ram(pte.p2m.base); > - } > - break; > - } > - > - /* Preempt every 2MiB (mapped) or 32 MiB (unmapped) - arbitrary */ > - if ( op == RELINQUISH && count >= 0x2000 ) > - { > - if ( hypercall_preempt_check() ) > - { > - p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT; > - rc = -ERESTART; > - goto out; > - } > - count = 0; > - } > - > - /* Got the next page */ > - addr += PAGE_SIZE; > + rc = apply_one_level(d, &third[third_table_offset(addr)], > + 3, flush_pt, op, > + start_gpaddr, end_gpaddr, > + &addr, &maddr, &flush, > + mattr, t); > + if ( rc < 0 ) goto out; > + /* L3 had better have done something! We cannot descend any further > */ > + BUG_ON(rc == P2M_ONE_DESCEND); > + count += rc; > } > > if ( flush ) > diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h > index 911d32d..821d9ef 100644 > --- a/xen/include/asm-arm/p2m.h > +++ b/xen/include/asm-arm/p2m.h > @@ -29,6 +29,14 @@ struct p2m_domain { > * resume the search. Apart from during teardown this can only > * decrease. */ > unsigned long lowest_mapped_gfn; > + > + struct { > + /* Number of mappings at each p2m tree level */ > + unsigned long mappings[4]; > + /* Number of times we have shattered a mapping > + * at each p2m tree level. */ > + unsigned long shattered[4]; > + } stats; > }; Are you introducing stats.mappings just for statistic gathering? If so, you should write it in the comment. > /* List of possible type for each page in the p2m entry. > @@ -79,6 +87,9 @@ int p2m_alloc_table(struct domain *d); > void p2m_save_state(struct vcpu *p); > void p2m_restore_state(struct vcpu *n); > > +/* */ > +void p2m_dump_info(struct domain *d); > + > /* Look up the MFN corresponding to a domain's PFN. */ > paddr_t p2m_lookup(struct domain *d, paddr_t gpfn, p2m_type_t *t); > > -- > 1.7.10.4 > _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |