|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v2 7/8] xen: arm: use superpages in p2m when pages are suitably aligned
On Wed, 11 Jun 2014, Ian Campbell wrote:
> +static bool_t is_mapping_aligned(const paddr_t start_gpaddr,
> + const paddr_t end_gpaddr,
> + const paddr_t maddr,
> + const paddr_t level_size)
> +{
> + const paddr_t level_mask = level_size - 1;
> +
> + /* No hardware superpages at level 0 */
> + if ( level_size == ZEROETH_SIZE )
> + return false;
> +
> + /*
> + * A range smaller than the size of a superpage at this level
> + * cannot be superpage aligned.
> + */
> + if ( ( end_gpaddr - start_gpaddr ) < level_size - 1 )
> + return false;
Shouldn't this be
if ( ( end_gpaddr - start_gpaddr ) < level_size )
?
> + /* Both the gpaddr and maddr must be aligned */
> + if ( start_gpaddr & level_mask )
> + return false;
> + if ( maddr & level_mask )
> + return false;
> + return true;
> +}
> +
> +#define P2M_ONE_DESCEND 0
> +#define P2M_ONE_PROGRESS_NOP 0x1
> +#define P2M_ONE_PROGRESS 0x10
> +
> +/*
> + * 0 == (P2M_ONE_DESCEND) continue to descend the tree
> + * +ve == (P2M_ONE_PROGRESS_*) handled at this level, continue, flush,
> + * entry, addr and maddr updated. Return value is an
> + * indication of the amount of work done (for preemption).
> + * -ve == (-Exxx) error.
> + */
> +static int apply_one_level(struct domain *d,
> + lpae_t *entry,
> + unsigned int level,
> + bool_t flush_cache,
> + enum p2m_operation op,
> + paddr_t start_gpaddr,
> + paddr_t end_gpaddr,
> + paddr_t *addr,
> + paddr_t *maddr,
> + bool_t *flush,
> + int mattr,
> + p2m_type_t t)
> +{
> + /* Helpers to lookup the properties of each level */
> + const paddr_t level_sizes[] =
> + { ZEROETH_SIZE, FIRST_SIZE, SECOND_SIZE, THIRD_SIZE };
> + const paddr_t level_masks[] =
> + { ZEROETH_MASK, FIRST_MASK, SECOND_MASK, THIRD_MASK };
> + const paddr_t level_shifts[] =
> + { ZEROETH_SHIFT, FIRST_SHIFT, SECOND_SHIFT, THIRD_SHIFT };
> + const paddr_t level_size = level_sizes[level];
> + const paddr_t level_mask = level_masks[level];
> + const paddr_t level_shift = level_shifts[level];
> +
> + struct p2m_domain *p2m = &d->arch.p2m;
> + lpae_t pte;
> + const lpae_t orig_pte = *entry;
> + int rc;
> +
> + BUG_ON(level > 3);
> +
> + switch ( op )
> + {
> + case ALLOCATE:
> + ASSERT(level < 3 || !p2m_valid(orig_pte));
> + ASSERT(*maddr == 0);
> +
> + if ( p2m_valid(orig_pte) )
> + return P2M_ONE_DESCEND;
> +
> + if ( is_mapping_aligned(*addr, end_gpaddr, 0, level_size) )
> + {
> + struct page_info *page;
> +
> + page = alloc_domheap_pages(d, level_shift - PAGE_SHIFT, 0);
> + if ( page )
> + {
> + pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t);
> + if ( level != 3 )
> + pte.p2m.table = 0;
> + p2m_write_pte(entry, pte, flush_cache);
> + p2m->stats.mappings[level]++;
> + return P2M_ONE_PROGRESS;
> + }
> + else if ( level == 3 )
> + return -ENOMEM;
> + }
> +
> + BUG_ON(level == 3); /* L3 is always superpage aligned */
> +
> + /*
> + * If we get here then we failed to allocate a sufficiently
> + * large contiguous region for this level (which can't be
> + * L3). Create a page table and continue to descend so we try
> + * smaller allocations.
> + */
> + rc = p2m_create_table(d, entry, 0, flush_cache);
> + if ( rc < 0 )
> + return rc;
> +
> + return P2M_ONE_DESCEND;
> +
> + case INSERT:
Shouldn't you add a
if ( p2m_valid(orig_pte) )
return P2M_ONE_DESCEND;
test here too?
> + if ( is_mapping_aligned(*addr, end_gpaddr, *maddr, level_size) &&
> + /* We do not handle replacing an existing table with a superpage
> */
> + (level == 3 || !p2m_table(orig_pte)) )
Why the difference with ALLOCATE?
> + {
> + /* New mapping is superpage aligned, make it */
> + pte = mfn_to_p2m_entry(*maddr >> PAGE_SHIFT, mattr, t);
> + if ( level < 3 )
> + pte.p2m.table = 0; /* Superpage entry */
> +
> + p2m_write_pte(entry, pte, flush_cache);
> +
> + *flush |= p2m_valid(orig_pte);
> +
> + *addr += level_size;
> + *maddr += level_size;
> +
> + if ( p2m_valid(orig_pte) )
> + {
> + /*
> + * We can't currently get here for an existing table
> + * mapping, since we don't handle replacing an
> + * existing table with a superpage. If we did we would
> + * need to handle freeing (and accounting) for the bit
> + * of the p2m tree which we would be about to lop off.
> + */
> + BUG_ON(level < 3 && p2m_table(orig_pte));
> + if ( level == 3 )
> + p2m_put_l3_page(orig_pte);
> + }
> + else /* New mapping */
> + p2m->stats.mappings[level]++;
> +
> + return P2M_ONE_PROGRESS;
> + }
> + else
> + {
> + /* New mapping is not superpage aligned, create a new table
> entry */
> + BUG_ON(level == 3); /* L3 is always superpage aligned */
> +
> + /* Not present -> create table entry and descend */
> + if ( !p2m_valid(orig_pte) )
> + {
> + rc = p2m_create_table(d, entry, 0, flush_cache);
> + if ( rc < 0 )
> + return rc;
> + return P2M_ONE_DESCEND;
> + }
> +
> + /* Existing superpage mapping -> shatter and descend */
> + if ( p2m_entry(orig_pte) )
> + {
> + *flush = true;
> + rc = p2m_create_table(d, entry,
> + level_shift - PAGE_SHIFT, flush_cache);
> + if ( rc < 0 )
> + return rc;
> +
> + p2m->stats.shattered[level]++;
> + p2m->stats.mappings[level]--;
> + p2m->stats.mappings[level+1] += LPAE_ENTRIES;
> + } /* else: an existing table mapping -> descend */
> +
> + BUG_ON(!entry->p2m.table);
> +
> + return P2M_ONE_DESCEND;
> + }
> +
> + break;
> +
> + case RELINQUISH:
> + case REMOVE:
> + if ( !p2m_valid(orig_pte) )
> + {
> + /* Progress up to next boundary */
> + *addr = (*addr + level_size) & level_mask;
> + return P2M_ONE_PROGRESS_NOP;
> + }
You might as well have a single !p2m_valid check before the switch
> + if ( level < 3 && p2m_table(orig_pte) )
> + return P2M_ONE_DESCEND;
> +
> + *flush = true;
> +
> + memset(&pte, 0x00, sizeof(pte));
> + p2m_write_pte(entry, pte, flush_cache);
> +
> + *addr += level_size;
> +
> + p2m->stats.mappings[level]--;
> +
> + if ( level == 3 )
> + p2m_put_l3_page(orig_pte);
> +
> + /*
> + * This is still a single pte write, no matter the level, so no need
> to
> + * scale.
> + */
> + return P2M_ONE_PROGRESS;
> +
> + case CACHEFLUSH:
> + if ( !p2m_valid(orig_pte) )
> + {
> + *addr = (*addr + level_size) & level_mask;
> + return P2M_ONE_PROGRESS_NOP;
> + }
> +
> + /*
> + * could flush up to the next boundary, but would need to be
> + * careful about preemption, so just do one page now and loop.
> + */
> + *addr += PAGE_SIZE;
> + if ( p2m_is_ram(orig_pte.p2m.type) )
> + {
> + flush_page_to_ram(orig_pte.p2m.base + third_table_offset(*addr));
> + return P2M_ONE_PROGRESS;
> + }
> + else
> + return P2M_ONE_PROGRESS_NOP;
> + }
> +
> + BUG(); /* Should never get here */
> +}
> +
> static int apply_p2m_changes(struct domain *d,
> enum p2m_operation op,
> paddr_t start_gpaddr,
> @@ -346,9 +650,7 @@ static int apply_p2m_changes(struct domain *d,
> cur_first_offset = ~0,
> cur_second_offset = ~0;
> unsigned long count = 0;
> - unsigned int flush = 0;
> - bool_t populate = (op == INSERT || op == ALLOCATE);
> - lpae_t pte;
> + bool_t flush = false;
> bool_t flush_pt;
>
> /* Some IOMMU don't support coherent PT walk. When the p2m is
> @@ -362,6 +664,25 @@ static int apply_p2m_changes(struct domain *d,
> addr = start_gpaddr;
> while ( addr < end_gpaddr )
> {
> + /*
> + * Arbitrarily, preempt every 512 operations or 8192 nops.
> + * 512*P2M_ONE_PROGRESS == 8192*P2M_ONE_PROGRESS_NOP == 0x2000
> + *
> + * count is initialised to 0 above, so we are guaranteed to
> + * always make at least one pass.
> + */
> +
> + if ( op == RELINQUISH && count >= 0x2000 )
> + {
> + if ( hypercall_preempt_check() )
> + {
> + p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT;
> + rc = -ERESTART;
> + goto out;
> + }
> + count = 0;
> + }
> +
> if ( cur_first_page != p2m_first_level_index(addr) )
> {
> if ( first ) unmap_domain_page(first);
> @@ -374,22 +695,18 @@ static int apply_p2m_changes(struct domain *d,
> cur_first_page = p2m_first_level_index(addr);
> }
>
> - if ( !p2m_valid(first[first_table_offset(addr)]) )
> - {
> - if ( !populate )
> - {
> - addr = (addr + FIRST_SIZE) & FIRST_MASK;
> - continue;
> - }
> + /* We only use a 3 level p2m at the moment, so no level 0,
> + * current hardware doesn't support super page mappings at
> + * level 0 anyway */
>
> - rc = p2m_create_table(d, &first[first_table_offset(addr)],
> - flush_pt);
> - if ( rc < 0 )
> - {
> - printk("p2m_populate_ram: L1 failed\n");
> - goto out;
> - }
> - }
> + rc = apply_one_level(d, &first[first_table_offset(addr)],
> + 1, flush_pt, op,
> + start_gpaddr, end_gpaddr,
> + &addr, &maddr, &flush,
> + mattr, t);
> + if ( rc < 0 ) goto out;
> + count += rc;
Adding an rc to a counter looks a bit strange.
> + if ( rc != P2M_ONE_DESCEND ) continue;
>
> BUG_ON(!p2m_valid(first[first_table_offset(addr)]));
>
> @@ -401,23 +718,16 @@ static int apply_p2m_changes(struct domain *d,
> }
> /* else: second already valid */
>
> - if ( !p2m_valid(second[second_table_offset(addr)]) )
> - {
> - if ( !populate )
> - {
> - addr = (addr + SECOND_SIZE) & SECOND_MASK;
> - continue;
> - }
> -
> - rc = p2m_create_table(d, &second[second_table_offset(addr)],
> - flush_pt);
> - if ( rc < 0 ) {
> - printk("p2m_populate_ram: L2 failed\n");
> - goto out;
> - }
> - }
> + rc = apply_one_level(d,&second[second_table_offset(addr)],
> + 2, flush_pt, op,
> + start_gpaddr, end_gpaddr,
> + &addr, &maddr, &flush,
> + mattr, t);
> + if ( rc < 0 ) goto out;
> + count += rc;
> + if ( rc != P2M_ONE_DESCEND ) continue;
>
> - BUG_ON(!second[second_table_offset(addr)].p2m.valid);
> + BUG_ON(!p2m_valid(second[second_table_offset(addr)]));
>
> if ( cur_second_offset != second_table_offset(addr) )
> {
> @@ -427,84 +737,15 @@ static int apply_p2m_changes(struct domain *d,
> cur_second_offset = second_table_offset(addr);
> }
>
> - pte = third[third_table_offset(addr)];
> -
> - flush |= pte.p2m.valid;
> -
> - switch (op) {
> - case ALLOCATE:
> - {
> - /* Allocate a new RAM page and attach */
> - struct page_info *page;
> -
> - ASSERT(!pte.p2m.valid);
> - rc = -ENOMEM;
> - page = alloc_domheap_page(d, 0);
> - if ( page == NULL ) {
> - printk("p2m_populate_ram: failed to allocate
> page\n");
> - goto out;
> - }
> -
> - pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t);
> -
> - p2m_write_pte(&third[third_table_offset(addr)],
> - pte, flush_pt);
> - }
> - break;
> - case INSERT:
> - {
> - if ( pte.p2m.valid )
> - p2m_put_page(pte);
> - pte = mfn_to_p2m_entry(maddr >> PAGE_SHIFT, mattr, t);
> - p2m_write_pte(&third[third_table_offset(addr)],
> - pte, flush_pt);
> - maddr += PAGE_SIZE;
> - }
> - break;
> - case RELINQUISH:
> - case REMOVE:
> - {
> - if ( !pte.p2m.valid )
> - {
> - count++;
> - break;
> - }
> -
> - p2m_put_page(pte);
> -
> - count += 0x10;
> -
> - memset(&pte, 0x00, sizeof(pte));
> - p2m_write_pte(&third[third_table_offset(addr)],
> - pte, flush_pt);
> - count++;
> - }
> - break;
> -
> - case CACHEFLUSH:
> - {
> - if ( !pte.p2m.valid || !p2m_is_ram(pte.p2m.type) )
> - break;
> -
> - flush_page_to_ram(pte.p2m.base);
> - }
> - break;
> - }
> -
> - /* Preempt every 2MiB (mapped) or 32 MiB (unmapped) - arbitrary */
> - if ( op == RELINQUISH && count >= 0x2000 )
> - {
> - if ( hypercall_preempt_check() )
> - {
> - p2m->lowest_mapped_gfn = addr >> PAGE_SHIFT;
> - rc = -ERESTART;
> - goto out;
> - }
> - count = 0;
> - }
> -
> - /* Got the next page */
> - addr += PAGE_SIZE;
> + rc = apply_one_level(d, &third[third_table_offset(addr)],
> + 3, flush_pt, op,
> + start_gpaddr, end_gpaddr,
> + &addr, &maddr, &flush,
> + mattr, t);
> + if ( rc < 0 ) goto out;
> + /* L3 had better have done something! We cannot descend any further
> */
> + BUG_ON(rc == P2M_ONE_DESCEND);
> + count += rc;
> }
>
> if ( flush )
> diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
> index 911d32d..821d9ef 100644
> --- a/xen/include/asm-arm/p2m.h
> +++ b/xen/include/asm-arm/p2m.h
> @@ -29,6 +29,14 @@ struct p2m_domain {
> * resume the search. Apart from during teardown this can only
> * decrease. */
> unsigned long lowest_mapped_gfn;
> +
> + struct {
> + /* Number of mappings at each p2m tree level */
> + unsigned long mappings[4];
> + /* Number of times we have shattered a mapping
> + * at each p2m tree level. */
> + unsigned long shattered[4];
> + } stats;
> };
Are you introducing stats.mappings just for statistic gathering?
If so, you should write it in the comment.
> /* List of possible type for each page in the p2m entry.
> @@ -79,6 +87,9 @@ int p2m_alloc_table(struct domain *d);
> void p2m_save_state(struct vcpu *p);
> void p2m_restore_state(struct vcpu *n);
>
> +/* */
> +void p2m_dump_info(struct domain *d);
> +
> /* Look up the MFN corresponding to a domain's PFN. */
> paddr_t p2m_lookup(struct domain *d, paddr_t gpfn, p2m_type_t *t);
>
> --
> 1.7.10.4
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |