|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH RFC] xen/gnttab: Avoid TLB flush if the grant mapped page was not accessed
On 04/07/14 13:54, Malcolm Crossley wrote:
> Implement an x86 specific optimisation to avoid a TLB flush if the grant
> mapped page was not accessed by a CPU whilst it was mapped.
>
> The optimisation works by checking the accessed page bit of the pte to
> determine if the page has been accessed by a cpu. The x86 specification
> details
> that the processor atomically sets the accessed bit before caching a PTE into
> a TLB.
>
> The grant mapped pte is now setup without the accessed or dirty bit's set and
> so we can determine if any CPU has accessed that page via the grant mapping
> pte.
>
> I believe Xen drops all attempts by guest to clear the accessed or dirty bit's
> on the pte because Xen is the pte owner. This should prevent a malicous guest
> from delibrately circumventing the TLB flush.
>
> Blkback benefits from this optimistation because it does not access the data
> itself from the CPU.
>
> Netback can benefit from the optimistation depending on how much of the header
> is put into the first fragment because the first fragment is grant copied on
> recent netback implementations.
>
> Signed-off-by: Malcolm Crossley <malcolm.crossley@xxxxxxxxxx>
Probably worth a comment regarding the improvements we have observed.
100% reduction of TLB flushes on the blk path and between 80~90%
reduction on the net path.
>
> diff -r 3e0ee081c616 -r 38f320029371 xen/arch/arm/mm.c
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -1216,7 +1216,7 @@ int create_grant_host_mapping(unsigned l
> }
>
> int replace_grant_host_mapping(unsigned long addr, unsigned long mfn,
> - unsigned long new_addr, unsigned int flags)
> + unsigned long new_addr, unsigned int flags, int *page_accessed)
pragmatically, this should be a bool_t rather than an int.
> {
> unsigned long gfn = (unsigned long)(addr >> PAGE_SHIFT);
> struct domain *d = current->domain;
> @@ -1225,6 +1225,7 @@ int replace_grant_host_mapping(unsigned
> return GNTST_general_error;
>
> guest_physmap_remove_page(d, gfn, mfn, 0);
> + *page_accessed = 1;
>
> return GNTST_okay;
> }
> diff -r 3e0ee081c616 -r 38f320029371 xen/arch/x86/mm.c
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -3901,7 +3901,8 @@ static int create_grant_va_mapping(
> }
>
> static int replace_grant_va_mapping(
> - unsigned long addr, unsigned long frame, l1_pgentry_t nl1e, struct vcpu
> *v)
> + unsigned long addr, unsigned long frame, l1_pgentry_t nl1e, struct vcpu
> *v,
> + int *page_accessed)
> {
> l1_pgentry_t *pl1e, ol1e;
> unsigned long gl1mfn;
> @@ -3947,12 +3948,13 @@ static int replace_grant_va_mapping(
> }
>
> /* Delete pagetable entry. */
> - if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) )
> + if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 1)) )
> {
> MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
> rc = GNTST_general_error;
> goto unlock_and_out;
> }
> + *page_accessed = !!(l1e_get_flags(*pl1e) & _PAGE_ACCESSED);
>
> unlock_and_out:
> page_unlock(l1pg);
> @@ -3963,9 +3965,9 @@ static int replace_grant_va_mapping(
> }
>
> static int destroy_grant_va_mapping(
> - unsigned long addr, unsigned long frame, struct vcpu *v)
> + unsigned long addr, unsigned long frame, struct vcpu *v, int
> *page_accessed)
> {
> - return replace_grant_va_mapping(addr, frame, l1e_empty(), v);
> + return replace_grant_va_mapping(addr, frame, l1e_empty(), v,
> page_accessed);
> }
>
> static int create_grant_p2m_mapping(uint64_t addr, unsigned long frame,
> @@ -4000,7 +4002,7 @@ int create_grant_host_mapping(uint64_t a
> return create_grant_p2m_mapping(addr, frame, flags, cache_flags);
>
> grant_pte_flags =
> - _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_GNTTAB;
> + _PAGE_PRESENT | _PAGE_GNTTAB;
> if ( cpu_has_nx )
> grant_pte_flags |= _PAGE_NX_BIT;
>
> @@ -4048,7 +4050,8 @@ static int replace_grant_p2m_mapping(
> }
>
> int replace_grant_host_mapping(
> - uint64_t addr, unsigned long frame, uint64_t new_addr, unsigned int
> flags)
> + uint64_t addr, unsigned long frame, uint64_t new_addr, unsigned int
> flags,
> + int *page_accessed)
> {
> struct vcpu *curr = current;
> l1_pgentry_t *pl1e, ol1e;
> @@ -4069,7 +4072,7 @@ int replace_grant_host_mapping(
> }
>
> if ( !new_addr )
> - return destroy_grant_va_mapping(addr, frame, curr);
> + return destroy_grant_va_mapping(addr, frame, curr, page_accessed);
>
> pl1e = guest_map_l1e(curr, new_addr, &gl1mfn);
> if ( !pl1e )
> @@ -4117,7 +4120,7 @@ int replace_grant_host_mapping(
> put_page(l1pg);
> guest_unmap_l1e(curr, pl1e);
>
> - rc = replace_grant_va_mapping(addr, frame, ol1e, curr);
> + rc = replace_grant_va_mapping(addr, frame, ol1e, curr, page_accessed);
> if ( rc && !paging_mode_refcounts(curr->domain) )
> put_page_from_l1e(ol1e, curr->domain);
>
> diff -r 3e0ee081c616 -r 38f320029371 xen/common/grant_table.c
> --- a/xen/common/grant_table.c
> +++ b/xen/common/grant_table.c
> @@ -73,6 +73,7 @@ struct gnttab_unmap_common {
>
> /* Return */
> int16_t status;
> + int page_accessed;
>
> /* Shared state beteen *_unmap and *_unmap_complete */
> u16 flags;
> @@ -81,6 +82,12 @@ struct gnttab_unmap_common {
> struct domain *rd;
> };
>
> +#ifndef NDEBUG
> +atomic_t grant_unmap_tlb_flush_avoided;
> +atomic_t grant_unmap_tlb_flush_done;
> +atomic_t grant_unmap_operations;
> +#endif
While these were useful during development, they would not be useful if
the patch were committed to xen. I suggest they be dropped, and
possibly provided as second "for people interested in the savings they
are getting" patch.
> +
> /* Number of unmap operations that are done between each tlb flush */
> #define GNTTAB_UNMAP_BATCH_SIZE 32
>
> @@ -844,6 +851,7 @@ static void
> ld = current->domain;
> lgt = ld->grant_table;
>
> + op->page_accessed = 0;
> op->frame = (unsigned long)(op->dev_bus_addr >> PAGE_SHIFT);
>
> if ( unlikely(op->handle >= lgt->maptrack_limit) )
> @@ -924,7 +932,7 @@ static void
> {
> if ( (rc = replace_grant_host_mapping(op->host_addr,
> op->frame, op->new_addr,
> - op->flags)) < 0 )
> + op->flags,
> &op->page_accessed)) < 0 )
> goto unmap_out;
>
> ASSERT(act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask));
> @@ -1082,20 +1090,25 @@ static long
> gnttab_unmap_grant_ref(
> XEN_GUEST_HANDLE_PARAM(gnttab_unmap_grant_ref_t) uop, unsigned int count)
> {
> - int i, c, partial_done, done = 0;
> + int i, c, partial_done, done = 0, grant_map_accessed = 0;
> struct gnttab_unmap_grant_ref op;
> struct gnttab_unmap_common common[GNTTAB_UNMAP_BATCH_SIZE];
>
> +#ifndef NDEBUG
> + atomic_inc(&grant_unmap_operations);
> +#endif
> while ( count != 0 )
> {
> c = min(count, (unsigned int)GNTTAB_UNMAP_BATCH_SIZE);
> partial_done = 0;
> + grant_map_accessed = 0;
>
> for ( i = 0; i < c; i++ )
> {
> if ( unlikely(__copy_from_guest(&op, uop, 1)) )
> goto fault;
> __gnttab_unmap_grant_ref(&op, &(common[i]));
> + grant_map_accessed |= common[i].page_accessed;
> ++partial_done;
> if ( unlikely(__copy_field_to_guest(uop, &op, status)) )
> goto fault;
> @@ -1103,6 +1116,14 @@ gnttab_unmap_grant_ref(
> }
>
> gnttab_flush_tlb(current->domain);
> + if ( grant_map_accessed ) {
Coding style.
~Andrew
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |