|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v5 1/7] x86/xpti: avoid copying L4 page table contents when possible
On 06/04/18 08:52, Juergen Gross wrote:
> For mitigation of Meltdown the current L4 page table is copied to the
> cpu local root page table each time a 64 bit pv guest is entered.
>
> Copying can be avoided in cases where the guest L4 page table hasn't
> been modified while running the hypervisor, e.g. when handling
> interrupts or any hypercall not modifying the L4 page table or %cr3.
>
> So add a per-cpu flag whether the copying should be performed and set
"flag indicating whether"
> that flag only when loading a new %cr3 or modifying the L4 page table.
> This includes synchronization of the cpu local root page table with
> other cpus, so add a special synchronization flag for that case.
>
> A simple performance check (compiling the hypervisor via "make -j 4")
> in dom0 with 4 vcpus shows a significant improvement:
>
> - real time drops from 113 seconds to 109 seconds
> - system time drops from 165 seconds to 155 seconds
>
> Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
> Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
> ---
> V4:
> - move setting of root_pgt_changed flag in flush_area_local() out of
> irq disabled section (Jan Beulich)
> - move setting of root_pgt_changed in make_cr3() to _toggle_guest_pt()
> (Jan Beulich)
> - remove most conditionals in write_ptbase() (Jan Beulich)
> - don't set root_pgt_changed in do_mmu_update() for modification of
> the user page table (Jan Beulich)
>
> V3:
> - set flag locally only if affected L4 is active (Jan Beulich)
> - add setting flag to flush_area_mask() (Jan Beulich)
> - set flag in make_cr3() only if called for current active vcpu
>
> To be applied on top of Jan's "Meltdown band-aid overhead reduction"
> series
> ---
> xen/arch/x86/flushtlb.c | 3 +++
> xen/arch/x86/mm.c | 38 +++++++++++++++++++++++++-------------
> xen/arch/x86/pv/domain.c | 2 ++
> xen/arch/x86/smp.c | 2 +-
> xen/arch/x86/x86_64/asm-offsets.c | 1 +
> xen/arch/x86/x86_64/entry.S | 8 ++++++--
> xen/include/asm-x86/current.h | 8 ++++++++
> xen/include/asm-x86/flushtlb.h | 2 ++
> 8 files changed, 48 insertions(+), 16 deletions(-)
>
> diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c
> index 8a7a76b8ff..38cedf3b22 100644
> --- a/xen/arch/x86/flushtlb.c
> +++ b/xen/arch/x86/flushtlb.c
> @@ -160,5 +160,8 @@ unsigned int flush_area_local(const void *va, unsigned
> int flags)
>
> local_irq_restore(irqfl);
>
> + if ( flags & FLUSH_ROOT_PGTBL )
> + get_cpu_info()->root_pgt_changed = true;
> +
> return flags;
> }
> diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
> index 6d39d2c8ab..fd89685486 100644
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -504,6 +504,8 @@ void make_cr3(struct vcpu *v, mfn_t mfn)
>
> void write_ptbase(struct vcpu *v)
> {
> + if ( this_cpu(root_pgt) )
I'm not sure this conditional is wise. A call hitting here has changed
the root pgt, irrespective of whether the exit path cares.
> + get_cpu_info()->root_pgt_changed = true;
> write_cr3(v->arch.cr3);
> }
>
> @@ -3701,18 +3703,28 @@ long do_mmu_update(
> break;
> rc = mod_l4_entry(va, l4e_from_intpte(req.val), mfn,
> cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
> - /*
> - * No need to sync if all uses of the page can be
> accounted
> - * to the page lock we hold, its pinned status, and uses
> on
> - * this (v)CPU.
> - */
> - if ( !rc && !cpu_has_no_xpti &&
> - ((page->u.inuse.type_info & PGT_count_mask) >
> - (1 + !!(page->u.inuse.type_info & PGT_pinned) +
> - (pagetable_get_pfn(curr->arch.guest_table) ==
> mfn) +
> - (pagetable_get_pfn(curr->arch.guest_table_user) ==
> - mfn))) )
> - sync_guest = true;
> + if ( !rc && !cpu_has_no_xpti )
> + {
> + bool local_in_use = false;
> +
> + if ( pagetable_get_pfn(curr->arch.guest_table) ==
> mfn )
> + {
> + local_in_use = true;
> + get_cpu_info()->root_pgt_changed = true;
> + }
> +
> + /*
> + * No need to sync if all uses of the page can be
> + * accounted to the page lock we hold, its pinned
> + * status, and uses on this (v)CPU.
> + */
> + if ( (page->u.inuse.type_info & PGT_count_mask) >
> + (1 + !!(page->u.inuse.type_info & PGT_pinned) +
> +
> (pagetable_get_pfn(curr->arch.guest_table_user) ==
> + mfn) +
> + local_in_use) )
These two lines can be folded together.
> + sync_guest = true;
> + }
> break;
>
> case PGT_writable_page:
> @@ -3827,7 +3839,7 @@ long do_mmu_update(
>
> cpumask_andnot(mask, pt_owner->dirty_cpumask, cpumask_of(cpu));
> if ( !cpumask_empty(mask) )
> - flush_mask(mask, FLUSH_TLB_GLOBAL);
> + flush_mask(mask, FLUSH_TLB_GLOBAL | FLUSH_ROOT_PGTBL);
> }
>
> perfc_add(num_page_updates, i);
> diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
> index 01c62e2d45..42522a2db3 100644
> --- a/xen/arch/x86/pv/domain.c
> +++ b/xen/arch/x86/pv/domain.c
> @@ -223,6 +223,8 @@ static void _toggle_guest_pt(struct vcpu *v)
> {
> v->arch.flags ^= TF_kernel_mode;
> update_cr3(v);
> + get_cpu_info()->root_pgt_changed = true;
> +
> /* Don't flush user global mappings from the TLB. Don't tick TLB clock.
> */
> asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" );
>
> diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c
> index 033dd05958..63e819ca38 100644
> --- a/xen/arch/x86/smp.c
> +++ b/xen/arch/x86/smp.c
> @@ -208,7 +208,7 @@ void invalidate_interrupt(struct cpu_user_regs *regs)
> ack_APIC_irq();
> perfc_incr(ipis);
> if ( (flags & FLUSH_VCPU_STATE) && __sync_local_execstate() )
> - flags &= ~(FLUSH_TLB | FLUSH_TLB_GLOBAL);
> + flags &= ~(FLUSH_TLB | FLUSH_TLB_GLOBAL | FLUSH_ROOT_PGTBL);
> if ( flags & ~(FLUSH_VCPU_STATE | FLUSH_ORDER_MASK) )
> flush_area_local(flush_va, flags);
> cpumask_clear_cpu(smp_processor_id(), &flush_cpumask);
> diff --git a/xen/arch/x86/x86_64/asm-offsets.c
> b/xen/arch/x86/x86_64/asm-offsets.c
> index a2fea94f4c..9e2aefb00f 100644
> --- a/xen/arch/x86/x86_64/asm-offsets.c
> +++ b/xen/arch/x86/x86_64/asm-offsets.c
> @@ -143,6 +143,7 @@ void __dummy__(void)
> OFFSET(CPUINFO_shadow_spec_ctrl, struct cpu_info, shadow_spec_ctrl);
> OFFSET(CPUINFO_use_shadow_spec_ctrl, struct cpu_info,
> use_shadow_spec_ctrl);
> OFFSET(CPUINFO_bti_ist_info, struct cpu_info, bti_ist_info);
> + OFFSET(CPUINFO_root_pgt_changed, struct cpu_info, root_pgt_changed);
> DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
> BLANK();
>
> diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
> index 45d9842d09..30c9da5446 100644
> --- a/xen/arch/x86/x86_64/entry.S
> +++ b/xen/arch/x86/x86_64/entry.S
> @@ -128,10 +128,13 @@ restore_all_guest:
> /* Copy guest mappings and switch to per-CPU root page table. */
> mov VCPU_cr3(%rbx), %r9
> GET_STACK_END(dx)
> - mov STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rdi
> + mov STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rax
I think this assembly block is correct (based on the register
requirements at .Lrag_copy_done). However (as a check of whether I've
understood it correctly), I think it would be more obviously correct as:
mov STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rdi (no delta)
mov %rdi, %rax (move from lower down)
cmpb $0, STACK_CPUINFO_FIELD(root_pgt_changed)(%rdx)
and (IIRC) is slightly better static instruction scheduling for in-order
processors.
~Andrew
> + cmpb $0, STACK_CPUINFO_FIELD(root_pgt_changed)(%rdx)
> + je .Lrag_copy_done
> + movb $0, STACK_CPUINFO_FIELD(root_pgt_changed)(%rdx)
> movabs $PADDR_MASK & PAGE_MASK, %rsi
> movabs $DIRECTMAP_VIRT_START, %rcx
> - mov %rdi, %rax
> + mov %rax, %rdi
> and %rsi, %rdi
> jz .Lrag_keep_cr3
> and %r9, %rsi
> @@ -148,6 +151,7 @@ restore_all_guest:
> sub $(ROOT_PAGETABLE_FIRST_XEN_SLOT - \
> ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rdi
> rep movsq
> +.Lrag_copy_done:
> mov STACK_CPUINFO_FIELD(cr4)(%rdx), %rdi
> mov %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx)
> mov %rdi, %rsi
> diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h
> index 3a0e1eef36..f2491b4423 100644
> --- a/xen/include/asm-x86/current.h
> +++ b/xen/include/asm-x86/current.h
> @@ -59,6 +59,14 @@ struct cpu_info {
> bool use_shadow_spec_ctrl;
> uint8_t bti_ist_info;
>
> + /*
> + * The following field controls copying of the L4 page table of 64-bit
> + * PV guests to the per-cpu root page table on entering the guest
> context.
> + * If set the L4 page table is being copied to the root page table and
> + * the field will be reset.
> + */
> + bool root_pgt_changed;
> +
> unsigned long __pad;
> /* get_stack_bottom() must be 16-byte aligned */
> };
> diff --git a/xen/include/asm-x86/flushtlb.h b/xen/include/asm-x86/flushtlb.h
> index 2cade9cbfb..052f0fa403 100644
> --- a/xen/include/asm-x86/flushtlb.h
> +++ b/xen/include/asm-x86/flushtlb.h
> @@ -103,6 +103,8 @@ void write_cr3(unsigned long cr3);
> #define FLUSH_VA_VALID 0x800
> /* Flush CPU state */
> #define FLUSH_VCPU_STATE 0x1000
> + /* Flush the per-cpu root page table */
> +#define FLUSH_ROOT_PGTBL 0x2000
>
> /* Flush local TLBs/caches. */
> unsigned int flush_area_local(const void *va, unsigned int flags);
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |