[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH v5] x86/xpti: Don't leak TSS-adjacent percpu data via Meltdown
From: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> The XPTI work restricted the visibility of most of memory, but missed a few aspects when it came to the TSS. Given that the TSS is just an object in percpu data, the 4k mapping for it created in setup_cpu_root_pgt() maps adjacent percpu data, making it all leakable via Meltdown, even when XPTI is in use. Furthermore, no care is taken to check that the TSS doesn't cross a page boundary. As it turns out, struct tss_struct is aligned on its size which does prevent it straddling a page boundary. Move the TSS into the page aligned percpu area, so no adjacent data can be leaked. Move the definition from setup.c to traps.c, which is a more appropriate place for it to live. Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> Introduce / use struct tss_page. Drop __cacheline_filler field. Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx> The _struct suffix on tss_struct is quite redundant. Rename it to tss64 to mirror the existing tss32 structure we have in HVM's Task Switch logic. The per-cpu name having an init_ prefix is also wrong. There is exactly one TSS for each CPU, which is used for the lifetime of the system. Drop the redirection and update all callers to use tss_page properly. Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx> --- TBD: Especially with how the previous patch now works I'm unconvinced of the utility of the linker script alignment check. It in particular doesn't check the property we're after in this patch, i.e. the fact that there's nothing else in the same page. NB: Sadly get_per_cpu_var() can't also be used on the "left" side of a #define. --- v5: * Replace link time assertion with BUILD_BUG_ON(). Rename types and variables. Drop __cacheline_filler field. v4: * Introduce / use struct tss_page. v3: * Drop the remark about CET. It is no longer accurate in the latest version of the CET spec. v2: * Rebase over changes to include __aligned() within DEFINE_PER_CPU_PAGE_ALIGNED() * Drop now-unused xen/percpu.h from setup.c --- a/xen/arch/x86/cpu/common.c +++ b/xen/arch/x86/cpu/common.c @@ -707,7 +707,7 @@ void load_system_tables(void) unsigned long stack_bottom = get_stack_bottom(), stack_top = stack_bottom & ~(STACK_SIZE - 1);- struct tss_struct *tss = &this_cpu(init_tss); + struct tss64 *tss = &this_cpu(tss_page).tss; seg_desc_t *gdt = this_cpu(gdt) - FIRST_RESERVED_GDT_ENTRY; seg_desc_t *compat_gdt = @@ -722,7 +722,7 @@ void load_system_tables(void) .limit = (IDT_ENTRIES * sizeof(idt_entry_t)) - 1, };- *tss = (struct tss_struct){ + *tss = (struct tss64){ /* Main stack for interrupts/exceptions. */ .rsp0 = stack_bottom,@@ -747,16 +747,10 @@ void load_system_tables(void) .bitmap = IOBMP_INVALID_OFFSET, };- _set_tssldt_desc( - gdt + TSS_ENTRY, - (unsigned long)tss, - offsetof(struct tss_struct, __cacheline_filler) - 1, - SYS_DESC_tss_avail); - _set_tssldt_desc( - compat_gdt + TSS_ENTRY, - (unsigned long)tss, - offsetof(struct tss_struct, __cacheline_filler) - 1, - SYS_DESC_tss_busy); + _set_tssldt_desc(gdt + TSS_ENTRY, (unsigned long)tss, + sizeof(*tss) - 1, SYS_DESC_tss_avail); + _set_tssldt_desc(compat_gdt + TSS_ENTRY, (unsigned long)tss, + sizeof(*tss) - 1, SYS_DESC_tss_busy);per_cpu(full_gdt_loaded, cpu) = false; lgdt(&gdtr); --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -796,7 +796,7 @@ static void vmx_set_host_env(struct vcpu (unsigned long)(this_cpu(gdt) - FIRST_RESERVED_GDT_ENTRY)); __vmwrite(HOST_IDTR_BASE, (unsigned long)idt_tables[cpu]);- __vmwrite(HOST_TR_BASE, (unsigned long)&per_cpu(init_tss, cpu)); + __vmwrite(HOST_TR_BASE, (unsigned long)&per_cpu(tss_page, cpu).tss);__vmwrite(HOST_SYSENTER_ESP, get_stack_bottom()); --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -16,7 +16,6 @@ #include <xen/domain_page.h> #include <xen/version.h> #include <xen/gdbstub.h> -#include <xen/percpu.h> #include <xen/hypercall.h> #include <xen/keyhandler.h> #include <xen/numa.h> @@ -100,8 +99,6 @@ unsigned long __read_mostly xen_phys_staunsigned long __read_mostly xen_virt_end; -DEFINE_PER_CPU(struct tss_struct, init_tss); - char __section(".bss.stack_aligned") __aligned(STACK_SIZE) cpu0_stack[STACK_SIZE];--- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -825,7 +825,11 @@ static int setup_cpu_root_pgt(unsigned i if ( !rc ) rc = clone_mapping(idt_tables[cpu], rpt); if ( !rc ) - rc = clone_mapping(&per_cpu(init_tss, cpu), rpt); + { + BUILD_BUG_ON(sizeof(this_cpu(tss_page)) != PAGE_SIZE); + + rc = clone_mapping(&per_cpu(tss_page, cpu), rpt); + } if ( !rc ) rc = clone_mapping((void *)per_cpu(stubs.addr, cpu), rpt);--- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -108,6 +108,12 @@ idt_entry_t __section(".bss.page_aligned /* Pointer to the IDT of every CPU. */ idt_entry_t *idt_tables[NR_CPUS] __read_mostly;+/* + * The TSS is smaller than a page, but we give it a full page to avoid + * adjacent per-cpu data leaking via Meltdown when XPTI is in use. + */ +DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_page, tss_page); + bool (*ioemul_handle_quirk)( u8 opcode, char *io_emul_stub, struct cpu_user_regs *regs);@@ -559,7 +565,7 @@ void show_stack_overflow(unsigned int cp printk("Valid stack range: %p-%p, sp=%p, tss.rsp0=%p\n", (void *)esp_top, (void *)esp_bottom, (void *)esp, - (void *)per_cpu(init_tss, cpu).rsp0); + (void *)per_cpu(tss_page, cpu).tss.rsp0);/* * Trigger overflow trace if %esp is anywhere within the guard page, or @@ -1897,7 +1903,7 @@ static void __init set_intr_gate(unsignevoid load_TR(void) { - struct tss_struct *tss = &this_cpu(init_tss); + struct tss64 *tss = &this_cpu(tss_page).tss; struct desc_ptr old_gdt, tss_gdt = { .base = (long)(this_cpu(gdt) - FIRST_RESERVED_GDT_ENTRY), .limit = LAST_RESERVED_GDT_BYTE @@ -1905,14 +1911,10 @@ void load_TR(void)_set_tssldt_desc( this_cpu(gdt) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY, - (unsigned long)tss, - offsetof(struct tss_struct, __cacheline_filler) - 1, - SYS_DESC_tss_avail); + (unsigned long)tss, sizeof(*tss) - 1, SYS_DESC_tss_avail); _set_tssldt_desc( this_cpu(compat_gdt) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY, - (unsigned long)tss, - offsetof(struct tss_struct, __cacheline_filler) - 1, - SYS_DESC_tss_busy); + (unsigned long)tss, sizeof(*tss) - 1, SYS_DESC_tss_busy);/* Switch to non-compat GDT (which has B bit clear) to execute LTR. */ asm volatile ( --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -411,7 +411,7 @@ static always_inline void __mwait(unsign #define IOBMP_BYTES 8192 #define IOBMP_INVALID_OFFSET 0x8000-struct __packed __cacheline_aligned tss_struct { +struct __packed tss64 { uint32_t :32; uint64_t rsp0, rsp1, rsp2; uint64_t :64; @@ -422,9 +422,11 @@ struct __packed __cacheline_aligned tss_ uint64_t ist[7]; uint64_t :64; uint16_t :16, bitmap; - /* Pads the TSS to be cacheline-aligned (total size is 0x80). */ - uint8_t __cacheline_filler[24]; }; +struct tss_page { + struct tss64 __aligned(PAGE_SIZE) tss; +}; +DECLARE_PER_CPU(struct tss_page, tss_page);#define IST_NONE 0UL #define IST_DF 1UL @@ -463,7 +465,6 @@ static inline void disable_each_ist(idt_ extern idt_entry_t idt_table[]; extern idt_entry_t *idt_tables[];-DECLARE_PER_CPU(struct tss_struct, init_tss); DECLARE_PER_CPU(root_pgentry_t *, root_pgt);extern void write_ptbase(struct vcpu *v); _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |