[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v2 1/3] x86/ldt: Make modify_ldt synchronous
On 07/21/2015 03:59 PM, Andy Lutomirski wrote: modify_ldt has questionable locking and does not synchronize threads. Improve it: redesign the locking and synchronize all threads' LDTs using an IPI on all modifications. This will dramatically slow down modify_ldt in multithreaded programs, but there shouldn't be any multithreaded programs that care about modify_ldt's performance in the first place. Cc: stable@xxxxxxxxxxxxxxx Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx> --- arch/x86/include/asm/desc.h | 15 --- arch/x86/include/asm/mmu.h | 3 +- arch/x86/include/asm/mmu_context.h | 48 ++++++- arch/x86/kernel/cpu/common.c | 4 +- arch/x86/kernel/cpu/perf_event.c | 12 +- arch/x86/kernel/ldt.c | 247 +++++++++++++++++++------------------ arch/x86/kernel/process_64.c | 4 +- arch/x86/kernel/step.c | 6 +- arch/x86/power/cpu.c | 3 +- 9 files changed, 192 insertions(+), 150 deletions(-) diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index a0bf89fd2647..4e10d73cf018 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -280,21 +280,6 @@ static inline void clear_LDT(void) set_ldt(NULL, 0); }-/*- * load one particular LDT into the current CPU - */ -static inline void load_LDT_nolock(mm_context_t *pc) -{ - set_ldt(pc->ldt, pc->size); -} - -static inline void load_LDT(mm_context_t *pc) -{ - preempt_disable(); - load_LDT_nolock(pc); - preempt_enable(); -} - static inline unsigned long get_desc_base(const struct desc_struct *desc) { return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 09b9620a73b4..364d27481a52 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -9,8 +9,7 @@ * we put the segment information here. */ typedef struct { - void *ldt; - int size; + struct ldt_struct *ldt;#ifdef CONFIG_X86_64/* True if mm supports a task running in 32 bit compatibility mode. */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 5e8daee7c5c9..1ff121fbf366 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -34,6 +34,44 @@ static inline void load_mm_cr4(struct mm_struct *mm) {} #endif/*+ * ldt_structs can be allocated, used, and freed, but they are never + * modified while live. + */ +struct ldt_struct { + int size; + int __pad; /* keep the descriptors naturally aligned. */ + struct desc_struct entries[]; +}; This breaks Xen which expects LDT to be page-aligned. Not sure why. Jan, Andrew? -boris + +static inline void load_mm_ldt(struct mm_struct *mm) +{ + struct ldt_struct *ldt; + DEBUG_LOCKS_WARN_ON(!irqs_disabled()); + + /* lockless_dereference synchronizes with smp_store_release */ + ldt = lockless_dereference(mm->context.ldt); + + /* + * Any change to mm->context.ldt is followed by an IPI to all + * CPUs with the mm active. The LDT will not be freed until + * after the IPI is handled by all such CPUs. This means that, + * if the ldt_struct changes before we return, the values we see + * will be safe, and the new values will be loaded before we run + * any user code. + * + * NB: don't try to convert this to use RCU without extreme care. + * We would still need IRQs off, because we don't want to change + * the local LDT after an IPI loaded a newer value than the one + * that we can see. + */ + + if (unlikely(ldt)) + set_ldt(ldt->entries, ldt->size); + else + clear_LDT(); +} + +/* * Used for LDT copy/destruction. */ int init_new_context(struct task_struct *tsk, struct mm_struct *mm); @@ -78,12 +116,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * was called and then modify_ldt changed * prev->context.ldt but suppressed an IPI to this CPU. * In this case, prev->context.ldt != NULL, because we - * never free an LDT while the mm still exists. That - * means that next->context.ldt != prev->context.ldt, - * because mms never share an LDT. + * never set context.ldt to NULL while the mm still + * exists. That means that next->context.ldt != + * prev->context.ldt, because mms never share an LDT. */ if (unlikely(prev->context.ldt != next->context.ldt)) - load_LDT_nolock(&next->context); + load_mm_ldt(next); } #ifdef CONFIG_SMP else { @@ -106,7 +144,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, load_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); load_mm_cr4(next); - load_LDT_nolock(&next->context); + load_mm_ldt(next); } } #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 922c5e0cea4c..cb9e5df42dd2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1410,7 +1410,7 @@ void cpu_init(void) load_sp0(t, ¤t->thread); set_tss_desc(cpu, t); load_TR_desc(); - load_LDT(&init_mm.context); + load_mm_ldt(&init_mm);clear_all_debug_regs();dbg_restore_debug_regs(); @@ -1459,7 +1459,7 @@ void cpu_init(void) load_sp0(t, thread); set_tss_desc(cpu, t); load_TR_desc(); - load_LDT(&init_mm.context); + load_mm_ldt(&init_mm);t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.cindex 3658de47900f..9469dfa55607 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2179,21 +2179,25 @@ static unsigned long get_segment_base(unsigned int segment) int idx = segment >> 3;if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {+ struct ldt_struct *ldt; + if (idx > LDT_ENTRIES) return 0;- if (idx > current->active_mm->context.size)+ /* IRQs are off, so this synchronizes with smp_store_release */ + ldt = lockless_dereference(current->active_mm->context.ldt); + if (!ldt || idx > ldt->size) return 0;- desc = current->active_mm->context.ldt;+ desc = &ldt->entries[idx]; } else { if (idx > GDT_ENTRIES) return 0;- desc = raw_cpu_ptr(gdt_page.gdt);+ desc = raw_cpu_ptr(gdt_page.gdt) + idx; }- return get_desc_base(desc + idx);+ return get_desc_base(desc); }#ifdef CONFIG_COMPATdiff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index c37886d759cc..d65e6ec90338 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -20,82 +20,70 @@ #include <asm/mmu_context.h> #include <asm/syscalls.h>-#ifdef CONFIG_SMPstatic void flush_ldt(void *current_mm) { - if (current->active_mm == current_mm) - load_LDT(¤t->active_mm->context); + if (current->active_mm == current_mm) { + /* context.lock is held for us, so we don't need any locking. */ + mm_context_t *pc = ¤t->active_mm->context; + set_ldt(pc->ldt->entries, pc->ldt->size); + } } -#endif-static int alloc_ldt(mm_context_t *pc, int mincount, int reload)+static struct ldt_struct *alloc_ldt_struct(int size) { - void *oldldt, *newldt; - int oldsize; + struct ldt_struct *new_ldt; + int alloc_size;- if (mincount <= pc->size)- return 0; - oldsize = pc->size; - mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) & - (~(PAGE_SIZE / LDT_ENTRY_SIZE - 1)); - if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) - newldt = vmalloc(mincount * LDT_ENTRY_SIZE); + if (size > LDT_ENTRIES) + return NULL; + + BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct)); + alloc_size = sizeof(struct ldt_struct) + size * LDT_ENTRY_SIZE; + + if (alloc_size > PAGE_SIZE) + new_ldt = vmalloc(alloc_size); else - newldt = (void *)__get_free_page(GFP_KERNEL); + new_ldt = (void *)__get_free_page(GFP_KERNEL); + if (!new_ldt) + return NULL;- if (!newldt)- return -ENOMEM; + new_ldt->size = size; + paravirt_alloc_ldt(new_ldt->entries, size); + return new_ldt; +}- if (oldsize)- memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE); - oldldt = pc->ldt; - memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, - (mincount - oldsize) * LDT_ENTRY_SIZE); +static void install_ldt(struct mm_struct *current_mm, + struct ldt_struct *ldt) +{ + /* context.lock is held */ + preempt_disable();- paravirt_alloc_ldt(newldt, mincount);+ /* Synchronizes with lockless_dereference in load_mm_ldt. */ + smp_store_release(¤t_mm->context.ldt, ldt);-#ifdef CONFIG_X86_64- /* CHECKME: Do we really need this ? */ - wmb(); -#endif - pc->ldt = newldt; - wmb(); - pc->size = mincount; - wmb(); + /* Activate for this CPU. */ + flush_ldt(current->mm);- if (reload) {#ifdef CONFIG_SMP - preempt_disable(); - load_LDT(pc); - if (!cpumask_equal(mm_cpumask(current->mm), - cpumask_of(smp_processor_id()))) - smp_call_function(flush_ldt, current->mm, 1); - preempt_enable(); -#else - load_LDT(pc); + /* Synchronize with other CPUs. */ + if (!cpumask_equal(mm_cpumask(current_mm), + cpumask_of(smp_processor_id()))) + smp_call_function(flush_ldt, current_mm, 1); #endif - } - if (oldsize) { - paravirt_free_ldt(oldldt, oldsize); - if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(oldldt); - else - put_page(virt_to_page(oldldt)); - } - return 0; + preempt_enable(); }-static inline int copy_ldt(mm_context_t *new, mm_context_t *old)+static void free_ldt_struct(struct ldt_struct *ldt) { - int err = alloc_ldt(new, old->size, 0); - int i; - - if (err < 0) - return err; - - for (i = 0; i < old->size; i++) - write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); - return 0; + if (unlikely(ldt)) { + int alloc_size = sizeof(struct ldt_struct) + + ldt->size * LDT_ENTRY_SIZE; + paravirt_free_ldt(ldt->entries, ldt->size); + if (alloc_size > PAGE_SIZE) + vfree(ldt); + else + put_page(virt_to_page(ldt)); + } }/*@@ -106,15 +94,35 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { struct mm_struct *old_mm; int retval = 0; + int i;mutex_init(&mm->context.lock);- mm->context.size = 0; old_mm = current->mm; - if (old_mm && old_mm->context.size > 0) { - mutex_lock(&old_mm->context.lock); - retval = copy_ldt(&mm->context, &old_mm->context); - mutex_unlock(&old_mm->context.lock); + if (!old_mm) { + mm->context.ldt = NULL; + return 0; } + + mutex_lock(&old_mm->context.lock); + if (old_mm->context.ldt) { + struct ldt_struct *new_ldt = + alloc_ldt_struct(old_mm->context.ldt->size); + if (!new_ldt) { + retval = -ENOMEM; + goto out_unlock; + } + + for (i = 0; i < old_mm->context.ldt->size; i++) + write_ldt_entry(new_ldt->entries, i, + &old_mm->context.ldt->entries[i]); + + mm->context.ldt = new_ldt; + } else { + mm->context.ldt = NULL; + } + +out_unlock: + mutex_unlock(&old_mm->context.lock); return retval; }@@ -125,53 +133,47 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)*/ void destroy_context(struct mm_struct *mm) { - if (mm->context.size) { -#ifdef CONFIG_X86_32 - /* CHECKME: Can this ever happen ? */ - if (mm == current->active_mm) - clear_LDT(); -#endif - paravirt_free_ldt(mm->context.ldt, mm->context.size); - if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) - vfree(mm->context.ldt); - else - put_page(virt_to_page(mm->context.ldt)); - mm->context.size = 0; - } + free_ldt_struct(mm->context.ldt); + mm->context.ldt = NULL; }static int read_ldt(void __user *ptr, unsigned long bytecount){ - int err; + int retval; unsigned long size; struct mm_struct *mm = current->mm;- if (!mm->context.size)- return 0; + mutex_lock(&mm->context.lock); + + if (!mm->context.ldt) { + retval = 0; + goto out_unlock; + } + if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;- mutex_lock(&mm->context.lock);- size = mm->context.size * LDT_ENTRY_SIZE; + size = mm->context.ldt->size * LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount;- err = 0;- if (copy_to_user(ptr, mm->context.ldt, size)) - err = -EFAULT; - mutex_unlock(&mm->context.lock); - if (err < 0) - goto error_return; + if (copy_to_user(ptr, mm->context.ldt->entries, size)) { + retval = -EFAULT; + goto out_unlock; + } + if (size != bytecount) { - /* zero-fill the rest */ + /* Zero-fill the rest and pretend we read bytecount bytes. */ if (clear_user(ptr + size, bytecount - size) != 0) { - err = -EFAULT; - goto error_return; + retval = -EFAULT; + goto out_unlock; } } - return bytecount; -error_return: - return err; + retval = bytecount; + +out_unlock: + mutex_unlock(&mm->context.lock); + return retval; }static int read_default_ldt(void __user *ptr, unsigned long bytecount)@@ -189,12 +191,16 @@ static int read_default_ldt(void __user *ptr, unsigned long bytecount) return bytecount; }+static struct desc_struct blank_desc;+ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) { struct mm_struct *mm = current->mm; struct desc_struct ldt; - int error; + int error, i; struct user_desc ldt_info; + int oldsize, newsize; + struct ldt_struct *new_ldt, *old_ldt;error = -EINVAL;if (bytecount != sizeof(ldt_info)) @@ -213,34 +219,41 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) goto out; }- mutex_lock(&mm->context.lock);- if (ldt_info.entry_number >= mm->context.size) { - error = alloc_ldt(¤t->mm->context, - ldt_info.entry_number + 1, 1); - if (error < 0) - goto out_unlock; - } - - /* Allow LDTs to be cleared by the user. */ - if (ldt_info.base_addr == 0 && ldt_info.limit == 0) { - if (oldmode || LDT_empty(&ldt_info)) { - memset(&ldt, 0, sizeof(ldt)); - goto install; + if ((oldmode && ldt_info.base_addr == 0 && ldt_info.limit == 0) || + LDT_empty(&ldt_info)) { + /* The user wants to clear the entry. */ + memset(&ldt, 0, sizeof(ldt)); + } else { + if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { + error = -EINVAL; + goto out; } + + fill_ldt(&ldt, &ldt_info); + if (oldmode) + ldt.avl = 0; }- if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {- error = -EINVAL; + mutex_lock(&mm->context.lock); + + old_ldt = mm->context.ldt; + oldsize = old_ldt ? old_ldt->size : 0; + newsize = max((int)(ldt_info.entry_number + 1), oldsize); + + error = -ENOMEM; + new_ldt = alloc_ldt_struct(newsize); + if (!new_ldt) goto out_unlock; - }- fill_ldt(&ldt, &ldt_info);- if (oldmode) - ldt.avl = 0; + for (i = 0; i < oldsize; i++) + write_ldt_entry(new_ldt->entries, i, + &old_ldt->entries[i]); + for (i = oldsize; i < newsize; i++) + write_ldt_entry(new_ldt->entries, i, &blank_desc); + write_ldt_entry(new_ldt->entries, ldt_info.entry_number, &ldt);- /* Install the new entry ... */-install: - write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt); + install_ldt(mm, new_ldt); + free_ldt_struct(old_ldt); error = 0;out_unlock:diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 71d7849a07f7..f6b916387590 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -121,11 +121,11 @@ void __show_regs(struct pt_regs *regs, int all) void release_thread(struct task_struct *dead_task) { if (dead_task->mm) { - if (dead_task->mm->context.size) { + if (dead_task->mm->context.ldt) { pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", dead_task->comm, dead_task->mm->context.ldt, - dead_task->mm->context.size); + dead_task->mm->context.ldt->size); BUG(); } } diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 9b4d51d0c0d0..6273324186ac 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -5,6 +5,7 @@ #include <linux/mm.h> #include <linux/ptrace.h> #include <asm/desc.h> +#include <asm/mmu_context.h>unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs){ @@ -30,10 +31,11 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re seg &= ~7UL;mutex_lock(&child->mm->context.lock);- if (unlikely((seg >> 3) >= child->mm->context.size)) + if (unlikely(!child->mm->context.ldt || + (seg >> 3) >= child->mm->context.ldt->size)) addr = -1L; /* bogus selector, access would fault */ else { - desc = child->mm->context.ldt + seg; + desc = &child->mm->context.ldt->entries[seg]; base = get_desc_base(desc);/* 16-bit code segment? */diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 0d7dd1f5ac36..9ab52791fed5 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -22,6 +22,7 @@ #include <asm/fpu/internal.h> #include <asm/debugreg.h> #include <asm/cpu.h> +#include <asm/mmu_context.h>#ifdef CONFIG_X86_32__visible unsigned long saved_context_ebx; @@ -153,7 +154,7 @@ static void fix_processor_context(void) syscall_init(); /* This sets MSR_*STAR and related */ #endif load_TR_desc(); /* This does ltr */ - load_LDT(¤t->active_mm->context); /* This does lldt */ + load_mm_ldt(current->active_mm); /* This does lldt */fpu__resume_cpu();} _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |