[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH v4 2/3] x86/ldt: Make modify_ldt optional



The modify_ldt syscall exposes a large attack surface and is
unnecessary for modern userspace.  Make it optional.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx>
---
 arch/x86/Kconfig                   | 17 +++++++++++++++++
 arch/x86/include/asm/mmu.h         |  2 ++
 arch/x86/include/asm/mmu_context.h | 31 +++++++++++++++++++++++--------
 arch/x86/kernel/Makefile           |  3 ++-
 arch/x86/kernel/cpu/perf_event.c   |  4 ++++
 arch/x86/kernel/process_64.c       |  2 ++
 arch/x86/kernel/step.c             |  2 ++
 kernel/sys_ni.c                    |  1 +
 8 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b3a1a5d77d92..ede52be845db 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1015,6 +1015,7 @@ config VM86
 config X86_16BIT
        bool "Enable support for 16-bit segments" if EXPERT
        default y
+       depends on MODIFY_LDT_SYSCALL
        ---help---
          This option is required by programs like Wine to run 16-bit
          protected mode legacy code on x86 processors.  Disabling
@@ -2053,6 +2054,22 @@ config CMDLINE_OVERRIDE
          This is used to work around broken boot loaders.  This should
          be set to 'N' under normal conditions.
 
+config MODIFY_LDT_SYSCALL
+       bool "Enable the LDT (local descriptor table)" if EXPERT
+       default y
+       ---help---
+         Linux can allow user programs to install a per-process x86
+        Local Descriptor Table (LDT) using the modify_ldt(2) system
+        call.  This is required to run 16-bit or segmented code such as
+        DOSEMU or some Wine programs.  It is also used by some very old
+        threading libraries.
+
+        Enabling this feature adds a small amount of overhead to
+        context switches and increases the low-level kernel attack
+        surface.  Disabling it removes the modify_ldt(2) system call.
+
+        Saying 'N' here may make sense for embedded or server kernels.
+
 source "kernel/livepatch/Kconfig"
 
 endmenu
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 364d27481a52..55234d5e7160 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -9,7 +9,9 @@
  * we put the segment information here.
  */
 typedef struct {
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
        struct ldt_struct *ldt;
+#endif
 
 #ifdef CONFIG_X86_64
        /* True if mm supports a task running in 32 bit compatibility mode. */
diff --git a/arch/x86/include/asm/mmu_context.h 
b/arch/x86/include/asm/mmu_context.h
index 3fcff70c398e..08094eded318 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -33,6 +33,7 @@ static inline void load_mm_cr4(struct mm_struct *mm)
 static inline void load_mm_cr4(struct mm_struct *mm) {}
 #endif
 
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
 /*
  * ldt_structs can be allocated, used, and freed, but they are never
  * modified while live.
@@ -48,10 +49,24 @@ struct ldt_struct {
        int size;
 };
 
+/*
+ * Used for LDT copy/destruction.
+ */
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
+void destroy_context(struct mm_struct *mm);
+#else  /* CONFIG_MODIFY_LDT_SYSCALL */
+static inline int init_new_context(struct task_struct *tsk,
+                                  struct mm_struct *mm)
+{
+       return 0;
+}
+static inline void destroy_context(struct mm_struct *mm) {}
+#endif
+
 static inline void load_mm_ldt(struct mm_struct *mm)
 {
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
        struct ldt_struct *ldt;
-       DEBUG_LOCKS_WARN_ON(!irqs_disabled());
 
        /* lockless_dereference synchronizes with smp_store_release */
        ldt = lockless_dereference(mm->context.ldt);
@@ -74,14 +89,12 @@ static inline void load_mm_ldt(struct mm_struct *mm)
                set_ldt(ldt->entries, ldt->size);
        else
                clear_LDT();
-}
-
-/*
- * Used for LDT copy/destruction.
- */
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
-void destroy_context(struct mm_struct *mm);
+#else
+       clear_LDT();
+#endif
 
+       DEBUG_LOCKS_WARN_ON(!irqs_disabled());
+}
 
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct 
*tsk)
 {
@@ -113,6 +126,7 @@ static inline void switch_mm(struct mm_struct *prev, struct 
mm_struct *next,
                /* Load per-mm CR4 state */
                load_mm_cr4(next);
 
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
                /*
                 * Load the LDT, if the LDT is different.
                 *
@@ -127,6 +141,7 @@ static inline void switch_mm(struct mm_struct *prev, struct 
mm_struct *next,
                 */
                if (unlikely(prev->context.ldt != next->context.ldt))
                        load_mm_ldt(next);
+#endif
        }
 #ifdef CONFIG_SMP
          else {
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0f15af41bd80..2b507befcd3f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -24,7 +24,8 @@ CFLAGS_irq.o := -I$(src)/../include/asm/trace
 
 obj-y                  := process_$(BITS).o signal.o
 obj-y                  += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
-obj-y                  += time.o ioport.o ldt.o dumpstack.o nmi.o
+obj-y                  += time.o ioport.o dumpstack.o nmi.o
+obj-$(CONFIG_MODIFY_LDT_SYSCALL)       += ldt.o
 obj-y                  += setup.o x86_init.o i8259.o irqinit.o jump_label.o
 obj-$(CONFIG_IRQ_WORK)  += irq_work.o
 obj-y                  += probe_roms.o
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 9469dfa55607..58b872ef2329 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -2179,6 +2179,7 @@ static unsigned long get_segment_base(unsigned int 
segment)
        int idx = segment >> 3;
 
        if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
                struct ldt_struct *ldt;
 
                if (idx > LDT_ENTRIES)
@@ -2190,6 +2191,9 @@ static unsigned long get_segment_base(unsigned int 
segment)
                        return 0;
 
                desc = &ldt->entries[idx];
+#else
+               return 0;
+#endif
        } else {
                if (idx > GDT_ENTRIES)
                        return 0;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index f6b916387590..941295ddf802 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -121,6 +121,7 @@ void __show_regs(struct pt_regs *regs, int all)
 void release_thread(struct task_struct *dead_task)
 {
        if (dead_task->mm) {
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
                if (dead_task->mm->context.ldt) {
                        pr_warn("WARNING: dead process %s still has LDT? 
<%p/%d>\n",
                                dead_task->comm,
@@ -128,6 +129,7 @@ void release_thread(struct task_struct *dead_task)
                                dead_task->mm->context.ldt->size);
                        BUG();
                }
+#endif
        }
 }
 
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 6273324186ac..fd88e152d584 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -18,6 +18,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, 
struct pt_regs *re
                return addr;
        }
 
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
        /*
         * We'll assume that the code segments in the GDT
         * are all zero-based. That is largely true: the
@@ -45,6 +46,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, 
struct pt_regs *re
                }
                mutex_unlock(&child->mm->context.lock);
        }
+#endif
 
        return addr;
 }
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 7995ef5868d8..ca7d84f438f1 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -140,6 +140,7 @@ cond_syscall(sys_sgetmask);
 cond_syscall(sys_ssetmask);
 cond_syscall(sys_vm86old);
 cond_syscall(sys_vm86);
+cond_syscall(sys_modify_ldt);
 cond_syscall(sys_ipc);
 cond_syscall(compat_sys_ipc);
 cond_syscall(compat_sys_sysctl);
-- 
2.4.3


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.