[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling




On 2013-10-21 23:55, Liu, Jinsong wrote:
 From 4ff1e2955f67954e60562b29a00adea89e5b93ae Mon Sep 17 00:00:00 2001
From: Liu Jinsong <jinsong.liu@xxxxxxxxx>
Date: Thu, 17 Oct 2013 05:49:23 +0800
Subject: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling

This patch solves XSA-60 security hole:
1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need
do nothing, since hardware snoop mechanism has ensured cache coherency.

2. For guest with VT-d but non-snooped, cache coherency can not be
guaranteed by h/w snoop, therefore it need emulate UC type to guest:
2.1). if it works w/ Intel EPT, set guest IA32_PAT fields as UC so that
guest memory type are all UC.
2.2). if it works w/ shadow, drop all shadows so that any new ones would
be created on demand w/ UC.

This patch also fix a bug of shadow cr0.cd setting. Current shadow has a
small window between cache flush and TLB invalidation, resulting in possilbe
cache pollution. This patch pause vcpus so that no vcpus context involved
into the window.

Signed-off-by: Liu Jinsong <jinsong.liu@xxxxxxxxx>
---
  xen/arch/x86/hvm/hvm.c            |   75 ++++++++++++++++++++++--------------
  xen/arch/x86/hvm/vmx/vmcs.c       |    2 +-
  xen/arch/x86/hvm/vmx/vmx.c        |   58 +++++++++++++++++++++++++++-
  xen/common/domain.c               |   10 +++++
  xen/include/asm-x86/hvm/hvm.h     |    1 +
  xen/include/asm-x86/hvm/support.h |    2 +
  xen/include/xen/sched.h           |    1 +
  7 files changed, 117 insertions(+), 32 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 688a943..df021de 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1701,6 +1701,40 @@ int hvm_mov_from_cr(unsigned int cr, unsigned int gpr)
      return X86EMUL_UNHANDLEABLE;
  }
+void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value)
+{
+    if ( value & X86_CR0_CD )
+    {
+        /* Entering no fill cache mode. */
+        spin_lock(&v->domain->arch.hvm_domain.uc_lock);
+        v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
+
+        if ( !v->domain->arch.hvm_domain.is_in_uc_mode )
+        {
+            domain_pause_nosync(v->domain);
+
+            /* Flush physical caches. */
+            on_each_cpu(local_flush_cache, NULL, 1);
+            hvm_set_uc_mode(v, 1);
+
+            domain_unpause(v->domain);
+        }
+        spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
+    }
+    else if ( !(value & X86_CR0_CD) &&
+              (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
+    {
+        /* Exit from no fill cache mode. */
+        spin_lock(&v->domain->arch.hvm_domain.uc_lock);
+        v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
+
+        if ( domain_exit_uc_mode(v) )
+            hvm_set_uc_mode(v, 0);
+
+        spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
+    }
+}
+
  int hvm_set_cr0(unsigned long value)
  {
      struct vcpu *v = current;
@@ -1784,35 +1818,18 @@ int hvm_set_cr0(unsigned long value)
          }
      }
- if ( has_arch_mmios(v->domain) )
-    {
-        if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) )
-        {
-            /* Entering no fill cache mode. */
-            spin_lock(&v->domain->arch.hvm_domain.uc_lock);
-            v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
-
-            if ( !v->domain->arch.hvm_domain.is_in_uc_mode )
-            {
-                /* Flush physical caches. */
-                on_each_cpu(local_flush_cache, NULL, 1);
-                hvm_set_uc_mode(v, 1);
-            }
-            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
-        }
-        else if ( !(value & (X86_CR0_CD | X86_CR0_NW)) &&
-                  (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
-        {
-            /* Exit from no fill cache mode. */
-            spin_lock(&v->domain->arch.hvm_domain.uc_lock);
-            v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
-
-            if ( domain_exit_uc_mode(v) )
-                hvm_set_uc_mode(v, 0);
-
-            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
-        }
-    }
+    /*
+     * When cr0.cd setting
+     * 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need not
+     * do anything, since hardware snoop mechanism has ensured cache coherency;
+     * 2. For guest with VT-d but non-snooped, cache coherency cannot be
+     * guaranteed by h/w so need emulate UC memory type to guest.
+     */
+    if ( ((value ^ old_value) & X86_CR0_CD) &&
+           has_arch_pdevs(v->domain) &&
+           iommu_enabled && !iommu_snoop &&
+           hvm_funcs.handle_cd )
+        hvm_funcs.handle_cd(v, value);
v->arch.hvm_vcpu.guest_cr[0] = value;
      hvm_update_guest_cr(v, 0);
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 6916c6d..7a01b1f 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -921,7 +921,7 @@ static int construct_vmcs(struct vcpu *v)
          vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, MSR_TYPE_R | 
MSR_TYPE_W);
          vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, MSR_TYPE_R | 
MSR_TYPE_W);
          vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, MSR_TYPE_R | 
MSR_TYPE_W);
-        if ( paging_mode_hap(d) )
+        if ( paging_mode_hap(d) && (!iommu_enabled || iommu_snoop) )
              vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, MSR_TYPE_R | 
MSR_TYPE_W);
      }
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 6dedb29..d846a9c 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -642,6 +642,10 @@ static void vmx_ctxt_switch_to(struct vcpu *v)
              __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept_data), 0);
      }
+ /* For guest cr0.cd setting, do not use potentially polluted cache */
+    if ( unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
+        wbinvd();
+
      vmx_restore_guest_msrs(v);
      vmx_restore_dr(v);
  }
@@ -908,7 +912,8 @@ static unsigned long vmx_get_shadow_gs_base(struct vcpu *v)
static int vmx_set_guest_pat(struct vcpu *v, u64 gpat)
  {
-    if ( !paging_mode_hap(v->domain) )
+    if ( !paging_mode_hap(v->domain) ||
+         unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
          return 0;
vmx_vmcs_enter(v);
@@ -919,7 +924,8 @@ static int vmx_set_guest_pat(struct vcpu *v, u64 gpat)
static int vmx_get_guest_pat(struct vcpu *v, u64 *gpat)
  {
-    if ( !paging_mode_hap(v->domain) )
+    if ( !paging_mode_hap(v->domain) ||
+         unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
          return 0;
vmx_vmcs_enter(v);
@@ -928,6 +934,53 @@ static int vmx_get_guest_pat(struct vcpu *v, u64 *gpat)
      return 1;
  }
+static void vmx_handle_cd(struct vcpu *v, unsigned long value)
+{
+    if ( !paging_mode_hap(v->domain) )
+    {
+        /*
+         * For shadow, 'load IA32_PAT' VM-entry control is 0, so it cannot
+         * set guest memory type as UC via IA32_PAT. Xen drop all shadows
+         * so that any new ones would be created on demand.
+         */
+        hvm_shadow_handle_cd(v, value);
+    }
+    else
+    {
+        u64 *pat = &v->arch.hvm_vcpu.pat_cr;
+
+        if ( value & X86_CR0_CD )
+        {
+            /*
+             * For EPT, set guest IA32_PAT fields as UC so that guest
+             * memory type are all UC.
+             */
+            u64 uc_pat =
+                ((uint64_t)PAT_TYPE_UNCACHABLE)       |       /* PAT0 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 8)  |       /* PAT1 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 16) |       /* PAT2 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 24) |       /* PAT3 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 32) |       /* PAT4 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 40) |       /* PAT5 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 48) |       /* PAT6 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 56);        /* PAT7 */
+
+            vmx_get_guest_pat(v, pat);
+            vmx_set_guest_pat(v, uc_pat);
+
+            wbinvd();               /* flush possibly polluted cache */
+            hvm_asid_flush_vcpu(v); /* invalidate memory type cached in TLB */
+            v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
+        }
+        else
+        {
+            v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
+            vmx_set_guest_pat(v, *pat);
+            hvm_asid_flush_vcpu(v); /* no need to flush cache */
+        }
+    }
+}
+
  static void vmx_set_tsc_offset(struct vcpu *v, u64 offset)
  {
      vmx_vmcs_enter(v);
@@ -1550,6 +1603,7 @@ static struct hvm_function_table __initdata 
vmx_function_table = {
      .msr_read_intercept   = vmx_msr_read_intercept,
      .msr_write_intercept  = vmx_msr_write_intercept,
      .invlpg_intercept     = vmx_invlpg_intercept,
+    .handle_cd            = vmx_handle_cd,
      .set_info_guest       = vmx_set_info_guest,
      .set_rdtsc_exiting    = vmx_set_rdtsc_exiting,
      .nhvm_vcpu_initialise = nvmx_vcpu_initialise,
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 5999779..ce20323 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -825,6 +825,16 @@ void domain_pause(struct domain *d)
          vcpu_sleep_sync(v);
  }
+void domain_pause_nosync(struct domain *d)
+{
+    struct vcpu *v;
+
+    atomic_inc(&d->pause_count);
+
+    for_each_vcpu( d, v )
+        vcpu_sleep_nosync(v);
+}
+
  void domain_unpause(struct domain *d)
  {
      struct vcpu *v;
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 8dd2b40..c9afb56 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -156,6 +156,7 @@ struct hvm_function_table {
      int (*msr_read_intercept)(unsigned int msr, uint64_t *msr_content);
      int (*msr_write_intercept)(unsigned int msr, uint64_t msr_content);
      void (*invlpg_intercept)(unsigned long vaddr);
+    void (*handle_cd)(struct vcpu *v, unsigned long value);
      void (*set_info_guest)(struct vcpu *v);
      void (*set_rdtsc_exiting)(struct vcpu *v, bool_t);
diff --git a/xen/include/asm-x86/hvm/support.h b/xen/include/asm-x86/hvm/support.h
index 7ddc806..52aef1f 100644
--- a/xen/include/asm-x86/hvm/support.h
+++ b/xen/include/asm-x86/hvm/support.h
@@ -130,6 +130,8 @@ void hvm_rdtsc_intercept(struct cpu_user_regs *regs);
int __must_check hvm_handle_xsetbv(u32 index, u64 new_bv); +void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value);
+
  /* These functions all return X86EMUL return codes. */
  int hvm_set_efer(uint64_t value);
  int hvm_set_cr0(unsigned long value);
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 1765e18..82f522e 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -700,6 +700,7 @@ void vcpu_unblock(struct vcpu *v);
  void vcpu_pause(struct vcpu *v);
  void vcpu_pause_nosync(struct vcpu *v);
  void domain_pause(struct domain *d);
+void domain_pause_nosync(struct domain *d);
  void vcpu_unpause(struct vcpu *v);
  void domain_unpause(struct domain *d);
  void domain_pause_by_systemcontroller(struct domain *d);
We have a problem w/ the new Xen4.4. hypervisor - dom0 comes up but hit the following bug and the box reboot.
    See stack trace bellow.

...
Starting portmap: [  OK  ]
Starting NFS statd: [  OK  ]
Starting RPC idmapd: [  OK  ]
(XEN) Xen BUG at spinlock.c:48
(XEN) ----[ Xen-4.4-unstable  x86_64  debug=y  Not tainted ]----
(XEN) CPU:    2
(XEN) RIP:    e008:[<ffff82d080127355>] check_lock+0x3d/0x43
(XEN) RFLAGS: 0000000000010046   CONTEXT: hypervisor
(XEN) rax: 0000000000000000   rbx: ffff82d08028ab08   rcx: 0000000000000001
(XEN) rdx: 0000000000000000   rsi: 0000000000000001   rdi: ffff82d08028ab0c
(XEN) rbp: ffff83203fda7c50   rsp: ffff83203fda7c50   r8: ffff82d0802dfc88
(XEN) r9:  00000000deadbeef   r10: ffff82d08023e120   r11: 0000000000000206
(XEN) r12: ffff83007f481ff0   r13: 0000000000000000   r14: 000ffff82cfffd5d
(XEN) r15: ffff82cfffd5e000   cr0: 0000000080050033   cr4: 00000000000026f0
(XEN) cr3: 000000c083a2a000   cr2: 000000000040e000
(XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
(XEN) Xen stack trace from rsp=ffff83203fda7c50:
(XEN) ffff83203fda7c68 ffff82d08012750d ffff83007f74e000 ffff83203fda7d08 (XEN) ffff82d0801717bf ffff83203fda7ca8 010183203fda7d88 0000000000000163 (XEN) 000000013fda7d88 ffff832000000163 ffff83203fda7d28 0000000000000000 (XEN) 000000000c082433 01ffffffffffffff ffff83007f4839f8 ffff82d08026ff20 (XEN) ffff83203fdcafd0 0000000000000000 00000000000002a2 ffff82d0802dfc90 (XEN) 0000000000000001 000000c082432000 00000000000002a2 ffff83203fda7d18 (XEN) ffff82d080171e8a ffff83203fda7d58 ffff82d08019f5f5 0000000000000002 (XEN) ffff82d0802dfc88 ffff83203fda7db8 00000000ffffffea 0000000000000001 (XEN) 0000000000000000 ffff83203fda7da8 ffff82d080112f8b 0000000000000002 (XEN) ffff83203fdac6c8 0000000200000005 0000000000000000 0000000000000001 (XEN) 0000000000000000 ffff8807bc799e68 0000000000000246 ffff83203fda7ee8 (XEN) ffff82d080113cce 0000000000000001 000000c082432000 0000000000000000 (XEN) 000000c085c1b000 0000000000000000 000000c085c1a000 0000000000000000 (XEN) 000000c085c19000 0000000000000000 000000c085c18000 0000000000000000 (XEN) 000000c085eb7000 0000000000000000 000000c085eb6000 0000000000000000 (XEN) 000000c085eb5000 0000000000000000 000000c082433000 000000c085eb4002 (XEN) 0000000008f59690 ffff82d0801274bf ffff83007f2db060 ffff83203fda7e88 (XEN) ffff82d08018d8ee ffff83203fd9c330 ffff83203fda7f18 ffff83203fda7ef8 (XEN) ffff82d080220550 0000000000000000 0000000008fff000 0000000000000044 (XEN) 0000000000000000 ffffffff8125bd07 ffff83007f2db000 ffff8807bc684000
(XEN) Xen call trace:
(XEN)    [<ffff82d080127355>] check_lock+0x3d/0x43
(XEN)    [<ffff82d08012750d>] _spin_lock+0x11/0x48
(XEN)    [<ffff82d0801717bf>] map_pages_to_xen+0xcab/0x1052
(XEN)    [<ffff82d080171e8a>] __set_fixmap+0x30/0x32
(XEN)    [<ffff82d08019f5f5>] machine_kexec_load+0x66/0xa1
(XEN)    [<ffff82d080112f8b>] kexec_load_unload_internal+0xb9/0x2cc
(XEN)    [<ffff82d080113cce>] do_kexec_op_internal+0x391/0x4b2
(XEN)    [<ffff82d080113e0d>] do_kexec_op+0xe/0x12
(XEN)    [<ffff82d080225c7b>] syscall_enter+0xeb/0x145
(XEN)
(XEN)
(XEN) ****************************************
(XEN) Panic on CPU 2:
(XEN) Xen BUG at spinlock.c:48
(XEN) ****************************************
(XEN)
(XEN) Reboot in five seconds...
(XEN) Resetting with ACPI MEMORY or I/O RESET_REG.

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.