[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 4/6] x86/emulate: Support for emulating software event injection



AMD SVM requires all software events to have their injection emulated if
hardware lacks NextRIP support.  In addition, `icebp` (opcode 0xf1) injection
requires emulation in all cases, even with hardware NextRIP support.

Emulating full control transfers is overkill for our needs.  All that matters
is that guest userspace can't bypass the descriptor DPL check.  Any guest OS
which would incur other faults as part of injection is going to end up with a
double fault instead, and won't be in a position to care that the faulting eip
is wrong.

Reported-by: Andrei LUTAS <vlutas@xxxxxxxxxxxxxxx>
Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>
CC: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
CC: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
CC: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@xxxxxxx>
---
 xen/arch/x86/hvm/emulate.c             |    8 +++
 xen/arch/x86/hvm/svm/svm.c             |   57 +++++++++++++--
 xen/arch/x86/mm.c                      |    2 +
 xen/arch/x86/mm/shadow/common.c        |    1 +
 xen/arch/x86/x86_emulate/x86_emulate.c |  122 ++++++++++++++++++++++++++++++--
 xen/arch/x86/x86_emulate/x86_emulate.h |   10 +++
 6 files changed, 191 insertions(+), 9 deletions(-)

diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
index 7ee146b..463ccfb 100644
--- a/xen/arch/x86/hvm/emulate.c
+++ b/xen/arch/x86/hvm/emulate.c
@@ -21,6 +21,7 @@
 #include <asm/hvm/hvm.h>
 #include <asm/hvm/trace.h>
 #include <asm/hvm/support.h>
+#include <asm/hvm/svm/svm.h>
 
 static void hvmtrace_io_assist(int is_mmio, ioreq_t *p)
 {
@@ -1328,6 +1329,13 @@ static int _hvm_emulate_one(struct hvm_emulate_ctxt 
*hvmemul_ctxt,
     vio->mmio_retrying = vio->mmio_retry;
     vio->mmio_retry = 0;
 
+    if ( cpu_has_vmx )
+        hvmemul_ctxt->ctxt.swint_emulate = x86_swint_emulate_none;
+    else if ( cpu_has_svm_nrips )
+        hvmemul_ctxt->ctxt.swint_emulate = x86_swint_emulate_icebp;
+    else
+        hvmemul_ctxt->ctxt.swint_emulate = x86_swint_emulate_all;
+
     rc = x86_emulate(&hvmemul_ctxt->ctxt, ops);
 
     if ( rc == X86EMUL_OKAY && vio->mmio_retry )
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index de982fd..b6beefc 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1177,11 +1177,12 @@ static void svm_inject_trap(struct hvm_trap *trap)
     struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
     eventinj_t event = vmcb->eventinj;
     struct hvm_trap _trap = *trap;
+    const struct cpu_user_regs *regs = guest_cpu_user_regs();
 
     switch ( _trap.vector )
     {
     case TRAP_debug:
-        if ( guest_cpu_user_regs()->eflags & X86_EFLAGS_TF )
+        if ( regs->eflags & X86_EFLAGS_TF )
         {
             __restore_debug_registers(vmcb, curr);
             vmcb_set_dr6(vmcb, vmcb_get_dr6(vmcb) | 0x4000);
@@ -1209,10 +1210,58 @@ static void svm_inject_trap(struct hvm_trap *trap)
 
     event.bytes = 0;
     event.fields.v = 1;
-    event.fields.type = X86_EVENTTYPE_HW_EXCEPTION;
     event.fields.vector = _trap.vector;
-    event.fields.ev = (_trap.error_code != HVM_DELIVER_NO_ERROR_CODE);
-    event.fields.errorcode = _trap.error_code;
+
+    /* Refer to AMD Vol 2: System Programming, 15.20 Event Injection. */
+    switch ( _trap.type )
+    {
+    case X86_EVENTTYPE_SW_INTERRUPT: /* int $n */
+        /*
+         * Injection type 4 (software interrupt) is only supported with
+         * NextRIP support.  Without NextRIP, the emulator will have performed
+         * DPL and presence checks for us.
+         */
+        if ( cpu_has_svm_nrips )
+        {
+            vmcb->nextrip = regs->eip + _trap.insn_len;
+            event.fields.type = X86_EVENTTYPE_SW_INTERRUPT;
+        }
+        else
+            event.fields.type = X86_EVENTTYPE_HW_EXCEPTION;
+        break;
+
+    case X86_EVENTTYPE_PRI_SW_EXCEPTION: /* icebp */
+        /*
+         * icebp's injection must always be emulated.  Software injection help
+         * in x86_emulate has moved eip forward, but NextRIP (if used) still
+         * needs setting or execution will resume from 0.
+         */
+        if ( cpu_has_svm_nrips )
+            vmcb->nextrip = regs->eip;
+        event.fields.type = X86_EVENTTYPE_HW_EXCEPTION;
+        break;
+
+    case X86_EVENTTYPE_SW_EXCEPTION: /* int3, into */
+        /*
+         * The AMD manual states that .type=3 (HW exception), .vector=3 or 4,
+         * will perform DPL checks.  Experimentally, DPL and presence checks
+         * are indeed performed, even without NextRIP support.
+         *
+         * However without NextRIP support, the event injection still needs
+         * fully emulating to get the correct eip in the trap frame, yet get
+         * the correct faulting eip should a fault occur.
+         */
+        if ( cpu_has_svm_nrips )
+            vmcb->nextrip = regs->eip + _trap.insn_len;
+        event.fields.type = X86_EVENTTYPE_HW_EXCEPTION;
+        break;
+
+    default:
+        event.fields.type = X86_EVENTTYPE_HW_EXCEPTION;
+        event.fields.ev = (_trap.error_code != HVM_DELIVER_NO_ERROR_CODE);
+        event.fields.errorcode = _trap.error_code;
+        break;
+    }
 
     vmcb->eventinj = event;
 
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 5b3f06f..bfe9f05 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -5096,6 +5096,7 @@ int ptwr_do_page_fault(struct vcpu *v, unsigned long addr,
     ptwr_ctxt.ctxt.force_writeback = 0;
     ptwr_ctxt.ctxt.addr_size = ptwr_ctxt.ctxt.sp_size =
         is_pv_32on64_domain(d) ? 32 : BITS_PER_LONG;
+    ptwr_ctxt.ctxt.swint_emulate = x86_swint_emulate_none;
     ptwr_ctxt.cr2 = addr;
     ptwr_ctxt.pte = pte;
 
@@ -5172,6 +5173,7 @@ int mmio_ro_do_page_fault(struct vcpu *v, unsigned long 
addr,
         .ctxt.regs = regs,
         .ctxt.addr_size = addr_size,
         .ctxt.sp_size = addr_size,
+        .ctxt.swint_emulate = x86_swint_emulate_none,
         .cr2 = addr
     };
     int rc;
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index 9115a78..a5eed28 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -366,6 +366,7 @@ const struct x86_emulate_ops *shadow_init_emulation(
 
     sh_ctxt->ctxt.regs = regs;
     sh_ctxt->ctxt.force_writeback = 0;
+    sh_ctxt->ctxt.swint_emulate = x86_swint_emulate_none;
 
     if ( is_pv_vcpu(v) )
     {
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c 
b/xen/arch/x86/x86_emulate/x86_emulate.c
index e06aa60..ffca65a 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -403,6 +403,11 @@ typedef union {
 #define EXC_PF 14
 #define EXC_MF 16
 
+/* Segment selector error code bits. */
+#define ECODE_EXT (1 << 0)
+#define ECODE_IDT (1 << 1)
+#define ECODE_TI  (1 << 2)
+
 /*
  * Instruction emulation:
  * Most instructions are emulated directly via a fragment of inline assembly
@@ -1318,6 +1323,115 @@ decode_segment(uint8_t modrm_reg)
     return decode_segment_failed;
 }
 
+/* Inject a software interrupt/exception, emulating if needed. */
+static int inject_swint(enum x86_swint_type type,
+                        uint8_t vector, uint8_t insn_len,
+                        struct x86_emulate_ctxt *ctxt,
+                        const struct x86_emulate_ops *ops)
+{
+    int rc, error_code, fault_type = EXC_GP;
+
+    fail_if(ops->inject_sw_interrupt == NULL);
+    fail_if(ops->inject_hw_exception == NULL);
+
+    /*
+     * Without hardware support, injecting software interrupts/exceptions is
+     * problematic.
+     *
+     * All software methods of generating exceptions (other than BOUND) yield
+     * traps, so eip in the exception frame needs to point after the
+     * instruction, not at it.
+     *
+     * However, if injecting it as a hardware exception causes a fault during
+     * delivery, our adjustment of eip will cause the fault to be reported
+     * after the faulting instruction, not pointing to it.
+     *
+     * Therefore, eip can only safely be wound forwards if we are certain that
+     * injecting an equivalent hardware exception won't fault, which means
+     * emulating everything the processor would do on a control transfer.
+     *
+     * However, emulation of complete control transfers is very complicated.
+     * All we care about is that guest userspace cannot avoid the descriptor
+     * DPL check by using the Xen emulator, and successfully invoke DPL=0
+     * descriptors.
+     *
+     * Any OS which would further fault during injection is going to receive a
+     * double fault anyway, and won't be in a position to care that the
+     * faulting eip is incorrect.
+     */
+
+    if ( (ctxt->swint_emulate == x86_swint_emulate_all) ||
+         ((ctxt->swint_emulate == x86_swint_emulate_icebp) &&
+          (type == x86_swint_icebp)) )
+    {
+        if ( !in_realmode(ctxt, ops) )
+        {
+            unsigned int idte_size = (ctxt->addr_size == 64) ? 16 : 8;
+            unsigned int idte_offset = vector * idte_size;
+            struct segment_register idtr;
+            uint32_t idte_ctl;
+
+            /* icebp sets the External Event bit despite being an instruction. 
*/
+            error_code = (vector << 3) | ECODE_IDT |
+                (type == x86_swint_icebp ? ECODE_EXT : 0);
+
+            /*
+             * TODO - this does not cover the v8086 mode with CR4.VME case
+             * correctly, but falls on the safe side from the point of view of
+             * a 32bit OS.  Someone with many TUITs can see about reading the
+             * TSS Software Interrupt Redirection bitmap.
+             */
+            if ( (ctxt->regs->eflags & EFLG_VM) &&
+                 ((ctxt->regs->eflags & EFLG_IOPL) != EFLG_IOPL) )
+                goto raise_exn;
+
+            fail_if(ops->read_segment == NULL);
+            fail_if(ops->read == NULL);
+            if ( (rc = ops->read_segment(x86_seg_idtr, &idtr, ctxt)) )
+                goto done;
+
+            if ( (idte_offset + idte_size - 1) > idtr.limit )
+                goto raise_exn;
+
+            /*
+             * Should strictly speaking read all 8/16 bytes of an entry,
+             * but we currently only care about the dpl and present bits.
+             */
+            ops->read(x86_seg_none, idtr.base + idte_offset + 4,
+                      &idte_ctl, sizeof(idte_ctl), ctxt);
+
+            /* Is this entry present? */
+            if ( !(idte_ctl & (1u << 15)) )
+            {
+                fault_type = EXC_NP;
+                goto raise_exn;
+            }
+
+            /* icebp counts as a hardware event, and bypasses the dpl check. */
+            if ( type != x86_swint_icebp )
+            {
+                struct segment_register ss;
+
+                if ( (rc = ops->read_segment(x86_seg_ss, &ss, ctxt)) )
+                    goto done;
+
+                if ( ss.attr.fields.dpl > ((idte_ctl >> 13) & 3) )
+                    goto raise_exn;
+            }
+        }
+
+        ctxt->regs->eip += insn_len;
+    }
+
+    rc = ops->inject_sw_interrupt(type, vector, insn_len, ctxt);
+
+ done:
+    return rc;
+
+ raise_exn:
+    return ops->inject_hw_exception(fault_type, error_code, ctxt);
+}
+
 int
 x86_emulate(
     struct x86_emulate_ctxt *ctxt,
@@ -2637,11 +2751,9 @@ x86_emulate(
         src.val = insn_fetch_type(uint8_t);
         swint_type = x86_swint_int;
     swint:
-        fail_if(!in_realmode(ctxt, ops)); /* XSA-106 */
-        fail_if(ops->inject_sw_interrupt == NULL);
-        rc = ops->inject_sw_interrupt(swint_type, src.val,
-                                      _regs.eip - ctxt->regs->eip,
-                                      ctxt) ? : X86EMUL_EXCEPTION;
+        rc = inject_swint(swint_type, src.val,
+                          _regs.eip - ctxt->regs->eip,
+                          ctxt, ops) ? : X86EMUL_EXCEPTION;
         goto done;
 
     case 0xce: /* into */
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h 
b/xen/arch/x86/x86_emulate/x86_emulate.h
index b336e17..b059341 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.h
+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
@@ -59,6 +59,13 @@ enum x86_swint_type {
     x86_swint_int,   /* 0xcd $n */
 };
 
+/* How much help is required with software event injection? */
+enum x86_swint_emulation {
+    x86_swint_emulate_none, /* Hardware supports all software injection 
properly */
+    x86_swint_emulate_icebp,/* Help needed with `icebp` (0xf1) */
+    x86_swint_emulate_all,  /* Help needed with all software events */
+};
+
 /* 
  * Attribute for segment selector. This is a copy of bit 40:47 & 52:55 of the
  * segment descriptor. It happens to match the format of an AMD SVM VMCB.
@@ -388,6 +395,9 @@ struct x86_emulate_ctxt
     /* Set this if writes may have side effects. */
     uint8_t force_writeback;
 
+    /* Software event injection support. */
+    enum x86_swint_emulation swint_emulate;
+
     /* Retirement state, set by the emulator (valid only on X86EMUL_OKAY). */
     union {
         struct {
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.