[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[xen staging-4.19] x86/pv: Don't assume that INT $imm8 instructions are two bytes long



commit 260740e1aa1ff414dab65798e2d2d698c5e78b03
Author:     Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
AuthorDate: Tue Mar 10 14:45:29 2026 +0100
Commit:     Jan Beulich <jbeulich@xxxxxxxx>
CommitDate: Tue Mar 10 14:45:29 2026 +0100

    x86/pv: Don't assume that INT $imm8 instructions are two bytes long
    
    For INT $N instructions (besides $0x80 for which there is a dedicated fast
    path), handling is mostly fault-based because of DPL0 gates in the IDT.  
This
    means that when the guest kernel allows the instruction too, Xen must
    increment %rip to the end of the instruction before passing a trap to the
    guest kernel.
    
    When an INT $N instruction has a prefix, it's longer than two bytes, and Xen
    will deliver the "trap" with %rip pointing into the middle of the 
instruction.
    
    Introduce a new pv_emulate_sw_interrupt() which uses x86_insn_length() to
    determine the instruction length, rather than assuming two.
    
    This is a change in behaviour for PV guests, but the prior behaviour cannot
    reasonably be said to be intentional.
    
    This change does not affect the INT $0x80 fastpath.  Prefixed INT $N
    instructions occur almost exclusively in test code or exploits, and INT 
$0x80
    appears to be the only user-usable interrupt gate in contemporary PV guests.
    
    Signed-off-by: Andrew Cooper <andrew.cooper3@xxxxxxxxxx>
    Reviewed-by: Jan Beulich <jbeulich@xxxxxxxx>
    master commit: a934a600c45486b7acfe8abc3658e284e64e5dd5
    master date: 2026-03-03 15:15:53 +0000
---
 xen/arch/x86/include/asm/pv/traps.h |  2 ++
 xen/arch/x86/pv/emul-priv-op.c      | 49 +++++++++++++++++++++++++++++++++++++
 xen/arch/x86/traps.c                |  3 +--
 3 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/xen/arch/x86/include/asm/pv/traps.h 
b/xen/arch/x86/include/asm/pv/traps.h
index 404f5b169c..5892dc9ef8 100644
--- a/xen/arch/x86/include/asm/pv/traps.h
+++ b/xen/arch/x86/include/asm/pv/traps.h
@@ -19,6 +19,7 @@ void pv_trap_init(void);
 int pv_raise_nmi(struct vcpu *v);
 
 int pv_emulate_privileged_op(struct cpu_user_regs *regs);
+void pv_emulate_sw_interrupt(struct cpu_user_regs *regs);
 void pv_emulate_gate_op(struct cpu_user_regs *regs);
 bool pv_emulate_invalid_op(struct cpu_user_regs *regs);
 
@@ -37,6 +38,7 @@ static inline void pv_trap_init(void) {}
 static inline int pv_raise_nmi(struct vcpu *v) { return -EOPNOTSUPP; }
 
 static inline int pv_emulate_privileged_op(struct cpu_user_regs *regs) { 
return 0; }
+static inline void pv_emulate_sw_interrupt(struct cpu_user_regs *regs) {}
 static inline void pv_emulate_gate_op(struct cpu_user_regs *regs) {}
 static inline bool pv_emulate_invalid_op(struct cpu_user_regs *regs) { return 
true; }
 
diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
index a21d499519..7753e44477 100644
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -8,6 +8,7 @@
  */
 
 #include <xen/domain_page.h>
+#include <xen/err.h>
 #include <xen/event.h>
 #include <xen/guest_access.h>
 #include <xen/hypercall.h>
@@ -1381,6 +1382,54 @@ int pv_emulate_privileged_op(struct cpu_user_regs *regs)
     return 0;
 }
 
+/*
+ * Hardware already decoded the INT $N instruction and determined that there
+ * was a permission issue (i.e. the DPL violation intended to trigger #GP).
+ * Xen has already determined that the guest kernel has permitted this
+ * software interrupt.
+ *
+ * All that is needed is the instruction length, to turn the fault into a
+ * trap.  All errors are turned back into the original #GP, as that's the
+ * action that really happened.
+ */
+void pv_emulate_sw_interrupt(struct cpu_user_regs *regs)
+{
+    struct vcpu *curr = current;
+    struct domain *currd = curr->domain;
+    struct priv_op_ctxt ctxt = {
+        .ctxt.regs = regs,
+        .ctxt.lma = !is_pv_32bit_domain(currd),
+    };
+    struct x86_emulate_state *state;
+    uint8_t vector = regs->error_code >> 3;
+    unsigned int len, ar;
+
+    if ( !pv_emul_read_descriptor(regs->cs, curr, &ctxt.cs.base,
+                                  &ctxt.cs.limit, &ar, 1) ||
+         !(ar & _SEGMENT_S) ||
+         !(ar & _SEGMENT_P) ||
+         !(ar & _SEGMENT_CODE) )
+        goto error;
+
+    state = x86_decode_insn(&ctxt.ctxt, insn_fetch);
+    if ( IS_ERR_OR_NULL(state) )
+        goto error;
+
+    len = x86_insn_length(state, &ctxt.ctxt);
+    x86_emulate_free_state(state);
+
+    /* Note: Checked slightly late to simplify 'state' handling. */
+    if ( ctxt.ctxt.opcode != 0xcd /* INT $imm8 */ )
+        goto error;
+
+    regs->rip += len;
+    pv_inject_sw_interrupt(vector);
+    return;
+
+ error:
+    pv_inject_hw_exception(X86_EXC_GP, regs->error_code);
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index af90d8b27d..0c8c935ee9 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1300,8 +1300,7 @@ void do_general_protection(struct cpu_user_regs *regs)
 
         if ( permit_softint(TI_GET_DPL(ti), v, regs) )
         {
-            regs->rip += 2;
-            pv_inject_sw_interrupt(vector);
+            pv_emulate_sw_interrupt(regs);
             return;
         }
     }
--
generated by git-patchbot for /home/xen/git/xen.git#staging-4.19



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.