[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-changelog] [xen-unstable] [XEN] Simplify x86_emulate interface.



# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Node ID 88935ae47fa92feeacaf25bf2c07770206ac14fa
# Parent  d1b0a5adaeabffa3a1a68ae08bc974f5fb28d2c7
[XEN] Simplify x86_emulate interface.

 - No distinction between 'special' and 'normal' memory accesses.
 - No reliance on caller-supplied %cr2 value
 - Memory operations include segment identifier to allow callers
   to support non-zero-based segments

TODO:
 1. HVM emulations should take into account segment base, limit, and
    attributes.
 2. We ought to obey stack-size attribute on PUSH/POP instructions.
    Could extend the mode input field, or could add an extra call-out
    hook, or perhaps we don't care at all...

Signed-off-by: Keir Fraser <keir@xxxxxxxxxxxxx>
---
 tools/tests/test_x86_emulator.c   |   76 +++---
 xen/arch/x86/mm.c                 |   97 ++++---
 xen/arch/x86/mm/shadow/common.c   |   92 +++----
 xen/arch/x86/mm/shadow/multi.c    |    1 
 xen/arch/x86/x86_emulate.c        |  465 ++++++++++++++++----------------------
 xen/include/asm-x86/x86_emulate.h |  113 ++-------
 6 files changed, 381 insertions(+), 463 deletions(-)

diff -r d1b0a5adaeab -r 88935ae47fa9 tools/tests/test_x86_emulator.c
--- a/tools/tests/test_x86_emulator.c   Wed Nov 29 23:40:40 2006 +0000
+++ b/tools/tests/test_x86_emulator.c   Thu Nov 30 10:57:28 2006 +0000
@@ -17,12 +17,14 @@ typedef int64_t            s64;
 
 #define PFEC_write_access (1U<<1)
 
-static int read_any(
-    unsigned long addr,
+static int read(
+    unsigned int seg,
+    unsigned long offset,
     unsigned long *val,
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
+    unsigned long addr = offset;
     switch ( bytes )
     {
     case 1: *val = *(u8 *)addr; break;
@@ -33,12 +35,14 @@ static int read_any(
     return X86EMUL_CONTINUE;
 }
 
-static int write_any(
-    unsigned long addr,
+static int write(
+    unsigned int seg,
+    unsigned long offset,
     unsigned long val,
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
+    unsigned long addr = offset;
     switch ( bytes )
     {
     case 1: *(u8 *)addr = (u8)val; break;
@@ -49,13 +53,15 @@ static int write_any(
     return X86EMUL_CONTINUE;
 }
 
-static int cmpxchg_any(
-    unsigned long addr,
+static int cmpxchg(
+    unsigned int seg,
+    unsigned long offset,
     unsigned long old,
     unsigned long new,
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
+    unsigned long addr = offset;
     switch ( bytes )
     {
     case 1: *(u8 *)addr = (u8)new; break;
@@ -66,21 +72,23 @@ static int cmpxchg_any(
     return X86EMUL_CONTINUE;
 }
 
-static int cmpxchg8b_any(
-    unsigned long addr,
+static int cmpxchg8b(
+    unsigned int seg,
+    unsigned long offset,
     unsigned long old_lo,
     unsigned long old_hi,
     unsigned long new_lo,
     unsigned long new_hi,
     struct x86_emulate_ctxt *ctxt)
 {
+    unsigned long addr = offset;
     ((unsigned long *)addr)[0] = new_lo;
     ((unsigned long *)addr)[1] = new_hi;
     return X86EMUL_CONTINUE;
 }
 
 static struct x86_emulate_ops emulops = {
-    read_any, write_any, read_any, write_any, cmpxchg_any, cmpxchg8b_any
+    read, write, cmpxchg, cmpxchg8b
 };
 
 int main(int argc, char **argv)
@@ -108,7 +116,7 @@ int main(int argc, char **argv)
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
     regs.error_code = PFEC_write_access;
-    ctxt.cr2    = (unsigned long)res;
+    regs.eax    = (unsigned long)res;
     *res        = 0x7FFFFFFF;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
@@ -127,7 +135,7 @@ int main(int argc, char **argv)
 #else
     regs.ecx    = 0x12345678UL;
 #endif
-    ctxt.cr2    = (unsigned long)res;
+    regs.eax    = (unsigned long)res;
     regs.error_code = 0;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
@@ -142,7 +150,7 @@ int main(int argc, char **argv)
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = ~0UL;
-    ctxt.cr2    = (unsigned long)res;
+    regs.eax    = (unsigned long)res;
     regs.error_code = 0;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
@@ -152,13 +160,13 @@ int main(int argc, char **argv)
         goto fail;
     printf("okay\n");
 
-    printf("%-40s", "Testing lock cmpxchgb %%cl,(%%eax)...");
-    instr[0] = 0xf0; instr[1] = 0x0f; instr[2] = 0xb0; instr[3] = 0x08;
+    printf("%-40s", "Testing lock cmpxchgb %%cl,(%%ebx)...");
+    instr[0] = 0xf0; instr[1] = 0x0f; instr[2] = 0xb0; instr[3] = 0x0b;
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0x92345677UL;
     regs.ecx    = 0xAA;
-    ctxt.cr2    = (unsigned long)res;
+    regs.ebx    = (unsigned long)res;
     regs.error_code = PFEC_write_access;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
@@ -169,13 +177,13 @@ int main(int argc, char **argv)
         goto fail;
     printf("okay\n");
 
-    printf("%-40s", "Testing lock cmpxchgb %%cl,(%%eax)...");
-    instr[0] = 0xf0; instr[1] = 0x0f; instr[2] = 0xb0; instr[3] = 0x08;
+    printf("%-40s", "Testing lock cmpxchgb %%cl,(%%ebx)...");
+    instr[0] = 0xf0; instr[1] = 0x0f; instr[2] = 0xb0; instr[3] = 0x0b;
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0xAABBCC77UL;
     regs.ecx    = 0xFF;
-    ctxt.cr2    = (unsigned long)res;
+    regs.ebx    = (unsigned long)res;
     regs.error_code = PFEC_write_access;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
@@ -192,7 +200,7 @@ int main(int argc, char **argv)
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    ctxt.cr2    = (unsigned long)res;
+    regs.eax    = (unsigned long)res;
     regs.error_code = PFEC_write_access;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
@@ -203,14 +211,14 @@ int main(int argc, char **argv)
         goto fail;
     printf("okay\n");
 
-    printf("%-40s", "Testing lock cmpxchgl %%ecx,(%%eax)...");
-    instr[0] = 0xf0; instr[1] = 0x0f; instr[2] = 0xb1; instr[3] = 0x08;
+    printf("%-40s", "Testing lock cmpxchgl %%ecx,(%%ebx)...");
+    instr[0] = 0xf0; instr[1] = 0x0f; instr[2] = 0xb1; instr[3] = 0x0b;
     regs.eflags = 0x200;
     *res        = 0x923456AA;
     regs.eip    = (unsigned long)&instr[0];
     regs.eax    = 0x923456AAUL;
     regs.ecx    = 0xDDEEFF00L;
-    ctxt.cr2    = (unsigned long)res;
+    regs.ebx    = (unsigned long)res;
     regs.error_code = PFEC_write_access;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
@@ -230,7 +238,6 @@ int main(int argc, char **argv)
     regs.esi    = (unsigned long)res + 0;
     regs.edi    = (unsigned long)res + 2;
     regs.error_code = 0; /* read fault */
-    ctxt.cr2    = regs.esi;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (*res != 0x44554455) ||
@@ -248,13 +255,28 @@ int main(int argc, char **argv)
     regs.eflags = 0x200;
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)res;
-    ctxt.cr2    = regs.edi;
     regs.error_code = PFEC_write_access;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
          (*res != 0x2233445D) ||
          ((regs.eflags&0x201) != 0x201) ||
          (regs.eip != (unsigned long)&instr[4]) )
+        goto fail;
+    printf("okay\n");
+
+    printf("%-40s", "Testing btrl %eax,(%edi)...");
+    instr[0] = 0x0f; instr[1] = 0xb3; instr[2] = 0x07;
+    *res        = 0x2233445F;
+    regs.eflags = 0x200;
+    regs.eip    = (unsigned long)&instr[0];
+    regs.eax    = -32;
+    regs.edi    = (unsigned long)(res+1);
+    regs.error_code = PFEC_write_access;
+    rc = x86_emulate_memop(&ctxt, &emulops);
+    if ( (rc != 0) || 
+         (*res != 0x2233445E) ||
+         ((regs.eflags&0x201) != 0x201) ||
+         (regs.eip != (unsigned long)&instr[3]) )
         goto fail;
     printf("okay\n");
 
@@ -270,7 +292,6 @@ int main(int argc, char **argv)
     regs.ecx    = 0xCCCCFFFF;
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)res;
-    ctxt.cr2    = regs.edi;
     regs.error_code = PFEC_write_access;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
@@ -285,7 +306,6 @@ int main(int argc, char **argv)
     instr[0] = 0x0f; instr[1] = 0xc7; instr[2] = 0x0f;
     regs.eip    = (unsigned long)&instr[0];
     regs.edi    = (unsigned long)res;
-    ctxt.cr2    = regs.edi;
     regs.error_code = PFEC_write_access;
     rc = x86_emulate_memop(&ctxt, &emulops);
     if ( (rc != 0) || 
@@ -302,7 +322,7 @@ int main(int argc, char **argv)
     instr[0] = 0x0f; instr[1] = 0xbe; instr[2] = 0x08;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    ctxt.cr2    = (unsigned long)res;
+    regs.eax    = (unsigned long)res;
     *res        = 0x82;
     regs.error_code = 0;
     rc = x86_emulate_memop(&ctxt, &emulops);
@@ -318,7 +338,7 @@ int main(int argc, char **argv)
     instr[0] = 0x0f; instr[1] = 0xb7; instr[2] = 0x08;
     regs.eip    = (unsigned long)&instr[0];
     regs.ecx    = 0x12345678;
-    ctxt.cr2    = (unsigned long)res;
+    regs.eax    = (unsigned long)res;
     *res        = 0x1234aa82;
     regs.error_code = 0;
     rc = x86_emulate_memop(&ctxt, &emulops);
diff -r d1b0a5adaeab -r 88935ae47fa9 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed Nov 29 23:40:40 2006 +0000
+++ b/xen/arch/x86/mm.c Thu Nov 30 10:57:28 2006 +0000
@@ -3033,12 +3033,39 @@ long arch_memory_op(int op, XEN_GUEST_HA
  * Writable Pagetables
  */
 
+struct ptwr_emulate_ctxt {
+    struct x86_emulate_ctxt ctxt;
+    unsigned long cr2;
+    l1_pgentry_t  pte;
+};
+
+static int ptwr_emulated_read(
+    unsigned int seg,
+    unsigned long offset,
+    unsigned long *val,
+    unsigned int bytes,
+    struct x86_emulate_ctxt *ctxt)
+{
+    unsigned int rc;
+    unsigned long addr = offset;
+
+    *val = 0;
+    if ( (rc = copy_from_user((void *)val, (void *)addr, bytes)) != 0 )
+    {
+        propagate_page_fault(addr + bytes - rc, 0); /* read fault */
+        return X86EMUL_PROPAGATE_FAULT;
+    }
+
+    return X86EMUL_CONTINUE;
+}
+
 static int ptwr_emulated_update(
     unsigned long addr,
     paddr_t old,
     paddr_t val,
     unsigned int bytes,
-    unsigned int do_cmpxchg)
+    unsigned int do_cmpxchg,
+    struct ptwr_emulate_ctxt *ptwr_ctxt)
 {
     unsigned long gmfn, mfn;
     struct page_info *page;
@@ -3046,11 +3073,11 @@ static int ptwr_emulated_update(
     struct vcpu *v = current;
     struct domain *d = v->domain;
 
-    /* Aligned access only, thank you. */
-    if ( !access_ok(addr, bytes) || ((addr & (bytes-1)) != 0) )
-    {
-        MEM_LOG("ptwr_emulate: Unaligned or bad size ptwr access (%d, %lx)",
-                bytes, addr);
+    /* Only allow naturally-aligned stores within the original %cr2 page. */
+    if ( unlikely(((addr^ptwr_ctxt->cr2) & PAGE_MASK) || (addr & (bytes-1))) )
+    {
+        MEM_LOG("Bad ptwr access (cr2=%lx, addr=%lx, bytes=%u)",
+                ptwr_ctxt->cr2, addr, bytes);
         return X86EMUL_UNHANDLEABLE;
     }
 
@@ -3079,17 +3106,9 @@ static int ptwr_emulated_update(
         old  |= full;
     }
 
-    /* Read the PTE that maps the page being updated. */
-    guest_get_eff_l1e(v, addr, &pte);
-    if ( unlikely(!(l1e_get_flags(pte) & _PAGE_PRESENT)) )
-    {
-        MEM_LOG("%s: Cannot get L1 PTE for guest address %lx",
-                __func__, addr);
-        return X86EMUL_UNHANDLEABLE;
-    }
-
-    gmfn  = l1e_get_pfn(pte);
-    mfn = gmfn_to_mfn(d, gmfn);
+    pte  = ptwr_ctxt->pte;
+    gmfn = l1e_get_pfn(pte);
+    mfn  = gmfn_to_mfn(d, gmfn);
     page = mfn_to_page(mfn);
 
     /* We are looking only for read-only mappings of p.t. pages. */
@@ -3164,26 +3183,33 @@ static int ptwr_emulated_update(
 }
 
 static int ptwr_emulated_write(
-    unsigned long addr,
+    unsigned int seg,
+    unsigned long offset,
     unsigned long val,
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    return ptwr_emulated_update(addr, 0, val, bytes, 0);
+    return ptwr_emulated_update(
+        offset, 0, val, bytes, 0,
+        container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
 }
 
 static int ptwr_emulated_cmpxchg(
-    unsigned long addr,
+    unsigned int seg,
+    unsigned long offset,
     unsigned long old,
     unsigned long new,
     unsigned int bytes,
     struct x86_emulate_ctxt *ctxt)
 {
-    return ptwr_emulated_update(addr, old, new, bytes, 1);
+    return ptwr_emulated_update(
+        offset, old, new, bytes, 1,
+        container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
 }
 
 static int ptwr_emulated_cmpxchg8b(
-    unsigned long addr,
+    unsigned int seg,
+    unsigned long offset,
     unsigned long old,
     unsigned long old_hi,
     unsigned long new,
@@ -3192,18 +3218,16 @@ static int ptwr_emulated_cmpxchg8b(
 {
     if ( CONFIG_PAGING_LEVELS == 2 )
         return X86EMUL_UNHANDLEABLE;
-    else
-        return ptwr_emulated_update(
-            addr, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1);
+    return ptwr_emulated_update(
+        offset, ((u64)old_hi << 32) | old, ((u64)new_hi << 32) | new, 8, 1,
+        container_of(ctxt, struct ptwr_emulate_ctxt, ctxt));
 }
 
 static struct x86_emulate_ops ptwr_emulate_ops = {
-    .read_std           = x86_emulate_read_std,
-    .write_std          = x86_emulate_write_std,
-    .read_emulated      = x86_emulate_read_std,
-    .write_emulated     = ptwr_emulated_write,
-    .cmpxchg_emulated   = ptwr_emulated_cmpxchg,
-    .cmpxchg8b_emulated = ptwr_emulated_cmpxchg8b
+    .read      = ptwr_emulated_read,
+    .write     = ptwr_emulated_write,
+    .cmpxchg   = ptwr_emulated_cmpxchg,
+    .cmpxchg8b = ptwr_emulated_cmpxchg8b
 };
 
 /* Write page fault handler: check if guest is trying to modify a PTE. */
@@ -3214,7 +3238,7 @@ int ptwr_do_page_fault(struct vcpu *v, u
     unsigned long     pfn;
     struct page_info *page;
     l1_pgentry_t      pte;
-    struct x86_emulate_ctxt emul_ctxt;
+    struct ptwr_emulate_ctxt ptwr_ctxt;
 
     LOCK_BIGLOCK(d);
 
@@ -3235,10 +3259,11 @@ int ptwr_do_page_fault(struct vcpu *v, u
          (page_get_owner(page) != d) )
         goto bail;
 
-    emul_ctxt.regs = guest_cpu_user_regs();
-    emul_ctxt.cr2  = addr;
-    emul_ctxt.mode = X86EMUL_MODE_HOST;
-    if ( x86_emulate_memop(&emul_ctxt, &ptwr_emulate_ops) )
+    ptwr_ctxt.ctxt.regs = guest_cpu_user_regs();
+    ptwr_ctxt.ctxt.mode = X86EMUL_MODE_HOST;
+    ptwr_ctxt.cr2       = addr;
+    ptwr_ctxt.pte       = pte;
+    if ( x86_emulate_memop(&ptwr_ctxt.ctxt, &ptwr_emulate_ops) )
         goto bail;
 
     UNLOCK_BIGLOCK(d);
diff -r d1b0a5adaeab -r 88935ae47fa9 xen/arch/x86/mm/shadow/common.c
--- a/xen/arch/x86/mm/shadow/common.c   Wed Nov 29 23:40:40 2006 +0000
+++ b/xen/arch/x86/mm/shadow/common.c   Thu Nov 30 10:57:28 2006 +0000
@@ -70,11 +70,14 @@ int _shadow_mode_refcounts(struct domain
  */
 
 static int
-sh_x86_emulate_read_std(unsigned long addr,
-                         unsigned long *val,
-                         unsigned int bytes,
-                         struct x86_emulate_ctxt *ctxt)
-{
+sh_x86_emulate_read(unsigned int seg,
+                    unsigned long offset,
+                    unsigned long *val,
+                    unsigned int bytes,
+                    struct x86_emulate_ctxt *ctxt)
+{
+    unsigned long addr = offset;
+
     *val = 0;
     // XXX -- this is WRONG.
     //        It entirely ignores the permissions in the page tables.
@@ -99,40 +102,15 @@ sh_x86_emulate_read_std(unsigned long ad
 }
 
 static int
-sh_x86_emulate_write_std(unsigned long addr,
-                          unsigned long val,
-                          unsigned int bytes,
-                          struct x86_emulate_ctxt *ctxt)
-{
-#if 0
+sh_x86_emulate_write(unsigned int seg,
+                     unsigned long offset,
+                     unsigned long val,
+                     unsigned int bytes,
+                     struct x86_emulate_ctxt *ctxt)
+{
     struct vcpu *v = current;
-    SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
-                  v->domain->domain_id, v->vcpu_id, addr, val, bytes);
-#endif
-
-    // XXX -- this is WRONG.
-    //        It entirely ignores the permissions in the page tables.
-    //        In this case, that includes user vs supervisor, and
-    //        write access.
-    //
-    if ( hvm_copy_to_guest_virt(addr, &val, bytes) == 0 )
-        return X86EMUL_CONTINUE;
-
-    /* If we got here, there was nothing mapped here, or a bad GFN 
-     * was mapped here.  This should never happen: we're here because
-     * of a write fault at the end of the instruction we're emulating,
-     * which should be handled by sh_x86_emulate_write_emulated. */ 
-    SHADOW_PRINTK("write failed to va %#lx\n", addr);
-    return X86EMUL_PROPAGATE_FAULT;
-}
-
-static int
-sh_x86_emulate_write_emulated(unsigned long addr,
-                               unsigned long val,
-                               unsigned int bytes,
-                               struct x86_emulate_ctxt *ctxt)
-{
-    struct vcpu *v = current;
+    unsigned long addr = offset;
+
 #if 0
     SHADOW_PRINTK("d=%u v=%u a=%#lx v=%#lx bytes=%u\n",
                   v->domain->domain_id, v->vcpu_id, addr, val, bytes);
@@ -141,13 +119,16 @@ sh_x86_emulate_write_emulated(unsigned l
 }
 
 static int 
-sh_x86_emulate_cmpxchg_emulated(unsigned long addr,
-                                 unsigned long old,
-                                 unsigned long new,
-                                 unsigned int bytes,
-                                 struct x86_emulate_ctxt *ctxt)
+sh_x86_emulate_cmpxchg(unsigned int seg,
+                       unsigned long offset,
+                       unsigned long old,
+                       unsigned long new,
+                       unsigned int bytes,
+                       struct x86_emulate_ctxt *ctxt)
 {
     struct vcpu *v = current;
+    unsigned long addr = offset;
+
 #if 0
     SHADOW_PRINTK("d=%u v=%u a=%#lx o?=%#lx n:=%#lx bytes=%u\n",
                    v->domain->domain_id, v->vcpu_id, addr, old, new, bytes);
@@ -157,14 +138,17 @@ sh_x86_emulate_cmpxchg_emulated(unsigned
 }
 
 static int 
-sh_x86_emulate_cmpxchg8b_emulated(unsigned long addr,
-                                   unsigned long old_lo,
-                                   unsigned long old_hi,
-                                   unsigned long new_lo,
-                                   unsigned long new_hi,
-                                   struct x86_emulate_ctxt *ctxt)
+sh_x86_emulate_cmpxchg8b(unsigned int seg,
+                         unsigned long offset,
+                         unsigned long old_lo,
+                         unsigned long old_hi,
+                         unsigned long new_lo,
+                         unsigned long new_hi,
+                         struct x86_emulate_ctxt *ctxt)
 {
     struct vcpu *v = current;
+    unsigned long addr = offset;
+
 #if 0
     SHADOW_PRINTK("d=%u v=%u a=%#lx o?=%#lx:%lx n:=%#lx:%lx\n",
                    v->domain->domain_id, v->vcpu_id, addr, old_hi, old_lo,
@@ -176,12 +160,10 @@ sh_x86_emulate_cmpxchg8b_emulated(unsign
 
 
 struct x86_emulate_ops shadow_emulator_ops = {
-    .read_std           = sh_x86_emulate_read_std,
-    .write_std          = sh_x86_emulate_write_std,
-    .read_emulated      = sh_x86_emulate_read_std,
-    .write_emulated     = sh_x86_emulate_write_emulated,
-    .cmpxchg_emulated   = sh_x86_emulate_cmpxchg_emulated,
-    .cmpxchg8b_emulated = sh_x86_emulate_cmpxchg8b_emulated,
+    .read      = sh_x86_emulate_read,
+    .write     = sh_x86_emulate_write,
+    .cmpxchg   = sh_x86_emulate_cmpxchg,
+    .cmpxchg8b = sh_x86_emulate_cmpxchg8b,
 };
 
 /**************************************************************************/
diff -r d1b0a5adaeab -r 88935ae47fa9 xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c    Wed Nov 29 23:40:40 2006 +0000
+++ b/xen/arch/x86/mm/shadow/multi.c    Thu Nov 30 10:57:28 2006 +0000
@@ -2812,7 +2812,6 @@ static int sh_page_fault(struct vcpu *v,
     if ( is_hvm_domain(d) )
         hvm_store_cpu_guest_regs(v, regs, NULL);
     emul_ctxt.regs = regs;
-    emul_ctxt.cr2  = va;
     emul_ctxt.mode = (is_hvm_domain(d) ?
                       hvm_guest_x86_mode(v) : X86EMUL_MODE_HOST);
 
diff -r d1b0a5adaeab -r 88935ae47fa9 xen/arch/x86/x86_emulate.c
--- a/xen/arch/x86/x86_emulate.c        Wed Nov 29 23:40:40 2006 +0000
+++ b/xen/arch/x86/x86_emulate.c        Thu Nov 30 10:57:28 2006 +0000
@@ -15,9 +15,9 @@
 #include <xen/config.h>
 #include <xen/types.h>
 #include <xen/lib.h>
-#include <xen/mm.h>
 #include <asm/regs.h>
 #define dprintf(_f, _a...) gdprintk(XENLOG_WARNING, _f , ## _a )
+#undef cmpxchg
 #endif
 #include <asm-x86/x86_emulate.h>
 
@@ -38,6 +38,7 @@
 /* Operand sizes: 8-bit operands or specified/overridden size. */
 #define ByteOp      (1<<0) /* 8-bit operands. */
 /* Destination operand type. */
+#define DstBitBase  (0<<1) /* Memory operand, bit string. */
 #define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
 #define DstReg      (2<<1) /* Register operand. */
 #define DstMem      (3<<1) /* Memory operand. */
@@ -111,8 +112,8 @@ static uint8_t opcode_table[256] = {
     /* 0x90 - 0x9F */
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     /* 0xA0 - 0xA7 */
-    ByteOp|DstReg|SrcMem|Mov, DstReg|SrcMem|Mov,
-    ByteOp|DstMem|SrcReg|Mov, DstMem|SrcReg|Mov,
+    ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
+    ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
     ByteOp|ImplicitOps|Mov, ImplicitOps|Mov, 0, 0,
     /* 0xA8 - 0xAF */
     0, 0, ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
@@ -170,14 +171,15 @@ static uint8_t twobyte_table[256] = {
     /* 0x90 - 0x9F */
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     /* 0xA0 - 0xA7 */
-    0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0, 
+    0, 0, 0, DstBitBase|SrcReg|ModRM, 0, 0, 0, 0, 
     /* 0xA8 - 0xAF */
-    0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0,
+    0, 0, 0, DstBitBase|SrcReg|ModRM, 0, 0, 0, 0,
     /* 0xB0 - 0xB7 */
-    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, DstMem|SrcReg|ModRM,
+    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+    0, DstBitBase|SrcReg|ModRM,
     0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov,
     /* 0xB8 - 0xBF */
-    0, 0, DstMem|SrcImmByte|ModRM, DstMem|SrcReg|ModRM,
+    0, 0, DstBitBase|SrcImmByte|ModRM, DstBitBase|SrcReg|ModRM,
     0, 0, ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov,
     /* 0xC0 - 0xCF */
     0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -193,7 +195,12 @@ struct operand {
 struct operand {
     enum { OP_REG, OP_MEM, OP_IMM } type;
     unsigned int  bytes;
-    unsigned long val, orig_val, *ptr;
+    unsigned long val, orig_val;
+    /* OP_REG: Pointer to register field. */
+    unsigned long *reg;
+    /* OP_MEM: Segment and offset. */
+    unsigned int  mem_seg;
+    unsigned long mem_off;
 };
 
 /* EFLAGS bit definitions. */
@@ -366,24 +373,23 @@ do{ __asm__ __volatile__ (              
 #endif /* __i386__ */
 
 /* Fetch next part of the instruction being emulated. */
-#define _insn_fetch(_size)                                              \
-({ unsigned long _x, _ptr = _regs.eip;                                  \
-   if ( mode == X86EMUL_MODE_REAL ) _ptr += _regs.cs << 4;              \
-   rc = ops->read_std(_ptr, &_x, (_size), ctxt);                        \
-   if ( rc != 0 )                                                       \
-       goto done;                                                       \
-   _regs.eip += (_size);                                                \
-   _x;                                                                  \
+#define _insn_fetch(_size)                                      \
+({ unsigned long _x;                                            \
+   rc = ops->read(_regs.cs, _regs.eip, &_x, (_size), ctxt);     \
+   if ( rc != 0 )                                               \
+       goto done;                                               \
+   _regs.eip += (_size);                                        \
+   _x;                                                          \
 })
 #define insn_fetch(_type) ((_type)_insn_fetch(sizeof(_type)))
 
-/* Access/update address held in a register, based on addressing mode. */
-#define register_address(sel, reg)                                      \
-({  unsigned long __reg = (reg);                                        \
-    (((mode == X86EMUL_MODE_REAL) ? ((unsigned long)(sel) << 4) : 0) +  \
-     ((ad_bytes == sizeof(unsigned long)) ? __reg :                     \
-      (__reg & ((1UL << (ad_bytes << 3)) - 1))));                       \
+#define truncate_ea(ea)                                 \
+({  unsigned long __ea = (ea);                          \
+    ((ad_bytes == sizeof(unsigned long)) ? __ea :       \
+     (__ea & ((1UL << (ad_bytes << 3)) - 1)));          \
 })
+
+/* Update address held in a register, based on addressing mode. */
 #define register_address_increment(reg, inc)                            \
 do {                                                                    \
     int _inc = (inc); /* signed type ensures sign extension to long */  \
@@ -393,17 +399,6 @@ do {                                    
         (reg) = ((reg) & ~((1UL << (ad_bytes << 3)) - 1)) |             \
                 (((reg) + _inc) & ((1UL << (ad_bytes << 3)) - 1));      \
 } while (0)
-
-/*
- * We cannot handle a page fault on a data access that straddles two pages
- * and faults on the second page. This is because CR2 is not equal to the
- * memory operand's effective address in this case. Rather than fix up the
- * effective address it is okay for us to fail the emulation.
- */
-#define page_boundary_test() do {                               \
-    if ( ((cr2 & (PAGE_SIZE-1)) == 0) && ((ea & 7) != 0) )      \
-        goto bad_ea;                                            \
-} while ( 0 )
 
 void *
 decode_register(
@@ -452,16 +447,12 @@ dump_instr(
 {
 #ifdef __XEN__
     int i;
-    unsigned long x, pc;
-
-    pc = ctxt->regs->eip;
-    if ( ctxt->mode == X86EMUL_MODE_REAL )
-        pc += ctxt->regs->cs << 4;
+    unsigned long x, eip = ctxt->regs->eip;
 
     dprintf("Instr:");
-    for ( i = 0; i < 16; i++, pc++ )
-    {
-        if ( ops->read_std(pc, &x, 1, ctxt) != 0 )
+    for ( i = 0; i < 16; i++, eip++ )
+    {
+        if ( ops->read(ctxt->regs->cs, eip, &x, 1, ctxt) != 0 )
             printk(" ??");
         else
             printk(" %02x", (uint8_t)x);
@@ -480,19 +471,13 @@ x86_emulate_memop(
 
     uint8_t b, d, sib, sib_index, sib_base, twobyte = 0, rex_prefix = 0;
     uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
-    uint16_t *seg = &_regs.ds; /* override segment */
     unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i;
     int rc = 0;
     struct operand src, dst;
-    unsigned long ea = 0, cr2 = ctxt->cr2;
     int mode = ctxt->mode;
 
-    /*
-     * We do not emulate faults on instruction fetch. We assume that the
-     * guest never executes out of a special memory area.
-     */
-    if ( _regs.error_code & PFEC_insn_fetch )
-        return -1;
+    unsigned int  ea_seg = X86_SEG_DS;
+    unsigned long ea_off = 0;
 
     switch ( mode )
     {
@@ -528,22 +513,22 @@ x86_emulate_memop(
                 ad_bytes ^= 6;  /* switch between 2/4 bytes */
             break;
         case 0x2e: /* CS override */
-            seg = &_regs.cs;
+            ea_seg = X86_SEG_CS;
             break;
         case 0x3e: /* DS override */
-            seg = &_regs.ds;
+            ea_seg = X86_SEG_DS;
             break;
         case 0x26: /* ES override */
-            seg = &_regs.es;
+            ea_seg = X86_SEG_ES;
             break;
         case 0x64: /* FS override */
-            seg = &_regs.fs;
+            ea_seg = X86_SEG_FS;
             break;
         case 0x65: /* GS override */
-            seg = &_regs.gs;
+            ea_seg = X86_SEG_GS;
             break;
         case 0x36: /* SS override */
-            seg = &_regs.ss;
+            ea_seg = X86_SEG_SS;
             break;
         case 0xf0: /* LOCK */
             lock_prefix = 1;
@@ -604,20 +589,20 @@ x86_emulate_memop(
             /* 16-bit ModR/M decode. */
             switch ( modrm_rm )
             {
-            case 0: ea = _regs.ebx + _regs.esi; break;
-            case 1: ea = _regs.ebx + _regs.edi; break;
-            case 2: ea = _regs.ebp + _regs.esi; break;
-            case 3: ea = _regs.ebp + _regs.edi; break;
-            case 4: ea = _regs.esi; break;
-            case 5: ea = _regs.edi; break;
-            case 6: ea = _regs.ebp; break;
-            case 7: ea = _regs.ebx; break;
+            case 0: ea_off = _regs.ebx + _regs.esi; break;
+            case 1: ea_off = _regs.ebx + _regs.edi; break;
+            case 2: ea_off = _regs.ebp + _regs.esi; break;
+            case 3: ea_off = _regs.ebp + _regs.edi; break;
+            case 4: ea_off = _regs.esi; break;
+            case 5: ea_off = _regs.edi; break;
+            case 6: ea_off = _regs.ebp; break;
+            case 7: ea_off = _regs.ebx; break;
             }
             switch ( modrm_mod )
             {
-            case 0: if ( modrm_rm == 6 ) ea = insn_fetch(int16_t); break;
-            case 1: ea += insn_fetch(int8_t);  break;
-            case 2: ea += insn_fetch(int16_t); break;
+            case 0: if ( modrm_rm == 6 ) ea_off = insn_fetch(int16_t); break;
+            case 1: ea_off += insn_fetch(int8_t);  break;
+            case 2: ea_off += insn_fetch(int16_t); break;
             }
         }
         else
@@ -629,83 +614,50 @@ x86_emulate_memop(
                 sib_index = ((sib >> 3) & 7) | ((modrm << 2) & 8);
                 sib_base  = (sib & 7) | ((modrm << 3) & 8);
                 if ( sib_index != 4 )
-                    ea = *(long *)decode_register(sib_index, &_regs, 0);
-                ea <<= (sib >> 6) & 3;
+                    ea_off = *(long *)decode_register(sib_index, &_regs, 0);
+                ea_off <<= (sib >> 6) & 3;
                 if ( (modrm_mod == 0) && ((sib_base & 7) == 5) )
-                    ea += insn_fetch(int32_t);
+                    ea_off += insn_fetch(int32_t);
                 else
-                    ea += *(long *)decode_register(sib_base, &_regs, 0);
+                    ea_off += *(long *)decode_register(sib_base, &_regs, 0);
             }
             else
             {
                 modrm_rm |= (rex_prefix & 1) << 3;
-                ea = *(long *)decode_register(modrm_rm, &_regs, 0);
+                ea_off = *(long *)decode_register(modrm_rm, &_regs, 0);
             }
             switch ( modrm_mod )
             {
             case 0:
                 if ( (modrm_rm & 7) != 5 )
                     break;
-                ea = insn_fetch(int32_t);
+                ea_off = insn_fetch(int32_t);
                 if ( mode != X86EMUL_MODE_PROT64 )
                     break;
                 /* Relative to RIP of next instruction. Argh! */
-                ea += _regs.eip;
+                ea_off += _regs.eip;
                 if ( (d & SrcMask) == SrcImm )
-                    ea += (d & ByteOp) ? 1 : ((op_bytes == 8) ? 4 : op_bytes);
+                    ea_off += (d & ByteOp) ? 1 :
+                        ((op_bytes == 8) ? 4 : op_bytes);
                 else if ( (d & SrcMask) == SrcImmByte )
-                    ea += 1;
+                    ea_off += 1;
                 else if ( ((b == 0xf6) || (b == 0xf7)) &&
                           ((modrm_reg & 7) <= 1) )
                     /* Special case in Grp3: test has immediate operand. */
-                    ea += (d & ByteOp) ? 1
+                    ea_off += (d & ByteOp) ? 1
                         : ((op_bytes == 8) ? 4 : op_bytes);
                 break;
-            case 1: ea += insn_fetch(int8_t);  break;
-            case 2: ea += insn_fetch(int32_t); break;
+            case 1: ea_off += insn_fetch(int8_t);  break;
+            case 2: ea_off += insn_fetch(int32_t); break;
             }
         }
 
-        ea = register_address(*seg, ea);
-        page_boundary_test();
-    }
-
-    /* Decode and fetch the destination operand: register or memory. */
-    switch ( d & DstMask )
-    {
-    case ImplicitOps:
-        /* Special instructions do their own operand decoding. */
+        ea_off = truncate_ea(ea_off);
+    }
+
+    /* Special instructions do their own operand decoding. */
+    if ( (d & DstMask) == ImplicitOps )
         goto special_insn;
-    case DstReg:
-        dst.type = OP_REG;
-        if ( d & ByteOp )
-        {
-            dst.ptr = decode_register(modrm_reg, &_regs, (rex_prefix == 0));
-            dst.val = *(uint8_t *)dst.ptr;
-            dst.bytes = 1;
-        }
-        else
-        {
-            dst.ptr = decode_register(modrm_reg, &_regs, 0);
-            switch ( (dst.bytes = op_bytes) )
-            {
-            case 2: dst.val = *(uint16_t *)dst.ptr; break;
-            case 4: dst.val = *(uint32_t *)dst.ptr; break;
-            case 8: dst.val = *(uint64_t *)dst.ptr; break;
-            }
-        }
-        break;
-    case DstMem:
-        dst.type  = OP_MEM;
-        dst.ptr   = (unsigned long *)cr2;
-        dst.bytes = (d & ByteOp) ? 1 : op_bytes;
-        if ( !(d & Mov) && /* optimisation - avoid slow emulated read */
-             ((rc = ops->read_emulated((unsigned long)dst.ptr,
-                                       &dst.val, dst.bytes, ctxt)) != 0) )
-             goto done;
-        break;
-    }
-    dst.orig_val = dst.val;
 
     /* Decode and fetch the source operand: register, memory or immediate. */
     switch ( d & SrcMask )
@@ -716,18 +668,18 @@ x86_emulate_memop(
         src.type = OP_REG;
         if ( d & ByteOp )
         {
-            src.ptr = decode_register(modrm_reg, &_regs, (rex_prefix == 0));
-            src.val = src.orig_val = *(uint8_t *)src.ptr;
+            src.reg = decode_register(modrm_reg, &_regs, (rex_prefix == 0));
+            src.val = src.orig_val = *(uint8_t *)src.reg;
             src.bytes = 1;
         }
         else
         {
-            src.ptr = decode_register(modrm_reg, &_regs, 0);
+            src.reg = decode_register(modrm_reg, &_regs, 0);
             switch ( (src.bytes = op_bytes) )
             {
-            case 2: src.val = src.orig_val = *(uint16_t *)src.ptr; break;
-            case 4: src.val = src.orig_val = *(uint32_t *)src.ptr; break;
-            case 8: src.val = src.orig_val = *(uint64_t *)src.ptr; break;
+            case 2: src.val = src.orig_val = *(uint16_t *)src.reg; break;
+            case 4: src.val = src.orig_val = *(uint32_t *)src.reg; break;
+            case 8: src.val = src.orig_val = *(uint64_t *)src.reg; break;
             }
         }
         break;
@@ -741,15 +693,15 @@ x86_emulate_memop(
         src.bytes = (d & ByteOp) ? 1 : op_bytes;
     srcmem_common:
         src.type  = OP_MEM;
-        src.ptr   = (unsigned long *)cr2;
-        if ( (rc = ops->read_emulated((unsigned long)src.ptr, 
-                                      &src.val, src.bytes, ctxt)) != 0 )
+        src.mem_seg = ea_seg;
+        src.mem_off = ea_off;
+        if ( (rc = ops->read(src.mem_seg, src.mem_off,
+                             &src.val, src.bytes, ctxt)) != 0 )
             goto done;
         src.orig_val = src.val;
         break;
     case SrcImm:
         src.type  = OP_IMM;
-        src.ptr   = (unsigned long *)_regs.eip;
         src.bytes = (d & ByteOp) ? 1 : op_bytes;
         if ( src.bytes == 8 ) src.bytes = 4;
         /* NB. Immediates are sign-extended as necessary. */
@@ -762,11 +714,80 @@ x86_emulate_memop(
         break;
     case SrcImmByte:
         src.type  = OP_IMM;
-        src.ptr   = (unsigned long *)_regs.eip;
         src.bytes = 1;
         src.val   = insn_fetch(int8_t);
         break;
     }
+
+    /* Decode and fetch the destination operand: register or memory. */
+    switch ( d & DstMask )
+    {
+    case DstReg:
+        dst.type = OP_REG;
+        if ( d & ByteOp )
+        {
+            dst.reg = decode_register(modrm_reg, &_regs, (rex_prefix == 0));
+            dst.val = *(uint8_t *)dst.reg;
+            dst.bytes = 1;
+        }
+        else
+        {
+            dst.reg = decode_register(modrm_reg, &_regs, 0);
+            switch ( (dst.bytes = op_bytes) )
+            {
+            case 2: dst.val = *(uint16_t *)dst.reg; break;
+            case 4: dst.val = *(uint32_t *)dst.reg; break;
+            case 8: dst.val = *(uint64_t *)dst.reg; break;
+            }
+        }
+        break;
+    case DstBitBase:
+        dst.mem_off = ea_off;
+        if ( (d & SrcMask) == SrcImmByte )
+        {
+            src.val &= (op_bytes << 3) - 1;
+        }
+        else
+        {
+            /*
+             * EA       += BitOffset DIV op_bytes*8
+             * BitOffset = BitOffset MOD op_byte*8
+             * DIV truncates towards negative infinity.
+             * MOD always produces a positive result.
+             */
+            if ( op_bytes == 2 )
+                src.val = (int16_t)src.val;
+            else if ( op_bytes == 4 )
+                src.val = (int32_t)src.val;
+            if ( (long)src.val < 0 )
+            {
+                unsigned long byte_offset;
+                byte_offset = op_bytes + (((-src.val-1) >> 3) & ~(op_bytes-1));
+                dst.mem_off -= byte_offset;
+                src.val = (byte_offset << 3) + src.val;
+            }
+            else
+            {
+                dst.mem_off += (src.val >> 3) & ~(op_bytes - 1);
+                src.val &= (op_bytes << 3) - 1;
+            }
+        }
+        /* Becomes a normal DstMem operation from here on. */
+        d = (d & ~DstMask) | DstMem;
+        goto dstmem_common;
+    case DstMem:
+        dst.mem_off = ea_off;
+    dstmem_common:
+        dst.mem_seg = ea_seg;
+        dst.type  = OP_MEM;
+        dst.bytes = (d & ByteOp) ? 1 : op_bytes;
+        if ( !(d & Mov) && /* optimisation - avoid slow emulated read */
+             ((rc = ops->read(dst.mem_seg, dst.mem_off,
+                              &dst.val, dst.bytes, ctxt)) != 0) )
+             goto done;
+        break;
+    }
+    dst.orig_val = dst.val;
 
     if ( twobyte )
         goto twobyte_insn;
@@ -822,45 +843,25 @@ x86_emulate_memop(
         /* Write back the register source. */
         switch ( dst.bytes )
         {
-        case 1: *(uint8_t  *)src.ptr = (uint8_t)dst.val; break;
-        case 2: *(uint16_t *)src.ptr = (uint16_t)dst.val; break;
-        case 4: *src.ptr = (uint32_t)dst.val; break; /* 64b reg: zero-extend */
-        case 8: *src.ptr = dst.val; break;
+        case 1: *(uint8_t  *)src.reg = (uint8_t)dst.val; break;
+        case 2: *(uint16_t *)src.reg = (uint16_t)dst.val; break;
+        case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */
+        case 8: *src.reg = dst.val; break;
         }
         /* Write back the memory destination with implicit LOCK prefix. */
         dst.val = src.val;
         lock_prefix = 1;
         break;
-    case 0xa0 ... 0xa1: /* mov */
-        dst.ptr = (unsigned long *)&_regs.eax;
-        dst.val = src.val;
-        /* Source EA is not encoded via ModRM. */
-        ea = register_address(*seg, _insn_fetch(ad_bytes));
-        page_boundary_test();
-        break;
-    case 0xa2 ... 0xa3: /* mov */
-        dst.val = (unsigned long)_regs.eax;
-        /* Destination EA is not encoded via ModRM. */
-        ea = register_address(*seg, _insn_fetch(ad_bytes));
-        page_boundary_test();
-        break;
     case 0x88 ... 0x8b: /* mov */
     case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
         dst.val = src.val;
         break;
     case 0x8f: /* pop (sole member of Grp1a) */
-        /*
-         * If the faulting access was a read it means that the fault occurred
-         * when accessing the implicit stack operand. We assume the guest never
-         * uses special memory areas as stack space.
-         */
-        if ( !(_regs.error_code & PFEC_write_access) )
-            goto cannot_emulate; /* fault on stack access: bail */
         /* 64-bit mode: POP always pops a 64-bit operand. */
         if ( mode == X86EMUL_MODE_PROT64 )
             dst.bytes = 8;
-        if ( (rc = ops->read_std(register_address(_regs.ss, _regs.esp),
-                                 &dst.val, dst.bytes, ctxt)) != 0 )
+        if ( (rc = ops->read(X86_SEG_SS, truncate_ea(_regs.esp),
+                             &dst.val, dst.bytes, ctxt)) != 0 )
             goto done;
         register_address_increment(_regs.esp, dst.bytes);
         break;
@@ -903,7 +904,6 @@ x86_emulate_memop(
         case 0 ... 1: /* test */
             /* Special case in Grp3: test has an immediate source operand. */
             src.type = OP_IMM;
-            src.ptr  = (unsigned long *)_regs.eip;
             src.bytes = (d & ByteOp) ? 1 : op_bytes;
             if ( src.bytes == 8 ) src.bytes = 4;
             switch ( src.bytes )
@@ -933,24 +933,17 @@ x86_emulate_memop(
             emulate_1op("dec", dst, _regs.eflags);
             break;
         case 6: /* push */
-            /*
-             * If the faulting access was a write it means that the fault
-             * occurred when accessing the implicit stack operand. We assume
-             * the guest never uses special memory areas as stack space.
-             */
-            if ( _regs.error_code & PFEC_write_access )
-                goto cannot_emulate; /* fault on stack access: bail */
             /* 64-bit mode: PUSH always pushes a 64-bit operand. */
             if ( mode == X86EMUL_MODE_PROT64 )
             {
                 dst.bytes = 8;
-                if ( (rc = ops->read_std((unsigned long)dst.ptr,
-                                         &dst.val, 8, ctxt)) != 0 )
+                if ( (rc = ops->read(dst.mem_seg, dst.mem_off,
+                                     &dst.val, 8, ctxt)) != 0 )
                     goto done;
             }
             register_address_increment(_regs.esp, -dst.bytes);
-            if ( (rc = ops->write_std(register_address(_regs.ss, _regs.esp),
-                                      dst.val, dst.bytes, ctxt)) != 0 )
+            if ( (rc = ops->write(X86_SEG_SS, truncate_ea(_regs.esp),
+                                  dst.val, dst.bytes, ctxt)) != 0 )
                 goto done;
             dst.val = dst.orig_val; /* skanky: disable writeback */
             break;
@@ -969,20 +962,20 @@ x86_emulate_memop(
             /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
             switch ( dst.bytes )
             {
-            case 1: *(uint8_t  *)dst.ptr = (uint8_t)dst.val; break;
-            case 2: *(uint16_t *)dst.ptr = (uint16_t)dst.val; break;
-            case 4: *dst.ptr = (uint32_t)dst.val; break; /* 64b: zero-ext */
-            case 8: *dst.ptr = dst.val; break;
+            case 1: *(uint8_t  *)dst.reg = (uint8_t)dst.val; break;
+            case 2: *(uint16_t *)dst.reg = (uint16_t)dst.val; break;
+            case 4: *dst.reg = (uint32_t)dst.val; break; /* 64b: zero-ext */
+            case 8: *dst.reg = dst.val; break;
             }
             break;
         case OP_MEM:
             if ( lock_prefix )
-                rc = ops->cmpxchg_emulated(
-                    (unsigned long)dst.ptr, dst.orig_val,
+                rc = ops->cmpxchg(
+                    dst.mem_seg, dst.mem_off, dst.orig_val,
                     dst.val, dst.bytes, ctxt);
             else
-                rc = ops->write_emulated(
-                    (unsigned long)dst.ptr, dst.val, dst.bytes, ctxt);
+                rc = ops->write(
+                    dst.mem_seg, dst.mem_off, dst.val, dst.bytes, ctxt);
             if ( rc != 0 )
                 goto done;
         default:
@@ -1011,50 +1004,51 @@ x86_emulate_memop(
     }
     switch ( b )
     {
+    case 0xa0 ... 0xa1: /* mov moffs,{%al,%ax,%eax,%rax} */
+        /* Source EA is not encoded via ModRM. */
+        dst.type  = OP_REG;
+        dst.reg   = (unsigned long *)&_regs.eax;
+        dst.bytes = (d & ByteOp) ? 1 : op_bytes;
+        if ( (rc = ops->read(ea_seg, _insn_fetch(ad_bytes),
+                             &dst.val, dst.bytes, ctxt)) != 0 )
+            goto done;
+        break;
+    case 0xa2 ... 0xa3: /* mov {%al,%ax,%eax,%rax},moffs */
+        /* Destination EA is not encoded via ModRM. */
+        dst.type    = OP_MEM;
+        dst.mem_seg = ea_seg;
+        dst.mem_off = _insn_fetch(ad_bytes);
+        dst.bytes   = (d & ByteOp) ? 1 : op_bytes;
+        dst.val     = (unsigned long)_regs.eax;
+        break;
     case 0xa4 ... 0xa5: /* movs */
         dst.type  = OP_MEM;
         dst.bytes = (d & ByteOp) ? 1 : op_bytes;
-        if ( _regs.error_code & PFEC_write_access )
-        {
-            /* Write fault: destination is special memory. */
-            dst.ptr = (unsigned long *)cr2;
-            if ( (rc = ops->read_std(register_address(*seg, _regs.esi),
-                                     &dst.val, dst.bytes, ctxt)) != 0 )
-                goto done;
-            ea = register_address(_regs.es, _regs.edi);
-        }
-        else
-        {
-            /* Read fault: source is special memory. */
-            dst.ptr = (unsigned long *)register_address(_regs.es, _regs.edi);
-            if ( (rc = ops->read_emulated(cr2, &dst.val,
-                                          dst.bytes, ctxt)) != 0 )
-                goto done;
-            ea = register_address(*seg, _regs.esi);
-        }
-        page_boundary_test();
+        dst.mem_seg = X86_SEG_ES;
+        dst.mem_off = truncate_ea(_regs.edi);
+        if ( (rc = ops->read(ea_seg, truncate_ea(_regs.esi),
+                             &dst.val, dst.bytes, ctxt)) != 0 )
+            goto done;
         register_address_increment(
             _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
         register_address_increment(
             _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
         break;
     case 0xaa ... 0xab: /* stos */
-        ea = register_address(_regs.es, _regs.edi);
-        page_boundary_test();
         dst.type  = OP_MEM;
         dst.bytes = (d & ByteOp) ? 1 : op_bytes;
-        dst.ptr   = (unsigned long *)cr2;
+        dst.mem_seg = X86_SEG_ES;
+        dst.mem_off = truncate_ea(_regs.edi);
         dst.val   = _regs.eax;
         register_address_increment(
             _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
         break;
     case 0xac ... 0xad: /* lods */
-        ea = register_address(*seg, _regs.esi);
-        page_boundary_test();
         dst.type  = OP_REG;
         dst.bytes = (d & ByteOp) ? 1 : op_bytes;
-        dst.ptr   = (unsigned long *)&_regs.eax;
-        if ( (rc = ops->read_emulated(cr2, &dst.val, dst.bytes, ctxt)) != 0 )
+        dst.reg   = (unsigned long *)&_regs.eax;
+        if ( (rc = ops->read(ea_seg, truncate_ea(_regs.esi),
+                             &dst.val, dst.bytes, ctxt)) != 0 )
             goto done;
         register_address_increment(
             _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
@@ -1116,19 +1110,16 @@ x86_emulate_memop(
         {
             /* Failure: write the value we saw to EAX. */
             dst.type = OP_REG;
-            dst.ptr  = (unsigned long *)&_regs.eax;
+            dst.reg  = (unsigned long *)&_regs.eax;
         }
         break;
     case 0xa3: bt: /* bt */
-        src.val &= (dst.bytes << 3) - 1; /* only subword offset */
         emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags);
         break;
     case 0xb3: btr: /* btr */
-        src.val &= (dst.bytes << 3) - 1; /* only subword offset */
         emulate_2op_SrcV_nobyte("btr", src, dst, _regs.eflags);
         break;
     case 0xab: bts: /* bts */
-        src.val &= (dst.bytes << 3) - 1; /* only subword offset */
         emulate_2op_SrcV_nobyte("bts", src, dst, _regs.eflags);
         break;
     case 0xb6 ... 0xb7: /* movzx */
@@ -1136,7 +1127,6 @@ x86_emulate_memop(
         dst.val = (d & ByteOp) ? (uint8_t)src.val : (uint16_t)src.val;
         break;
     case 0xbb: btc: /* btc */
-        src.val &= (dst.bytes << 3) - 1; /* only subword offset */
         emulate_2op_SrcV_nobyte("btc", src, dst, _regs.eflags);
         break;
     case 0xba: /* Grp8 */
@@ -1167,8 +1157,8 @@ x86_emulate_memop(
 #if defined(__i386__)
     {
         unsigned long old_lo, old_hi;
-        if ( ((rc = ops->read_emulated(cr2+0, &old_lo, 4, ctxt)) != 0) ||
-             ((rc = ops->read_emulated(cr2+4, &old_hi, 4, ctxt)) != 0) )
+        if ( ((rc = ops->read(ea_seg, ea_off+0, &old_lo, 4, ctxt)) != 0) ||
+             ((rc = ops->read(ea_seg, ea_off+4, &old_hi, 4, ctxt)) != 0) )
             goto done;
         if ( (old_lo != _regs.eax) || (old_hi != _regs.edx) )
         {
@@ -1176,15 +1166,15 @@ x86_emulate_memop(
             _regs.edx = old_hi;
             _regs.eflags &= ~EFLG_ZF;
         }
-        else if ( ops->cmpxchg8b_emulated == NULL )
+        else if ( ops->cmpxchg8b == NULL )
         {
             rc = X86EMUL_UNHANDLEABLE;
             goto done;
         }
         else
         {
-            if ( (rc = ops->cmpxchg8b_emulated(cr2, old_lo, old_hi, _regs.ebx,
-                                               _regs.ecx, ctxt)) != 0 )
+            if ( (rc = ops->cmpxchg8b(ea_seg, ea_off, old_lo, old_hi,
+                                      _regs.ebx, _regs.ecx, ctxt)) != 0 )
                 goto done;
             _regs.eflags |= EFLG_ZF;
         }
@@ -1193,7 +1183,7 @@ x86_emulate_memop(
 #elif defined(__x86_64__)
     {
         unsigned long old, new;
-        if ( (rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0 )
+        if ( (rc = ops->read(ea_seg, ea_off, &old, 8, ctxt)) != 0 )
             goto done;
         if ( ((uint32_t)(old>>0) != (uint32_t)_regs.eax) ||
              ((uint32_t)(old>>32) != (uint32_t)_regs.edx) )
@@ -1205,7 +1195,7 @@ x86_emulate_memop(
         else
         {
             new = (_regs.ecx<<32)|(uint32_t)_regs.ebx;
-            if ( (rc = ops->cmpxchg_emulated(cr2, old, new, 8, ctxt)) != 0 )
+            if ( (rc = ops->cmpxchg(ea_seg, ea_off, old, new, 8, ctxt)) != 0 )
                 goto done;
             _regs.eflags |= EFLG_ZF;
         }
@@ -1219,55 +1209,4 @@ x86_emulate_memop(
     dprintf("Cannot emulate %02x\n", b);
     dump_instr(ctxt, ops);
     return -1;
-
- bad_ea:
-    dprintf("Access faulted on page boundary (cr2=%lx,ea=%lx).\n", cr2, ea);
-    dump_instr(ctxt, ops);
-    show_execution_state(ctxt->regs);
-    return -1;
 }
-
-#ifdef __XEN__
-
-#include <asm/mm.h>
-#include <asm/uaccess.h>
-
-int
-x86_emulate_read_std(
-    unsigned long addr,
-    unsigned long *val,
-    unsigned int bytes,
-    struct x86_emulate_ctxt *ctxt)
-{
-    unsigned int rc;
-
-    *val = 0;
-
-    if ( (rc = copy_from_user((void *)val, (void *)addr, bytes)) != 0 )
-    {
-        propagate_page_fault(addr + bytes - rc, 0); /* read fault */
-        return X86EMUL_PROPAGATE_FAULT;
-    }
-
-    return X86EMUL_CONTINUE;
-}
-
-int
-x86_emulate_write_std(
-    unsigned long addr,
-    unsigned long val,
-    unsigned int bytes,
-    struct x86_emulate_ctxt *ctxt)
-{
-    unsigned int rc;
-
-    if ( (rc = copy_to_user((void *)addr, (void *)&val, bytes)) != 0 )
-    {
-        propagate_page_fault(addr + bytes - rc, PFEC_write_access);
-        return X86EMUL_PROPAGATE_FAULT;
-    }
-
-    return X86EMUL_CONTINUE;
-}
-
-#endif
diff -r d1b0a5adaeab -r 88935ae47fa9 xen/include/asm-x86/x86_emulate.h
--- a/xen/include/asm-x86/x86_emulate.h Wed Nov 29 23:40:40 2006 +0000
+++ b/xen/include/asm-x86/x86_emulate.h Thu Nov 30 10:57:28 2006 +0000
@@ -11,35 +11,27 @@
 
 struct x86_emulate_ctxt;
 
+#define X86_SEG_CS 0
+#define X86_SEG_SS 1
+#define X86_SEG_DS 2
+#define X86_SEG_ES 3
+#define X86_SEG_FS 4
+#define X86_SEG_GS 5
+
 /*
  * x86_emulate_ops:
  * 
  * These operations represent the instruction emulator's interface to memory.
- * There are two categories of operation: those that act on ordinary memory
- * regions (*_std), and those that act on memory regions known to require
- * special treatment or emulation (*_emulated).
- * 
- * The emulator assumes that an instruction accesses only one 'emulated memory'
- * location, that this location is the given linear faulting address (cr2), and
- * that this is one of the instruction's data operands. Instruction fetches and
- * stack operations are assumed never to access emulated memory. The emulator
- * automatically deduces which operand of a string-move operation is accessing
- * emulated memory, and assumes that the other operand accesses normal memory.
  * 
  * NOTES:
- *  1. The emulator isn't very smart about emulated vs. standard memory.
- *     'Emulated memory' access addresses should be checked for sanity.
- *     'Normal memory' accesses may fault, and the caller must arrange to
- *     detect and handle reentrancy into the emulator via recursive faults.
- *     Accesses may be unaligned and may cross page boundaries.
- *  2. If the access fails (cannot emulate, or a standard access faults) then
+ *  1. If the access fails (cannot emulate, or a standard access faults) then
  *     it is up to the memop to propagate the fault to the guest VM via
  *     some out-of-band mechanism, unknown to the emulator. The memop signals
  *     failure by returning X86EMUL_PROPAGATE_FAULT to the emulator, which will
  *     then immediately bail.
- *  3. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only
+ *  2. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only
  *     cmpxchg8b_emulated need support 8-byte accesses.
- *  4. The emulator cannot handle 64-bit mode emulation on an x86/32 system.
+ *  3. The emulator cannot handle 64-bit mode emulation on an x86/32 system.
  */
 /* Access completed successfully: continue emulation as normal. */
 #define X86EMUL_CONTINUE        0
@@ -52,74 +44,51 @@ struct x86_emulate_ops
 struct x86_emulate_ops
 {
     /*
-     * read_std: Read bytes of standard (non-emulated/special) memory.
-     *           Used for instruction fetch, stack operations, and others.
-     *  @addr:  [IN ] Linear address from which to read.
-     *  @val:   [OUT] Value read from memory, zero-extended to 'u_long'.
+     * All functions:
+     *  @seg:   [IN ] Segment being dereferenced (specified as X86_SEG_??).
+     *  @offset [IN ] Offset within segment.
+     */
+
+    /*
+     * read: Emulate a memory read.
+     *  @val:   [OUT] Value read from memory, zero-extended to 'ulong'.
      *  @bytes: [IN ] Number of bytes to read from memory.
      */
-    int (*read_std)(
-        unsigned long addr,
+    int (*read)(
+        unsigned int seg,
+        unsigned long offset,
         unsigned long *val,
         unsigned int bytes,
         struct x86_emulate_ctxt *ctxt);
 
     /*
-     * write_std: Write bytes of standard (non-emulated/special) memory.
-     *            Used for stack operations, and others.
-     *  @addr:  [IN ] Linear address to which to write.
+     * write: Emulate a memory write.
      *  @val:   [IN ] Value to write to memory (low-order bytes used as req'd).
      *  @bytes: [IN ] Number of bytes to write to memory.
      */
-    int (*write_std)(
-        unsigned long addr,
+    int (*write)(
+        unsigned int seg,
+        unsigned long offset,
         unsigned long val,
         unsigned int bytes,
         struct x86_emulate_ctxt *ctxt);
 
     /*
-     * read_emulated: Read bytes from emulated/special memory area.
-     *  @addr:  [IN ] Linear address from which to read.
-     *  @val:   [OUT] Value read from memory, zero-extended to 'u_long'.
-     *  @bytes: [IN ] Number of bytes to read from memory.
-     */
-    int (*read_emulated)(
-        unsigned long addr,
-        unsigned long *val,
-        unsigned int bytes,
-        struct x86_emulate_ctxt *ctxt);
-
-    /*
-     * write_emulated: Read bytes from emulated/special memory area.
-     *  @addr:  [IN ] Linear address to which to write.
-     *  @val:   [IN ] Value to write to memory (low-order bytes used as req'd).
-     *  @bytes: [IN ] Number of bytes to write to memory.
-     */
-    int (*write_emulated)(
-        unsigned long addr,
-        unsigned long val,
-        unsigned int bytes,
-        struct x86_emulate_ctxt *ctxt);
-
-    /*
-     * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
-     *                   emulated/special memory area.
-     *  @addr:  [IN ] Linear address to access.
+     * cmpxchg: Emulate an atomic (LOCKed) CMPXCHG operation.
      *  @old:   [IN ] Value expected to be current at @addr.
      *  @new:   [IN ] Value to write to @addr.
      *  @bytes: [IN ] Number of bytes to access using CMPXCHG.
      */
-    int (*cmpxchg_emulated)(
-        unsigned long addr,
+    int (*cmpxchg)(
+        unsigned int seg,
+        unsigned long offset,
         unsigned long old,
         unsigned long new,
         unsigned int bytes,
         struct x86_emulate_ctxt *ctxt);
 
     /*
-     * cmpxchg8b_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
-     *                     emulated/special memory area.
-     *  @addr:  [IN ] Linear address to access.
+     * cmpxchg8b: Emulate an atomic (LOCKed) CMPXCHG8B operation.
      *  @old:   [IN ] Value expected to be current at @addr.
      *  @new:   [IN ] Value to write to @addr.
      * NOTES:
@@ -128,8 +97,9 @@ struct x86_emulate_ops
      *  2. Not defining this function (i.e., specifying NULL) is equivalent
      *     to defining a function that always returns X86EMUL_UNHANDLEABLE.
      */
-    int (*cmpxchg8b_emulated)(
-        unsigned long addr,
+    int (*cmpxchg8b)(
+        unsigned int seg,
+        unsigned long offset,
         unsigned long old_lo,
         unsigned long old_hi,
         unsigned long new_lo,
@@ -137,29 +107,12 @@ struct x86_emulate_ops
         struct x86_emulate_ctxt *ctxt);
 };
 
-/* Standard reader/writer functions that callers may wish to use. */
-extern int
-x86_emulate_read_std(
-    unsigned long addr,
-    unsigned long *val,
-    unsigned int bytes,
-    struct x86_emulate_ctxt *ctxt);
-extern int
-x86_emulate_write_std(
-    unsigned long addr,
-    unsigned long val,
-    unsigned int bytes,
-    struct x86_emulate_ctxt *ctxt);
-
 struct cpu_user_regs;
 
 struct x86_emulate_ctxt
 {
     /* Register state before/after emulation. */
     struct cpu_user_regs   *regs;
-
-    /* Linear faulting address (if emulating a page-faulting instruction). */
-    unsigned long           cr2;
 
     /* Emulated execution mode, represented by an X86EMUL_MODE value. */
     int                     mode;

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.