[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH] Augment vmxassist emulation support and more accurate realmode GDT addressing



This patch allows Solaris 10 (and all S10 updates) to boot and run on Intel processors.

--

-----------------------------------------------------
Russ Blaine | Solaris Kernel | russell.blaine@xxxxxxx

# HG changeset patch
# User russell.blaine@xxxxxxx
# Date 1187906490 25200
# Node ID a77889d180ea0c8e0dd1297e394a755968de45cb
# Parent  149ca6d04923ea8b2b7b9bbe9c9f983974e40df9
Augment vmxassist emulation support and more accurate realmode GDT addressing

vmxassist does not support all instructions needed for the transition to and from real mode when running non-windows/non-linux HVM guests. Add support needed to allow Solaris multiboot (Solaris 10 and S10 updates) to boot. Also, modify how the emulator finds the GDT while in real mode. The emulator currently reads segment descriptors out of the GDT in guest memory every time it calculates an address. To more accurately reflect what actual hardware does, the emulator should use the hidden portion of the segment registers to find the address of the GDT. In addition, the current code assumes that the GDT resides on a single physical page of memory. To accomodate larger GDTs, the emulator should calculate the virtual address of the needed entry in the GDT before converting it to a physical address for reading.The vmxassist tool used to help HVM guests transition to and from real mode has some defficiencies and inaccuracies which prevent certain

Signed-off-by: Russell Blaine <russell.blaine@xxxxxxx>

diff --git a/tools/firmware/vmxassist/vm86.c b/tools/firmware/vmxassist/vm86.c
--- a/tools/firmware/vmxassist/vm86.c
+++ b/tools/firmware/vmxassist/vm86.c
@@ -50,10 +50,14 @@ char *states[] = {
 };

 static char *rnames[] = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di" };
+static char *srnames[] = { "es", "cs", "ss", "ds", "fs", "gs", "invl", "invl" 
};
 #endif /* DEBUG */

 #define PDE_PS                         (1 << 7)
 #define PT_ENTRY_PRESENT       0x1
+
+static void load_or_clear_seg(unsigned long, uint32_t *, uint32_t *,
+       union vmcs_arbytes *);

 /* We only support access to <=4G physical memory due to 1:1 mapping */
 static uint64_t
@@ -93,7 +97,8 @@ guest_linear_to_phys(uint32_t base)

                if (l2_mfn & 0xf00000000ULL) {
                        printf("l2 page above 4G\n");
-                       cpuid_addr_value(l2_mfn + 8 * ((base >> 21) & 0x1ff), 
&l1_mfn);
+                       cpuid_addr_value(l2_mfn + 8 * ((base >> 21) & 0x1ff),
+                           &l1_mfn);
                } else
                        l1_mfn = ((uint64_t *)(long)l2_mfn)[(base >> 21) & 
0x1ff];
                if (!(l1_mfn & PT_ENTRY_PRESENT))
@@ -108,7 +113,8 @@ guest_linear_to_phys(uint32_t base)

                if (l1_mfn & 0xf00000000ULL) {
                        printf("l1 page above 4G\n");
-                       cpuid_addr_value(l1_mfn + 8 * ((base >> 12) & 0x1ff), 
&l0_mfn);
+                       cpuid_addr_value(l1_mfn + 8 * ((base >> 12) & 0x1ff),
+                           &l0_mfn);
                } else
                        l0_mfn = ((uint64_t *)(long)l1_mfn)[(base >> 12) & 
0x1ff];
                if (!(l0_mfn & PT_ENTRY_PRESENT))
@@ -123,7 +129,8 @@ static unsigned
 static unsigned
 address(struct regs *regs, unsigned seg, unsigned off)
 {
-       uint64_t gdt_phys_base;
+       uint64_t gdt_entry_pa;
+       unsigned gdt_entry_va;
        unsigned long long entry;
        unsigned seg_base, seg_limit;
        unsigned entry_low, entry_high;
@@ -139,12 +146,32 @@ address(struct regs *regs, unsigned seg,
                (mode == VM86_REAL_TO_PROTECTED && regs->cs == seg))
                return ((seg & 0xFFFF) << 4) + off;

-       gdt_phys_base = guest_linear_to_phys(oldctx.gdtr_base);
-       if (gdt_phys_base != (uint32_t)gdt_phys_base) {
+       if (mode == VM86_PROTECTED_TO_REAL && !(oldctx.cr0 & (CR0_PG|CR0_PE))) {
+               if (seg == regs->cs)
+                       return oldctx.cs_base + off;
+               if (seg == regs->ves)
+                       return oldctx.es_base + off;
+               if (seg == regs->vds)
+                       return oldctx.ds_base + off;
+               if (seg == regs->uss)
+                       return oldctx.ss_base + off;
+               if (seg == regs->vfs)
+                       return oldctx.fs_base + off;
+               if (seg == regs->vgs)
+                       return oldctx.gs_base + off;
+
+               dump_regs(regs);
+               panic("address(): unknown segment selector 0x%x\n", seg);
+       }
+
+       gdt_entry_va = (unsigned)((unsigned long long *)oldctx.gdtr_base +
+           (seg >> 3));
+       gdt_entry_pa = guest_linear_to_phys(gdt_entry_va);
+       if (gdt_entry_pa != (uint32_t)gdt_entry_pa) {
                printf("gdt base address above 4G\n");
-               cpuid_addr_value(gdt_phys_base + 8 * (seg >> 3), &entry);
+               cpuid_addr_value(gdt_entry_pa, &entry);
        } else
-               entry = ((unsigned long long *)(long)gdt_phys_base)[seg >> 3];
+       entry = *(unsigned long long *)(long)gdt_entry_pa;

        entry_high = entry >> 32;
        entry_low = entry & 0xFFFFFFFF;
@@ -688,7 +715,8 @@ movcr(struct regs *regs, unsigned prefix
                }
                break;
        case 0x22: /* mov Cd, Rd */
-               TRACE((regs, regs->eip - eip, "movl %%eax, %%cr%d", cr));
+               TRACE((regs, regs->eip - eip, "movl %%eax, %%cr%d [[0x%x]]",
+                         cr, getreg32(regs, modrm)));
                switch (cr) {
                case 0:
                        oldctx.cr0 = getreg32(regs, modrm) | (CR0_PE | CR0_NE);
@@ -815,16 +843,83 @@ pop(struct regs *regs, unsigned prefix,
                        write16(addr, pop16(regs));
                TRACE((regs, regs->eip - eip, "pop *0x%x", addr));
                break;
-
-       /* other pop opcodes ... */
+       default: /* other pop opcodes ... */
+               return (0);
        }

        return 1;
+}
+
+static void
+push(struct regs *regs, unsigned prefix, unsigned opc)
+{
+        unsigned eip = regs->eip - 1;
+        unsigned data;
+
+        if (prefix & DATA32) {
+                data = getreg32(regs, opc & 0xF);
+                push32(regs, data);
+        } else {
+                data = getreg16(regs, opc & 0xF);
+                push16(regs, data);
+        }
+
+        TRACE((regs, regs->eip - eip, "push%s %s%s (val 0x%x)",
+                  (prefix & DATA32) ? "l" : "",
+                  (prefix & DATA32) ? "e" : "", rnames[opc & 0xF], data));
+}
+
+static int
+mov_from_seg(struct regs *regs, unsigned prefix, unsigned opc)
+{
+        unsigned eip = regs->eip - 1;
+        unsigned modrm = fetch8(regs);
+        unsigned r = modrm & 3; /* dest reg is in r/m field */
+        unsigned data;
+
+        if ((modrm & 0xC0) != 0xC0) /* reg destinations only. memory unimpl */
+                return 0;
+
+        switch ((modrm & 0x38) >> 3) { /* source reg is in reg field */
+        case 0: /* es */
+                data = regs->ves;
+                break;
+
+        case 1: /* cs */
+                data = regs->cs;
+                break;
+
+        case 2: /* ss */
+                data = regs->uss;
+                break;
+
+        case 3: /* ds */
+                data = regs->vds;
+                break;
+
+        case 4: /* fs */
+                data = regs->vfs;
+                break;
+
+        case 5: /* gs */
+                data = regs->vgs;
+                break;
+
+        default:
+                return 0;
+        }
+
+        TRACE((regs, regs->eip - eip, "mov %%%s %%%s (val 0x%x)\n",
+                  srnames[(modrm & 0x38) >> 3], rnames[r], data));
+
+        setreg16(regs, r, data);
+        return 1;
 }

 static int
 mov_to_seg(struct regs *regs, unsigned prefix, unsigned opc)
 {
+       unsigned eip = regs->eip - 1;
        unsigned modrm = fetch8(regs);

        /*
@@ -836,54 +931,130 @@ mov_to_seg(struct regs *regs, unsigned p
            mode != VM86_PROTECTED_TO_REAL)
                return 0;

-       /* Register source only. */
-       if ((modrm & 0xC0) != 0xC0)
-               goto fail;
-
-       switch ((modrm & 0x38) >> 3) {
-       case 0: /* es */
-               regs->ves = getreg16(regs, modrm);
-               if (mode == VM86_PROTECTED_TO_REAL)
-                       return 1;
-               saved_rm_regs.ves = 0;
-               oldctx.es_sel = regs->ves;
-               return 1;
-
-       /* case 1: cs */
-
-       case 2: /* ss */
-               regs->uss = getreg16(regs, modrm);
-               if (mode == VM86_PROTECTED_TO_REAL)
-                       return 1;
-               saved_rm_regs.uss = 0;
-               oldctx.ss_sel = regs->uss;
-               return 1;
-       case 3: /* ds */
-               regs->vds = getreg16(regs, modrm);
-               if (mode == VM86_PROTECTED_TO_REAL)
-                       return 1;
-               saved_rm_regs.vds = 0;
-               oldctx.ds_sel = regs->vds;
-               return 1;
-       case 4: /* fs */
-               regs->vfs = getreg16(regs, modrm);
-               if (mode == VM86_PROTECTED_TO_REAL)
-                       return 1;
-               saved_rm_regs.vfs = 0;
-               oldctx.fs_sel = regs->vfs;
-               return 1;
-       case 5: /* gs */
-               regs->vgs = getreg16(regs, modrm);
-               if (mode == VM86_PROTECTED_TO_REAL)
-                       return 1;
-               saved_rm_regs.vgs = 0;
-               oldctx.gs_sel = regs->vgs;
-               return 1;
-       }
-
- fail:
-       printf("%s:%d: missed opcode %02x %02x\n",
-                  __FUNCTION__, __LINE__, opc, modrm);
+        if ((modrm & 0xC0) == 0xC0) /* register source */
+        {
+                TRACE((regs, regs->eip - eip, "mov %s %s",
+                          rnames[modrm & 0x7],
+                          srnames[(modrm & 0x38) >> 3]));
+
+                switch ((modrm & 0x38) >> 3) {
+                case 0: /* es */
+                        regs->ves = getreg16(regs, modrm);
+                       if (mode == VM86_PROTECTED_TO_REAL)
+                               return 1;
+                        saved_rm_regs.ves = 0;
+                        oldctx.es_sel = regs->ves;
+                        load_or_clear_seg(oldctx.es_sel, &oldctx.es_base,
+                            &oldctx.es_limit, &oldctx.es_arbytes);
+                        return 1;
+
+                /* case 1: cs */
+
+                case 2: /* ss */
+                        regs->uss = getreg16(regs, modrm);
+                       if (mode == VM86_PROTECTED_TO_REAL)
+                               return 1;
+                        saved_rm_regs.uss = 0;
+                        oldctx.ss_sel = regs->uss;
+                        load_or_clear_seg(oldctx.ss_sel, &oldctx.ss_base,
+                            &oldctx.ss_limit, &oldctx.ss_arbytes);
+                        return 1;
+                case 3: /* ds */
+                        regs->vds = getreg16(regs, modrm);
+                       if (mode == VM86_PROTECTED_TO_REAL)
+                               return 1;
+                        saved_rm_regs.vds = 0;
+                        oldctx.ds_sel = regs->vds;
+                        load_or_clear_seg(oldctx.ds_sel, &oldctx.ds_base,
+                            &oldctx.ds_limit, &oldctx.ds_arbytes);
+                        return 1;
+                case 4: /* fs */
+                        regs->vfs = getreg16(regs, modrm);
+                       if (mode == VM86_PROTECTED_TO_REAL)
+                               return 1;
+                        saved_rm_regs.vfs = 0;
+                        oldctx.fs_sel = regs->vfs;
+                        load_or_clear_seg(oldctx.fs_sel, &oldctx.fs_base,
+                            &oldctx.fs_limit, &oldctx.fs_arbytes);
+                        return 1;
+                case 5: /* gs */
+                        regs->vgs = getreg16(regs, modrm);
+                       if (mode == VM86_PROTECTED_TO_REAL)
+                               return 1;
+                        saved_rm_regs.vgs = 0;
+                        oldctx.gs_sel = regs->vgs;
+                        load_or_clear_seg(oldctx.gs_sel, &oldctx.gs_base,
+                            &oldctx.gs_limit, &oldctx.gs_arbytes);
+                        return 1;
+                default:
+                        break;
+                }
+       } else if ((modrm & 0xC0) == 0) /* memory source */
+       {
+                unsigned addr = operand(prefix, regs, modrm);
+                unsigned data = read16(addr);
+
+                TRACE((regs, regs->eip - eip, "mov [0x%x] %s", addr,
+                          srnames[(modrm & 0x38) >> 3]));
+
+                switch ((modrm & 0x38) >> 3) {
+                case 0: /* es */
+                        regs->ves = data;
+                       if (mode == VM86_PROTECTED_TO_REAL)
+                               return 1;
+                        saved_rm_regs.ves = 0;
+                        oldctx.es_sel = regs->ves;
+                        load_or_clear_seg(oldctx.es_sel, &oldctx.es_base,
+                            &oldctx.es_limit, &oldctx.es_arbytes);
+                        return 1;
+
+                case 1: /* cs */
+                        break;
+
+                case 2: /* ss */
+                        regs->uss = data;
+                       if (mode == VM86_PROTECTED_TO_REAL)
+                               return 1;
+                        saved_rm_regs.uss = 0;
+                        oldctx.ss_sel = regs->uss;
+                        load_or_clear_seg(oldctx.ss_sel, &oldctx.ss_base,
+                            &oldctx.ss_limit, &oldctx.ss_arbytes);
+                        return 1;
+                case 3: /* ds */
+                        regs->vds = data;
+                       if (mode == VM86_PROTECTED_TO_REAL)
+                               return 1;
+                        saved_rm_regs.vds = 0;
+                        oldctx.ds_sel = regs->vds;
+                        load_or_clear_seg(oldctx.ds_sel, &oldctx.ds_base,
+                            &oldctx.ds_limit, &oldctx.ds_arbytes);
+                        return 1;
+                case 4: /* fs */
+                        regs->vfs = data;
+                       if (mode == VM86_PROTECTED_TO_REAL)
+                               return 1;
+                        saved_rm_regs.vfs = 0;
+                        oldctx.fs_sel = regs->vfs;
+                        load_or_clear_seg(oldctx.fs_sel, &oldctx.fs_base,
+                            &oldctx.fs_limit, &oldctx.fs_arbytes);
+                        return 1;
+                case 5: /* gs */
+                        regs->vgs = data;
+                       if (mode == VM86_PROTECTED_TO_REAL)
+                               return 1;
+                        saved_rm_regs.vgs = 0;
+                        oldctx.gs_sel = regs->vgs;
+                        load_or_clear_seg(oldctx.gs_sel, &oldctx.gs_base,
+                            &oldctx.gs_limit, &oldctx.gs_arbytes);
+                        return 1;
+                default:
+                        break;
+                }
+
+        }
+
+        TRACE((regs, regs->eip - eip, "%s: missed opcode %02x modrm %02x\n",
+                  __FUNCTION__, opc, modrm));
        return 0;
 }

@@ -891,9 +1062,11 @@ mov_to_seg(struct regs *regs, unsigned p
  * Emulate a segment load in protected mode
  */
 static int
-load_seg(unsigned long sel, uint32_t *base, uint32_t *limit, union vmcs_arbytes *arbytes)
-{
-       uint64_t gdt_phys_base;
+load_seg(unsigned long sel, uint32_t *base, uint32_t *limit,
+    union vmcs_arbytes *arbytes)
+{
+       unsigned gdt_entry_va;
+       uint64_t gdt_entry_pa;
        unsigned long long entry;

        /* protected mode: use seg as index into gdt */
@@ -905,12 +1078,14 @@ load_seg(unsigned long sel, uint32_t *ba
                return 1;
        }

-       gdt_phys_base = guest_linear_to_phys(oldctx.gdtr_base);
-       if (gdt_phys_base != (uint32_t)gdt_phys_base) {
+       gdt_entry_va = (unsigned)((unsigned long long *)oldctx.gdtr_base +
+           (sel >> 3));
+       gdt_entry_pa = guest_linear_to_phys(gdt_entry_va);
+       if (gdt_entry_pa != (uint32_t)gdt_entry_pa) {
                printf("gdt base address above 4G\n");
-               cpuid_addr_value(gdt_phys_base + 8 * (sel >> 3), &entry);
+               cpuid_addr_value(gdt_entry_pa, &entry);
        } else
-               entry = ((unsigned long long *)(long)gdt_phys_base)[sel >> 3];
+       entry = *(unsigned long long *)(long)gdt_entry_pa;

        /* Check the P bit first */
        if (!((entry >> (15+32)) & 0x1) && sel != 0)
@@ -945,7 +1120,8 @@ load_seg(unsigned long sel, uint32_t *ba
  * the descriptor was invalid.
  */
 static void
-load_or_clear_seg(unsigned long sel, uint32_t *base, uint32_t *limit, union vmcs_arbytes *arbytes)
+load_or_clear_seg(unsigned long sel, uint32_t *base, uint32_t *limit,
+    union vmcs_arbytes *arbytes)
 {
        if (!load_seg(sel, base, limit, arbytes))
                load_seg(0, base, limit, arbytes);
@@ -972,8 +1148,11 @@ protected_mode(struct regs *regs)

        /* reload all segment registers */
        if (!load_seg(regs->cs, &oldctx.cs_base,
-                               &oldctx.cs_limit, &oldctx.cs_arbytes))
+               &oldctx.cs_limit, &oldctx.cs_arbytes)) {
+               dump_regs(regs);
                panic("Invalid %%cs=0x%x for protected mode\n", regs->cs);
+       }
+
        oldctx.cs_sel = regs->cs;

        load_or_clear_seg(oldctx.es_sel, &oldctx.es_base,
@@ -1109,7 +1288,7 @@ jmpl(struct regs *regs, int prefix)
        regs->cs = cs;
        regs->eip = eip;

-       if (mode == VM86_REAL_TO_PROTECTED)             /* jump to protected 
mode */
+       if (mode == VM86_REAL_TO_PROTECTED)     /* jump to protected mode */
                set_mode(regs, VM86_PROTECTED);
        else if (mode == VM86_PROTECTED_TO_REAL)        /* jump to real mode */
                set_mode(regs, VM86_REAL);
@@ -1135,12 +1314,12 @@ jmpl_indirect(struct regs *regs, int pre
        regs->cs = cs;
        regs->eip = eip;

-       if (mode == VM86_REAL_TO_PROTECTED)             /* jump to protected 
mode */
+       if (mode == VM86_REAL_TO_PROTECTED)     /* jump to protected mode */
                set_mode(regs, VM86_PROTECTED);
        else if (mode == VM86_PROTECTED_TO_REAL)        /* jump to real mode */
                set_mode(regs, VM86_REAL);
        else
-               panic("jmpl");
+               panic("jmpl_indirect");
 }

 static void
@@ -1409,6 +1588,10 @@ opcode(struct regs *regs)
                        prefix |= SEG_DS;
                        continue;

+               case 0x50 ... 0x57:
+                       push(regs, prefix, opc);
+                       continue;
+
                case 0x64:
                        TRACE((regs, regs->eip - eip, "%%fs:"));
                        prefix |= SEG_FS;
@@ -1457,7 +1640,12 @@ opcode(struct regs *regs)
                                goto invalid;
                        return OPC_EMULATED;

-               case 0x8E: /* mov r16, sreg */
+               case 0x8C: /* mov sreg, r/m16 */
+                       if (!mov_from_seg(regs, prefix, opc))
+                               goto invalid;
+                       return OPC_EMULATED;
+
+               case 0x8E: /* mov r/m16, sreg */
                        if (!mov_to_seg(regs, prefix, opc))
                                goto invalid;
                        return OPC_EMULATED;
@@ -1535,7 +1723,7 @@ opcode(struct regs *regs)
                        if (mode == VM86_REAL_TO_PROTECTED ||
                                mode == VM86_PROTECTED_TO_REAL) {
                                retl(regs, prefix);
-                               return OPC_INVALID;
+                               return OPC_INVALID; /* try to exit emulator */
                        }
                        goto invalid;

@@ -1573,7 +1761,7 @@ opcode(struct regs *regs)
                        if (mode == VM86_REAL_TO_PROTECTED ||
                                mode == VM86_PROTECTED_TO_REAL) {
                                jmpl(regs, prefix);
-                               return OPC_INVALID;
+                               return OPC_INVALID; /* try to exit emulator */
                        }
                        goto invalid;

@@ -1707,8 +1895,10 @@ trap(int trapno, int errno, struct regs

        default:
        invalid:
-               printf("Trap (0x%x) while in %s mode\n",
-                       trapno, regs->eflags & EFLAGS_VM ? "real" : 
"protected");
+               printf("Trap (0x%x) while in %s mode (emulator in mode %s\n",
+                       trapno, regs->eflags & EFLAGS_VM ? "real" : "protected",
+                   states[mode]);
+
                if (trapno == 14)
                        printf("Page fault address 0x%x\n", get_cr2());
                dump_regs(regs);


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.