[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] RE: [Xen-devel] [PATCH] Augment vmxassist emulation support and more accurate realmode GDT addressing
> Subject: [Xen-devel] [PATCH] Augment vmxassist emulation support and > more accurate realmode GDT addressing > > This patch allows Solaris 10 (and all S10 updates) to boot and run on > Intel processors. How much testing have you given this patch with other OSes? Vmxassist is known to be quite fragile, and I'd feel much happier if you'd tested with a good selection of Windows, Linux and *BSD versions. In particular, the various syslinux modules Ubuntu uses have been a particular problem. Thanks, Ian > -- > > ----------------------------------------------------- > Russ Blaine | Solaris Kernel | russell.blaine@xxxxxxx > > # HG changeset patch > # User russell.blaine@xxxxxxx > # Date 1187906490 25200 > # Node ID a77889d180ea0c8e0dd1297e394a755968de45cb > # Parent 149ca6d04923ea8b2b7b9bbe9c9f983974e40df9 > Augment vmxassist emulation support and more accurate realmode GDT > addressing > > vmxassist does not support all instructions needed for the transition > to and > from real mode when running non-windows/non-linux HVM guests. Add > support needed > to allow Solaris multiboot (Solaris 10 and S10 updates) to boot. Also, > modify > how the emulator finds the GDT while in real mode. The emulator > currently reads > segment descriptors out of the GDT in guest memory every time it > calculates an > address. To more accurately reflect what actual hardware does, the > emulator > should use the hidden portion of the segment registers to find the > address of > the GDT. In addition, the current code assumes that the GDT resides on > a single > physical page of memory. To accomodate larger GDTs, the emulator should > calculate the virtual address of the needed entry in the GDT before > converting > it to a physical address for reading.The vmxassist tool used to help > HVM guests > transition to and from real mode has some defficiencies and > inaccuracies which > prevent certain > > Signed-off-by: Russell Blaine <russell.blaine@xxxxxxx> > > diff --git a/tools/firmware/vmxassist/vm86.c > b/tools/firmware/vmxassist/vm86.c > --- a/tools/firmware/vmxassist/vm86.c > +++ b/tools/firmware/vmxassist/vm86.c > @@ -50,10 +50,14 @@ char *states[] = { > }; > > static char *rnames[] = { "ax", "cx", "dx", "bx", "sp", "bp", "si", > "di" }; > +static char *srnames[] = { "es", "cs", "ss", "ds", "fs", "gs", "invl", > "invl" }; > #endif /* DEBUG */ > > #define PDE_PS (1 << 7) > #define PT_ENTRY_PRESENT 0x1 > + > +static void load_or_clear_seg(unsigned long, uint32_t *, uint32_t *, > + union vmcs_arbytes *); > > /* We only support access to <=4G physical memory due to 1:1 mapping > */ > static uint64_t > @@ -93,7 +97,8 @@ guest_linear_to_phys(uint32_t base) > > if (l2_mfn & 0xf00000000ULL) { > printf("l2 page above 4G\n"); > - cpuid_addr_value(l2_mfn + 8 * ((base >> 21) & 0x1ff), > &l1_mfn); > + cpuid_addr_value(l2_mfn + 8 * ((base >> 21) & 0x1ff), > + &l1_mfn); > } else > l1_mfn = ((uint64_t *)(long)l2_mfn)[(base >> 21) & > 0x1ff]; > if (!(l1_mfn & PT_ENTRY_PRESENT)) > @@ -108,7 +113,8 @@ guest_linear_to_phys(uint32_t base) > > if (l1_mfn & 0xf00000000ULL) { > printf("l1 page above 4G\n"); > - cpuid_addr_value(l1_mfn + 8 * ((base >> 12) & 0x1ff), > &l0_mfn); > + cpuid_addr_value(l1_mfn + 8 * ((base >> 12) & 0x1ff), > + &l0_mfn); > } else > l0_mfn = ((uint64_t *)(long)l1_mfn)[(base >> 12) & > 0x1ff]; > if (!(l0_mfn & PT_ENTRY_PRESENT)) > @@ -123,7 +129,8 @@ static unsigned > static unsigned > address(struct regs *regs, unsigned seg, unsigned off) > { > - uint64_t gdt_phys_base; > + uint64_t gdt_entry_pa; > + unsigned gdt_entry_va; > unsigned long long entry; > unsigned seg_base, seg_limit; > unsigned entry_low, entry_high; > @@ -139,12 +146,32 @@ address(struct regs *regs, unsigned seg, > (mode == VM86_REAL_TO_PROTECTED && regs->cs == seg)) > return ((seg & 0xFFFF) << 4) + off; > > - gdt_phys_base = guest_linear_to_phys(oldctx.gdtr_base); > - if (gdt_phys_base != (uint32_t)gdt_phys_base) { > + if (mode == VM86_PROTECTED_TO_REAL && !(oldctx.cr0 & > (CR0_PG|CR0_PE))) { > + if (seg == regs->cs) > + return oldctx.cs_base + off; > + if (seg == regs->ves) > + return oldctx.es_base + off; > + if (seg == regs->vds) > + return oldctx.ds_base + off; > + if (seg == regs->uss) > + return oldctx.ss_base + off; > + if (seg == regs->vfs) > + return oldctx.fs_base + off; > + if (seg == regs->vgs) > + return oldctx.gs_base + off; > + > + dump_regs(regs); > + panic("address(): unknown segment selector 0x%x\n", seg); > + } > + > + gdt_entry_va = (unsigned)((unsigned long long *)oldctx.gdtr_base > + > + (seg >> 3)); > + gdt_entry_pa = guest_linear_to_phys(gdt_entry_va); > + if (gdt_entry_pa != (uint32_t)gdt_entry_pa) { > printf("gdt base address above 4G\n"); > - cpuid_addr_value(gdt_phys_base + 8 * (seg >> 3), &entry); > + cpuid_addr_value(gdt_entry_pa, &entry); > } else > - entry = ((unsigned long long *)(long)gdt_phys_base)[seg >> > 3]; > + entry = *(unsigned long long *)(long)gdt_entry_pa; > > entry_high = entry >> 32; > entry_low = entry & 0xFFFFFFFF; > @@ -688,7 +715,8 @@ movcr(struct regs *regs, unsigned prefix > } > break; > case 0x22: /* mov Cd, Rd */ > - TRACE((regs, regs->eip - eip, "movl %%eax, %%cr%d", cr)); > + TRACE((regs, regs->eip - eip, "movl %%eax, %%cr%d > [[0x%x]]", > + cr, getreg32(regs, modrm))); > switch (cr) { > case 0: > oldctx.cr0 = getreg32(regs, modrm) | (CR0_PE | > CR0_NE); > @@ -815,16 +843,83 @@ pop(struct regs *regs, unsigned prefix, > write16(addr, pop16(regs)); > TRACE((regs, regs->eip - eip, "pop *0x%x", addr)); > break; > - > - /* other pop opcodes ... */ > + default: /* other pop opcodes ... */ > + return (0); > } > > return 1; > +} > + > +static void > +push(struct regs *regs, unsigned prefix, unsigned opc) > +{ > + unsigned eip = regs->eip - 1; > + unsigned data; > + > + if (prefix & DATA32) { > + data = getreg32(regs, opc & 0xF); > + push32(regs, data); > + } else { > + data = getreg16(regs, opc & 0xF); > + push16(regs, data); > + } > + > + TRACE((regs, regs->eip - eip, "push%s %s%s (val 0x%x)", > + (prefix & DATA32) ? "l" : "", > + (prefix & DATA32) ? "e" : "", rnames[opc & 0xF], > data)); > +} > + > +static int > +mov_from_seg(struct regs *regs, unsigned prefix, unsigned opc) > +{ > + unsigned eip = regs->eip - 1; > + unsigned modrm = fetch8(regs); > + unsigned r = modrm & 3; /* dest reg is in r/m field */ > + unsigned data; > + > + if ((modrm & 0xC0) != 0xC0) /* reg destinations only. memory > unimpl */ > + return 0; > + > + switch ((modrm & 0x38) >> 3) { /* source reg is in reg field > */ > + case 0: /* es */ > + data = regs->ves; > + break; > + > + case 1: /* cs */ > + data = regs->cs; > + break; > + > + case 2: /* ss */ > + data = regs->uss; > + break; > + > + case 3: /* ds */ > + data = regs->vds; > + break; > + > + case 4: /* fs */ > + data = regs->vfs; > + break; > + > + case 5: /* gs */ > + data = regs->vgs; > + break; > + > + default: > + return 0; > + } > + > + TRACE((regs, regs->eip - eip, "mov %%%s %%%s (val 0x%x)\n", > + srnames[(modrm & 0x38) >> 3], rnames[r], data)); > + > + setreg16(regs, r, data); > + return 1; > } > > static int > mov_to_seg(struct regs *regs, unsigned prefix, unsigned opc) > { > + unsigned eip = regs->eip - 1; > unsigned modrm = fetch8(regs); > > /* > @@ -836,54 +931,130 @@ mov_to_seg(struct regs *regs, unsigned p > mode != VM86_PROTECTED_TO_REAL) > return 0; > > - /* Register source only. */ > - if ((modrm & 0xC0) != 0xC0) > - goto fail; > - > - switch ((modrm & 0x38) >> 3) { > - case 0: /* es */ > - regs->ves = getreg16(regs, modrm); > - if (mode == VM86_PROTECTED_TO_REAL) > - return 1; > - saved_rm_regs.ves = 0; > - oldctx.es_sel = regs->ves; > - return 1; > - > - /* case 1: cs */ > - > - case 2: /* ss */ > - regs->uss = getreg16(regs, modrm); > - if (mode == VM86_PROTECTED_TO_REAL) > - return 1; > - saved_rm_regs.uss = 0; > - oldctx.ss_sel = regs->uss; > - return 1; > - case 3: /* ds */ > - regs->vds = getreg16(regs, modrm); > - if (mode == VM86_PROTECTED_TO_REAL) > - return 1; > - saved_rm_regs.vds = 0; > - oldctx.ds_sel = regs->vds; > - return 1; > - case 4: /* fs */ > - regs->vfs = getreg16(regs, modrm); > - if (mode == VM86_PROTECTED_TO_REAL) > - return 1; > - saved_rm_regs.vfs = 0; > - oldctx.fs_sel = regs->vfs; > - return 1; > - case 5: /* gs */ > - regs->vgs = getreg16(regs, modrm); > - if (mode == VM86_PROTECTED_TO_REAL) > - return 1; > - saved_rm_regs.vgs = 0; > - oldctx.gs_sel = regs->vgs; > - return 1; > - } > - > - fail: > - printf("%s:%d: missed opcode %02x %02x\n", > - __FUNCTION__, __LINE__, opc, modrm); > + if ((modrm & 0xC0) == 0xC0) /* register source */ > + { > + TRACE((regs, regs->eip - eip, "mov %s %s", > + rnames[modrm & 0x7], > + srnames[(modrm & 0x38) >> 3])); > + > + switch ((modrm & 0x38) >> 3) { > + case 0: /* es */ > + regs->ves = getreg16(regs, modrm); > + if (mode == VM86_PROTECTED_TO_REAL) > + return 1; > + saved_rm_regs.ves = 0; > + oldctx.es_sel = regs->ves; > + load_or_clear_seg(oldctx.es_sel, > &oldctx.es_base, > + &oldctx.es_limit, &oldctx.es_arbytes); > + return 1; > + > + /* case 1: cs */ > + > + case 2: /* ss */ > + regs->uss = getreg16(regs, modrm); > + if (mode == VM86_PROTECTED_TO_REAL) > + return 1; > + saved_rm_regs.uss = 0; > + oldctx.ss_sel = regs->uss; > + load_or_clear_seg(oldctx.ss_sel, > &oldctx.ss_base, > + &oldctx.ss_limit, &oldctx.ss_arbytes); > + return 1; > + case 3: /* ds */ > + regs->vds = getreg16(regs, modrm); > + if (mode == VM86_PROTECTED_TO_REAL) > + return 1; > + saved_rm_regs.vds = 0; > + oldctx.ds_sel = regs->vds; > + load_or_clear_seg(oldctx.ds_sel, > &oldctx.ds_base, > + &oldctx.ds_limit, &oldctx.ds_arbytes); > + return 1; > + case 4: /* fs */ > + regs->vfs = getreg16(regs, modrm); > + if (mode == VM86_PROTECTED_TO_REAL) > + return 1; > + saved_rm_regs.vfs = 0; > + oldctx.fs_sel = regs->vfs; > + load_or_clear_seg(oldctx.fs_sel, > &oldctx.fs_base, > + &oldctx.fs_limit, &oldctx.fs_arbytes); > + return 1; > + case 5: /* gs */ > + regs->vgs = getreg16(regs, modrm); > + if (mode == VM86_PROTECTED_TO_REAL) > + return 1; > + saved_rm_regs.vgs = 0; > + oldctx.gs_sel = regs->vgs; > + load_or_clear_seg(oldctx.gs_sel, > &oldctx.gs_base, > + &oldctx.gs_limit, &oldctx.gs_arbytes); > + return 1; > + default: > + break; > + } > + } else if ((modrm & 0xC0) == 0) /* memory source */ > + { > + unsigned addr = operand(prefix, regs, modrm); > + unsigned data = read16(addr); > + > + TRACE((regs, regs->eip - eip, "mov [0x%x] %s", addr, > + srnames[(modrm & 0x38) >> 3])); > + > + switch ((modrm & 0x38) >> 3) { > + case 0: /* es */ > + regs->ves = data; > + if (mode == VM86_PROTECTED_TO_REAL) > + return 1; > + saved_rm_regs.ves = 0; > + oldctx.es_sel = regs->ves; > + load_or_clear_seg(oldctx.es_sel, > &oldctx.es_base, > + &oldctx.es_limit, &oldctx.es_arbytes); > + return 1; > + > + case 1: /* cs */ > + break; > + > + case 2: /* ss */ > + regs->uss = data; > + if (mode == VM86_PROTECTED_TO_REAL) > + return 1; > + saved_rm_regs.uss = 0; > + oldctx.ss_sel = regs->uss; > + load_or_clear_seg(oldctx.ss_sel, > &oldctx.ss_base, > + &oldctx.ss_limit, &oldctx.ss_arbytes); > + return 1; > + case 3: /* ds */ > + regs->vds = data; > + if (mode == VM86_PROTECTED_TO_REAL) > + return 1; > + saved_rm_regs.vds = 0; > + oldctx.ds_sel = regs->vds; > + load_or_clear_seg(oldctx.ds_sel, > &oldctx.ds_base, > + &oldctx.ds_limit, &oldctx.ds_arbytes); > + return 1; > + case 4: /* fs */ > + regs->vfs = data; > + if (mode == VM86_PROTECTED_TO_REAL) > + return 1; > + saved_rm_regs.vfs = 0; > + oldctx.fs_sel = regs->vfs; > + load_or_clear_seg(oldctx.fs_sel, > &oldctx.fs_base, > + &oldctx.fs_limit, &oldctx.fs_arbytes); > + return 1; > + case 5: /* gs */ > + regs->vgs = data; > + if (mode == VM86_PROTECTED_TO_REAL) > + return 1; > + saved_rm_regs.vgs = 0; > + oldctx.gs_sel = regs->vgs; > + load_or_clear_seg(oldctx.gs_sel, > &oldctx.gs_base, > + &oldctx.gs_limit, &oldctx.gs_arbytes); > + return 1; > + default: > + break; > + } > + > + } > + > + TRACE((regs, regs->eip - eip, "%s: missed opcode %02x modrm > %02x\n", > + __FUNCTION__, opc, modrm)); > return 0; > } > > @@ -891,9 +1062,11 @@ mov_to_seg(struct regs *regs, unsigned p > * Emulate a segment load in protected mode > */ > static int > -load_seg(unsigned long sel, uint32_t *base, uint32_t *limit, union > vmcs_arbytes > *arbytes) > -{ > - uint64_t gdt_phys_base; > +load_seg(unsigned long sel, uint32_t *base, uint32_t *limit, > + union vmcs_arbytes *arbytes) > +{ > + unsigned gdt_entry_va; > + uint64_t gdt_entry_pa; > unsigned long long entry; > > /* protected mode: use seg as index into gdt */ > @@ -905,12 +1078,14 @@ load_seg(unsigned long sel, uint32_t *ba > return 1; > } > > - gdt_phys_base = guest_linear_to_phys(oldctx.gdtr_base); > - if (gdt_phys_base != (uint32_t)gdt_phys_base) { > + gdt_entry_va = (unsigned)((unsigned long long *)oldctx.gdtr_base > + > + (sel >> 3)); > + gdt_entry_pa = guest_linear_to_phys(gdt_entry_va); > + if (gdt_entry_pa != (uint32_t)gdt_entry_pa) { > printf("gdt base address above 4G\n"); > - cpuid_addr_value(gdt_phys_base + 8 * (sel >> 3), &entry); > + cpuid_addr_value(gdt_entry_pa, &entry); > } else > - entry = ((unsigned long long *)(long)gdt_phys_base)[sel >> > 3]; > + entry = *(unsigned long long *)(long)gdt_entry_pa; > > /* Check the P bit first */ > if (!((entry >> (15+32)) & 0x1) && sel != 0) > @@ -945,7 +1120,8 @@ load_seg(unsigned long sel, uint32_t *ba > * the descriptor was invalid. > */ > static void > -load_or_clear_seg(unsigned long sel, uint32_t *base, uint32_t *limit, > union > vmcs_arbytes *arbytes) > +load_or_clear_seg(unsigned long sel, uint32_t *base, uint32_t *limit, > + union vmcs_arbytes *arbytes) > { > if (!load_seg(sel, base, limit, arbytes)) > load_seg(0, base, limit, arbytes); > @@ -972,8 +1148,11 @@ protected_mode(struct regs *regs) > > /* reload all segment registers */ > if (!load_seg(regs->cs, &oldctx.cs_base, > - &oldctx.cs_limit, &oldctx.cs_arbytes)) > + &oldctx.cs_limit, &oldctx.cs_arbytes)) { > + dump_regs(regs); > panic("Invalid %%cs=0x%x for protected mode\n", regs->cs); > + } > + > oldctx.cs_sel = regs->cs; > > load_or_clear_seg(oldctx.es_sel, &oldctx.es_base, > @@ -1109,7 +1288,7 @@ jmpl(struct regs *regs, int prefix) > regs->cs = cs; > regs->eip = eip; > > - if (mode == VM86_REAL_TO_PROTECTED) /* jump to protected > mode */ > + if (mode == VM86_REAL_TO_PROTECTED) /* jump to protected mode */ > set_mode(regs, VM86_PROTECTED); > else if (mode == VM86_PROTECTED_TO_REAL) /* jump to real mode */ > set_mode(regs, VM86_REAL); > @@ -1135,12 +1314,12 @@ jmpl_indirect(struct regs *regs, int pre > regs->cs = cs; > regs->eip = eip; > > - if (mode == VM86_REAL_TO_PROTECTED) /* jump to protected > mode */ > + if (mode == VM86_REAL_TO_PROTECTED) /* jump to protected mode */ > set_mode(regs, VM86_PROTECTED); > else if (mode == VM86_PROTECTED_TO_REAL) /* jump to real mode */ > set_mode(regs, VM86_REAL); > else > - panic("jmpl"); > + panic("jmpl_indirect"); > } > > static void > @@ -1409,6 +1588,10 @@ opcode(struct regs *regs) > prefix |= SEG_DS; > continue; > > + case 0x50 ... 0x57: > + push(regs, prefix, opc); > + continue; > + > case 0x64: > TRACE((regs, regs->eip - eip, "%%fs:")); > prefix |= SEG_FS; > @@ -1457,7 +1640,12 @@ opcode(struct regs *regs) > goto invalid; > return OPC_EMULATED; > > - case 0x8E: /* mov r16, sreg */ > + case 0x8C: /* mov sreg, r/m16 */ > + if (!mov_from_seg(regs, prefix, opc)) > + goto invalid; > + return OPC_EMULATED; > + > + case 0x8E: /* mov r/m16, sreg */ > if (!mov_to_seg(regs, prefix, opc)) > goto invalid; > return OPC_EMULATED; > @@ -1535,7 +1723,7 @@ opcode(struct regs *regs) > if (mode == VM86_REAL_TO_PROTECTED || > mode == VM86_PROTECTED_TO_REAL) { > retl(regs, prefix); > - return OPC_INVALID; > + return OPC_INVALID; /* try to exit emulator */ > } > goto invalid; > > @@ -1573,7 +1761,7 @@ opcode(struct regs *regs) > if (mode == VM86_REAL_TO_PROTECTED || > mode == VM86_PROTECTED_TO_REAL) { > jmpl(regs, prefix); > - return OPC_INVALID; > + return OPC_INVALID; /* try to exit emulator */ > } > goto invalid; > > @@ -1707,8 +1895,10 @@ trap(int trapno, int errno, struct regs > > default: > invalid: > - printf("Trap (0x%x) while in %s mode\n", > - trapno, regs->eflags & EFLAGS_VM ? "real" : > "protected"); > + printf("Trap (0x%x) while in %s mode (emulator in mode > %s\n", > + trapno, regs->eflags & EFLAGS_VM ? "real" : > "protected", > + states[mode]); > + > if (trapno == 14) > printf("Page fault address 0x%x\n", get_cr2()); > dump_regs(regs); > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxxxxxxxx > http://lists.xensource.com/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |