emu.c (49656B)
1 /* 2 * Simple instruction scanning and rewriting 3 * for implementing vx32 on x86-32 hosts. 4 */ 5 6 #ifdef __APPLE__ 7 #define __DARWIN_UNIX03 0 8 #endif 9 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <stddef.h> 13 #include <string.h> 14 #include <setjmp.h> 15 #include <assert.h> 16 #include <errno.h> 17 #include <sys/stat.h> // XX FreeBSD 4.9 header bug? 18 #include <sys/mman.h> 19 #include <stdarg.h> 20 #include <unistd.h> 21 22 #include "vx32.h" 23 #include "vx32impl.h" 24 #include "os.h" 25 #include "x86dis.h" 26 27 // Special values for unused entries in entrypoint hash table 28 #define NULLSRCEIP ((uint32_t)-1) 29 #define NULLDSTEIP ((uint32_t)(uintptr_t)vxrun_nullfrag); 30 31 int vx32_debugxlate = 0; 32 33 static uint64_t nflush; 34 35 static void disassemble(uint8_t *addr0, uint8_t*, uint8_t*); 36 37 // Create the emulation state for a new process 38 int vxemu_init(struct vxproc *vxp) 39 { 40 // Initial emulation hash table size (must be a power of two) 41 int etablen = 4096; 42 43 // Allocate the vxemu state area in 32-bit memory, 44 // because it must be accessible to our translated code 45 // via the special fs segment register setup. 46 vxemu *e = mmap(NULL, VXCODEBUFSIZE, 47 PROT_READ | PROT_WRITE | PROT_EXEC, 48 MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0); 49 if (e == MAP_FAILED){ 50 vxprint("vxemu_init: mmap failed\n"); 51 return -1; 52 } 53 54 // Basic initialization 55 memset(e, 0, sizeof(vxemu)); 56 e->magic = VXEMU_MAGIC; 57 e->proc = vxp; 58 vxp->cpu = &e->cpu; 59 e->emuptr = (uint32_t)(intptr_t)e; 60 e->etablen = etablen; 61 e->etabmask = etablen - 1; 62 63 // Initialize the entrypoint table and translation buffer pointers 64 vxemu_flush(e); 65 66 vxp->emu = e; 67 return 0; 68 } 69 70 void vxemu_free(vxemu *e) 71 { 72 assert(e->proc->emu == e); 73 e->proc->emu = NULL; 74 75 // Free the vxemu state area 76 munmap(e, VXCODEBUFSIZE); 77 } 78 79 // Reset a vxproc's translation code buffer and entrypoint table. 80 void vxemu_flush(vxemu *e) 81 { 82 uint32_t i; 83 84 // Clear the entrypoint table. 85 uint32_t etablen = e->etablen; 86 for (i = 0; i < etablen; i++) { 87 e->etab[i].srceip = NULLSRCEIP; 88 e->etab[i].dsteip = NULLDSTEIP; 89 } 90 e->etabcnt = 0; 91 92 // The translated code buffer immediately follows the etab. 93 e->codebuf = &e->etab[etablen]; 94 e->codefree = &e->etab[etablen]; 95 e->codetab = (void*)e + VXCODEBUFSIZE; 96 e->codetop = (void*)e + VXCODEBUFSIZE; 97 98 nflush++; 99 } 100 101 void vxemu_growetab(struct vxemu *e) 102 { 103 // Increase the size of the entrypoint table, 104 // which effectively just reserves more memory 105 // from the code translation buffer. 106 e->etablen *= 2; 107 e->etabmask = e->etablen - 1; 108 109 // Re-initialize the entrypoint table and translation buffer. 110 vxemu_flush(e); 111 } 112 113 // Each translated frag starts with a one-instruction prolog... 114 #define PROLOG_LEN 7 // Length of 'mov VSEG:VXEMU_EBX,%ebx' 115 116 117 // Translate a block of code starting at the current vx32 EIP. 118 // The basic procedure works in four stages. 119 // 120 // 1: We first scan the instruction stream to build up a 121 // tentative vxinsn table for the instructions we plan to translate, 122 // with output code offsets computed for worst-case instruction lengths. 123 // This pass handles checking execute permissions on instruction pages, 124 // and decides exactly how many instructions we'll translate in this block. 125 // The final instruction in a fragment is always either 126 // an unconditional flow control instruction (JMP, CALL, RET, INT, etc.), 127 // or the special "pseudo-instruction" VXI_ENDFRAG, 128 // which ends the fragment with a jump to the appropriate subsequent EIP. 129 // 130 // 2: Next we do a reverse scan through the vxinsn table 131 // to identify instructions we can simplify: 132 // particularly instructions with condition code fixups 133 // whose condition codes are not actually used before they are killed. 134 // We also identify branches that can be rewritten with 8-bit displacements. 135 // In the process we adjust the target instruction length (dstlen) fields 136 // for all simplified instructions accordingly. 137 // 138 // 3: We now perform a forward scan through the vxinsn table 139 // to compute the final offsets for all target instructions in the block. 140 // 141 // 4: Finally, we scan the instruction stream again 142 // and emit the target instructions for the block. 143 // 144 145 // Macros to extract fields in a Mod-Reg-R/M byte 146 #define EA_MOD(b) ((uint8_t)(b) >> 6) 147 #define EA_REG(b) (((uint8_t)(b) >> 3) & 7) 148 #define EA_RM(b) ((uint8_t)(b) & 7) 149 150 // Scan a Mod-Reg-R/M byte and the rest of the effective address 151 uint8_t *xscan_rm(uint8_t *inp) 152 { 153 uint8_t ea = *inp++; 154 switch (EA_MOD(ea)) { 155 case 0: 156 switch (EA_RM(ea)) { 157 case 4: ; // SIB 158 uint8_t sib = *inp; 159 if ((sib & 7) == 5) 160 return inp+1+4; 161 else 162 return inp+1; 163 case 5: // disp32 164 return inp+4; 165 default: // [reg] 166 return inp; 167 } 168 169 case 1: 170 switch (EA_RM(ea)) { 171 case 4: // SIB+disp8 172 return inp+1+1; 173 default: // [reg]+disp8 174 return inp+1; 175 } 176 177 case 2: 178 switch (EA_RM(ea)) { 179 case 4: // SIB+disp32 180 return inp+1+4; 181 default: // [reg]+disp32 182 return inp+4; 183 } 184 185 case 3: // reg 186 return inp; 187 188 default: 189 assert(0); 190 return 0; 191 } 192 } 193 194 // Translation pass 1: 195 // scan instruction stream, build preliminary vxinsn table, 196 // and decide how many instructions to translate in this fragment. 197 static int xscan(struct vxproc *p) 198 { 199 uint32_t faultva; 200 uint32_t eip; 201 uint8_t *instart, *inmax; 202 struct vxemu *emu = p->emu; 203 204 // Make sure there's enough space in the translated code buffer; 205 // if not, then first clear the code buffer and entrypoint table. 206 if (((uint8_t*)emu->codetab - (uint8_t*)emu->codefree) < 1024) 207 vxemu_flush(emu); 208 209 // Grow the entrypoint hash table if it gets too crowded. 210 // This also in effect flushes the translated code buffer. 211 if (emu->etabcnt > emu->etablen/2) 212 vxemu_growetab(emu); 213 214 // Find and check permissions on the input instruction stream, 215 // and determine how far ahead we can scan (up to one full page) 216 // before hitting a non-executable page. 217 eip = emu->cpu.eip; 218 instart = (uint8_t*)emu->mem->base + eip; 219 emu->guestfrag = instart; 220 if (!vxmem_checkperm(p->mem, eip, 2*VXPAGESIZE, VXPERM_EXEC, &faultva)) { 221 if(faultva == eip) { 222 noexec: 223 emu->cpu_trap = VXTRAP_PAGEFAULT; 224 emu->cpu.traperr = 0x10; 225 emu->cpu.trapva = faultva; 226 return emu->cpu_trap; 227 } 228 } else 229 faultva = VXPAGETRUNC(eip) + 2*VXPAGESIZE; 230 inmax = instart + faultva - eip; 231 232 // Create a new fragment header in the code translation buffer 233 struct vxfrag *f = (struct vxfrag*)(((intptr_t)emu->codefree + 3) & ~3); 234 emu->txfrag = f; 235 f->eip = eip; 236 237 unsigned ino = 0; // instruction number 238 unsigned dstofs = PROLOG_LEN; 239 uint8_t *inp = instart; 240 emu->ininst = inp; // save instruction currently being translated 241 int fin = 0; 242 do { 243 uint8_t itype = 0; 244 uint8_t dstlen; 245 uint8_t ea; 246 247 if(*inp == 0xF0) // LOCK 248 inp++; 249 250 // Begin instruction decode. 251 // We might take a fault on any of these instruction reads 252 // if we run off the end of a mapped code page. 253 // In that case our exception handler 254 // notices that emu->ininst != NULL and initiates recovery. 255 // Or we might _not_ take a fault 256 // on a page marked read-only but not executable; 257 // that's why we check against inmax after each insn. 258 switch (*inp++) { 259 260 // OP Eb,Gb; OP Ev,Gv; OP Gb,Eb; OP Gv,Ev 261 case 0x00: case 0x01: case 0x02: case 0x03: // ADD 262 case 0x08: case 0x09: case 0x0a: case 0x0b: // OR 263 case 0x10: case 0x11: case 0x12: case 0x13: // ADC 264 case 0x18: case 0x19: case 0x1a: case 0x1b: // SBB 265 case 0x20: case 0x21: case 0x22: case 0x23: // AND 266 case 0x28: case 0x29: case 0x2a: case 0x2b: // SUB 267 case 0x30: case 0x31: case 0x32: case 0x33: // XOR 268 case 0x38: case 0x39: case 0x3a: case 0x3b: // CMP 269 case 0x84: case 0x85: // TEST 270 case 0x86: case 0x87: // XCHG 271 case 0x88: case 0x89: case 0x8a: case 0x8b: // MOV 272 inp = xscan_rm(inp); 273 goto notrans; 274 275 // OP AL,Ib; PUSH Ib 276 case 0x04: case 0x0c: case 0x14: case 0x1c: // ADD etc. 277 case 0x24: case 0x2c: case 0x34: case 0x3c: // AND etc. 278 case 0x6a: // PUSH Ib 279 case 0xa8: // TEST AL,Ib 280 case 0xb0: case 0xb1: case 0xb2: case 0xb3: // MOV Gb,Ib 281 case 0xb4: case 0xb5: case 0xb6: case 0xb7: 282 inp += 1; 283 goto notrans; 284 285 // OP EAX,Iv; PUSH Iv; MOV moffs 286 case 0x05: case 0x0d: case 0x15: case 0x1d: // OP EAX,Iv 287 case 0x25: case 0x2d: case 0x35: case 0x3d: 288 case 0x68: // PUSH Iv 289 case 0xa0: case 0xa1: case 0xa2: case 0xa3: // MOV moffs 290 case 0xa9: // TEST eAX,Iv 291 case 0xb8: case 0xb9: case 0xba: case 0xbb: // MOV Gv,Iv 292 case 0xbc: case 0xbd: case 0xbe: case 0xbf: 293 inp += 4; 294 goto notrans; 295 296 // CS and DS segment overrides, only valid for branch hints 297 case 0x2e: // CS/"not taken" 298 case 0x3e: // DS/"taken" 299 switch (*inp++) { 300 301 // Jcc (8-bit displacement) 302 case 0x70: case 0x71: case 0x72: case 0x73: 303 case 0x74: case 0x75: case 0x76: case 0x77: 304 case 0x78: case 0x79: case 0x7a: case 0x7b: 305 case 0x7c: case 0x7d: case 0x7e: case 0x7f: 306 inp += 1; 307 itype = VXI_JUMP; 308 dstlen = 7; // 32-bit branch w/hint 309 goto done; 310 311 // Two-byte opcode 312 case 0x0f: 313 switch (*inp++) { 314 315 // Jcc - conditional branch with disp32 316 case 0x80: case 0x81: case 0x82: case 0x83: 317 case 0x84: case 0x85: case 0x86: case 0x87: 318 case 0x88: case 0x89: case 0x8a: case 0x8b: 319 case 0x8c: case 0x8d: case 0x8e: case 0x8f: 320 inp += 4; 321 itype = VXI_JUMP; 322 dstlen = 7; // 32-bit branch w/hint 323 goto done; 324 325 } 326 goto invalid; 327 } 328 goto invalid; 329 330 // INC reg; DEC reg; PUSH reg; POP reg; XCHG eAX,reg 331 case 0x40: case 0x41: case 0x42: case 0x43: // INC 332 case 0x44: case 0x45: case 0x46: case 0x47: 333 case 0x48: case 0x49: case 0x4a: case 0x4b: // DEC 334 case 0x4c: case 0x4d: case 0x4e: case 0x4f: 335 case 0x50: case 0x51: case 0x52: case 0x53: // PUSH 336 case 0x54: case 0x55: case 0x56: case 0x57: 337 case 0x58: case 0x59: case 0x5a: case 0x5b: // POP 338 case 0x5c: case 0x5d: case 0x5e: case 0x5f: 339 case 0x90: case 0x91: case 0x92: case 0x93: // XCHG 340 case 0x94: case 0x95: case 0x96: case 0x97: 341 case 0x98: case 0x99: // CWDE, CDQ 342 case 0xa4: case 0xa5: case 0xa6: case 0xa7: // MOVS, CMPS 343 case 0xaa: case 0xab: // STOS 344 case 0xac: case 0xad: case 0xae: case 0xaf: // LODS, SCAS 345 case 0xc9: // LEAVE 346 case 0xfc: case 0xfd: // CLD, STD 347 goto notrans; 348 349 // OP Eb,Ib; OP Ev,Ib; IMUL Gv,Ev,Ib 350 case 0x80: // OP Eb,Ib 351 case 0x83: // OP Ev,Ib 352 case 0x6b: // IMUL Gv,Ev,Ib 353 inp = xscan_rm(inp); 354 inp += 1; 355 goto notrans; 356 357 // OP Ev,Iv; IMUL Gv,Ev,Iv 358 case 0x81: // OP Ev,Iv 359 case 0x69: // IMUL Gv,Ev,Iv 360 inp = xscan_rm(inp); 361 inp += 4; 362 goto notrans; 363 364 // Jcc (8-bit displacement) 365 case 0x70: case 0x71: case 0x72: case 0x73: 366 case 0x74: case 0x75: case 0x76: case 0x77: 367 case 0x78: case 0x79: case 0x7a: case 0x7b: 368 case 0x7c: case 0x7d: case 0x7e: case 0x7f: 369 inp += 1; 370 itype = VXI_JUMP; 371 dstlen = 6; // Size of worst-case 32-bit branch 372 goto done; 373 374 // LEA Gv,M 375 case 0x8d: 376 if (EA_MOD(*inp) == 3) // Mem-only 377 goto invalid; 378 inp = xscan_rm(inp); 379 goto notrans; 380 381 // Group 1a - POP Ev 382 case 0x8f: 383 if (EA_REG(*inp) != 0) 384 goto invalid; 385 inp = xscan_rm(inp); 386 goto notrans; 387 388 // FWAIT 389 case 0x9b: 390 if (p->allowfp == 0) { 391 badfp: 392 if (ino > 0) 393 goto endfrag; 394 emu->cpu_trap = VXTRAP_FPOFF; 395 return emu->cpu_trap; 396 } 397 goto notrans; 398 399 // PUSHF; POPF 400 case 0x9c: case 0x9d: 401 goto notrans; 402 403 // SAHF; LAHF 404 case 0x9f: case 0x9e: 405 goto notrans; 406 407 // Shift Eb,Ib; Shift Ev,Ib 408 case 0xc0: case 0xc1: 409 inp = xscan_rm(inp); 410 inp += 1; 411 // XXX fix CCs 412 goto notrans; 413 414 // Shift Eb,1; Shift Ev,1 415 case 0xd0: case 0xd1: 416 inp = xscan_rm(inp); 417 // XXX fix CCs 418 goto notrans; 419 420 // Shift Eb,CL; Shift Ev,CL 421 case 0xd2: case 0xd3: 422 inp = xscan_rm(inp); 423 // XXX fix CCs 424 goto notrans; 425 426 // RET Iw 427 case 0xc2: 428 inp += 2; 429 itype = VXI_RETURN_IMM; 430 dstlen = 7+1+6+5; // movl %ebx,VSEG:VXEMU_EBX 431 // popl %ebx 432 // addl $Iw,%esp 433 // jmp vxrun_lookup_indirect 434 fin = 1; 435 goto done; 436 437 // RET 438 case 0xc3: 439 itype = VXI_RETURN; 440 dstlen = 7+1+5; // movl %ebx,VSEG:VXEMU_EBX 441 // popl %ebx 442 // jmp vxrun_lookup_indirect 443 fin = 1; 444 goto done; 445 446 // Group 11 - MOV Eb,Ib 447 case 0xc6: 448 if (EA_REG(*inp) != 0) 449 goto invalid; 450 inp = xscan_rm(inp); 451 inp += 1; 452 goto notrans; 453 454 // Group 11 - MOV Ev,Iv 455 case 0xc7: 456 if (EA_REG(*inp) != 0) 457 goto invalid; 458 inp = xscan_rm(inp); 459 inp += 4; 460 goto notrans; 461 462 // ENTER 463 case 0xc8: 464 inp += 2+1; // imm16,imm8 465 goto notrans; 466 467 case 0xcd: // INT n (software interrupt) 468 inp++; 469 case 0xcc: // INT3 (breakpoint) 470 goto gentrap; 471 472 // 387 escapes - modrm with opcode field 473 case 0xd8: case 0xd9: case 0xda: case 0xdb: 474 case 0xdc: case 0xdd: case 0xde: case 0xdf: 475 if (!p->allowfp) 476 goto badfp; 477 if ((*inp>>6) == 3) 478 inp++; 479 else 480 inp = xscan_rm(inp); 481 goto notrans; 482 483 // Loops 484 case 0xe0: // LOOPNZ cb 485 inp++; 486 itype = VXI_LOOPNZ; 487 dstlen = 3+2+2+5; // leal -1(ecx), ecx 488 // jz .+7 489 // jecxz .+5 490 // jmp cb 491 goto done; 492 493 case 0xe1: // LOOPZ cb 494 inp++; 495 itype = VXI_LOOPZ; 496 dstlen = 3+2+2+5; // leal -1(ecx), ecx 497 // jnz .+7 498 // jecxz .+5 499 // jmp cb 500 goto done; 501 502 case 0xe2: // LOOP cb 503 inp++; 504 itype = VXI_LOOP; 505 dstlen = 3+2+5; // leal -1(ecx), ecx 506 // jecxz .+5 507 // jmp cb 508 goto done; 509 510 // CALL 511 case 0xe8: // CALL Jv 512 inp += 4; 513 itype = VXI_CALL; 514 dstlen = 5+5; // pushl $nexteip 515 // jmp trampoline 516 fin = 1; 517 goto done; 518 519 // JMP 520 case 0xe9: // JMP Jv 521 inp += 4; 522 itype = VXI_JUMP; 523 dstlen = 5; // Size of worst-case 32-bit JMP 524 fin = 1; 525 goto done; 526 527 // JMP short 528 case 0xeb: // JMP Jb 529 inp += 1; 530 itype = VXI_JUMP; 531 dstlen = 5; // Size of worst-case 32-bit JMP 532 fin = 1; 533 goto done; 534 535 // Group 3 - unary ops 536 case 0xf6: 537 ea = *inp; 538 inp = xscan_rm(inp); 539 switch (EA_REG(ea)) { 540 case 0: case 1: // TEST Eb,Ib 541 inp += 1; 542 default: // NOT, NEG, ... 543 ; // XXX MUL/DIV require fixcc! 544 } 545 goto notrans; 546 547 case 0xf7: 548 ea = *inp; 549 inp = xscan_rm(inp); 550 switch (EA_REG(ea)) { 551 case 0: case 1: // TEST Ev,Iv 552 inp += 4; 553 default: // NOT, NEG, ... 554 ; // XXX MUL/DIV require fixcc! 555 } 556 goto notrans; 557 558 // Group 4 - INC, DEC 559 case 0xfe: 560 ea = *inp; 561 inp = xscan_rm(inp); 562 switch (EA_REG(ea)) { 563 case 0: case 1: // INC Eb, DEC Eb 564 goto notrans; 565 } 566 goto invalid; 567 568 // Group 5 - INC, DEC, CALL, JMP, PUSH 569 case 0xff: 570 ea = *inp; 571 inp = xscan_rm(inp); 572 switch (EA_REG(ea)) { 573 case 0: case 1: // INC Ev, DEC Ev 574 case 6: // PUSH Ev 575 goto notrans; 576 case 2: // CALL Ev 577 itype = VXI_CALLIND; 578 dstlen = 7+(inp-emu->ininst)+5+5; 579 // movl %ebx,VSEG:VXEMU_EBX 580 // movl <indirect_ea>,%ebx 581 // (same length as CALL inst) 582 // pushl $<return_eip> 583 // jmp vxrun_lookup_indirect 584 fin = 1; 585 goto done; 586 case 4: // JMP Ev 587 itype = VXI_JUMPIND; 588 dstlen = 7+(inp-emu->ininst)+5; 589 // movl %ebx,VSEG:VXEMU_EBX 590 // movl <indirect_ea>,%ebx 591 // (same length as CALL inst) 592 // jmp vxrun_lookup_indirect 593 fin = 1; 594 goto done; 595 } 596 goto invalid; 597 598 // I/O 599 case 0xed: 600 goto gentrap; 601 602 // Prefixes 603 case 0x0f: // 2-byte opcode escape 604 goto twobyte; 605 case 0x66: // Operand size prefix 606 goto opsize; 607 case 0xf3: // REP/REPE prefix 608 goto rep; 609 case 0xf2: // REPNE prefix 610 goto repne; 611 } 612 goto invalid; 613 614 // Operand size prefix (0x66) seen 615 opsize: 616 switch (*inp++) { 617 618 // OP Ev,Gv; OP Gv,Ev 619 case 0x01: case 0x03: // ADD 620 case 0x09: case 0x0b: // OR 621 case 0x11: case 0x13: // ADC 622 case 0x19: case 0x1b: // SBB 623 case 0x21: case 0x23: // AND 624 case 0x29: case 0x2b: // SUB 625 case 0x31: case 0x33: // XOR 626 case 0x39: case 0x3b: // CMP 627 case 0x85: // TEST 628 case 0x87: // XCHG 629 case 0x89: case 0x8b: // MOV 630 inp = xscan_rm(inp); 631 goto notrans; 632 633 // OP EAX,Iv; PUSH Iv 634 case 0x05: case 0x0d: case 0x15: case 0x1d: // OP EAX,Iv 635 case 0x25: case 0x2d: case 0x35: case 0x3d: 636 case 0x68: // PUSH Iv 637 case 0xa9: // TEST eAX,Iv 638 case 0xb8: case 0xb9: case 0xba: case 0xbb: // MOV Gv,Iv 639 case 0xbc: case 0xbd: case 0xbe: case 0xbf: 640 inp += 2; 641 goto notrans; 642 643 // INC reg; DEC reg; PUSH reg; POP reg; XCHG eAX,reg 644 case 0x40: case 0x41: case 0x42: case 0x43: // INC 645 case 0x44: case 0x45: case 0x46: case 0x47: 646 case 0x48: case 0x49: case 0x4a: case 0x4b: // DEC 647 case 0x4c: case 0x4d: case 0x4e: case 0x4f: 648 case 0x50: case 0x51: case 0x52: case 0x53: // PUSH 649 case 0x54: case 0x55: case 0x56: case 0x57: 650 case 0x58: case 0x59: case 0x5a: case 0x5b: // POP 651 case 0x5c: case 0x5d: case 0x5e: case 0x5f: 652 case 0x90: case 0x91: case 0x92: case 0x93: // XCHG 653 case 0x94: case 0x95: case 0x96: case 0x97: 654 case 0x98: case 0x99: // CWDE, CDQ 655 case 0xa4: case 0xa5: case 0xa6: case 0xa7: // MOVS, CMPS 656 case 0xaa: case 0xab: // STOS 657 case 0xac: case 0xad: case 0xae: case 0xaf: // LODS, SCAS 658 case 0xc9: // LEAVE 659 case 0xfc: case 0xfd: // CLD, STD 660 goto notrans; 661 662 // OP Ev,Iv; IMUL Gv,Ev,Iv 663 case 0x81: // OP Ev,Iv 664 case 0x69: // IMUL Gv,Ev,Iv 665 inp = xscan_rm(inp); 666 inp += 2; 667 goto notrans; 668 669 // OP Ev,Ib; IMUL Gv,Ev,Ib 670 case 0x83: // OP Ev,Ib 671 case 0x6b: // IMUL Gv,Ev,Ib 672 inp = xscan_rm(inp); 673 inp += 1; 674 goto notrans; 675 676 // MOV moffs 677 case 0xa1: case 0xa3: 678 inp += 4; // always 32-bit offset 679 goto notrans; 680 681 // Shift Ev,Ib 682 case 0xc1: 683 inp = xscan_rm(inp); 684 inp += 1; 685 // XXX fix CCs 686 goto notrans; 687 688 // Shift Ev,1 689 case 0xd1: 690 inp = xscan_rm(inp); 691 // XXX fix CCs 692 goto notrans; 693 694 // Shift Ev,CL 695 case 0xd3: 696 inp = xscan_rm(inp); 697 // XXX fix CCs 698 goto notrans; 699 700 // Group 11 - MOV Ev,Iv 701 case 0xc7: 702 if (EA_REG(*inp) != 0) 703 goto invalid; 704 inp = xscan_rm(inp); 705 inp += 2; 706 goto notrans; 707 708 // Group 3 - unary ops 709 case 0xf7: 710 ea = *inp; 711 inp = xscan_rm(inp); 712 switch (EA_REG(ea)) { 713 case 0: case 1: // TEST Ev,Iv 714 inp += 2; 715 default: // NOT, NEG, ... 716 ; // XXX MUL/DIV require fixcc! 717 } 718 goto notrans; 719 720 // Group 5 - INC, DEC, CALL, JMP, PUSH 721 case 0xff: 722 ea = *inp; 723 inp = xscan_rm(inp); 724 switch (EA_REG(ea)) { 725 case 0: case 1: // INC Ev, DEC Ev 726 goto notrans; 727 } 728 goto invalid; 729 730 // Prefixes 731 case 0x0f: // 2-byte opcode escape 732 goto twobyte_opsize; 733 case 0x66: // Operand size prefix (redundant) 734 goto invalid; 735 case 0xf3: // REP/REPE prefix 736 goto opsize_rep; 737 case 0xf2: // REPNE prefix 738 goto opsize_repne; 739 } 740 goto invalid; 741 742 // REP/REPE prefix (0xf3) seen 743 rep: 744 switch (*inp++) { 745 746 // No-operand insns 747 case 0xa4: case 0xa5: case 0xa6: case 0xa7: // MOVS, CMPS 748 case 0xaa: case 0xab: // STOS 749 case 0xac: case 0xad: case 0xae: case 0xaf: // LODS, SCAS 750 goto notrans; 751 752 // Prefixes 753 case 0x0f: // 2-byte opcode escape 754 goto twobyte_rep; 755 case 0x66: // Operand size prefix 756 goto opsize_rep; 757 case 0xf3: // REP/REPE prefix (redundant) 758 goto invalid; 759 case 0xf2: // REPNE prefix (conflicting) 760 goto invalid; 761 } 762 goto invalid; 763 764 // REPNE prefix (0xf2) seen 765 repne: 766 switch (*inp++) { 767 768 // No-operand insns 769 case 0xa6: case 0xa7: // CMPS 770 case 0xae: case 0xaf: // SCAS 771 goto notrans; 772 773 // Prefixes 774 case 0x0f: // 2-byte opcode escape 775 goto twobyte_repne; 776 case 0x66: // Operand size prefix 777 goto opsize_repne; 778 case 0xf3: // REP/REPE prefix (conflicting) 779 goto invalid; 780 case 0xf2: // REPNE prefix (redundant) 781 goto invalid; 782 } 783 goto invalid; 784 785 786 // Operand size prefix (0x66) and REP/REPE prefix (0xf3) seen 787 opsize_rep: 788 switch (*inp++) { 789 case 0xa5: case 0xa7: // MOVS, CMPS 790 case 0xab: // STOS 791 case 0xad: case 0xaf: // LODS, SCAS 792 goto notrans; 793 } 794 goto invalid; 795 796 // Operand size prefix (0x66) and REPNE prefix (0xf2) seen 797 opsize_repne: 798 switch (*inp++) { 799 case 0xa7: // CMPS 800 case 0xaf: // SCAS 801 goto notrans; 802 } 803 goto invalid; 804 805 806 twobyte: 807 switch (*inp++) { 808 809 // SYSCALL instruction for fast system calls 810 case 0x05: 811 goto gentrap; 812 813 // No additional operand 814 case 0xc8: case 0xc9: case 0xca: case 0xcb: // BSWAP 815 case 0xcc: case 0xcd: case 0xce: case 0xcf: 816 goto notrans; 817 818 // General EA operands 819 case 0x10: case 0x11: // MOVUPS 820 case 0x12: // MOVLPS Vps,Mq/MOVHLPS 821 case 0x14: case 0x15: // UNPCKLPS/UNPCKHPS 822 case 0x16: // MOVHPS Vps,Mq/MOVLHPS 823 case 0x28: case 0x29: // MOVAPS 824 case 0x2e: case 0x2f: // UCOMISS/COMISS 825 case 0x40: case 0x41: case 0x42: case 0x43: // CMOVcc 826 case 0x44: case 0x45: case 0x46: case 0x47: 827 case 0x48: case 0x49: case 0x4a: case 0x4b: 828 case 0x4c: case 0x4d: case 0x4e: case 0x4f: 829 case 0x51: // SQRTPS 830 case 0x54: case 0x55: case 0x56: case 0x57: // ANDPS etc. 831 case 0x58: case 0x59: case 0x5a: case 0x5b: // ADDPS etc. 832 case 0x5c: case 0x5d: case 0x5e: case 0x5f: // SUBPS etc. 833 case 0xa3: // BT Ev,Gv 834 case 0xab: // BTS Ev,Gv 835 case 0xaf: // IMUL Gv,Ev 836 case 0xb0: // CMPXCHG Eb,Gb 837 case 0xb1: // CMPXCHG Ev,Gv 838 case 0xb3: // BTR Ev,Gv 839 case 0xb6: case 0xb7: // MOVZX 840 case 0xbb: // BTC Ev,Gv 841 case 0xbc: case 0xbd: // BSF, BSR 842 case 0xbe: case 0xbf: // MOVSX 843 case 0xc0: // XADD Eb,Gb 844 case 0xc1: // XADD Ev,Gv 845 inp = xscan_rm(inp); 846 goto notrans; 847 848 // General EA operands plus immediate byte 849 case 0xc2: // CMPPS Vps,Wps,Ib 850 case 0xc6: // SHUFPS Vps,Wps,Ib 851 inp = xscan_rm(inp); 852 inp += 1; 853 goto notrans; 854 855 // Memory-only EA operand 856 case 0x13: // MOVLPS Mq,Vps 857 case 0x17: // MOVHPS Mq,Vps 858 case 0x2b: // MOVNTPS 859 case 0xc3: // MOVNTI Md,Gd 860 if (EA_MOD(*inp) == 3) // Mem-only 861 goto invalid; 862 inp = xscan_rm(inp); 863 goto notrans; 864 865 // Register-only EA operand 866 case 0x50: // MOVMSKPS 867 if (EA_MOD(*inp) != 3) // Reg-only 868 goto invalid; 869 inp = xscan_rm(inp); 870 goto notrans; 871 872 // Jcc - conditional branch with disp32 873 case 0x80: case 0x81: case 0x82: case 0x83: 874 case 0x84: case 0x85: case 0x86: case 0x87: 875 case 0x88: case 0x89: case 0x8a: case 0x8b: 876 case 0x8c: case 0x8d: case 0x8e: case 0x8f: 877 inp += 4; 878 itype = VXI_JUMP; 879 dstlen = 6; // Size of worst-case 32-bit branch 880 goto done; 881 882 // SETcc - set byte based on condition 883 case 0x90: case 0x91: case 0x92: case 0x93: 884 case 0x94: case 0x95: case 0x96: case 0x97: 885 case 0x98: case 0x99: case 0x9a: case 0x9b: 886 case 0x9c: case 0x9d: case 0x9e: case 0x9f: 887 if (EA_REG(*inp) != 0) 888 goto invalid; 889 inp = xscan_rm(inp); 890 goto notrans; 891 892 // Shift instructions 893 case 0xa4: // SHLD Ev,Gv,Ib 894 case 0xac: // SHRD Ev,Gv,Ib 895 inp = xscan_rm(inp); 896 inp += 1; 897 // XXX fix cc 898 goto notrans; 899 case 0xa5: // SHLD Ev,Gv,CL 900 case 0xad: // SHRD Ev,Gv,CL 901 inp = xscan_rm(inp); 902 // XXX fix cc 903 goto notrans; 904 905 // Group 8 - Bit test/modify with immediate 906 case 0xba: 907 if (!(EA_REG(*inp) & 4)) 908 goto invalid; 909 inp = xscan_rm(inp); 910 inp += 1; 911 goto invalid; 912 913 // Group 15 - SSE control 914 case 0xae: 915 ea = *inp; 916 inp = xscan_rm(inp); 917 switch (EA_REG(ea)) { 918 case 2: // LDMXCSR 919 case 3: // STMXCSR 920 if (EA_MOD(ea) == 3) // Mem-only 921 goto invalid; 922 goto notrans; 923 // XX LFENCE, SFENCE, MFENCE? 924 } 925 goto invalid; 926 927 // Group 16 - PREFETCH 928 case 0x18: 929 if (EA_MOD(*inp) == 3) // Mem-only 930 goto invalid; 931 // XX Squash to NOP if EA_REG(*inp) > 3? 932 inp = xscan_rm(inp); 933 goto notrans; 934 935 } 936 goto invalid; 937 938 twobyte_opsize: 939 switch (*inp++) { 940 941 // General EA operands 942 case 0x10: case 0x11: // MOVUPD 943 case 0x14: case 0x15: // UNPCKLPD/UNPCKHPD 944 case 0x28: case 0x29: // MOVAPD 945 case 0x2e: case 0x2f: // UCOMISD/COMISD 946 case 0x40: case 0x41: case 0x42: case 0x43: // CMOVcc 947 case 0x44: case 0x45: case 0x46: case 0x47: 948 case 0x48: case 0x49: case 0x4a: case 0x4b: 949 case 0x4c: case 0x4d: case 0x4e: case 0x4f: 950 case 0x51: // SQRTPD 951 case 0x54: case 0x55: case 0x56: case 0x57: // ANDPD etc. 952 case 0x58: case 0x59: case 0x5a: case 0x5b: // ADDPD etc. 953 case 0x5c: case 0x5d: case 0x5e: case 0x5f: // SUBPD etc. 954 case 0x60: case 0x61: case 0x62: case 0x63: // PUNPCK... 955 case 0x64: case 0x65: case 0x66: case 0x67: // PCMPGT... 956 case 0x68: case 0x69: case 0x6a: case 0x6b: // PUNPCK... 957 case 0x6c: case 0x6d: case 0x6e: case 0x6f: // PUNPCK... 958 case 0x74: case 0x75: case 0x76: // PCMPEQ... 959 case 0x7e: case 0x7f: // MOVD/MOVDQA 960 case 0xa3: // BT Ev,Gv 961 case 0xab: // BTS Ev,Gv 962 case 0xb3: // BTR Ev,Gv 963 case 0xbb: // BTC Ev,Gv 964 case 0xbc: case 0xbd: // BSF, BSR 965 case 0xaf: // IMUL Gv,Ev 966 case 0xb6: // MOVZX Gv,Eb 967 case 0xbe: // MOVSX Gv,Eb 968 case 0xd1: case 0xd2: case 0xd3: // PSRLx 969 case 0xd4: case 0xd5: case 0xd6: // PADDQ... 970 case 0xd8: case 0xd9: case 0xda: case 0xdb: // PSUBUSB... 971 case 0xdc: case 0xdd: case 0xde: case 0xdf: // PADDUSB... 972 case 0xe0: case 0xe1: case 0xe2: case 0xe3: // PAVGB... 973 case 0xe4: case 0xe5: case 0xe6: // PMULHUW... 974 case 0xe8: case 0xe9: case 0xea: case 0xeb: // PSUBSB... 975 case 0xec: case 0xed: case 0xee: case 0xef: // PADDSB... 976 case 0xf1: case 0xf2: case 0xf3: // PSLLx 977 case 0xf4: case 0xf5: case 0xf6: // PMULUDQ... 978 case 0xf8: case 0xf9: case 0xfa: case 0xfb: // PSUBB... 979 case 0xfc: case 0xfd: case 0xfe: // PADDB... 980 inp = xscan_rm(inp); 981 goto notrans; 982 983 // General EA operands plus immediate byte 984 case 0xc5: // PEXTRW Gd,VRdq,Ib 985 if (EA_MOD(*inp) != 3) 986 goto invalid; // Reg-only 987 case 0x70: // PSHUFD Vdq,Wdq,Ib 988 case 0xc2: // CMPPD Vps,Wps,Ib 989 case 0xc4: // PINSRW Vdq,Ew,Ib 990 case 0xc6: // SHUFPD Vps,Wps,Ib 991 inp = xscan_rm(inp); 992 inp += 1; 993 goto notrans; 994 995 // Memory-only EA operand 996 case 0x12: case 0x13: // MOVLPD 997 case 0x16: case 0x17: // MOVHPD 998 case 0x2b: // MOVNTPD 999 case 0xe7: // MOVNTDQ Mdq,Vdq 1000 if (EA_MOD(*inp) == 3) // Mem-only 1001 goto invalid; 1002 inp = xscan_rm(inp); 1003 goto notrans; 1004 1005 // Register-only EA operand 1006 case 0x50: // MOVMSKPD 1007 case 0xd7: // PMOVMSKB Gd,VRdq 1008 case 0xf7: // MASKMOVQ Vdq,Wdq 1009 if (EA_MOD(*inp) != 3) // Reg-only 1010 goto invalid; 1011 inp = xscan_rm(inp); 1012 goto notrans; 1013 1014 // Shift instructions 1015 case 0xa4: // SHLD Ev,Gv,Ib 1016 case 0xac: // SHRD Ev,Gv,Ib 1017 inp = xscan_rm(inp); 1018 inp += 1; 1019 // XXX fix cc 1020 goto notrans; 1021 case 0xa5: // SHLD Ev,Gv,CL 1022 case 0xad: // SHRD Ev,Gv,CL 1023 inp = xscan_rm(inp); 1024 // XXX fix cc 1025 goto notrans; 1026 1027 // Group 8 - Bit test/modify with immediate 1028 case 0xba: 1029 if (!(EA_REG(*inp) & 4)) 1030 goto invalid; 1031 inp = xscan_rm(inp); 1032 inp += 1; 1033 goto invalid; 1034 1035 // Group 12, 13, 14 - SSE vector shift w/ immediate 1036 case 0x71: case 0x72: case 0x73: 1037 ea = *inp; 1038 inp = xscan_rm(inp); 1039 switch (EA_REG(ea)) { 1040 case 2: case 4: case 6: 1041 inp += 1; 1042 goto notrans; 1043 } 1044 goto invalid; 1045 } 1046 goto invalid; 1047 1048 twobyte_rep: 1049 switch (*inp++) { 1050 1051 // General EA operands 1052 case 0x10: case 0x11: // MOVSS 1053 case 0x2a: case 0x2c: case 0x2d: // CVT... 1054 case 0x51: // SQRTSS 1055 case 0x58: case 0x59: case 0x5a: case 0x5b: // ADDSS etc. 1056 case 0x5c: case 0x5d: case 0x5e: case 0x5f: // SUBSS etc. 1057 case 0x6f: // MOVDQU 1058 case 0x7e: case 0x7f: // MOVQ/MOVDQU 1059 case 0xe6: // CVTDQ2PD 1060 inp = xscan_rm(inp); 1061 goto notrans; 1062 1063 // General EA operands plus immediate byte 1064 case 0x70: // PSHUFHW Vq,Wq,Ib 1065 case 0xc2: // CMPSS Vss,Wss,Ib 1066 inp = xscan_rm(inp); 1067 inp += 1; 1068 goto notrans; 1069 } 1070 goto invalid; 1071 1072 twobyte_repne: 1073 switch (*inp++) { 1074 1075 // General EA operands 1076 case 0x10: case 0x11: // MOVSD 1077 case 0x2a: case 0x2c: case 0x2d: // CVT... 1078 case 0x51: // SQRTSD 1079 case 0x58: case 0x59: case 0x5a: // ADDSD etc. 1080 case 0x5c: case 0x5d: case 0x5e: case 0x5f: // SUBSD etc. 1081 case 0xe6: // CVTPD2DQ 1082 inp = xscan_rm(inp); 1083 goto notrans; 1084 1085 // General EA operands plus immediate byte 1086 case 0x70: // PSHUFLW Vq,Wq,Ib 1087 case 0xc2: // CMPSD Vss,Wss,Ib 1088 inp = xscan_rm(inp); 1089 inp += 1; 1090 goto notrans; 1091 } 1092 goto invalid; 1093 1094 1095 invalid: 1096 vxrun_cleanup(emu); 1097 vxprint("invalid opcode %02x %02x %02x at eip %08x\n", 1098 emu->ininst[0], emu->ininst[1], emu->ininst[2], 1099 emu->cpu.eip + (emu->ininst - instart)); 1100 vxrun_setup(emu); 1101 gentrap: 1102 fin = 1; 1103 itype = VXI_TRAP; 1104 dstlen = 6+5+11+5; // movl %eax,VSEG:VXEMU_EAX 1105 // movl $fin,%eax 1106 // movl $eip,VSEG:VXEMU_EIP 1107 // jmp vxrun_gentrap 1108 goto done; 1109 1110 1111 notrans: 1112 // No translation of this instruction is required - 1113 // dstlen is the same as srclen. 1114 dstlen = inp - emu->ininst; 1115 1116 done: 1117 // Make sure this whole instruction was actually executable 1118 if (inp > inmax) { 1119 // If the whole first instruction isn't executable, 1120 // then just generate the trap immediately, 1121 // since we know it'll be required. 1122 if (ino == 0) 1123 goto noexec; 1124 1125 // Otherwise, just roll back 1126 // and stop translating before this instruction, 1127 // and let the exception (if any) 1128 // happen next time into the translator. 1129 goto endfrag; 1130 } 1131 1132 // Make sure there's actually room for the resulting code 1133 if (dstofs + dstlen > VXDSTOFS_MAX) { 1134 1135 // Roll back and end the frag before this instruction 1136 endfrag: 1137 fin = 1; 1138 itype = VXI_ENDFRAG; 1139 inp = emu->ininst; // no source consumed 1140 dstlen = 5; // jmp to next frag 1141 } 1142 1143 // Record the instruction record 1144 f->insn[ino].itype = itype; 1145 f->insn[ino].srcofs = emu->ininst - instart; 1146 f->insn[ino].dstofs = dstofs; 1147 f->insn[ino].dstlen = dstlen; 1148 1149 // Move on to next instruction 1150 ino++; 1151 emu->ininst = inp; 1152 dstofs += dstlen; 1153 1154 } while (!fin); 1155 1156 // Record the total number of instructions for this frag 1157 f->ninsn = ino; 1158 1159 // vxprint("%d ins - to %x\n", ino, emu->ininst - instart + eip); 1160 // Clear the special instruction-scanning exception state flag 1161 emu->guestfragend = emu->ininst; 1162 emu->ininst = NULL; 1163 1164 return 0; 1165 } 1166 1167 // Try to optimize jump instructions whose target 1168 // is in the same fragment we're building. 1169 static inline void xsimp_jump(struct vxproc *p, unsigned ino) 1170 { 1171 struct vxemu *emu = p->emu; 1172 struct vxfrag *f = emu->txfrag; 1173 unsigned ninsn = f->ninsn; 1174 unsigned srcofs = f->insn[ino].srcofs; 1175 uint8_t *inp = (uint8_t*)emu->mem->base + emu->cpu.eip + srcofs; 1176 1177 // Skip any branch prediction hint prefix 1178 uint8_t opcode = *inp++; 1179 int dstlen = 2; 1180 uint32_t targofs = srcofs; 1181 if (opcode == 0x2e || opcode == 0x3e) { 1182 opcode = *inp++; 1183 dstlen = 3; 1184 targofs++; 1185 } 1186 1187 // Determine the jump target. 1188 if (opcode == 0xe9) { 1189 // 32-bit JMP 1190 targofs += 5 + *(int32_t*)inp; 1191 } else if (opcode == 0x0f) { 1192 // 32-bit Jcc 1193 targofs += 6 + *(int32_t*)inp; 1194 } else { 1195 // 8-bit JMP or Jcc or LOOP 1196 targofs += 2 + (int32_t)(int8_t)*inp; 1197 } 1198 if (targofs > f->insn[ninsn-1].srcofs) 1199 return; // Target is not in this fragment 1200 1201 // Find the target in the insn table 1202 unsigned lo = 0; 1203 unsigned hi = ninsn-1; 1204 while (hi > lo) { 1205 unsigned mid = (lo + hi + 1) / 2; 1206 unsigned midofs = f->insn[mid].srcofs; 1207 if (targofs >= midofs) 1208 lo = mid; 1209 else 1210 hi = mid - 1; 1211 } 1212 if (targofs != f->insn[lo].srcofs) 1213 return; // Jump target is _between_ instructions! 1214 1215 // Make sure target is still in range after translation 1216 if (lo > ino) { 1217 if ((int)f->insn[lo].dstofs > 1218 (int)f->insn[ino+1].dstofs+127) 1219 return; // too far ahead 1220 } else { 1221 if ((int)f->insn[lo].dstofs < 1222 (int)f->insn[ino].dstofs+3-128) 1223 return; // too far behind 1224 } 1225 1226 // In range - convert it to an 8-bit jump! 1227 f->insn[ino].itype = VXI_JUMP8; 1228 f->insn[ino].dstlen = dstlen; 1229 } 1230 1231 // Translation pass 2: 1232 // Reverse scan through the instruction table trying to simplify instructions. 1233 static void xsimp(struct vxproc *p) 1234 { 1235 int i; 1236 struct vxemu *emu = p->emu; 1237 struct vxfrag *f = emu->txfrag; 1238 unsigned ninsn = f->ninsn; 1239 1240 for (i = ninsn-1; i >= 0; i--) { 1241 unsigned itype = f->insn[i].itype; 1242 1243 switch (itype) { 1244 case VXI_LOOP: 1245 case VXI_LOOPZ: 1246 case VXI_LOOPNZ: 1247 case VXI_JUMP: 1248 xsimp_jump(p, i); 1249 break; 1250 default: 1251 break; // no simplifications 1252 } 1253 1254 } 1255 } 1256 1257 // Translation pass 3: 1258 // Compute final instruction offsets. 1259 static void xplace(struct vxproc *p) 1260 { 1261 int i; 1262 struct vxemu *emu = p->emu; 1263 struct vxfrag *f = emu->txfrag; 1264 unsigned ninsn = f->ninsn; 1265 1266 size_t outofs = PROLOG_LEN; 1267 for (i = 0; i < ninsn; i++) { 1268 f->insn[i].dstofs = outofs; 1269 outofs += f->insn[i].dstlen; 1270 } 1271 } 1272 1273 // Emit a direct 32-bit jump/branch/call/endfrag instruction. 1274 // The original jump might have been either short or long. 1275 // NB. vxemu_sighandler (sig.c) knows that jumps don't trash registers. 1276 // NB. vxemu_sighandler knows that calls push the return address 1277 // onto the stack as the first instruction, and that the target address 1278 // can be found at offset 26 of the translation. 1279 static inline void xemit_jump( 1280 struct vxproc *p, uint8_t itype, unsigned ino, 1281 uint8_t **extrap) 1282 { 1283 extern void vxrun_lookup_backpatch(); 1284 1285 struct vxemu *emu = p->emu; 1286 struct vxfrag *f = emu->txfrag; 1287 1288 // Determine the jump target EIP 1289 // and emit the appropriate call/jump/branch instruction, 1290 // with its target pointing to a temporary jump trampoline. 1291 uint8_t *tramp = *extrap; 1292 unsigned srcofs = f->insn[ino].srcofs; 1293 uint8_t *inp = (uint8_t*)emu->mem->base + emu->cpu.eip + srcofs; 1294 uint8_t *outp = FRAGCODE(f) + f->insn[ino].dstofs; 1295 uint32_t targeip = emu->cpu.eip + srcofs; 1296 if (itype == VXI_JUMP) { 1297 1298 uint8_t opcode = *inp; 1299 1300 // Copy any branch taken/not taken hint prefix 1301 if (opcode == 0x2e || opcode == 0x3e) { 1302 *outp++ = opcode; 1303 opcode = *++inp; 1304 targeip++; 1305 } 1306 1307 // Emit the branch/jump/call instruction 1308 switch (opcode) { 1309 1310 case 0xe9: // was a 32-bit JMP 1311 targeip += 5 + *(int32_t*)&inp[1]; 1312 goto emitjmp; 1313 1314 case 0xeb: // was an 8-bit JMP 1315 targeip += 2 + (int32_t)(int8_t)inp[1]; 1316 emitjmp: 1317 outp[0] = 0xe9; // always emit 32-bit JMP 1318 *(int32_t*)&outp[1] = (int32_t)(tramp - (outp+5)); 1319 outp += 5; 1320 break; 1321 1322 case 0x0f: // was a 32-bit Jcc 1323 opcode = inp[1]; 1324 targeip += 6 + *(int32_t*)&inp[2]; 1325 goto emitjcc; 1326 1327 default: // was an 8-bit Jcc 1328 opcode = inp[0] + 0x10; 1329 targeip += 2 + (int32_t)(int8_t)inp[1]; 1330 emitjcc: 1331 outp[0] = 0x0f; // always emit 32-bit Jcc 1332 outp[1] = opcode; 1333 *(int32_t*)&outp[2] = (int32_t)(tramp - (outp+6)); 1334 outp += 6; 1335 break; 1336 } 1337 } else if (itype == VXI_CALL) { 1338 assert(*inp == 0xe8); // 32-bit CALL 1339 1340 outp[0] = 0x68; // pushl $<return_eip> 1341 *(uint32_t*)&outp[1] = targeip + 5; 1342 outp += 5; 1343 targeip += 5 + *(int32_t*)&inp[1]; 1344 goto emitjmp; 1345 } else if (itype == VXI_LOOP || itype == VXI_LOOPZ || itype == VXI_LOOPNZ) { 1346 *outp++ = 0x8d; // leal -1(ecx) -> ecx 1347 *outp++ = 0x49; 1348 *outp++ = 0xff; 1349 if (itype == VXI_LOOPZ) { 1350 *outp++ = 0x75; // jnz .+7 1351 *outp++ = 0x07; 1352 } else if (itype == VXI_LOOPNZ) { 1353 *outp++ = 0x74; // jz .+7 1354 *outp++ = 0x07; 1355 } 1356 *outp++ = 0xe3; // jecxz .+5 1357 *outp++ = 0x05; 1358 targeip += 2 + (int32_t)(int8_t)inp[1]; 1359 goto emitjmp; 1360 } else { 1361 // End-of-fragment pseudo-instruction. 1362 // targeip already points to the eip we wish to "jump" to. 1363 assert(itype == VXI_ENDFRAG); 1364 goto emitjmp; 1365 } 1366 1367 // Emit the trampoline code 1368 tramp[0] = VSEGPREFIX; // movl $patchrec,VSEG:VXEMU_JMPINFO 1369 tramp[1] = 0xc7; 1370 tramp[2] = 0x05; 1371 *(uint32_t*)&tramp[3] = offsetof(vxemu,jmpinfo); 1372 *(uint32_t*)&tramp[7] = (uint32_t)((intptr_t)tramp+11+5 - 1373 (intptr_t)emu); 1374 1375 tramp[11+0] = 0xe9; // jmp vxrun_lookup_backpatch 1376 *(uint32_t*)&tramp[11+1] = (uint32_t)((intptr_t)vxrun_lookup_backpatch 1377 - (intptr_t)&tramp[11+5]); 1378 1379 *(uint32_t*)&tramp[11+5] = targeip; // .long targeip 1380 *(uint32_t*)&tramp[11+5+4] = (uint32_t)(intptr_t)outp; // .long jmpend 1381 *extrap = &tramp[11+5+4+4]; 1382 } 1383 1384 // Emit a short (8-bit) jump/branch instruction. 1385 // The original branch might have been either short or long. 1386 // NB. vxemu_sighandler (sig.c) knows that jump8s don't 1387 // trash registers. 1388 static inline void xemit_jump8(struct vxproc *p, unsigned ino) 1389 { 1390 struct vxemu *emu = p->emu; 1391 struct vxfrag *f = emu->txfrag; 1392 unsigned srcofs = f->insn[ino].srcofs; 1393 uint8_t *inp = (uint8_t*)emu->mem->base + emu->cpu.eip + srcofs; 1394 uint8_t *outp = FRAGCODE(f) + f->insn[ino].dstofs; 1395 1396 // Copy any branch taken/not taken hint prefix 1397 uint8_t opcode = *inp; 1398 int outlen = 2; 1399 uint32_t targofs = srcofs; 1400 if (opcode == 0x2e || opcode == 0x3e) { 1401 *outp++ = opcode; 1402 opcode = *++inp; 1403 outlen = 3; 1404 targofs++; 1405 } 1406 1407 // Determine the jump target and output opcode. 1408 switch (opcode) { 1409 case 0xe9: // 32-bit JMP 1410 opcode = 0xeb; 1411 targofs += 5 + *(int32_t*)&inp[1]; 1412 break; 1413 case 0x0f: // 32-bit Jcc 1414 opcode = inp[1] - 0x10; 1415 targofs += 6 + *(int32_t*)&inp[2]; 1416 break; 1417 case 0xeb: // 8-bit JMP 1418 case 0xe0: // 8-bit LOOP 1419 case 0xe1: 1420 case 0xe2: 1421 default: // 8-bit Jcc 1422 targofs += 2 + (int32_t)(int8_t)inp[1]; 1423 break; 1424 } 1425 assert(targofs <= f->insn[f->ninsn-1].srcofs); 1426 1427 // Find the target in the insn table 1428 unsigned lo = 0; 1429 unsigned hi = f->ninsn-1; 1430 while (hi > lo) { 1431 unsigned mid = (lo + hi + 1) / 2; 1432 unsigned midofs = f->insn[mid].srcofs; 1433 if (targofs >= midofs) 1434 lo = mid; 1435 else 1436 hi = mid - 1; 1437 } 1438 assert(targofs == f->insn[lo].srcofs); 1439 1440 // Emit the 2-byte jump instruction (3 bytes with prediction hint) 1441 outp[0] = opcode; 1442 outp[1] = (int)f->insn[lo].dstofs - ((int)f->insn[ino].dstofs+outlen); 1443 } 1444 1445 // Emit an indirect jump/call/ret instruction. 1446 // NB. vxemu_sighandler (sig.c) knows that ebx is saved as 1447 // the first instruction and then trashed. 1448 // NB. vxemu_sighandler knows that the immediate count 1449 // in a return immediate instruction is at offset 10. 1450 // NB. vxemu_sighandler knows that in an indirect call: 1451 // * the stack is unchanged until offset -5 (from the end) 1452 // * at offset -5, the return address has been pushed 1453 // and the target eip is in ebx. 1454 static inline void xemit_indir(struct vxproc *p, int itype, unsigned ino) 1455 { 1456 unsigned i; 1457 extern void vxrun_lookup_indirect(); 1458 1459 struct vxemu *emu = p->emu; 1460 struct vxfrag *f = emu->txfrag; 1461 unsigned srcofs = f->insn[ino].srcofs; 1462 uint8_t *inp = (uint8_t*)emu->mem->base + emu->cpu.eip + srcofs; 1463 uint8_t *outp = FRAGCODE(f) + f->insn[ino].dstofs; 1464 uint8_t *outp0 = outp; 1465 1466 // Common: movl %ebx,VSEG:VXEMU_EBX 1467 outp[0] = VSEGPREFIX; // Appropriate segment override 1468 outp[1] = 0x89; 1469 outp[2] = 0x1d; 1470 *(uint32_t*)&outp[3] = offsetof(vxemu, cpu.reg[EBX]); 1471 outp += 7; 1472 1473 // Instruction-specific code 1474 switch (itype) { 1475 default: 1476 assert(0); 1477 1478 case VXI_CALLIND: 1479 assert(inp[0] == 0xff); 1480 assert(EA_REG(inp[1]) == 2); 1481 goto Common; 1482 1483 case VXI_JUMPIND: 1484 assert(inp[0] == 0xff); 1485 assert(EA_REG(inp[1]) == 4); 1486 Common:; 1487 unsigned srclen = xscan_rm(inp+1) - inp; 1488 outp[0] = 0x8b; // movl <indirect_ea>,%ebx 1489 outp[1] = (inp[1] & 0xc7) | (EBX << 3); 1490 for (i = 2; i < srclen; i++) 1491 outp[i] = inp[i]; 1492 outp += srclen; 1493 1494 if(itype == VXI_CALLIND) { 1495 outp[0] = 0x68; // pushl $<return_eip> 1496 *(uint32_t*)&outp[1] = emu->cpu.eip + srcofs + srclen; 1497 outp += 5; 1498 } 1499 break; 1500 1501 case VXI_RETURN: 1502 assert(inp[0] == 0xc3); 1503 *outp++ = 0x5b; // popl %ebx 1504 break; 1505 1506 case VXI_RETURN_IMM: 1507 assert(inp[0] == 0xc2); 1508 outp[0] = 0x5b; // popl %ebx 1509 outp[1] = 0x81; // add $<spc>,%esp 1510 outp[2] = 0xc4; 1511 *(uint32_t*)&outp[3] = *(uint16_t*)&inp[1]; 1512 outp += 1+6; 1513 break; 1514 } 1515 1516 // Common: jmp vxrun_lookup_indirect 1517 outp[0] = 0xe9; 1518 *(uint32_t*)&outp[1] = (uint32_t)(intptr_t)vxrun_lookup_indirect - 1519 (uint32_t)(intptr_t)&outp[5]; 1520 outp += 5; 1521 assert(outp - outp0 == f->insn[ino].dstlen); 1522 } 1523 1524 // NB. vxemu_sighandler (sig.c) knows that eax is saved as 1525 // the first instruction and then trashed. 1526 static void xemit_trap(struct vxproc *p, int ino) 1527 { 1528 extern void vxrun_gentrap(); 1529 1530 struct vxemu *emu = p->emu; 1531 struct vxfrag *f = emu->txfrag; 1532 1533 // Trapping instruction. Determine the trap type. 1534 uint32_t trapno; 1535 uint32_t trapeip = emu->cpu.eip + f->insn[ino].srcofs; 1536 uint8_t *inp = (uint8_t*)emu->mem->base + trapeip; 1537 switch (inp[0]) { 1538 case 0xcc: // Breakpoint 1539 trapno = VXTRAP_BREAKPOINT; 1540 trapeip++; // EIP points after insn 1541 break; 1542 case 0xcd: // INT $n 1543 trapno = VXTRAP_SOFT + inp[1]; 1544 trapeip += 2; // EIP points after insn 1545 break; 1546 case 0x0f: 1547 if (inp[1] == 0x05) { // SYSCALL instruction 1548 trapno = VXTRAP_SYSCALL; 1549 trapeip += 2; // EIP points after insn 1550 break; 1551 } 1552 // fall thru... 1553 default: // Invalid instruction 1554 trapno = VXTRAP_INVALID; 1555 break; 1556 } 1557 1558 // Emit the output code sequence. 1559 uint8_t *outp = FRAGCODE(f) + f->insn[ino].dstofs; 1560 1561 // movl %eax,VSEG:VXEMU_EAX 1562 outp[0] = VSEGPREFIX; 1563 outp[1] = 0xa3; 1564 *(uint32_t*)&outp[2] = offsetof(vxemu, cpu.reg[EAX]); 1565 1566 // movl $trapno,%eax 1567 outp[6+0] = 0xb8; 1568 *(uint32_t*)&outp[6+1] = trapno; 1569 1570 // movl $trapeip,VSEG:VXEMU_EIP 1571 outp[6+5+0] = VSEGPREFIX; 1572 outp[6+5+1] = 0xc7; 1573 outp[6+5+2] = 0x05; 1574 *(uint32_t*)&outp[6+5+3] = offsetof(vxemu, cpu.eip); 1575 *(uint32_t*)&outp[6+5+7] = trapeip; 1576 1577 // jmp vxrun_gentrap 1578 outp[6+5+11+0] = 0xe9; 1579 *(uint32_t*)&outp[6+5+11+1] = (uint32_t)(intptr_t)vxrun_gentrap - 1580 (uint32_t)(intptr_t)&outp[6+5+11+5]; 1581 1582 assert(f->insn[ino].dstlen == 6+5+11+5); 1583 } 1584 1585 // Translation pass 4: 1586 // Emit the translated instruction stream. 1587 static void xemit(struct vxproc *p) 1588 { 1589 unsigned i, j; 1590 struct vxemu *emu = p->emu; 1591 struct vxfrag *f = emu->txfrag; 1592 unsigned ninsn = f->ninsn; 1593 1594 // Writing the instruction stream immediately after the insn table. 1595 uint8_t *outstart = FRAGCODE(f); 1596 1597 // Write extra trampoline code after the already-arranged code. 1598 uint8_t *extra = outstart + (unsigned)f->insn[ninsn-1].dstofs 1599 + (unsigned)f->insn[ninsn-1].dstlen; 1600 1601 // First emit the prolog 1602 outstart[0] = VSEGPREFIX; // Segment override 1603 outstart[1] = 0x8b; outstart[2] = 0x1d; // movl <abs32>,%ebx 1604 *(uint32_t*)&outstart[3] = offsetof(vxemu, cpu.reg[EBX]); 1605 1606 // Now emit the instructions 1607 asm volatile("cld"); 1608 uint8_t *instart = (uint8_t*)emu->mem->base + emu->cpu.eip; 1609 for (i = 0; i < ninsn; ) { 1610 unsigned itype = f->insn[i].itype; 1611 1612 switch (itype) { 1613 1614 case VXI_NOTRANS: 1615 // Just copy strings of untranslated instructions. 1616 for (j = i+1; j < ninsn; j++) 1617 if (f->insn[j].itype != VXI_NOTRANS) 1618 break; 1619 1620 unsigned srcofs = f->insn[i].srcofs; 1621 unsigned dstofs = f->insn[i].dstofs; 1622 uint8_t *inp = instart + f->insn[i].srcofs; 1623 uint8_t *outp = outstart + f->insn[i].dstofs; 1624 unsigned cnt = f->insn[j].dstofs - dstofs; 1625 assert(cnt == f->insn[j].srcofs - srcofs); 1626 asm volatile("rep movsb" 1627 : : "c" (cnt), "S" (inp), "D" (outp)); 1628 1629 i = j; 1630 break; 1631 1632 case VXI_CALL: 1633 case VXI_JUMP: 1634 case VXI_ENDFRAG: 1635 case VXI_LOOP: 1636 case VXI_LOOPZ: 1637 case VXI_LOOPNZ: 1638 xemit_jump(p, itype, i++, &extra); 1639 break; 1640 1641 case VXI_JUMP8: 1642 xemit_jump8(p, i++); 1643 break; 1644 1645 case VXI_RETURN: 1646 case VXI_JUMPIND: 1647 case VXI_CALLIND: 1648 xemit_indir(p, itype, i++); 1649 break; 1650 1651 case VXI_TRAP: 1652 xemit_trap(p, i++); 1653 break; 1654 1655 default: 1656 assert(0); 1657 } 1658 } 1659 1660 // Record the final amount of code table space we've consumed. 1661 emu->codefree = extra; 1662 1663 // Add an entry to the code pointer table to the new fragment 1664 uint32_t *codetab = emu->codetab; 1665 *--codetab = (uint32_t)(intptr_t)f; 1666 emu->codetab = codetab; 1667 1668 assert((void*)extra < (void*)codetab); 1669 1670 // Insert the new entrypoint into the hash table 1671 uint32_t idx = etabhash(emu->cpu.eip) & emu->etabmask; 1672 while (emu->etab[idx].srceip != NULLSRCEIP) { 1673 assert(emu->etab[idx].srceip != emu->cpu.eip); 1674 idx = (idx+1) & emu->etabmask; 1675 } 1676 emu->etab[idx].srceip = emu->cpu.eip; 1677 emu->etab[idx].dsteip = (uint32_t)(intptr_t)outstart; 1678 emu->etabcnt++; 1679 1680 if (vx32_debugxlate) { 1681 vxrun_cleanup(emu); 1682 vxprint("====== xlate\n"); 1683 vxprint("-- guest\n"); 1684 disassemble(emu->mem->base, emu->guestfrag, emu->guestfragend); 1685 vxprint("-- translation\n"); 1686 disassemble(NULL, outstart, extra); 1687 vxprint("======\n"); 1688 vxrun_setup(emu); 1689 } 1690 } 1691 1692 static int xlate(struct vxproc *vxp) 1693 { 1694 // Pass 1: scan instruction stream, build preliminary vxinsn table 1695 int rc = xscan(vxp); 1696 if (rc != 0) 1697 return rc; 1698 1699 // Pass 2: simplify vxinsns wherever possible 1700 xsimp(vxp); 1701 1702 // Pass 3: compute final instruction placement and sizes 1703 xplace(vxp); 1704 1705 // Pass 4: emit translated instructions 1706 xemit(vxp); 1707 1708 return 0; 1709 } 1710 1711 #if 0 1712 #include <asm/prctl.h> 1713 #include <sys/prctl.h> 1714 #endif 1715 1716 void dumpsegs(const char *prefix) 1717 { 1718 uint16_t ds, es, fs, gs, ss; 1719 asm( "movw %%ds,%0; movw %%es,%1; " 1720 "movw %%fs,%2; movw %%gs,%3; " 1721 "movw %%ss,%4" 1722 : "=rm"(ds), "=rm" (es), "=rm" (fs), "=rm" (gs), "=rm" (ss)); 1723 vxprint("%s: ds=%04x es=%04x fs=%04x gs=%04x ss=%04x\n", 1724 prefix, ds, es, fs, gs, ss); 1725 #if 0 1726 unsigned long fsofs, gsofs; 1727 arch_prctl(ARCH_GET_FS, (unsigned long)&fsofs); 1728 arch_prctl(ARCH_GET_GS, (unsigned long)&gsofs); 1729 vxprint("fsofs=%016lx gsofs=%016lx\n", fsofs, gsofs); 1730 #endif 1731 } 1732 1733 int vxproc_run(struct vxproc *vxp) 1734 { 1735 vxemu *emu = vxp->emu; 1736 vxmmap *mm; 1737 1738 // Make sure the process is mapped into our host memory 1739 if ((mm = vxmem_map(vxp->mem, 0)) == NULL) 1740 return -1; 1741 if (vxemu_map(emu, mm) < 0) { 1742 vxmem_unmap(vxp->mem, mm); 1743 return -1; 1744 } 1745 emu->mem = mm; 1746 1747 // Pending trap? 1748 if(emu->cpu_trap){ 1749 assert(0); // Can this even happen? 1750 int trap = emu->cpu_trap; 1751 emu->cpu_trap = 0; 1752 return trap; 1753 } 1754 1755 uint16_t vs; 1756 // Registers can't be already loaded or we will smash 1757 // the "host segment registers" part of emu. 1758 asm("movw %"VSEGSTR",%0" 1759 : "=r" (vs)); 1760 1761 assert(vs != emu->emusel); 1762 1763 // Save our stack environment for exception-handling. 1764 // This only saves the integer registers. If the signal handler 1765 // happens in the middle of a translation involving floating-point 1766 // code, we need to make sure that when we jump back here in the 1767 // handler, we first restore the floating point registers to 1768 // the state they were in during the computation. (Operating 1769 // systems typically save the FPU state, reset the FPU, and 1770 // pass the saved state to the signal handler.) 1771 // The Linux signal handler does exactly this. 1772 // 1773 // On FreeBSD, after hours wasted trying to manually restore the 1774 // floating point state, I gave up. Instead, the FreeBSD code 1775 // saves an mcontext_t here and then overwrites the signal handler's 1776 // mcontext_t with this one. Then when it returns from the handler, 1777 // the OS will restore the floating point state and then the mcontext, 1778 // jumping back here with exactly the FPU state that we want. 1779 // Why not do this on Linux? Because it didn't work when I tried it, 1780 // and I was not about to track down why. 1781 // 1782 // On OS X, there is no getcontext, so you'd think we'd be back to 1783 // the Linux approach of manual FPU restore + siglongjmp. 1784 // Unfortunately, OS X can't deal with siglongjmp from alternate 1785 // signal stacks. If it invokes a signal handler on an alternate 1786 // signal stack and that handler uses siglongjmp to go back to the 1787 // original stack instead of returning out of the handler, then 1788 // OS X thinks the code is still running on the alternate stack, 1789 // which causes all sorts of problems. Thus we have to do the 1790 // getcontext trick. Besides, it is far easier to write a getcontext 1791 // routine--we already need to know the layout of mcontext_t to 1792 // write the signal handler--than to figure out what the FPU state 1793 // looks like. 1794 // 1795 // And you thought this was going to be easy. 1796 1797 #if defined(__FreeBSD__) 1798 ucontext_t env; 1799 emu->trapenv = &env.uc_mcontext; 1800 volatile int n = 0; 1801 getcontext(&env); 1802 if(++n > 1){ 1803 #elif defined(__APPLE__) 1804 struct i386_thread_state env; 1805 emu->trapenv = &env; 1806 if(vx32_getcontext(&env)){ 1807 #else 1808 mcontext_t env; 1809 emu->trapenv = &env; 1810 if(vx32_getcontext(&env)){ 1811 #endif 1812 if(vx32_debugxlate) vxprint("VX trap %x err %x va %08x " 1813 "veip %08x veflags %08x\n", 1814 emu->cpu_trap, emu->cpu.traperr, emu->cpu.trapva, 1815 emu->cpu.eip, emu->cpu.eflags); 1816 goto trapped; 1817 } 1818 1819 // Load our special vxproc segment selector into fs register. 1820 vxrun_setup(emu); 1821 1822 while (1) { 1823 // Look up the translated entrypoint for the current vx32 EIP. 1824 uint32_t eip = emu->cpu.eip; 1825 uint32_t idx = etabhash(eip) & emu->etabmask; 1826 while (emu->etab[idx].srceip != eip) { 1827 if (emu->etab[idx].srceip == NULLSRCEIP) 1828 goto notfound; 1829 idx = (idx+1) & emu->etabmask; 1830 } 1831 1832 // Run the translated code fragment. 1833 // Return if the code terminated with an exception. 1834 // Otherwise it terminated because of an untranslated EIP, 1835 // so translate it. 1836 if(vxrun(emu, emu->etab[idx].dsteip) != 0) 1837 break; 1838 1839 notfound: 1840 // Translate the code fragment the current emu->cpu.eip points to 1841 if(xlate(vxp) != 0) 1842 break; 1843 } 1844 1845 // Restore the usual flat model data segment registers. 1846 vxrun_cleanup(emu); 1847 1848 trapped: 1849 // De-register our setjmp environment for trap handling. 1850 emu->trapenv = NULL; 1851 1852 emu->mem = NULL; 1853 int trap = emu->cpu_trap; 1854 emu->cpu_trap = 0; 1855 return trap; 1856 } 1857 1858 void vxemu_stats(struct vxproc *p) 1859 { 1860 unsigned i; 1861 vxemu *emu = p->emu; 1862 1863 vxprint("flush count: %llu\n", nflush); 1864 1865 // vxprint("vxproc size %dKB\n", p->size/1024); 1866 1867 unsigned coll = 0; 1868 for (i = 0; i < emu->etablen; i++) { 1869 vxentry *e = &emu->etab[i]; 1870 if (e->srceip == NULLSRCEIP) 1871 continue; 1872 unsigned idx = etabhash(e->srceip) & emu->etabmask; 1873 if (idx != i) { 1874 // vxprint("srcip %08x hash %d actually at %d\n", 1875 // e->srceip, idx, i); 1876 coll++; 1877 } 1878 } 1879 vxprint("entry tab: %d used, %d total, %d collisions\n", 1880 emu->etabcnt, emu->etablen, coll); 1881 } 1882 1883 static void disassemble(uint8_t *addr0, uint8_t *p, uint8_t *ep) 1884 { 1885 xdinst i; 1886 int j; 1887 uint8_t *q; 1888 char buf[128]; 1889 1890 for (; p < ep; p = q) { 1891 if ((q = x86decode(addr0, p, &i)) == NULL) 1892 break; 1893 x86print(buf, sizeof buf, &i); 1894 vxprint("%08x", i.addr); 1895 for(j=0; j<i.len; j++) 1896 vxprint(" %02x", p[j]); 1897 for(; j<10; j++) 1898 vxprint(" "); 1899 vxprint(" %s\n", buf); 1900 } 1901 } 1902 1903 void vxprint(char *fmt, ...) 1904 { 1905 va_list arg; 1906 char buf[512]; 1907 1908 va_start(arg, fmt); 1909 vsnprintf(buf, sizeof buf, fmt, arg); 1910 va_end(arg); 1911 USED(write(2, buf, strlen(buf))); 1912 } 1913