linux.c (10562B)
1 // Code specific to x86 hosts running Linux. 2 3 #define _GNU_SOURCE 4 #include <stdio.h> 5 #include <string.h> 6 #include <signal.h> 7 #include <assert.h> 8 #include <ucontext.h> 9 #include <sys/ucontext.h> 10 #include <asm/ldt.h> 11 #include <errno.h> 12 13 #include "vx32.h" 14 #include "vx32impl.h" 15 #include "os.h" 16 17 extern int modify_ldt(int, void*, unsigned long); 18 19 int vxemu_map(vxemu *emu, vxmmap *mm) 20 { 21 struct vxproc *vxp; 22 struct user_desc desc; 23 uint ldt[2]; 24 #ifdef __x86_64 25 static int didflat; 26 #endif 27 28 vxp = emu->proc; 29 emu->datasel = vxp->vxpno * 16 + 16 + 4 + 3; // 4=LDT, 3=RPL 30 emu->emusel = emu->datasel + 8; 31 32 if (emu->ldt_base != (uintptr_t)mm->base || emu->ldt_size != mm->size) { 33 // Set up the process's data segment selector (for DS,ES,SS). 34 memset(&desc, 0, sizeof(desc)); 35 desc.seg_32bit = 1; 36 desc.read_exec_only = 0; 37 desc.limit_in_pages = 1; 38 desc.seg_not_present = 0; 39 desc.useable = 1; 40 41 desc.entry_number = emu->datasel / 8; 42 desc.base_addr = (uintptr_t)mm->base; 43 desc.limit = (mm->size - 1) >> VXPAGESHIFT; 44 desc.contents = MODIFY_LDT_CONTENTS_DATA; 45 if (modify_ldt(1, &desc, sizeof(desc)) < 0) 46 return -1; 47 48 // Set up the process's vxemu segment selector (for FS). 49 desc.entry_number = emu->emusel / 8; 50 desc.base_addr = (uintptr_t)emu; 51 desc.limit = (VXCODEBUFSIZE - 1) >> VXPAGESHIFT; 52 desc.contents = MODIFY_LDT_CONTENTS_DATA; 53 if (modify_ldt(1, &desc, sizeof(desc)) < 0) 54 return -1; 55 56 emu->ldt_base = (uintptr_t)mm->base; 57 emu->ldt_size = mm->size; 58 } 59 60 #ifdef __x86_64 61 // Set up 32-bit mode code and data segments (not vxproc-specific), 62 // giving access to the full low 32 bits of linear address space. 63 // The code segment is necessary to get into 32-bit compatibility mode; 64 // the data segment is needed because Linux for x86-64 65 // doesn't give 64-bit processes a "real" data segment by default 66 // but instead just loads zero into the data segment selectors! 67 emu->runptr.sel = FLATCODE; 68 69 if (!didflat) { 70 didflat = 1; 71 memset(&desc, 0, sizeof(desc)); 72 desc.seg_32bit = 1; 73 desc.read_exec_only = 0; 74 desc.limit_in_pages = 1; 75 desc.seg_not_present = 0; 76 desc.useable = 1; 77 78 desc.entry_number = FLATCODE / 8; 79 desc.base_addr = 0; 80 desc.limit = 0xfffff; 81 desc.contents = MODIFY_LDT_CONTENTS_CODE; 82 if (modify_ldt(1, &desc, sizeof(desc)) < 0) 83 return -1; 84 85 /* 86 * Linux 2.6.27 has a bug: it does not load the L (long mode) 87 * bit from desc.lm when copying desc into its own 88 * copy of the LDT entry on the kernel stack. 89 * Instead, it leaves L uninitialized, picking up whatever 90 * random bit was left on the kernel stack by the 91 * previous call sequence. We need L to be 0. 92 * If it ends up 1, the *ljmpq in run64.S will GP fault. 93 * Luckily, we can look for this by asking to read 94 * back the raw LDT bytes. If we observe this problem, 95 * try to fix it by doing a modify_ldt with base = limit = 0, 96 * which clears the entire stack ldt structure, and then 97 * quickly do another modify_ldt with desc, hoping that 98 * the bit will still be zero when we get there for the 99 * second modify_ldt. I wish I were making this up. 100 * This is fixed in Linus's git repository, but the Ubuntu 101 * git repositories are still out of date. See for example 102 * http://swtch.com/go/ubuntu-ldt 103 * http://swtch.com/go/linus-ldt 104 * 105 * Remember, folks, Free Software is only free if your 106 * time has no value. 107 */ 108 if(modify_ldt(0, ldt, sizeof ldt) < 0) 109 return -1; 110 if(ldt[1] & 0x00200000) { 111 if (vx32_debugxlate) 112 vxprint("FLATCODE LDT=%08x %08x; working around\n", ldt[0], ldt[1]); 113 desc.limit = 0; 114 modify_ldt(1, &desc, sizeof desc); 115 desc.limit = 0xfffff; 116 modify_ldt(1, &desc, sizeof desc); 117 modify_ldt(0, ldt, sizeof ldt); 118 if(ldt[1] & 0x00200000) { 119 vxprint("cannot work around Linux FLATCODE bug\n"); 120 errno = EBADE; 121 return -1; 122 } 123 if (vx32_debugxlate) 124 vxprint("FLATCODE LDT=%08x %08x\n", ldt[0], ldt[1]); 125 } 126 127 desc.entry_number = FLATDATA / 8; 128 desc.base_addr = 0; 129 desc.limit = 0xfffff; 130 desc.contents = MODIFY_LDT_CONTENTS_DATA; 131 if (modify_ldt(1, &desc, sizeof(desc)) < 0) 132 return -1; 133 } 134 135 // Set up a far return vector in emu->retptr 136 // for getting back into 64-bit long mode. 137 extern void vxrun_return(); 138 asm volatile("movw %%cs,%0" : "=r" (emu->retptr.sel)); 139 emu->retptr.ofs = (uint32_t)(intptr_t)vxrun_return; 140 #endif 141 142 return 0; 143 } 144 145 static void dumpsigcontext(struct sigcontext *ctx) 146 { 147 #ifdef i386 148 printf( 149 "eax %08lx ebx %08lx\necx %08lx edx %08lx " 150 "rsi %08lx rdi %08lx\nrbp %08lx rsp %08lx\n" 151 "eip %08lx efl %08lx cs %04x\n" 152 "err %08lx trapno %08lx cr2 %08lx\n", 153 ctx->eax, ctx->ebx, ctx->ecx, ctx->edx, 154 ctx->esi, ctx->edi, ctx->ebp, ctx->esp, 155 ctx->eip, ctx->eflags, ctx->cs, 156 ctx->err, ctx->trapno, ctx->cr2); 157 #else 158 printf( 159 "rax %016lx rbx %016lx\nrcx %016lx rdx %016lx\n" 160 "rsi %016lx rdi %016lx\nrbp %016lx rsp %016lx\n" 161 "r8 %016lx r9 %016lx\nr10 %016lx r11 %016lx\n" 162 "r12 %016lx r13 %016lx\nr14 %016lx r15 %016lx\n" 163 "rip %016lx efl %016lx cs %04x ss %04x\n" 164 "err %016lx trapno %016lx cr2 %016lx\n", 165 ctx->rax, ctx->rbx, ctx->rcx, ctx->rdx, 166 ctx->rsi, ctx->rdi, ctx->rbp, ctx->rsp, 167 ctx->r8, ctx->r9, ctx->r10, ctx->r11, 168 ctx->r12, ctx->r13, ctx->r14, ctx->r15, 169 ctx->rip, ctx->eflags, ctx->cs, ctx->__pad0, 170 ctx->err, ctx->trapno, ctx->cr2); 171 #endif 172 } 173 174 #ifdef i386 175 #define VX32_BELIEVE_EIP (ctx->ds == vs - 8) 176 #define ctxeip eip 177 #else 178 #define VX32_BELIEVE_EIP (ctx->cs == FLATCODE) 179 180 // On x86-64, make x86 names work for ctx->xxx. 181 #define eax rax 182 #define ebx rbx 183 #define ecx rcx 184 #define edx rdx 185 #define esi rsi 186 #define edi rdi 187 #define esp rsp 188 #define ebp rbp 189 #define ctxeip rip 190 #endif 191 192 static void 193 fprestore(struct _fpstate *s) 194 { 195 asm volatile("frstor 0(%%eax); fwait\n" : : "a" (s) : "memory"); 196 } 197 198 int vx32_sighandler(int signo, siginfo_t *si, void *v) 199 { 200 uint32_t trapeip; 201 uint32_t magic; 202 uint16_t vs; 203 vxproc *vxp; 204 vxemu *emu; 205 struct sigcontext *ctx; 206 ucontext_t *uc; 207 mcontext_t *mc; 208 int r; 209 210 uc = v; 211 mc = &uc->uc_mcontext; 212 213 // same layout, and sigcontext is more convenient... 214 ctx = (struct sigcontext*)mc; 215 216 // We can't be sure that vxemu is running, 217 // and thus that %VSEG is actually mapped to a 218 // valid vxemu. The only way to tell is to look at %VSEG. 219 220 // First sanity check vxproc segment number. 221 asm("movw %"VSEGSTR",%0" 222 : "=r" (vs)); 223 224 if(vx32_debugxlate) vxprint("vx32_sighandler signo=%d eip=%#x esp=%#x vs=%#x\n", 225 signo, ctx->ctxeip, ctx->esp, vs); 226 if(vx32_debugxlate) dumpsigcontext(ctx); 227 228 if ((vs & 15) != 15) // 8 (emu), LDT, RPL=3 229 return 0; 230 231 // Okay, assume mapped; check for vxemu. 232 asm("movl %"VSEGSTR":%1,%0" 233 : "=r" (magic) 234 : "m" (((vxemu*)0)->magic)); 235 if (magic != VXEMU_MAGIC) 236 return 0; 237 238 // Okay, we're convinced. 239 240 // Find current vxproc and vxemu. 241 asm("mov %"VSEGSTR":%1,%0" 242 : "=r" (vxp) 243 : "m" (((vxemu*)0)->proc)); 244 emu = vxp->emu; 245 246 // Get back our regular host segment register state, 247 // so that thread-local storage and such works. 248 vxrun_cleanup(emu); 249 250 // dumpsigcontext(ctx); 251 252 if (VX32_BELIEVE_EIP) 253 trapeip = ctx->ctxeip; 254 else 255 trapeip = 0xffffffff; 256 257 int newtrap; 258 switch(signo){ 259 case SIGSEGV: 260 case SIGBUS: 261 newtrap = VXTRAP_PAGEFAULT; 262 break; 263 264 case SIGFPE: 265 newtrap = VXTRAP_FLOAT; 266 break; 267 268 case SIGVTALRM: 269 newtrap = VXTRAP_IRQ + VXIRQ_TIMER; 270 break; 271 272 case SIGTRAP: 273 // Linux sends SIGTRAP when it gets a processor 274 // debug exception, which is caused by single-stepping 275 // with the TF bit, among other things. The processor 276 // turns off the TF bit before generating the trap, but 277 // it appears that Linux turns it back on for us. 278 // Let's use it to confirm that this is a single-step trap. 279 if (ctx->eflags & EFLAGS_TF){ 280 newtrap = VXTRAP_SINGLESTEP; 281 ctx->eflags &= ~EFLAGS_TF; 282 }else{ 283 vxprint("Unexpected sigtrap eflags=%#x\n", ctx->eflags); 284 newtrap = VXTRAP_SIGNAL + signo; 285 } 286 break; 287 288 default: 289 newtrap = VXTRAP_SIGNAL + signo; 290 break; 291 } 292 293 int replaced_trap = 0; 294 if (emu->cpu_trap) { 295 // There's already a pending trap! 296 // Handle the new trap, and assume that when it 297 // finishes, restarting the code at cpu.eip will trigger 298 // the old trap again. 299 // Have to fix up eip for int 0x30 and syscall instructions. 300 if (emu->cpu_trap == VXTRAP_SYSCALL || 301 (emu->cpu_trap&VXTRAP_CATEGORY) == VXTRAP_SOFT) 302 emu->cpu.eip -= 2; 303 replaced_trap = emu->cpu_trap; 304 } 305 emu->cpu_trap = newtrap; 306 307 r = vxemu_sighandler(emu, trapeip); 308 309 if (r == VXSIG_SINGLESTEP){ 310 // Vxemu_sighandler wants us to single step. 311 // Execution state is in intermediate state - don't touch. 312 ctx->eflags |= EFLAGS_TF; // x86 TF (single-step) bit 313 vxrun_setup(emu); 314 return 1; 315 } 316 317 // Copy execution state into emu. 318 if ((r & VXSIG_SAVE_ALL) == VXSIG_SAVE_ALL) { 319 emu->cpu.reg[EAX] = ctx->eax; 320 emu->cpu.reg[EBX] = ctx->ebx; 321 emu->cpu.reg[ECX] = ctx->ecx; 322 emu->cpu.reg[EDX] = ctx->edx; 323 emu->cpu.reg[ESI] = ctx->esi; 324 emu->cpu.reg[EDI] = ctx->edi; 325 emu->cpu.reg[ESP] = ctx->esp; // or esp_at_signal ??? 326 emu->cpu.reg[EBP] = ctx->ebp; 327 emu->cpu.eflags = ctx->eflags; 328 } else if (r & VXSIG_SAVE_ALL) { 329 if (r & VXSIG_SAVE_EAX) 330 emu->cpu.reg[EAX] = ctx->eax; 331 if (r & VXSIG_SAVE_EBX) 332 emu->cpu.reg[EBX] = ctx->ebx; 333 if (r & VXSIG_SAVE_ECX) 334 emu->cpu.reg[ECX] = ctx->ecx; 335 if (r & VXSIG_SAVE_EDX) 336 emu->cpu.reg[EDX] = ctx->edx; 337 if (r & VXSIG_SAVE_ESI) 338 emu->cpu.reg[ESI] = ctx->esi; 339 if (r & VXSIG_SAVE_EDI) 340 emu->cpu.reg[EDI] = ctx->edi; 341 if (r & VXSIG_SAVE_ESP) 342 emu->cpu.reg[ESP] = ctx->esp; // or esp_at_signal ??? 343 if (r & VXSIG_SAVE_EBP) 344 emu->cpu.reg[EBP] = ctx->ebp; 345 if (r & VXSIG_SAVE_EFLAGS) 346 emu->cpu.eflags = ctx->eflags; 347 } 348 r &= ~VXSIG_SAVE_ALL; 349 350 if (r & VXSIG_SAVE_EBX_AS_EIP) 351 emu->cpu.eip = ctx->ebx; 352 r &= ~VXSIG_SAVE_EBX_AS_EIP; 353 354 if (r & VXSIG_ADD_COUNT_TO_ESP) { 355 emu->cpu.reg[ESP] += (uint16_t)(r >> VXSIG_COUNT_SHIFT); 356 r &= ~VXSIG_ADD_COUNT_TO_ESP; 357 r &= ~(0xFFFF << VXSIG_COUNT_SHIFT); 358 } 359 360 if (r & VXSIG_INC_ECX) { 361 emu->cpu.reg[ECX]++; 362 r &= ~VXSIG_INC_ECX; 363 } 364 365 if (r == VXSIG_TRAP) { 366 if (emu->trapenv == NULL) 367 return 0; 368 emu->cpu.traperr = ctx->err; 369 // Usually, ctx->cr2 == si->si_addr. 370 // But on a segmentation fault (as opposed to a paging fault), 371 // cr2 is not updated and the kernel sends an si_addr == 0. 372 // Be sure to use si_addr, not cr2. 373 emu->cpu.trapva = (uint32_t)(uintptr_t)si->si_addr; 374 memmove(mc->gregs, emu->trapenv->gregs, sizeof emu->trapenv->gregs); 375 376 return 1; 377 } 378 379 // The signal handler is confused; so are we. 380 return 0; 381 }