vx32

Local 9vx git repository for patches.
git clone git://r-36.net/vx32
Log | Files | Refs

linux.c (10562B)


      1 // Code specific to x86 hosts running Linux.
      2 
      3 #define _GNU_SOURCE
      4 #include <stdio.h>
      5 #include <string.h>
      6 #include <signal.h>
      7 #include <assert.h>
      8 #include <ucontext.h>
      9 #include <sys/ucontext.h>
     10 #include <asm/ldt.h>
     11 #include <errno.h>
     12 
     13 #include "vx32.h"
     14 #include "vx32impl.h"
     15 #include "os.h"
     16 
     17 extern int modify_ldt(int, void*, unsigned long);
     18 
     19 int vxemu_map(vxemu *emu, vxmmap *mm)
     20 {
     21 	struct vxproc *vxp;
     22 	struct user_desc desc;
     23 	uint ldt[2];
     24 #ifdef __x86_64
     25 	static int didflat;
     26 #endif
     27 
     28 	vxp = emu->proc;
     29 	emu->datasel = vxp->vxpno * 16 + 16 + 4 + 3;	// 4=LDT, 3=RPL
     30 	emu->emusel = emu->datasel + 8;
     31 
     32 	if (emu->ldt_base != (uintptr_t)mm->base || emu->ldt_size != mm->size) {
     33 		// Set up the process's data segment selector (for DS,ES,SS).
     34 		memset(&desc, 0, sizeof(desc));
     35 		desc.seg_32bit = 1;
     36 		desc.read_exec_only = 0;
     37 		desc.limit_in_pages = 1;
     38 		desc.seg_not_present = 0;
     39 		desc.useable = 1;
     40 
     41 		desc.entry_number = emu->datasel / 8;
     42 		desc.base_addr = (uintptr_t)mm->base;
     43 		desc.limit = (mm->size - 1) >> VXPAGESHIFT;
     44 		desc.contents = MODIFY_LDT_CONTENTS_DATA;
     45 		if (modify_ldt(1, &desc, sizeof(desc)) < 0)
     46 			return -1;
     47 	
     48 		// Set up the process's vxemu segment selector (for FS).
     49 		desc.entry_number = emu->emusel / 8;
     50 		desc.base_addr = (uintptr_t)emu;
     51 		desc.limit = (VXCODEBUFSIZE - 1) >> VXPAGESHIFT;
     52 		desc.contents = MODIFY_LDT_CONTENTS_DATA;
     53 		if (modify_ldt(1, &desc, sizeof(desc)) < 0)
     54 			return -1;
     55 		
     56 		emu->ldt_base = (uintptr_t)mm->base;
     57 		emu->ldt_size = mm->size;
     58 	}
     59 
     60 #ifdef __x86_64
     61 	// Set up 32-bit mode code and data segments (not vxproc-specific),
     62 	// giving access to the full low 32 bits of linear address space.
     63 	// The code segment is necessary to get into 32-bit compatibility mode;
     64 	// the data segment is needed because Linux for x86-64
     65 	// doesn't give 64-bit processes a "real" data segment by default
     66 	// but instead just loads zero into the data segment selectors!
     67 	emu->runptr.sel = FLATCODE;
     68 
     69 	if (!didflat) {
     70 		didflat = 1;
     71 		memset(&desc, 0, sizeof(desc));
     72 		desc.seg_32bit = 1;
     73 		desc.read_exec_only = 0;
     74 		desc.limit_in_pages = 1;
     75 		desc.seg_not_present = 0;
     76 		desc.useable = 1;
     77 
     78 		desc.entry_number = FLATCODE / 8;
     79 		desc.base_addr = 0;
     80 		desc.limit = 0xfffff;
     81 		desc.contents = MODIFY_LDT_CONTENTS_CODE;
     82 		if (modify_ldt(1, &desc, sizeof(desc)) < 0)
     83 			return -1;
     84 		
     85 		/*
     86 		 * Linux 2.6.27 has a bug: it does not load the L (long mode)
     87 		 * bit from desc.lm when copying desc into its own
     88 		 * copy of the LDT entry on the kernel stack.
     89 		 * Instead, it leaves L uninitialized, picking up whatever
     90 		 * random bit was left on the kernel stack by the
     91 		 * previous call sequence.  We need L to be 0.
     92 		 * If it ends up 1, the *ljmpq in run64.S will GP fault.
     93 		 * Luckily, we can look for this by asking to read
     94 		 * back the raw LDT bytes.  If we observe this problem,
     95 		 * try to fix it by doing a modify_ldt with base = limit = 0,
     96 		 * which clears the entire stack ldt structure, and then
     97 		 * quickly do another modify_ldt with desc, hoping that
     98 		 * the bit will still be zero when we get there for the
     99 		 * second modify_ldt.  I wish I were making this up.
    100 		 * This is fixed in Linus's git repository, but the Ubuntu
    101 		 * git repositories are still out of date.  See for example
    102 		 * 	http://swtch.com/go/ubuntu-ldt
    103 		 *	http://swtch.com/go/linus-ldt
    104 		 *
    105 		 * Remember, folks, Free Software is only free if your
    106 		 * time has no value.
    107 		 */
    108 		if(modify_ldt(0, ldt, sizeof ldt) < 0)
    109 			return -1;
    110 		if(ldt[1] & 0x00200000) {
    111 			if (vx32_debugxlate)
    112 				vxprint("FLATCODE LDT=%08x %08x; working around\n", ldt[0], ldt[1]);
    113 			desc.limit = 0;
    114 			modify_ldt(1, &desc, sizeof desc);
    115 			desc.limit = 0xfffff;
    116 			modify_ldt(1, &desc, sizeof desc);
    117 			modify_ldt(0, ldt, sizeof ldt);
    118 			if(ldt[1] & 0x00200000) {
    119 				vxprint("cannot work around Linux FLATCODE bug\n");
    120 				errno = EBADE;
    121 				return -1;
    122 			}
    123 			if (vx32_debugxlate)
    124 				vxprint("FLATCODE LDT=%08x %08x\n", ldt[0], ldt[1]);
    125 		}
    126 
    127 		desc.entry_number = FLATDATA / 8;
    128 		desc.base_addr = 0;
    129 		desc.limit = 0xfffff;
    130 		desc.contents = MODIFY_LDT_CONTENTS_DATA;
    131 		if (modify_ldt(1, &desc, sizeof(desc)) < 0)
    132 			return -1;
    133 	}
    134 
    135 	// Set up a far return vector in emu->retptr
    136 	// for getting back into 64-bit long mode.
    137 	extern void vxrun_return();
    138 	asm volatile("movw %%cs,%0" : "=r" (emu->retptr.sel));
    139 	emu->retptr.ofs = (uint32_t)(intptr_t)vxrun_return;
    140 #endif
    141 
    142 	return 0;
    143 }
    144 
    145 static void dumpsigcontext(struct sigcontext *ctx)
    146 {
    147 #ifdef i386
    148 	printf(
    149 		"eax %08lx  ebx %08lx\necx %08lx  edx %08lx  "
    150 		"rsi %08lx  rdi %08lx\nrbp %08lx  rsp %08lx\n"
    151 		"eip %08lx  efl %08lx  cs %04x\n"
    152 		"err %08lx  trapno %08lx  cr2 %08lx\n",
    153 		ctx->eax, ctx->ebx, ctx->ecx, ctx->edx,
    154 		ctx->esi, ctx->edi, ctx->ebp, ctx->esp,
    155 		ctx->eip, ctx->eflags, ctx->cs,
    156 		ctx->err, ctx->trapno, ctx->cr2);
    157 #else
    158 	printf(
    159 		"rax %016lx  rbx %016lx\nrcx %016lx  rdx %016lx\n"
    160 		"rsi %016lx  rdi %016lx\nrbp %016lx  rsp %016lx\n"
    161 		"r8  %016lx  r9  %016lx\nr10 %016lx  r11 %016lx\n"
    162 		"r12 %016lx  r13 %016lx\nr14 %016lx  r15 %016lx\n"
    163 		"rip %016lx  efl %016lx  cs %04x  ss %04x\n"
    164 		"err %016lx  trapno %016lx  cr2 %016lx\n",
    165 		ctx->rax, ctx->rbx, ctx->rcx, ctx->rdx,
    166 		ctx->rsi, ctx->rdi, ctx->rbp, ctx->rsp,
    167 		ctx->r8, ctx->r9, ctx->r10, ctx->r11,
    168 		ctx->r12, ctx->r13, ctx->r14, ctx->r15,
    169 		ctx->rip, ctx->eflags, ctx->cs, ctx->__pad0,
    170 		ctx->err, ctx->trapno, ctx->cr2);
    171 #endif
    172 }
    173 
    174 #ifdef i386
    175 #define	VX32_BELIEVE_EIP	(ctx->ds == vs - 8)
    176 #define	ctxeip eip
    177 #else
    178 #define	VX32_BELIEVE_EIP	(ctx->cs == FLATCODE)
    179 
    180 // On x86-64, make x86 names work for ctx->xxx.
    181 #define	eax rax
    182 #define	ebx rbx
    183 #define	ecx rcx
    184 #define	edx rdx
    185 #define	esi rsi
    186 #define	edi rdi
    187 #define	esp rsp
    188 #define	ebp rbp
    189 #define	ctxeip rip
    190 #endif
    191 
    192 static void
    193 fprestore(struct _fpstate *s)
    194 {
    195 	asm volatile("frstor 0(%%eax); fwait\n" : : "a" (s) : "memory");
    196 }
    197 
    198 int vx32_sighandler(int signo, siginfo_t *si, void *v)
    199 {
    200 	uint32_t trapeip;
    201 	uint32_t magic;
    202 	uint16_t vs;
    203 	vxproc *vxp;
    204 	vxemu *emu;
    205 	struct sigcontext *ctx;
    206 	ucontext_t *uc;
    207 	mcontext_t *mc;
    208 	int r;
    209 
    210 	uc = v;
    211 	mc = &uc->uc_mcontext;
    212 
    213 	// same layout, and sigcontext is more convenient...
    214 	ctx = (struct sigcontext*)mc;
    215 
    216 	// We can't be sure that vxemu is running,
    217 	// and thus that %VSEG is actually mapped to a
    218 	// valid vxemu.  The only way to tell is to look at %VSEG.
    219 
    220 	// First sanity check vxproc segment number.
    221 	asm("movw %"VSEGSTR",%0"
    222 		: "=r" (vs));
    223 	
    224 	if(vx32_debugxlate) vxprint("vx32_sighandler signo=%d eip=%#x esp=%#x vs=%#x\n",
    225 		signo, ctx->ctxeip, ctx->esp, vs);
    226 	if(vx32_debugxlate) dumpsigcontext(ctx);
    227 
    228 	if ((vs & 15) != 15)	// 8 (emu), LDT, RPL=3
    229 		return 0;
    230 
    231 	// Okay, assume mapped; check for vxemu.
    232 	asm("movl %"VSEGSTR":%1,%0"
    233 		: "=r" (magic)
    234 		: "m" (((vxemu*)0)->magic));
    235 	if (magic != VXEMU_MAGIC)
    236 		return 0;
    237 
    238 	// Okay, we're convinced.
    239 
    240 	// Find current vxproc and vxemu.
    241 	asm("mov %"VSEGSTR":%1,%0"
    242 		: "=r" (vxp)
    243 		: "m" (((vxemu*)0)->proc));
    244 	emu = vxp->emu;
    245 
    246 	// Get back our regular host segment register state,
    247 	// so that thread-local storage and such works.
    248 	vxrun_cleanup(emu);
    249 
    250 	// dumpsigcontext(ctx);
    251 
    252 	if (VX32_BELIEVE_EIP)
    253 		trapeip = ctx->ctxeip;
    254 	else
    255 		trapeip = 0xffffffff;
    256 
    257 	int newtrap;
    258 	switch(signo){
    259 	case SIGSEGV:
    260 	case SIGBUS:
    261 		newtrap = VXTRAP_PAGEFAULT;
    262 		break;
    263 	
    264 	case SIGFPE:
    265 		newtrap = VXTRAP_FLOAT;
    266 		break;
    267 	
    268 	case SIGVTALRM:
    269 		newtrap = VXTRAP_IRQ + VXIRQ_TIMER;
    270 		break;
    271 
    272 	case SIGTRAP:
    273 		// Linux sends SIGTRAP when it gets a processor 
    274 		// debug exception, which is caused by single-stepping
    275 		// with the TF bit, among other things.  The processor
    276 		// turns off the TF bit before generating the trap, but
    277 		// it appears that Linux turns it back on for us.
    278 		// Let's use it to confirm that this is a single-step trap.
    279 		if (ctx->eflags & EFLAGS_TF){
    280 			newtrap = VXTRAP_SINGLESTEP;
    281 			ctx->eflags &= ~EFLAGS_TF;
    282 		}else{
    283 			vxprint("Unexpected sigtrap eflags=%#x\n", ctx->eflags);
    284 			newtrap = VXTRAP_SIGNAL + signo;
    285 		}
    286 		break;
    287 
    288 	default:
    289 		newtrap = VXTRAP_SIGNAL + signo;
    290 		break;
    291 	}
    292 	
    293 	int replaced_trap = 0;
    294 	if (emu->cpu_trap) {
    295 		// There's already a pending trap!
    296 		// Handle the new trap, and assume that when it
    297 		// finishes, restarting the code at cpu.eip will trigger
    298 		// the old trap again.
    299 		// Have to fix up eip for int 0x30 and syscall instructions.
    300 		if (emu->cpu_trap == VXTRAP_SYSCALL ||
    301 				(emu->cpu_trap&VXTRAP_CATEGORY) == VXTRAP_SOFT)
    302 			emu->cpu.eip -= 2;
    303 		replaced_trap = emu->cpu_trap;
    304 	}
    305 	emu->cpu_trap = newtrap;
    306 
    307 	r = vxemu_sighandler(emu, trapeip);
    308 
    309 	if (r == VXSIG_SINGLESTEP){
    310 		// Vxemu_sighandler wants us to single step.
    311 		// Execution state is in intermediate state - don't touch.
    312 		ctx->eflags |= EFLAGS_TF;		// x86 TF (single-step) bit
    313 		vxrun_setup(emu);
    314 		return 1;
    315 	}
    316 
    317 	// Copy execution state into emu.
    318 	if ((r & VXSIG_SAVE_ALL) == VXSIG_SAVE_ALL) {
    319 		emu->cpu.reg[EAX] = ctx->eax;
    320 		emu->cpu.reg[EBX] = ctx->ebx;
    321 		emu->cpu.reg[ECX] = ctx->ecx;
    322 		emu->cpu.reg[EDX] = ctx->edx;
    323 		emu->cpu.reg[ESI] =  ctx->esi;
    324 		emu->cpu.reg[EDI] = ctx->edi;
    325 		emu->cpu.reg[ESP] = ctx->esp;	// or esp_at_signal ???
    326 		emu->cpu.reg[EBP] = ctx->ebp;
    327 		emu->cpu.eflags = ctx->eflags;
    328 	} else if (r & VXSIG_SAVE_ALL) {
    329 		if (r & VXSIG_SAVE_EAX)
    330 			emu->cpu.reg[EAX] = ctx->eax;
    331 		if (r & VXSIG_SAVE_EBX)
    332 			emu->cpu.reg[EBX] = ctx->ebx;
    333 		if (r & VXSIG_SAVE_ECX)
    334 			emu->cpu.reg[ECX] = ctx->ecx;
    335 		if (r & VXSIG_SAVE_EDX)
    336 			emu->cpu.reg[EDX] = ctx->edx;
    337 		if (r & VXSIG_SAVE_ESI)
    338 			emu->cpu.reg[ESI] =  ctx->esi;
    339 		if (r & VXSIG_SAVE_EDI)
    340 			emu->cpu.reg[EDI] = ctx->edi;
    341 		if (r & VXSIG_SAVE_ESP)
    342 			emu->cpu.reg[ESP] = ctx->esp;	// or esp_at_signal ???
    343 		if (r & VXSIG_SAVE_EBP)
    344 			emu->cpu.reg[EBP] = ctx->ebp;
    345 		if (r & VXSIG_SAVE_EFLAGS)
    346 			emu->cpu.eflags = ctx->eflags;
    347 	}
    348 	r &= ~VXSIG_SAVE_ALL;
    349 
    350 	if (r & VXSIG_SAVE_EBX_AS_EIP)
    351 		emu->cpu.eip = ctx->ebx;
    352 	r &= ~VXSIG_SAVE_EBX_AS_EIP;
    353 
    354 	if (r & VXSIG_ADD_COUNT_TO_ESP) {
    355 		emu->cpu.reg[ESP] += (uint16_t)(r >> VXSIG_COUNT_SHIFT);
    356 		r &= ~VXSIG_ADD_COUNT_TO_ESP;
    357 		r &= ~(0xFFFF << VXSIG_COUNT_SHIFT);
    358 	}
    359 	
    360 	if (r &  VXSIG_INC_ECX) {
    361 		emu->cpu.reg[ECX]++;
    362 		r &= ~VXSIG_INC_ECX;
    363 	}
    364 
    365 	if (r == VXSIG_TRAP) {
    366 		if (emu->trapenv == NULL)
    367 			return 0;
    368 		emu->cpu.traperr = ctx->err;
    369 		// Usually, ctx->cr2 == si->si_addr.
    370 		// But on a segmentation fault (as opposed to a paging fault),
    371 		// cr2 is not updated and the kernel sends an si_addr == 0.
    372 		// Be sure to use si_addr, not cr2.
    373 		emu->cpu.trapva = (uint32_t)(uintptr_t)si->si_addr;
    374 		memmove(mc->gregs, emu->trapenv->gregs, sizeof emu->trapenv->gregs);
    375 		
    376 		return 1;
    377 	}
    378 
    379 	// The signal handler is confused; so are we.
    380 	return 0;
    381 }