mmu.c (7541B)
1 #define WANT_M 2 #include "u.h" 3 #include <pthread.h> 4 #include "libvx32/vx32.h" 5 #include <sys/mman.h> 6 #include "lib.h" 7 #include "mem.h" 8 #include "dat.h" 9 #include "fns.h" 10 #include "error.h" 11 #include "ureg.h" 12 13 int tracemmu; 14 15 #ifndef MAP_ANONYMOUS 16 #define MAP_ANONYMOUS MAP_ANON 17 #endif 18 #define MAP_EMPTY (MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE) 19 20 /* 21 * We allocate a 256MB page file on disk to hold the "physical memory". 22 * We'll mmap individual file pages where we need them to simulate 23 * the page translation of a real MMU. To make the simulation more 24 * faithful, we map the vx32 sandboxed address space starting at 0, 25 * so that kernel 0 = user 0, so that pointers can be shared. 26 * Plan 9 assumes this, and while it's not a ton of work to break that 27 * assumption, it was easier not to. 28 * 29 * This value may be changed with the -m switch. 30 */ 31 int memsize = (256<<20); 32 33 static int pagefile; 34 static char* pagebase; 35 36 static Uspace uspace[16]; 37 static Uspace *ulist[nelem(uspace)]; 38 int nuspace = 1; 39 40 #ifdef __i386__ 41 #define BIT32 0 42 #define HINT nil 43 #elif defined(__amd64__) 44 #ifdef linux 45 #define BIT32 MAP_32BIT 46 #define HINT nil 47 #elif defined(__FreeBSD__) 48 #define BIT32 MAP_FIXED 49 #define HINT (caddr_t)0x40000000 50 #endif 51 #endif 52 53 int 54 isuaddr(void *v) 55 { 56 uchar *p; 57 uchar *uzero; 58 59 p = v; 60 uzero = up->pmmu.uzero; 61 return uzero <= p && p < uzero+USTKTOP; 62 } 63 64 /* 65 * Allocate a process-sized mapping with nothing there. 66 * The point is to reserve the space so that 67 * nothing else ends up there later. 68 */ 69 static void* 70 mapzero(void) 71 { 72 int fd, bit32; 73 void *v; 74 void *hint; 75 76 bit32 = BIT32; 77 hint = HINT; 78 79 /* First try mmaping /dev/zero. Some OS'es don't allow this. */ 80 if((fd = open("/dev/zero", O_RDONLY)) >= 0){ 81 v = mmap(hint, USTKTOP, PROT_NONE, bit32|MAP_PRIVATE, fd, 0); 82 if(v != MAP_FAILED) { 83 if((uint32_t)(uintptr)v != (uintptr)v) { 84 iprint("mmap returned 64-bit pointer %p\n", v); 85 panic("mmap"); 86 } 87 return v; 88 } 89 } 90 91 /* Next try an anonymous map. */ 92 v = mmap(hint, USTKTOP, PROT_NONE, bit32|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); 93 if(v != MAP_FAILED) { 94 if((uint32_t)(uintptr)v != (uintptr)v) { 95 iprint("mmap returned 64-bit pointer %p\n", v); 96 panic("mmap"); 97 } 98 return v; 99 } 100 101 return nil; 102 } 103 104 void 105 mmuinit(void) 106 { 107 char tmp[] = "/var/tmp/9vx.pages.XXXXXX"; 108 void *v; 109 int i; 110 111 if((pagefile = mkstemp(tmp)) < 0) 112 panic("mkstemp: %r"); 113 if(ftruncate(pagefile, memsize) < 0) 114 panic("ftruncate pagefile: %r"); 115 unlink(tmp); /* "remove on close" */ 116 117 /* Map pages for direct access at pagebase, wherever that is */ 118 /* MAP_SHARED means write the changes back to the file */ 119 v = mmap(nil, memsize, PROT_READ|PROT_WRITE, 120 MAP_SHARED, pagefile, 0); 121 if(v == MAP_FAILED) 122 panic("mmap pagefile: %r"); 123 pagebase = v; 124 125 if(nuspace <= 0) 126 nuspace = 1; 127 if(nuspace > nelem(uspace)) 128 nuspace = nelem(uspace); 129 for(i=0; i<nuspace; i++){ 130 uspace[i].uzero = mapzero(); 131 if(uspace[i].uzero == nil) 132 panic("mmap address space %d", i); 133 ulist[i] = &uspace[i]; 134 } 135 136 conf.mem[0].base = 0; 137 conf.mem[0].npage = memsize / BY2PG; 138 139 palloc.mem[0].base = 0; 140 palloc.mem[0].npage = memsize / BY2PG; 141 } 142 143 /* 144 * Temporary page mappings are easy again: 145 * everything is mapped at PAGEBASE. 146 */ 147 void* 148 tmpmap(Page *pg) 149 { 150 assert(pg->pa < memsize); 151 return pagebase + pg->pa; 152 } 153 154 void 155 tmpunmap(void *v) 156 { 157 assert(pagebase <= (char*)v && (char*)v < pagebase + memsize); 158 } 159 160 KMap* 161 kmap(Page *p) 162 { 163 return (KMap*)tmpmap(p); 164 } 165 166 void 167 kunmap(KMap *k) 168 { 169 } 170 171 /* 172 * Flush the current address space. 173 */ 174 static void 175 mmapflush(Uspace *us) 176 { 177 m->flushmmu = 0; 178 179 /* Nothing mapped? */ 180 if(us == nil || us->lo > us->hi || us->uzero == nil) 181 return; 182 183 #ifdef __FreeBSD__ 184 if(__FreeBSD__ < 7){ 185 /* 186 * On FreeBSD, we need to be able to use mincore to 187 * tell whether a page is mapped, so we have to remap 188 * something with no pages here. 189 */ 190 if(mmap(us->uzero, us->hi+BY2PG, PROT_NONE, 191 MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) == MAP_FAILED) 192 panic("mmapflush mmap: %r"); 193 us->lo = 0x80000000UL; 194 us->hi = 0; 195 return; 196 } 197 #endif 198 199 /* Clear only as much as got mapped. */ 200 if(mprotect(us->uzero, us->hi+BY2PG, PROT_NONE) < 0) 201 panic("mmapflush mprotect: %r"); 202 us->lo = 0x80000000UL; 203 us->hi = 0; 204 } 205 206 /* 207 * Update the "MMU" in response to a user fault. 208 * pa may have PTEWRITE set. 209 */ 210 void 211 putmmu(ulong va, ulong pa, Page *p) 212 { 213 int prot; 214 Uspace *us; 215 216 if(tracemmu || (pa&~(PTEWRITE|PTEVALID)) != p->pa) 217 iprint("putmmu va %lux pa %lux p->pa %lux\n", va, pa, p->pa); 218 219 assert(p->pa < memsize && pa < memsize); 220 assert(up); 221 us = up->pmmu.us; 222 assert(us); 223 224 /* Map the page */ 225 prot = PROT_READ; 226 if(pa&PTEWRITE) 227 prot |= PROT_WRITE; 228 pa &= ~(BY2PG-1); 229 va &= ~(BY2PG-1); 230 if(mmap(us->uzero+va, BY2PG, prot, MAP_FIXED|MAP_SHARED, 231 pagefile, pa) == MAP_FAILED) 232 panic("putmmu"); 233 234 /* Record high and low address range for quick unmap. */ 235 if(us->lo > va) 236 us->lo = va; 237 if(us->hi < va) 238 us->hi = va; 239 // printlinuxmaps(); 240 } 241 242 /* 243 * The memory maps have changed for up. Flush all cached state. 244 */ 245 void 246 flushmmu(void) 247 { 248 if(tracemmu) 249 iprint("flushmmu\n"); 250 251 if(up){ 252 vxproc_flush(up->pmmu.vxproc); 253 mmapflush(up->pmmu.us); 254 } 255 } 256 257 void 258 usespace(Uspace *us) 259 { 260 int i; 261 262 for(i=0; i<nuspace; i++) 263 if(ulist[i] == us){ 264 while(i > 0){ 265 ulist[i] = ulist[i-1]; 266 i--; 267 } 268 ulist[0] = us; 269 break; 270 } 271 } 272 273 Uspace* 274 getspace(Proc *p) 275 { 276 Uspace *us; 277 278 us = ulist[nuspace-1]; 279 if(us->p){ 280 if(tracemmu) 281 iprint("^^^^^^^^^^ %ld %s [evict %d]\n", us->p->pid, us->p->text, us - uspace); 282 mmapflush(us); 283 } 284 us->p = p; 285 p->pmmu.vxmm.base = us->uzero; 286 p->pmmu.uzero = us->uzero; 287 p->pmmu.us = us; 288 usespace(us); 289 return us; 290 } 291 292 void 293 takespace(Proc *p, Uspace *us) 294 { 295 usespace(us); 296 if(us->p == p) 297 return; 298 if(tracemmu){ 299 if(us->p) 300 iprint("^^^^^^^^^^ %ld %s [steal %d]\n", us->p->pid, us->p->text, us - uspace); 301 } 302 us->p = p; 303 mmapflush(us); 304 } 305 306 void 307 putspace(Uspace *us) 308 { 309 int i; 310 311 mmapflush(us); 312 us->p->pmmu.us = nil; 313 us->p->pmmu.uzero = nil; 314 us->p->pmmu.vxmm.base = nil; 315 us->p = nil; 316 for(i=0; i<nuspace; i++) 317 if(ulist[i] == us){ 318 while(++i < nuspace) 319 ulist[i-1] = ulist[i]; 320 ulist[i-1] = us; 321 break; 322 } 323 } 324 325 /* 326 * Called when scheduler has decided to run proc p. 327 * Prepare to run proc p. 328 */ 329 void 330 mmuswitch(Proc *p) 331 { 332 /* 333 * Switch the address space, but only if it's not the 334 * one we were just in. Also, kprocs don't count -- 335 * only the guys on cpu0 do. 336 */ 337 if(p->kp) 338 return; 339 340 if(tracemmu) 341 iprint("mmuswitch %ld %s\n", p->pid, p->text); 342 343 if(p->pmmu.us && p->pmmu.us->p == p){ 344 if(tracemmu) iprint("---------- %ld %s [%d]\n", 345 p->pid, p->text, p->pmmu.us - uspace); 346 usespace(p->pmmu.us); 347 if(!p->newtlb && !m->flushmmu){ 348 usespace(p->pmmu.us); 349 return; 350 } 351 mmapflush(p->pmmu.us); 352 p->newtlb = 0; 353 return; 354 } 355 356 if(p->pmmu.us == nil) 357 getspace(p); 358 else 359 takespace(p, p->pmmu.us); 360 if(tracemmu) iprint("========== %ld %s [%d]\n", 361 p->pid, p->text, p->pmmu.us - uspace); 362 } 363 364 /* 365 * Called when proc p is dying. 366 */ 367 void 368 mmurelease(Proc *p) 369 { 370 if(p->kp) 371 return; 372 if(tracemmu) 373 iprint("mmurelease %ld %s\n", p->pid, p->text); 374 if(p->pmmu.vxproc) 375 vxproc_flush(p->pmmu.vxproc); 376 if(p->pmmu.us){ 377 if(tracemmu) 378 iprint("^^^^^^^^^^ %ld %s [release %d]\n", p->pid, p->text, p->pmmu.us - uspace); 379 putspace(p->pmmu.us); 380 if(m->flushmmu) 381 mmapflush(p->pmmu.us); 382 } 383 } 384 385 void 386 printlinuxmaps(void) 387 { 388 char buf[100]; 389 sprint(buf, "cat /proc/%d/maps", getpid()); 390 system(buf); 391 } 392 393 void 394 mmusize(int size) 395 { 396 static int set = 0; 397 if(!set && size){ 398 memsize = (size << 20); 399 } 400 }