sysproc.c (23448B)
1 #define WANT_M 2 #include "u.h" 3 #include "tos.h" 4 #include "lib.h" 5 #include "mem.h" 6 #include "dat.h" 7 #include "fns.h" 8 #include "error.h" 9 10 #include "a.out.h" 11 12 int shargs(char*, int, char**); 13 14 extern void checkpages(void); 15 extern void checkpagerefs(void); 16 17 long 18 sysr1(uint32 *x) 19 { 20 vx32sysr1(); 21 return 0; 22 } 23 24 long 25 sysrfork(uint32 *arg) 26 { 27 Proc *p; 28 int n, i; 29 Fgrp *ofg; 30 Pgrp *opg; 31 Rgrp *org; 32 Egrp *oeg; 33 ulong pid, flag; 34 Mach *wm; 35 36 flag = arg[0]; 37 /* Check flags before we commit */ 38 if((flag & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG)) 39 error(Ebadarg); 40 if((flag & (RFNAMEG|RFCNAMEG)) == (RFNAMEG|RFCNAMEG)) 41 error(Ebadarg); 42 if((flag & (RFENVG|RFCENVG)) == (RFENVG|RFCENVG)) 43 error(Ebadarg); 44 45 if((flag&RFPROC) == 0) { 46 if(flag & (RFMEM|RFNOWAIT)) 47 error(Ebadarg); 48 if(flag & (RFFDG|RFCFDG)) { 49 ofg = up->fgrp; 50 if(flag & RFFDG) 51 up->fgrp = dupfgrp(ofg); 52 else 53 up->fgrp = dupfgrp(nil); 54 closefgrp(ofg); 55 } 56 if(flag & (RFNAMEG|RFCNAMEG)) { 57 opg = up->pgrp; 58 up->pgrp = newpgrp(); 59 if(flag & RFNAMEG) 60 pgrpcpy(up->pgrp, opg); 61 /* inherit noattach */ 62 up->pgrp->noattach = opg->noattach; 63 closepgrp(opg); 64 } 65 if(flag & RFNOMNT) 66 up->pgrp->noattach = 1; 67 if(flag & RFREND) { 68 org = up->rgrp; 69 up->rgrp = newrgrp(); 70 closergrp(org); 71 } 72 if(flag & (RFENVG|RFCENVG)) { 73 oeg = up->egrp; 74 up->egrp = smalloc(sizeof(Egrp)); 75 up->egrp->ref.ref = 1; 76 if(flag & RFENVG) 77 envcpy(up->egrp, oeg); 78 closeegrp(oeg); 79 } 80 if(flag & RFNOTEG) 81 up->noteid = incref(¬eidalloc); 82 return 0; 83 } 84 85 p = newproc(); 86 87 p->fpsave = up->fpsave; 88 p->scallnr = up->scallnr; 89 p->s = up->s; 90 p->nerrlab = 0; 91 p->slash = up->slash; 92 p->dot = up->dot; 93 incref(&p->dot->ref); 94 95 memmove(p->note, up->note, sizeof(p->note)); 96 p->privatemem = up->privatemem; 97 p->noswap = up->noswap; 98 p->nnote = up->nnote; 99 p->notified = 0; 100 p->lastnote = up->lastnote; 101 p->notify = up->notify; 102 p->ureg = up->ureg; 103 p->dbgreg = 0; 104 105 /* Make a new set of memory segments */ 106 n = flag & RFMEM; 107 qlock(&p->seglock); 108 if(waserror()){ 109 qunlock(&p->seglock); 110 nexterror(); 111 } 112 for(i = 0; i < NSEG; i++) 113 if(up->seg[i]) 114 p->seg[i] = dupseg(up->seg, i, n); 115 qunlock(&p->seglock); 116 poperror(); 117 118 /* File descriptors */ 119 if(flag & (RFFDG|RFCFDG)) { 120 if(flag & RFFDG) 121 p->fgrp = dupfgrp(up->fgrp); 122 else 123 p->fgrp = dupfgrp(nil); 124 } 125 else { 126 p->fgrp = up->fgrp; 127 incref(&p->fgrp->ref); 128 } 129 130 /* Process groups */ 131 if(flag & (RFNAMEG|RFCNAMEG)) { 132 p->pgrp = newpgrp(); 133 if(flag & RFNAMEG) 134 pgrpcpy(p->pgrp, up->pgrp); 135 /* inherit noattach */ 136 p->pgrp->noattach = up->pgrp->noattach; 137 } 138 else { 139 p->pgrp = up->pgrp; 140 incref(&p->pgrp->ref); 141 } 142 if(flag & RFNOMNT) 143 up->pgrp->noattach = 1; 144 145 if(flag & RFREND) 146 p->rgrp = newrgrp(); 147 else { 148 incref(&up->rgrp->ref); 149 p->rgrp = up->rgrp; 150 } 151 152 /* Environment group */ 153 if(flag & (RFENVG|RFCENVG)) { 154 p->egrp = smalloc(sizeof(Egrp)); 155 p->egrp->ref.ref = 1; 156 if(flag & RFENVG) 157 envcpy(p->egrp, up->egrp); 158 } 159 else { 160 p->egrp = up->egrp; 161 incref(&p->egrp->ref); 162 } 163 p->hang = up->hang; 164 p->procmode = up->procmode; 165 166 /* Craft a return frame which will cause the child to pop out of 167 * the scheduler in user mode with the return register zero 168 */ 169 forkchild(p, up->dbgreg); 170 171 p->parent = up; 172 p->parentpid = up->pid; 173 if(flag&RFNOWAIT) 174 p->parentpid = 0; 175 else { 176 lock(&up->exl); 177 up->nchild++; 178 unlock(&up->exl); 179 } 180 if((flag&RFNOTEG) == 0) 181 p->noteid = up->noteid; 182 183 p->fpstate = up->fpstate; 184 pid = p->pid; 185 memset(p->time, 0, sizeof(p->time)); 186 p->time[TReal] = msec(); 187 188 kstrdup(&p->text, up->text); 189 kstrdup(&p->user, up->user); 190 /* 191 * since the bss/data segments are now shareable, 192 * any mmu info about this process is now stale 193 * (i.e. has bad properties) and has to be discarded. 194 */ 195 flushmmu(); 196 p->basepri = up->basepri; 197 p->priority = up->basepri; 198 p->fixedpri = up->fixedpri; 199 p->mp = up->mp; 200 wm = up->wired; 201 if(wm) 202 procwired(p, wm->machno); 203 ready(p); 204 sched(); 205 return pid; 206 } 207 208 static uint32 209 l2be(uint32 l) 210 { 211 uchar *cp; 212 213 cp = (uchar*)&l; 214 return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3]; 215 } 216 217 static char Echanged[] = "exec arguments changed underfoot"; 218 219 long 220 sysexec(uint32 *arg) 221 { 222 char *volatile elem, *volatile file, *ufile; 223 Chan *volatile tc; 224 225 /* 226 * Open the file, remembering the final element and the full name. 227 */ 228 file = nil; 229 elem = nil; 230 tc = nil; 231 if(waserror()){ 232 if(file) 233 free(file); 234 if(elem) 235 free(elem); 236 if(tc) 237 cclose(tc); 238 nexterror(); 239 } 240 241 ufile = uvalidaddr(arg[0], 1, 0); 242 file = validnamedup(ufile, 1); 243 tc = namec(file, Aopen, OEXEC, 0); 244 kstrdup((char**)&elem, up->genbuf); 245 246 /* 247 * Read the header. If it's a #!, fill in progarg[] with info and repeat. 248 */ 249 int i, n, nprogarg; 250 char *progarg[sizeof(Exec)/2+1]; 251 char *prog, *p; 252 char line[sizeof(Exec)+1]; 253 Exec exec; 254 255 nprogarg = 0; 256 n = devtab[tc->type]->read(tc, &exec, sizeof(Exec), 0); 257 if(n < 2) 258 error(Ebadexec); 259 p = (char*)&exec; 260 if(p[0] == '#' && p[1] == '!'){ 261 memmove(line, p, n); 262 nprogarg = shargs(line, n, progarg); 263 if(nprogarg == 0) 264 error(Ebadexec); 265 266 /* The original file becomes an extra arg after #! line */ 267 progarg[nprogarg++] = file; 268 269 /* 270 * Take the #! $0 as a file to open, and replace 271 * $0 with the original path's name. 272 */ 273 prog = progarg[0]; 274 progarg[0] = elem; 275 cclose(tc); 276 tc = nil; /* in case namec errors out */ 277 tc = namec(prog, Aopen, OEXEC, 0); 278 n = devtab[tc->type]->read(tc, &exec, sizeof(Exec), 0); 279 if(n < 2) 280 error(Ebadexec); 281 } 282 283 /* 284 * #! has had its chance, now we need a real binary 285 */ 286 uint32 magic, entry, text, etext, data, edata, bss, ebss; 287 288 magic = l2be(exec.magic); 289 if(n != sizeof(Exec) || l2be(exec.magic) != AOUT_MAGIC) 290 error(Ebadexec); 291 292 entry = l2be(exec.entry); 293 text = l2be(exec.text); 294 data = l2be(exec.data); 295 bss = l2be(exec.bss); 296 etext = ROUND(UTZERO+sizeof(Exec)+text, BY2PG); 297 edata = ROUND(etext + data, BY2PG); 298 ebss = ROUND(etext + data + bss, BY2PG); 299 300 //iprint("entry %#lux text %#lux data %#lux bss %#lux\n", entry, text, data, bss); 301 //iprint("etext %#lux edata %#lux ebss %#lux\n", etext, edata, ebss); 302 303 if(entry < UTZERO+sizeof(Exec) || entry >= UTZERO+sizeof(Exec)+text) 304 error(Ebadexec); 305 306 /* many overflow possibilities */ 307 if(text >= USTKTOP || data >= USTKTOP || bss >= USTKTOP 308 || etext >= USTKTOP || edata >= USTKTOP || ebss >= USTKTOP 309 || etext >= USTKTOP || edata < etext || ebss < edata) 310 error(Ebadexec); 311 312 /* 313 * Copy argv into new stack segment temporarily mapped elsewhere. 314 * Be careful: multithreaded program could be changing argv during this. 315 * Pass 1: count number of arguments, string bytes. 316 */ 317 int nargv, strbytes; 318 uint32 argp, ssize, spage; 319 320 strbytes = 0; 321 for(i=0; i<nprogarg; i++) 322 strbytes += strlen(progarg[i]) + 1; 323 324 argp = arg[1]; 325 for(nargv=0;; nargv++, argp += BY2WD){ 326 uint32 a; 327 char *str; 328 329 a = *(uint32*)uvalidaddr(argp, BY2WD, 0); 330 if(a == 0) 331 break; 332 str = uvalidaddr(a, 1, 0); 333 n = ((char*)vmemchr(str, 0, 0x7FFFFFFF) - str) + 1; 334 if(nprogarg > 0 && nargv == 0) 335 continue; /* going to skip argv[0] on #! */ 336 strbytes += n; 337 } 338 if(nargv == 0) 339 error("exec missing argv"); 340 341 /* 342 * Skip over argv[0] if using #!. Waited until now so that 343 * string would still be checked for validity during loop. 344 */ 345 if(nprogarg > 0){ 346 nargv--; 347 arg[1] += BY2WD; 348 } 349 350 ssize = BY2WD*((nprogarg+nargv)+1) + ROUND(strbytes, BY2WD) + sizeof(Tos); 351 352 /* 353 * 8-byte align SP for those (e.g. sparc) that need it. 354 * execregs() will subtract another 4 bytes for argc. 355 */ 356 if((ssize+4) & 7) 357 ssize += 4; 358 spage = (ssize+(BY2PG-1)) >> PGSHIFT; 359 360 /* 361 * Pass 2: build the stack segment, being careful not to assume 362 * that the counts from pass 1 are still valid. 363 */ 364 if(spage > TSTKSIZ) 365 error(Enovmem); 366 367 qlock(&up->seglock); 368 if(waserror()){ 369 if(up->seg[ESEG]){ 370 putseg(up->seg[ESEG]); 371 up->seg[ESEG] = nil; 372 } 373 qunlock(&up->seglock); 374 nexterror(); 375 } 376 up->seg[ESEG] = newseg(SG_STACK, TSTKTOP-USTKSIZE, USTKSIZE/BY2PG); 377 flushmmu(); // Needed for Plan 9 VX XXX really? 378 379 /* 380 * Top-of-stack structure. 381 */ 382 uchar *uzero; 383 uzero = up->pmmu.uzero; 384 Tos *tos; 385 uint32 utos; 386 utos = USTKTOP - sizeof(Tos); 387 tos = (Tos*)(uzero + utos + TSTKTOP - USTKTOP); 388 tos->cyclefreq = m->cyclefreq; 389 cycles((uvlong*)&tos->pcycles); 390 tos->pcycles = -tos->pcycles; 391 tos->kcycles = tos->pcycles; 392 tos->clock = 0; 393 394 /* 395 * Argument pointers and strings, together. 396 */ 397 char *bp, *ep; 398 uint32 *targp; 399 uint32 ustrp, uargp; 400 401 ustrp = utos - ROUND(strbytes, BY2WD); 402 uargp = ustrp - BY2WD*((nprogarg+nargv)+1); 403 bp = (char*)(uzero + ustrp + TSTKTOP - USTKTOP); 404 ep = bp + strbytes; 405 p = bp; 406 targp = (uint32*)(uzero + uargp + TSTKTOP - USTKTOP); 407 408 /* #! args are trusted */ 409 for(i=0; i<nprogarg; i++){ 410 n = strlen(progarg[i]) + 1; 411 if(n > ep - p) 412 error(Echanged); 413 memmove(p, progarg[i], n); 414 p += n; 415 *targp++ = ustrp; 416 ustrp += n; 417 } 418 419 /* the rest are not */ 420 argp = arg[1]; 421 for(i=0; i<nargv; i++){ 422 uint32 a; 423 char *str; 424 425 a = *(uint32*)uvalidaddr(argp, BY2WD, 0); 426 argp += BY2WD; 427 428 str = uvalidaddr(a, 1, 0); 429 n = ((char*)vmemchr(str, 0, 0x7FFFFFFF) - str) + 1; 430 if(n > ep - p) 431 error(Echanged); 432 memmove(p, str, n); 433 p += n; 434 *targp++ = ustrp; 435 ustrp += n; 436 } 437 438 if(*(uint32*)uvalidaddr(argp, BY2WD, 0) != 0) 439 error(Echanged); 440 *targp = 0; 441 442 /* 443 * But wait, there's more: prepare an arg copy for up->args 444 * using the copy we just made in the temporary segment. 445 */ 446 char *args; 447 int nargs; 448 449 n = p - bp; /* includes NUL on last arg, so must be > 0 */ 450 if(n <= 0) /* nprogarg+nargv > 0; checked above */ 451 error(Egreg); 452 if(n > 128) 453 n = 128; 454 args = smalloc(n); 455 if(waserror()){ 456 free(args); 457 nexterror(); 458 } 459 memmove(args, bp, n); 460 /* find beginning of UTF character boundary to place final NUL */ 461 while(n > 0 && (args[n-1]&0xC0) == 0x80) 462 n--; 463 args[n-1] = '\0'; 464 nargs = n; 465 466 /* 467 * Now we're ready to commit. 468 */ 469 free(up->text); 470 up->text = elem; 471 free(up->args); 472 up->args = args; 473 up->nargs = n; 474 elem = nil; 475 poperror(); /* args */ 476 477 /* 478 * Free old memory. Special segments maintained across exec. 479 */ 480 Segment *s; 481 for(i = SSEG; i <= BSEG; i++) { 482 putseg(up->seg[i]); 483 up->seg[i] = nil; /* in case of error */ 484 } 485 for(i = BSEG+1; i< NSEG; i++) { 486 s = up->seg[i]; 487 if(s && (s->type&SG_CEXEC)) { 488 putseg(s); 489 up->seg[i] = nil; 490 } 491 } 492 493 /* 494 * Close on exec 495 */ 496 Fgrp *f; 497 f = up->fgrp; 498 for(i=0; i<=f->maxfd; i++) 499 fdclose(i, CCEXEC); 500 501 /* Text. Shared. Attaches to cache image if possible */ 502 /* attachimage returns a locked cache image */ 503 Image *img; 504 Segment *ts; 505 img = attachimage(SG_TEXT|SG_RONLY, tc, UTZERO, (etext-UTZERO)>>PGSHIFT); 506 ts = img->s; 507 up->seg[TSEG] = ts; 508 ts->flushme = 1; 509 ts->fstart = 0; 510 ts->flen = sizeof(Exec)+text; 511 unlock(&img->ref.lk); 512 513 /* Data. Shared. */ 514 s = newseg(SG_DATA, etext, (edata-etext)>>PGSHIFT); 515 up->seg[DSEG] = s; 516 517 /* Attached by hand */ 518 incref(&img->ref); 519 s->image = img; 520 s->fstart = ts->fstart+ts->flen; 521 s->flen = data; 522 523 /* BSS. Zero fill on demand */ 524 up->seg[BSEG] = newseg(SG_BSS, edata, (ebss-edata)>>PGSHIFT); 525 526 /* 527 * Move the stack 528 */ 529 s = up->seg[ESEG]; 530 up->seg[ESEG] = 0; 531 up->seg[SSEG] = s; 532 qunlock(&up->seglock); 533 poperror(); /* seglock */ 534 535 s->base = USTKTOP-USTKSIZE; 536 s->top = USTKTOP; 537 relocateseg(s, USTKTOP-TSTKTOP); 538 539 /* 540 * '/' processes are higher priority (hack to make /ip more responsive). 541 */ 542 if(devtab[tc->type]->dc == L'/') 543 up->basepri = PriRoot; 544 up->priority = up->basepri; 545 poperror(); /* tc, elem, file */ 546 cclose(tc); 547 free(file); 548 // elem is now up->text 549 550 /* 551 * At this point, the mmu contains info about the old address 552 * space and needs to be flushed 553 */ 554 flushmmu(); 555 qlock(&up->debug); 556 up->nnote = 0; 557 up->notify = 0; 558 up->notified = 0; 559 up->privatemem = 0; 560 procsetup(up); 561 qunlock(&up->debug); 562 if(up->hang) 563 up->procctl = Proc_stopme; 564 565 return execregs(entry, USTKTOP - uargp, nprogarg+nargv); 566 } 567 568 int 569 shargs(char *s, int n, char **ap) 570 { 571 int i; 572 573 s += 2; 574 n -= 2; /* skip #! */ 575 for(i=0; s[i]!='\n'; i++) 576 if(i == n-1) 577 return 0; 578 s[i] = 0; 579 *ap = 0; 580 i = 0; 581 for(;;) { 582 while(*s==' ' || *s=='\t') 583 s++; 584 if(*s == 0) 585 break; 586 i++; 587 *ap++ = s; 588 *ap = 0; 589 while(*s && *s!=' ' && *s!='\t') 590 s++; 591 if(*s == 0) 592 break; 593 else 594 *s++ = 0; 595 } 596 return i; 597 } 598 599 int 600 return0(void *v) 601 { 602 return 0; 603 } 604 605 long 606 syssleep(uint32 *arg) 607 { 608 609 int n; 610 611 n = arg[0]; 612 if(n <= 0) { 613 yield(); 614 return 0; 615 } 616 if(n < TK2MS(1)) 617 n = TK2MS(1); 618 tsleep(&up->sleep, return0, 0, n); 619 return 0; 620 } 621 622 long 623 sysalarm(uint32 *arg) 624 { 625 return procalarm(arg[0]); 626 } 627 628 long 629 sysexits(uint32 *arg) 630 { 631 char *status; 632 char *inval = "invalid exit string"; 633 char buf[ERRMAX]; 634 635 if(arg[0]){ 636 if(waserror()) 637 status = inval; 638 else{ 639 status = uvalidaddr(arg[0], 1, 0); 640 if(vmemchr(status, 0, ERRMAX) == 0){ 641 memmove(buf, status, ERRMAX); 642 buf[ERRMAX-1] = 0; 643 status = buf; 644 } 645 poperror(); 646 } 647 648 }else 649 status = nil; 650 pexit(status, 1); 651 return 0; /* not reached */ 652 } 653 654 long 655 sys_wait(uint32 *arg) 656 { 657 int pid; 658 Waitmsg w; 659 OWaitmsg *ow; 660 661 if(arg[0] == 0) 662 return pwait(nil); 663 664 ow = uvalidaddr(arg[0], sizeof(OWaitmsg), 1); 665 evenaddr(arg[0]); 666 pid = pwait(&w); 667 if(pid >= 0){ 668 readnum(0, ow->pid, NUMSIZE, w.pid, NUMSIZE); 669 readnum(0, ow->time+TUser*NUMSIZE, NUMSIZE, w.time[TUser], NUMSIZE); 670 readnum(0, ow->time+TSys*NUMSIZE, NUMSIZE, w.time[TSys], NUMSIZE); 671 readnum(0, ow->time+TReal*NUMSIZE, NUMSIZE, w.time[TReal], NUMSIZE); 672 strncpy(ow->msg, w.msg, sizeof(ow->msg)); 673 ow->msg[sizeof(ow->msg)-1] = '\0'; 674 } 675 return pid; 676 } 677 678 long 679 sysawait(uint32 *arg) 680 { 681 int i; 682 int pid; 683 Waitmsg w; 684 uint32 n; 685 char *buf; 686 687 n = arg[1]; 688 buf = uvalidaddr(arg[0], n, 1); 689 pid = pwait(&w); 690 if(pid < 0) 691 return -1; 692 i = snprint(buf, n, "%d %lud %lud %lud %q", 693 w.pid, 694 w.time[TUser], w.time[TSys], w.time[TReal], 695 w.msg); 696 697 return i; 698 } 699 700 void 701 werrstr(char *fmt, ...) 702 { 703 va_list va; 704 705 if(up == nil) 706 return; 707 708 va_start(va, fmt); 709 vseprint(up->syserrstr, up->syserrstr+ERRMAX, fmt, va); 710 va_end(va); 711 } 712 713 static long 714 generrstr(uint32 addr, uint nbuf) 715 { 716 char tmp[ERRMAX]; 717 char *buf; 718 719 if(nbuf == 0) 720 error(Ebadarg); 721 buf = uvalidaddr(addr, nbuf, 1); 722 if(nbuf > sizeof tmp) 723 nbuf = sizeof tmp; 724 memmove(tmp, buf, nbuf); 725 726 /* make sure it's NUL-terminated */ 727 tmp[nbuf-1] = '\0'; 728 memmove(buf, up->syserrstr, nbuf); 729 buf[nbuf-1] = '\0'; 730 memmove(up->syserrstr, tmp, nbuf); 731 return 0; 732 } 733 734 long 735 syserrstr(uint32 *arg) 736 { 737 return generrstr(arg[0], arg[1]); 738 } 739 740 /* compatibility for old binaries */ 741 long 742 sys_errstr(uint32 *arg) 743 { 744 return generrstr(arg[0], 64); 745 } 746 747 long 748 sysnotify(uint32 *arg) 749 { 750 if(arg[0] != 0) 751 uvalidaddr(arg[0], 1, 0); 752 up->notify = arg[0]; /* checked again when used */ 753 return 0; 754 } 755 756 long 757 sysnoted(uint32 *arg) 758 { 759 if(arg[0]!=NRSTR && !up->notified) 760 error(Egreg); 761 return 0; 762 } 763 764 long 765 syssegbrk(uint32 *arg) 766 { 767 int i; 768 uint32 addr; 769 Segment *s; 770 771 addr = arg[0]; 772 for(i = 0; i < NSEG; i++) { 773 s = up->seg[i]; 774 if(s == 0 || addr < s->base || addr >= s->top) 775 continue; 776 switch(s->type&SG_TYPE) { 777 case SG_TEXT: 778 case SG_DATA: 779 case SG_STACK: 780 error(Ebadarg); 781 default: 782 return ibrk(arg[1], i); 783 } 784 } 785 786 error(Ebadarg); 787 return 0; /* not reached */ 788 } 789 790 long 791 syssegattach(uint32 *arg) 792 { 793 return segattach(up, arg[0], uvalidaddr(arg[1], 1, 0), arg[2], arg[3]); 794 } 795 796 long 797 syssegdetach(uint32 *arg) 798 { 799 int i; 800 uint32 addr; 801 Segment *s; 802 803 qlock(&up->seglock); 804 if(waserror()){ 805 qunlock(&up->seglock); 806 nexterror(); 807 } 808 809 s = 0; 810 addr = arg[0]; 811 for(i = 0; i < NSEG; i++) 812 if((s = up->seg[i])) { 813 qlock(&s->lk); 814 if((addr >= s->base && addr < s->top) || 815 (s->top == s->base && addr == s->base)) 816 goto found; 817 qunlock(&s->lk); 818 } 819 820 error(Ebadarg); 821 822 found: 823 /* 824 * Check we are not detaching the initial stack segment. 825 */ 826 if(s == up->seg[SSEG]){ 827 qunlock(&s->lk); 828 error(Ebadarg); 829 } 830 up->seg[i] = 0; 831 qunlock(&s->lk); 832 putseg(s); 833 qunlock(&up->seglock); 834 poperror(); 835 836 /* Ensure we flush any entries from the lost segment */ 837 flushmmu(); 838 return 0; 839 } 840 841 long 842 syssegfree(uint32 *arg) 843 { 844 Segment *s; 845 uint32 from, to; 846 847 from = arg[0]; 848 s = seg(up, from, 1); 849 if(s == nil) 850 error(Ebadarg); 851 to = (from + arg[1]) & ~(BY2PG-1); 852 from = PGROUND(from); 853 854 if(to > s->top) { 855 qunlock(&s->lk); 856 error(Ebadarg); 857 } 858 859 mfreeseg(s, from, (to - from) / BY2PG); 860 qunlock(&s->lk); 861 flushmmu(); 862 863 return 0; 864 } 865 866 /* For binary compatibility */ 867 long 868 sysbrk_(uint32 *arg) 869 { 870 return ibrk(arg[0], BSEG); 871 } 872 873 long 874 sysrendezvous(uint32 *arg) 875 { 876 uintptr tag, val; 877 Proc *p, **l; 878 879 tag = arg[0]; 880 l = &REND(up->rgrp, tag); 881 up->rendval = ~(uintptr)0; 882 883 lock(&up->rgrp->ref.lk); 884 for(p = *l; p; p = p->rendhash) { 885 if(p->rendtag == tag) { 886 *l = p->rendhash; 887 val = p->rendval; 888 p->rendval = arg[1]; 889 890 while(p->mach != 0) 891 ; 892 ready(p); 893 unlock(&up->rgrp->ref.lk); 894 return val; 895 } 896 l = &p->rendhash; 897 } 898 899 /* Going to sleep here */ 900 up->rendtag = tag; 901 up->rendval = arg[1]; 902 up->rendhash = *l; 903 *l = up; 904 up->state = Rendezvous; 905 unlock(&up->rgrp->ref.lk); 906 907 sched(); 908 909 return up->rendval; 910 } 911 912 /* 913 * The implementation of semaphores is complicated by needing 914 * to avoid rescheduling in syssemrelease, so that it is safe 915 * to call from real-time processes. This means syssemrelease 916 * cannot acquire any qlocks, only spin locks. 917 * 918 * Semacquire and semrelease must both manipulate the semaphore 919 * wait list. Lock-free linked lists only exist in theory, not 920 * in practice, so the wait list is protected by a spin lock. 921 * 922 * The semaphore value *addr is stored in user memory, so it 923 * cannot be read or written while holding spin locks. 924 * 925 * Thus, we can access the list only when holding the lock, and 926 * we can access the semaphore only when not holding the lock. 927 * This makes things interesting. Note that sleep's condition function 928 * is called while holding two locks - r and up->rlock - so it cannot 929 * access the semaphore value either. 930 * 931 * An acquirer announces its intention to try for the semaphore 932 * by putting a Sema structure onto the wait list and then 933 * setting Sema.waiting. After one last check of semaphore, 934 * the acquirer sleeps until Sema.waiting==0. A releaser of n 935 * must wake up n acquirers who have Sema.waiting set. It does 936 * this by clearing Sema.waiting and then calling wakeup. 937 * 938 * There are three interesting races here. 939 940 * The first is that in this particular sleep/wakeup usage, a single 941 * wakeup can rouse a process from two consecutive sleeps! 942 * The ordering is: 943 * 944 * (a) set Sema.waiting = 1 945 * (a) call sleep 946 * (b) set Sema.waiting = 0 947 * (a) check Sema.waiting inside sleep, return w/o sleeping 948 * (a) try for semaphore, fail 949 * (a) set Sema.waiting = 1 950 * (a) call sleep 951 * (b) call wakeup(a) 952 * (a) wake up again 953 * 954 * This is okay - semacquire will just go around the loop 955 * again. It does mean that at the top of the for(;;) loop in 956 * semacquire, phore.waiting might already be set to 1. 957 * 958 * The second is that a releaser might wake an acquirer who is 959 * interrupted before he can acquire the lock. Since 960 * release(n) issues only n wakeup calls -- only n can be used 961 * anyway -- if the interrupted process is not going to use his 962 * wakeup call he must pass it on to another acquirer. 963 * 964 * The third race is similar to the second but more subtle. An 965 * acquirer sets waiting=1 and then does a final canacquire() 966 * before going to sleep. The opposite order would result in 967 * missing wakeups that happen between canacquire and 968 * waiting=1. (In fact, the whole point of Sema.waiting is to 969 * avoid missing wakeups between canacquire() and sleep().) But 970 * there can be spurious wakeups between a successful 971 * canacquire() and the following semdequeue(). This wakeup is 972 * not useful to the acquirer, since he has already acquired 973 * the semaphore. Like in the previous case, though, the 974 * acquirer must pass the wakeup call along. 975 * 976 * This is all rather subtle. The code below has been verified 977 * with the spin model /sys/src/9/port/semaphore.p. The 978 * original code anticipated the second race but not the first 979 * or third, which were caught only with spin. The first race 980 * is mentioned in /sys/doc/sleep.ps, but I'd forgotten about it. 981 * It was lucky that my abstract model of sleep/wakeup still managed 982 * to preserve that behavior. 983 * 984 * I remain slightly concerned about memory coherence 985 * outside of locks. The spin model does not take 986 * queued processor writes into account so we have to 987 * think hard. The only variables accessed outside locks 988 * are the semaphore value itself and the boolean flag 989 * Sema.waiting. The value is only accessed with cmpswap, 990 * whose job description includes doing the right thing as 991 * far as memory coherence across processors. That leaves 992 * Sema.waiting. To handle it, we call coherence() before each 993 * read and after each write. - rsc 994 */ 995 996 /* Add semaphore p with addr a to list in seg. */ 997 static void 998 semqueue(Segment *s, long *a, Sema *p) 999 { 1000 memset(p, 0, sizeof *p); 1001 p->addr = a; 1002 lock(&s->sema.rendez.lk); /* uses s->sema.Rendez.Lock, but no one else is */ 1003 p->next = &s->sema; 1004 p->prev = s->sema.prev; 1005 p->next->prev = p; 1006 p->prev->next = p; 1007 unlock(&s->sema.rendez.lk); 1008 } 1009 1010 /* Remove semaphore p from list in seg. */ 1011 static void 1012 semdequeue(Segment *s, Sema *p) 1013 { 1014 lock(&s->sema.rendez.lk); 1015 p->next->prev = p->prev; 1016 p->prev->next = p->next; 1017 unlock(&s->sema.rendez.lk); 1018 } 1019 1020 /* Wake up n waiters with addr a on list in seg. */ 1021 static void 1022 semwakeup(Segment *s, long *a, long n) 1023 { 1024 Sema *p; 1025 1026 lock(&s->sema.rendez.lk); 1027 for(p=s->sema.next; p!=&s->sema && n>0; p=p->next){ 1028 if(p->addr == a && p->waiting){ 1029 p->waiting = 0; 1030 coherence(); 1031 wakeup(&p->rendez); 1032 n--; 1033 } 1034 } 1035 unlock(&s->sema.rendez.lk); 1036 } 1037 1038 /* Add delta to semaphore and wake up waiters as appropriate. */ 1039 static long 1040 semrelease(Segment *s, long *addr, long delta) 1041 { 1042 long value; 1043 1044 do 1045 value = *addr; 1046 while(!cmpswap(addr, value, value+delta)); 1047 semwakeup(s, addr, delta); 1048 return value+delta; 1049 } 1050 1051 /* Try to acquire semaphore using compare-and-swap */ 1052 static int 1053 canacquire(long *addr) 1054 { 1055 long value; 1056 1057 while((value=*addr) > 0) 1058 if(cmpswap(addr, value, value-1)) 1059 return 1; 1060 return 0; 1061 } 1062 1063 /* Should we wake up? */ 1064 static int 1065 semawoke(void *p) 1066 { 1067 coherence(); 1068 return !((Sema*)p)->waiting; 1069 } 1070 1071 /* Acquire semaphore (subtract 1). */ 1072 static int 1073 semacquire(Segment *s, long *addr, int block) 1074 { 1075 int acquired; 1076 Sema phore; 1077 1078 if(canacquire(addr)) 1079 return 1; 1080 if(!block) 1081 return 0; 1082 1083 acquired = 0; 1084 semqueue(s, addr, &phore); 1085 for(;;){ 1086 phore.waiting = 1; 1087 coherence(); 1088 if(canacquire(addr)){ 1089 acquired = 1; 1090 break; 1091 } 1092 if(waserror()) 1093 break; 1094 sleep(&phore.rendez, semawoke, &phore); 1095 poperror(); 1096 } 1097 semdequeue(s, &phore); 1098 coherence(); /* not strictly necessary due to lock in semdequeue */ 1099 if(!phore.waiting) 1100 semwakeup(s, addr, 1); 1101 if(!acquired) 1102 nexterror(); 1103 return 1; 1104 } 1105 1106 long 1107 syssemacquire(uint32 *arg) 1108 { 1109 int block; 1110 long *addr; 1111 Segment *s; 1112 1113 addr = uvalidaddr(arg[0], sizeof(long), 1); 1114 evenaddr(arg[0]); 1115 block = arg[1]; 1116 1117 if((s = seg(up, arg[0], 0)) == nil) 1118 error(Ebadarg); 1119 if(*addr < 0) 1120 error(Ebadarg); 1121 return semacquire(s, addr, block); 1122 } 1123 1124 long 1125 syssemrelease(uint32 *arg) 1126 { 1127 long *addr, delta; 1128 Segment *s; 1129 1130 addr = uvalidaddr(arg[0], sizeof(long), 1); 1131 evenaddr(arg[0]); 1132 delta = arg[1]; 1133 1134 if((s = seg(up, arg[0], 0)) == nil) 1135 error(Ebadarg); 1136 if(delta < 0 || *addr < 0) 1137 error(Ebadarg); 1138 return semrelease(s, addr, arg[1]); 1139 }