vx32

Local 9vx git repository for patches.
git clone git://r-36.net/vx32
Log | Files | Refs

sysproc.c (23448B)


      1 #define	WANT_M
      2 #include	"u.h"
      3 #include	"tos.h"
      4 #include	"lib.h"
      5 #include	"mem.h"
      6 #include	"dat.h"
      7 #include	"fns.h"
      8 #include	"error.h"
      9 
     10 #include	"a.out.h"
     11 
     12 int	shargs(char*, int, char**);
     13 
     14 extern void checkpages(void);
     15 extern void checkpagerefs(void);
     16 
     17 long
     18 sysr1(uint32 *x)
     19 {
     20 	vx32sysr1();
     21 	return 0;
     22 }
     23 
     24 long
     25 sysrfork(uint32 *arg)
     26 {
     27 	Proc *p;
     28 	int n, i;
     29 	Fgrp *ofg;
     30 	Pgrp *opg;
     31 	Rgrp *org;
     32 	Egrp *oeg;
     33 	ulong pid, flag;
     34 	Mach *wm;
     35 
     36 	flag = arg[0];
     37 	/* Check flags before we commit */
     38 	if((flag & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
     39 		error(Ebadarg);
     40 	if((flag & (RFNAMEG|RFCNAMEG)) == (RFNAMEG|RFCNAMEG))
     41 		error(Ebadarg);
     42 	if((flag & (RFENVG|RFCENVG)) == (RFENVG|RFCENVG))
     43 		error(Ebadarg);
     44 
     45 	if((flag&RFPROC) == 0) {
     46 		if(flag & (RFMEM|RFNOWAIT))
     47 			error(Ebadarg);
     48 		if(flag & (RFFDG|RFCFDG)) {
     49 			ofg = up->fgrp;
     50 			if(flag & RFFDG)
     51 				up->fgrp = dupfgrp(ofg);
     52 			else
     53 				up->fgrp = dupfgrp(nil);
     54 			closefgrp(ofg);
     55 		}
     56 		if(flag & (RFNAMEG|RFCNAMEG)) {
     57 			opg = up->pgrp;
     58 			up->pgrp = newpgrp();
     59 			if(flag & RFNAMEG)
     60 				pgrpcpy(up->pgrp, opg);
     61 			/* inherit noattach */
     62 			up->pgrp->noattach = opg->noattach;
     63 			closepgrp(opg);
     64 		}
     65 		if(flag & RFNOMNT)
     66 			up->pgrp->noattach = 1;
     67 		if(flag & RFREND) {
     68 			org = up->rgrp;
     69 			up->rgrp = newrgrp();
     70 			closergrp(org);
     71 		}
     72 		if(flag & (RFENVG|RFCENVG)) {
     73 			oeg = up->egrp;
     74 			up->egrp = smalloc(sizeof(Egrp));
     75 			up->egrp->ref.ref = 1;
     76 			if(flag & RFENVG)
     77 				envcpy(up->egrp, oeg);
     78 			closeegrp(oeg);
     79 		}
     80 		if(flag & RFNOTEG)
     81 			up->noteid = incref(&noteidalloc);
     82 		return 0;
     83 	}
     84 
     85 	p = newproc();
     86 
     87 	p->fpsave = up->fpsave;
     88 	p->scallnr = up->scallnr;
     89 	p->s = up->s;
     90 	p->nerrlab = 0;
     91 	p->slash = up->slash;
     92 	p->dot = up->dot;
     93 	incref(&p->dot->ref);
     94 
     95 	memmove(p->note, up->note, sizeof(p->note));
     96 	p->privatemem = up->privatemem;
     97 	p->noswap = up->noswap;
     98 	p->nnote = up->nnote;
     99 	p->notified = 0;
    100 	p->lastnote = up->lastnote;
    101 	p->notify = up->notify;
    102 	p->ureg = up->ureg;
    103 	p->dbgreg = 0;
    104 
    105 	/* Make a new set of memory segments */
    106 	n = flag & RFMEM;
    107 	qlock(&p->seglock);
    108 	if(waserror()){
    109 		qunlock(&p->seglock);
    110 		nexterror();
    111 	}
    112 	for(i = 0; i < NSEG; i++)
    113 		if(up->seg[i])
    114 			p->seg[i] = dupseg(up->seg, i, n);
    115 	qunlock(&p->seglock);
    116 	poperror();
    117 
    118 	/* File descriptors */
    119 	if(flag & (RFFDG|RFCFDG)) {
    120 		if(flag & RFFDG)
    121 			p->fgrp = dupfgrp(up->fgrp);
    122 		else
    123 			p->fgrp = dupfgrp(nil);
    124 	}
    125 	else {
    126 		p->fgrp = up->fgrp;
    127 		incref(&p->fgrp->ref);
    128 	}
    129 
    130 	/* Process groups */
    131 	if(flag & (RFNAMEG|RFCNAMEG)) {
    132 		p->pgrp = newpgrp();
    133 		if(flag & RFNAMEG)
    134 			pgrpcpy(p->pgrp, up->pgrp);
    135 		/* inherit noattach */
    136 		p->pgrp->noattach = up->pgrp->noattach;
    137 	}
    138 	else {
    139 		p->pgrp = up->pgrp;
    140 		incref(&p->pgrp->ref);
    141 	}
    142 	if(flag & RFNOMNT)
    143 		up->pgrp->noattach = 1;
    144 
    145 	if(flag & RFREND)
    146 		p->rgrp = newrgrp();
    147 	else {
    148 		incref(&up->rgrp->ref);
    149 		p->rgrp = up->rgrp;
    150 	}
    151 
    152 	/* Environment group */
    153 	if(flag & (RFENVG|RFCENVG)) {
    154 		p->egrp = smalloc(sizeof(Egrp));
    155 		p->egrp->ref.ref = 1;
    156 		if(flag & RFENVG)
    157 			envcpy(p->egrp, up->egrp);
    158 	}
    159 	else {
    160 		p->egrp = up->egrp;
    161 		incref(&p->egrp->ref);
    162 	}
    163 	p->hang = up->hang;
    164 	p->procmode = up->procmode;
    165 
    166 	/* Craft a return frame which will cause the child to pop out of
    167 	 * the scheduler in user mode with the return register zero
    168 	 */
    169 	forkchild(p, up->dbgreg);
    170 
    171 	p->parent = up;
    172 	p->parentpid = up->pid;
    173 	if(flag&RFNOWAIT)
    174 		p->parentpid = 0;
    175 	else {
    176 		lock(&up->exl);
    177 		up->nchild++;
    178 		unlock(&up->exl);
    179 	}
    180 	if((flag&RFNOTEG) == 0)
    181 		p->noteid = up->noteid;
    182 
    183 	p->fpstate = up->fpstate;
    184 	pid = p->pid;
    185 	memset(p->time, 0, sizeof(p->time));
    186 	p->time[TReal] = msec();
    187 
    188 	kstrdup(&p->text, up->text);
    189 	kstrdup(&p->user, up->user);
    190 	/*
    191 	 *  since the bss/data segments are now shareable,
    192 	 *  any mmu info about this process is now stale
    193 	 *  (i.e. has bad properties) and has to be discarded.
    194 	 */
    195 	flushmmu();
    196 	p->basepri = up->basepri;
    197 	p->priority = up->basepri;
    198 	p->fixedpri = up->fixedpri;
    199 	p->mp = up->mp;
    200 	wm = up->wired;
    201 	if(wm)
    202 		procwired(p, wm->machno);
    203 	ready(p);
    204 	sched();
    205 	return pid;
    206 }
    207 
    208 static uint32
    209 l2be(uint32 l)
    210 {
    211 	uchar *cp;
    212 
    213 	cp = (uchar*)&l;
    214 	return (cp[0]<<24) | (cp[1]<<16) | (cp[2]<<8) | cp[3];
    215 }
    216 
    217 static char Echanged[] = "exec arguments changed underfoot";
    218 
    219 long
    220 sysexec(uint32 *arg)
    221 {
    222 	char *volatile elem, *volatile file, *ufile;
    223 	Chan *volatile tc;
    224 
    225 	/*
    226 	 * Open the file, remembering the final element and the full name.
    227 	 */
    228 	file = nil;
    229 	elem = nil;
    230 	tc = nil;
    231 	if(waserror()){
    232 		if(file)
    233 			free(file);
    234 		if(elem)
    235 			free(elem);
    236 		if(tc)
    237 			cclose(tc);
    238 		nexterror();
    239 	}
    240 
    241 	ufile = uvalidaddr(arg[0], 1, 0);
    242 	file = validnamedup(ufile, 1);
    243 	tc = namec(file, Aopen, OEXEC, 0);
    244 	kstrdup((char**)&elem, up->genbuf);
    245 
    246 	/*
    247 	 * Read the header.  If it's a #!, fill in progarg[] with info and repeat.
    248 	 */
    249 	int i, n, nprogarg;
    250 	char *progarg[sizeof(Exec)/2+1];
    251 	char *prog, *p;
    252 	char line[sizeof(Exec)+1];
    253 	Exec exec;
    254 
    255 	nprogarg = 0;
    256 	n = devtab[tc->type]->read(tc, &exec, sizeof(Exec), 0);
    257 	if(n < 2)
    258 		error(Ebadexec);
    259 	p = (char*)&exec;
    260 	if(p[0] == '#' && p[1] == '!'){
    261 		memmove(line, p, n);
    262 		nprogarg = shargs(line, n, progarg);
    263 		if(nprogarg == 0)
    264 			error(Ebadexec);
    265 		
    266 		/* The original file becomes an extra arg after #! line */
    267 		progarg[nprogarg++] = file;
    268 		
    269 		/*
    270 		 * Take the #! $0 as a file to open, and replace
    271 		 * $0 with the original path's name.
    272 		 */
    273 		prog = progarg[0];
    274 		progarg[0] = elem;
    275 		cclose(tc);
    276 		tc = nil;	/* in case namec errors out */
    277 		tc = namec(prog, Aopen, OEXEC, 0);
    278 		n = devtab[tc->type]->read(tc, &exec, sizeof(Exec), 0);
    279 		if(n < 2)
    280 			error(Ebadexec);
    281 	}
    282 
    283 	/* 
    284 	 * #! has had its chance, now we need a real binary
    285 	 */
    286 	uint32 magic, entry, text, etext, data, edata, bss, ebss;
    287 
    288 	magic = l2be(exec.magic);
    289 	if(n != sizeof(Exec) || l2be(exec.magic) != AOUT_MAGIC)
    290 		error(Ebadexec);
    291 
    292 	entry = l2be(exec.entry);
    293 	text = l2be(exec.text);
    294 	data = l2be(exec.data);
    295 	bss = l2be(exec.bss);
    296 	etext = ROUND(UTZERO+sizeof(Exec)+text, BY2PG);
    297 	edata = ROUND(etext + data, BY2PG);
    298 	ebss = ROUND(etext + data + bss, BY2PG);
    299 	
    300 //iprint("entry %#lux text %#lux data %#lux bss %#lux\n", entry, text, data, bss);
    301 //iprint("etext %#lux edata %#lux ebss %#lux\n", etext, edata, ebss);
    302 
    303 	if(entry < UTZERO+sizeof(Exec) || entry >= UTZERO+sizeof(Exec)+text)
    304 		error(Ebadexec);
    305 	
    306 	/* many overflow possibilities */
    307 	if(text >= USTKTOP || data >= USTKTOP || bss >= USTKTOP
    308 	|| etext >= USTKTOP || edata >= USTKTOP || ebss >= USTKTOP
    309 	|| etext >= USTKTOP || edata < etext || ebss < edata)
    310 		error(Ebadexec);
    311 
    312 	/*
    313 	 * Copy argv into new stack segment temporarily mapped elsewhere.
    314 	 * Be careful: multithreaded program could be changing argv during this.
    315 	 * Pass 1: count number of arguments, string bytes.
    316 	 */
    317 	int nargv, strbytes;
    318 	uint32 argp, ssize, spage;
    319 
    320 	strbytes = 0;
    321 	for(i=0; i<nprogarg; i++)
    322 		strbytes += strlen(progarg[i]) + 1;
    323 
    324 	argp = arg[1];
    325 	for(nargv=0;; nargv++, argp += BY2WD){
    326 		uint32 a;
    327 		char *str;
    328 
    329 		a = *(uint32*)uvalidaddr(argp, BY2WD, 0);
    330 		if(a == 0)
    331 			break;
    332 		str = uvalidaddr(a, 1, 0);
    333 		n = ((char*)vmemchr(str, 0, 0x7FFFFFFF) - str) + 1;
    334 		if(nprogarg > 0 && nargv == 0)
    335 			continue;	/* going to skip argv[0] on #! */
    336 		strbytes += n;
    337 	}
    338 	if(nargv == 0)
    339 		error("exec missing argv");
    340 
    341 	/* 
    342 	 * Skip over argv[0] if using #!.  Waited until now so that
    343 	 * string would still be checked for validity during loop.
    344 	 */
    345 	if(nprogarg > 0){
    346 		nargv--;
    347 		arg[1] += BY2WD;
    348 	}
    349 
    350 	ssize = BY2WD*((nprogarg+nargv)+1) + ROUND(strbytes, BY2WD) + sizeof(Tos);
    351 
    352 	/*
    353 	 * 8-byte align SP for those (e.g. sparc) that need it.
    354 	 * execregs() will subtract another 4 bytes for argc.
    355 	 */
    356 	if((ssize+4) & 7)
    357 		ssize += 4;
    358 	spage = (ssize+(BY2PG-1)) >> PGSHIFT;
    359 
    360 	/*
    361 	 * Pass 2: build the stack segment, being careful not to assume
    362 	 * that the counts from pass 1 are still valid.
    363 	 */
    364 	if(spage > TSTKSIZ)
    365 		error(Enovmem);
    366 
    367 	qlock(&up->seglock);
    368 	if(waserror()){
    369 		if(up->seg[ESEG]){
    370 			putseg(up->seg[ESEG]);
    371 			up->seg[ESEG] = nil;
    372 		}
    373 		qunlock(&up->seglock);
    374 		nexterror();
    375 	}
    376 	up->seg[ESEG] = newseg(SG_STACK, TSTKTOP-USTKSIZE, USTKSIZE/BY2PG);
    377 	flushmmu();	// Needed for Plan 9 VX  XXX really?
    378 
    379 	/*
    380 	 * Top-of-stack structure.
    381 	 */
    382 	uchar *uzero;
    383 	uzero = up->pmmu.uzero;
    384 	Tos *tos;
    385 	uint32 utos;
    386 	utos = USTKTOP - sizeof(Tos);
    387 	tos = (Tos*)(uzero + utos + TSTKTOP - USTKTOP);
    388 	tos->cyclefreq = m->cyclefreq;
    389 	cycles((uvlong*)&tos->pcycles);
    390 	tos->pcycles = -tos->pcycles;
    391 	tos->kcycles = tos->pcycles;
    392 	tos->clock = 0;
    393 
    394 	/*
    395 	 * Argument pointers and strings, together.
    396 	 */
    397 	char *bp, *ep;
    398 	uint32 *targp;
    399 	uint32 ustrp, uargp;
    400 
    401 	ustrp = utos - ROUND(strbytes, BY2WD);
    402 	uargp = ustrp - BY2WD*((nprogarg+nargv)+1);
    403 	bp = (char*)(uzero + ustrp + TSTKTOP - USTKTOP);
    404 	ep = bp + strbytes;
    405 	p = bp;
    406 	targp = (uint32*)(uzero + uargp + TSTKTOP - USTKTOP);
    407 	
    408 	/* #! args are trusted */
    409 	for(i=0; i<nprogarg; i++){
    410 		n = strlen(progarg[i]) + 1;
    411 		if(n  > ep - p)
    412 			error(Echanged);
    413 		memmove(p, progarg[i], n);
    414 		p += n;
    415 		*targp++ = ustrp;
    416 		ustrp += n;
    417 	}
    418 	
    419 	/* the rest are not */
    420 	argp = arg[1];
    421 	for(i=0; i<nargv; i++){
    422 		uint32 a;
    423 		char *str;
    424 		
    425 		a = *(uint32*)uvalidaddr(argp, BY2WD, 0);
    426 		argp += BY2WD;
    427 		
    428 		str = uvalidaddr(a, 1, 0);
    429 		n = ((char*)vmemchr(str, 0, 0x7FFFFFFF) - str) + 1;
    430 		if(n  > ep - p)
    431 			error(Echanged);
    432 		memmove(p, str, n);
    433 		p += n;
    434 		*targp++ = ustrp;
    435 		ustrp += n;
    436 	}
    437 
    438 	if(*(uint32*)uvalidaddr(argp, BY2WD, 0) != 0)
    439 		error(Echanged);	
    440 	*targp = 0;
    441 
    442 	/*
    443 	 * But wait, there's more: prepare an arg copy for up->args
    444 	 * using the copy we just made in the temporary segment.
    445 	 */
    446 	char *args;
    447 	int nargs;
    448 
    449 	n = p - bp;	/* includes NUL on last arg, so must be > 0 */
    450 	if(n <= 0)	/* nprogarg+nargv > 0; checked above */
    451 		error(Egreg);
    452 	if(n > 128)
    453 		n = 128;
    454 	args = smalloc(n);
    455 	if(waserror()){
    456 		free(args);
    457 		nexterror();
    458 	}
    459 	memmove(args, bp, n);
    460 	/* find beginning of UTF character boundary to place final NUL */
    461 	while(n > 0 && (args[n-1]&0xC0) == 0x80)
    462 		n--;
    463 	args[n-1] = '\0';
    464 	nargs = n;
    465 
    466 	/*
    467 	 * Now we're ready to commit.
    468 	 */
    469 	free(up->text);
    470 	up->text = elem;
    471 	free(up->args);
    472 	up->args = args;
    473 	up->nargs = n;
    474 	elem = nil;
    475 	poperror();	/* args */
    476 
    477 	/*
    478 	 * Free old memory.  Special segments maintained across exec.
    479 	 */
    480 	Segment *s;
    481 	for(i = SSEG; i <= BSEG; i++) {
    482 		putseg(up->seg[i]);
    483 		up->seg[i] = nil;	/* in case of error */
    484 	}
    485 	for(i = BSEG+1; i< NSEG; i++) {
    486 		s = up->seg[i];
    487 		if(s && (s->type&SG_CEXEC)) {
    488 			putseg(s);
    489 			up->seg[i] = nil;
    490 		}
    491 	}
    492 	
    493 	/*
    494 	 * Close on exec
    495 	 */
    496 	Fgrp *f;
    497 	f = up->fgrp;
    498 	for(i=0; i<=f->maxfd; i++)
    499 		fdclose(i, CCEXEC);
    500 
    501 	/* Text.  Shared. Attaches to cache image if possible */
    502 	/* attachimage returns a locked cache image */
    503 	Image *img;
    504 	Segment *ts;
    505 	img = attachimage(SG_TEXT|SG_RONLY, tc, UTZERO, (etext-UTZERO)>>PGSHIFT);
    506 	ts = img->s;
    507 	up->seg[TSEG] = ts;
    508 	ts->flushme = 1;
    509 	ts->fstart = 0;
    510 	ts->flen = sizeof(Exec)+text;
    511 	unlock(&img->ref.lk);
    512 
    513 	/* Data. Shared. */
    514 	s = newseg(SG_DATA, etext, (edata-etext)>>PGSHIFT);
    515 	up->seg[DSEG] = s;
    516 
    517 	/* Attached by hand */
    518 	incref(&img->ref);
    519 	s->image = img;
    520 	s->fstart = ts->fstart+ts->flen;
    521 	s->flen = data;
    522 
    523 	/* BSS. Zero fill on demand */
    524 	up->seg[BSEG] = newseg(SG_BSS, edata, (ebss-edata)>>PGSHIFT);
    525 
    526 	/*
    527 	 * Move the stack
    528 	 */
    529 	s = up->seg[ESEG];
    530 	up->seg[ESEG] = 0;
    531 	up->seg[SSEG] = s;
    532 	qunlock(&up->seglock);
    533 	poperror();	/* seglock */
    534 
    535 	s->base = USTKTOP-USTKSIZE;
    536 	s->top = USTKTOP;
    537 	relocateseg(s, USTKTOP-TSTKTOP);
    538 
    539 	/*
    540 	 *  '/' processes are higher priority (hack to make /ip more responsive).
    541 	 */
    542 	if(devtab[tc->type]->dc == L'/')
    543 		up->basepri = PriRoot;
    544 	up->priority = up->basepri;
    545 	poperror();	/* tc, elem, file */
    546 	cclose(tc);
    547 	free(file);
    548 	// elem is now up->text
    549 
    550 	/*
    551 	 *  At this point, the mmu contains info about the old address
    552 	 *  space and needs to be flushed
    553 	 */
    554 	flushmmu();
    555 	qlock(&up->debug);
    556 	up->nnote = 0;
    557 	up->notify = 0;
    558 	up->notified = 0;
    559 	up->privatemem = 0;
    560 	procsetup(up);
    561 	qunlock(&up->debug);
    562 	if(up->hang)
    563 		up->procctl = Proc_stopme;
    564 
    565 	return execregs(entry, USTKTOP - uargp, nprogarg+nargv);
    566 }
    567 
    568 int
    569 shargs(char *s, int n, char **ap)
    570 {
    571 	int i;
    572 
    573 	s += 2;
    574 	n -= 2;		/* skip #! */
    575 	for(i=0; s[i]!='\n'; i++)
    576 		if(i == n-1)
    577 			return 0;
    578 	s[i] = 0;
    579 	*ap = 0;
    580 	i = 0;
    581 	for(;;) {
    582 		while(*s==' ' || *s=='\t')
    583 			s++;
    584 		if(*s == 0)
    585 			break;
    586 		i++;
    587 		*ap++ = s;
    588 		*ap = 0;
    589 		while(*s && *s!=' ' && *s!='\t')
    590 			s++;
    591 		if(*s == 0)
    592 			break;
    593 		else
    594 			*s++ = 0;
    595 	}
    596 	return i;
    597 }
    598 
    599 int
    600 return0(void *v)
    601 {
    602 	return 0;
    603 }
    604 
    605 long
    606 syssleep(uint32 *arg)
    607 {
    608 
    609 	int n;
    610 
    611 	n = arg[0];
    612 	if(n <= 0) {
    613 		yield();
    614 		return 0;
    615 	}
    616 	if(n < TK2MS(1))
    617 		n = TK2MS(1);
    618 	tsleep(&up->sleep, return0, 0, n);
    619 	return 0;
    620 }
    621 
    622 long
    623 sysalarm(uint32 *arg)
    624 {
    625 	return procalarm(arg[0]);
    626 }
    627 
    628 long
    629 sysexits(uint32 *arg)
    630 {
    631 	char *status;
    632 	char *inval = "invalid exit string";
    633 	char buf[ERRMAX];
    634 
    635 	if(arg[0]){
    636 		if(waserror())
    637 			status = inval;
    638 		else{
    639 			status = uvalidaddr(arg[0], 1, 0);
    640 			if(vmemchr(status, 0, ERRMAX) == 0){
    641 				memmove(buf, status, ERRMAX);
    642 				buf[ERRMAX-1] = 0;
    643 				status = buf;
    644 			}
    645 			poperror();
    646 		}
    647 
    648 	}else
    649 		status = nil;
    650 	pexit(status, 1);
    651 	return 0;		/* not reached */
    652 }
    653 
    654 long
    655 sys_wait(uint32 *arg)
    656 {
    657 	int pid;
    658 	Waitmsg w;
    659 	OWaitmsg *ow;
    660 
    661 	if(arg[0] == 0)
    662 		return pwait(nil);
    663 
    664 	ow = uvalidaddr(arg[0], sizeof(OWaitmsg), 1);
    665 	evenaddr(arg[0]);
    666 	pid = pwait(&w);
    667 	if(pid >= 0){
    668 		readnum(0, ow->pid, NUMSIZE, w.pid, NUMSIZE);
    669 		readnum(0, ow->time+TUser*NUMSIZE, NUMSIZE, w.time[TUser], NUMSIZE);
    670 		readnum(0, ow->time+TSys*NUMSIZE, NUMSIZE, w.time[TSys], NUMSIZE);
    671 		readnum(0, ow->time+TReal*NUMSIZE, NUMSIZE, w.time[TReal], NUMSIZE);
    672 		strncpy(ow->msg, w.msg, sizeof(ow->msg));
    673 		ow->msg[sizeof(ow->msg)-1] = '\0';
    674 	}
    675 	return pid;
    676 }
    677 
    678 long
    679 sysawait(uint32 *arg)
    680 {
    681 	int i;
    682 	int pid;
    683 	Waitmsg w;
    684 	uint32 n;
    685 	char *buf;
    686 
    687 	n = arg[1];
    688 	buf = uvalidaddr(arg[0], n, 1);
    689 	pid = pwait(&w);
    690 	if(pid < 0)
    691 		return -1;
    692 	i = snprint(buf, n, "%d %lud %lud %lud %q",
    693 		w.pid,
    694 		w.time[TUser], w.time[TSys], w.time[TReal],
    695 		w.msg);
    696 
    697 	return i;
    698 }
    699 
    700 void
    701 werrstr(char *fmt, ...)
    702 {
    703 	va_list va;
    704 
    705 	if(up == nil)
    706 		return;
    707 
    708 	va_start(va, fmt);
    709 	vseprint(up->syserrstr, up->syserrstr+ERRMAX, fmt, va);
    710 	va_end(va);
    711 }
    712 
    713 static long
    714 generrstr(uint32 addr, uint nbuf)
    715 {
    716 	char tmp[ERRMAX];
    717 	char *buf;
    718 
    719 	if(nbuf == 0)
    720 		error(Ebadarg);
    721 	buf = uvalidaddr(addr, nbuf, 1);
    722 	if(nbuf > sizeof tmp)
    723 		nbuf = sizeof tmp;
    724 	memmove(tmp, buf, nbuf);
    725 
    726 	/* make sure it's NUL-terminated */
    727 	tmp[nbuf-1] = '\0';
    728 	memmove(buf, up->syserrstr, nbuf);
    729 	buf[nbuf-1] = '\0';
    730 	memmove(up->syserrstr, tmp, nbuf);
    731 	return 0;
    732 }
    733 
    734 long
    735 syserrstr(uint32 *arg)
    736 {
    737 	return generrstr(arg[0], arg[1]);
    738 }
    739 
    740 /* compatibility for old binaries */
    741 long
    742 sys_errstr(uint32 *arg)
    743 {
    744 	return generrstr(arg[0], 64);
    745 }
    746 
    747 long
    748 sysnotify(uint32 *arg)
    749 {
    750 	if(arg[0] != 0)
    751 		uvalidaddr(arg[0], 1, 0);
    752 	up->notify = arg[0];	/* checked again when used */
    753 	return 0;
    754 }
    755 
    756 long
    757 sysnoted(uint32 *arg)
    758 {
    759 	if(arg[0]!=NRSTR && !up->notified)
    760 		error(Egreg);
    761 	return 0;
    762 }
    763 
    764 long
    765 syssegbrk(uint32 *arg)
    766 {
    767 	int i;
    768 	uint32 addr;
    769 	Segment *s;
    770 
    771 	addr = arg[0];
    772 	for(i = 0; i < NSEG; i++) {
    773 		s = up->seg[i];
    774 		if(s == 0 || addr < s->base || addr >= s->top)
    775 			continue;
    776 		switch(s->type&SG_TYPE) {
    777 		case SG_TEXT:
    778 		case SG_DATA:
    779 		case SG_STACK:
    780 			error(Ebadarg);
    781 		default:
    782 			return ibrk(arg[1], i);
    783 		}
    784 	}
    785 
    786 	error(Ebadarg);
    787 	return 0;		/* not reached */
    788 }
    789 
    790 long
    791 syssegattach(uint32 *arg)
    792 {
    793 	return segattach(up, arg[0], uvalidaddr(arg[1], 1, 0), arg[2], arg[3]);
    794 }
    795 
    796 long
    797 syssegdetach(uint32 *arg)
    798 {
    799 	int i;
    800 	uint32 addr;
    801 	Segment *s;
    802 
    803 	qlock(&up->seglock);
    804 	if(waserror()){
    805 		qunlock(&up->seglock);
    806 		nexterror();
    807 	}
    808 
    809 	s = 0;
    810 	addr = arg[0];
    811 	for(i = 0; i < NSEG; i++)
    812 		if((s = up->seg[i])) {
    813 			qlock(&s->lk);
    814 			if((addr >= s->base && addr < s->top) ||
    815 			   (s->top == s->base && addr == s->base))
    816 				goto found;
    817 			qunlock(&s->lk);
    818 		}
    819 
    820 	error(Ebadarg);
    821 
    822 found:
    823 	/*
    824 	 * Check we are not detaching the initial stack segment.
    825 	 */
    826 	if(s == up->seg[SSEG]){
    827 		qunlock(&s->lk);
    828 		error(Ebadarg);
    829 	}
    830 	up->seg[i] = 0;
    831 	qunlock(&s->lk);
    832 	putseg(s);
    833 	qunlock(&up->seglock);
    834 	poperror();
    835 
    836 	/* Ensure we flush any entries from the lost segment */
    837 	flushmmu();
    838 	return 0;
    839 }
    840 
    841 long
    842 syssegfree(uint32 *arg)
    843 {
    844 	Segment *s;
    845 	uint32 from, to;
    846 
    847 	from = arg[0];
    848 	s = seg(up, from, 1);
    849 	if(s == nil)
    850 		error(Ebadarg);
    851 	to = (from + arg[1]) & ~(BY2PG-1);
    852 	from = PGROUND(from);
    853 
    854 	if(to > s->top) {
    855 		qunlock(&s->lk);
    856 		error(Ebadarg);
    857 	}
    858 
    859 	mfreeseg(s, from, (to - from) / BY2PG);
    860 	qunlock(&s->lk);
    861 	flushmmu();
    862 
    863 	return 0;
    864 }
    865 
    866 /* For binary compatibility */
    867 long
    868 sysbrk_(uint32 *arg)
    869 {
    870 	return ibrk(arg[0], BSEG);
    871 }
    872 
    873 long
    874 sysrendezvous(uint32 *arg)
    875 {
    876 	uintptr tag, val;
    877 	Proc *p, **l;
    878 
    879 	tag = arg[0];
    880 	l = &REND(up->rgrp, tag);
    881 	up->rendval = ~(uintptr)0;
    882 
    883 	lock(&up->rgrp->ref.lk);
    884 	for(p = *l; p; p = p->rendhash) {
    885 		if(p->rendtag == tag) {
    886 			*l = p->rendhash;
    887 			val = p->rendval;
    888 			p->rendval = arg[1];
    889 
    890 			while(p->mach != 0)
    891 				;
    892 			ready(p);
    893 			unlock(&up->rgrp->ref.lk);
    894 			return val;
    895 		}
    896 		l = &p->rendhash;
    897 	}
    898 
    899 	/* Going to sleep here */
    900 	up->rendtag = tag;
    901 	up->rendval = arg[1];
    902 	up->rendhash = *l;
    903 	*l = up;
    904 	up->state = Rendezvous;
    905 	unlock(&up->rgrp->ref.lk);
    906 
    907 	sched();
    908 
    909 	return up->rendval;
    910 }
    911 
    912 /*
    913  * The implementation of semaphores is complicated by needing
    914  * to avoid rescheduling in syssemrelease, so that it is safe
    915  * to call from real-time processes.  This means syssemrelease
    916  * cannot acquire any qlocks, only spin locks.
    917  * 
    918  * Semacquire and semrelease must both manipulate the semaphore
    919  * wait list.  Lock-free linked lists only exist in theory, not
    920  * in practice, so the wait list is protected by a spin lock.
    921  * 
    922  * The semaphore value *addr is stored in user memory, so it
    923  * cannot be read or written while holding spin locks.
    924  * 
    925  * Thus, we can access the list only when holding the lock, and
    926  * we can access the semaphore only when not holding the lock.
    927  * This makes things interesting.  Note that sleep's condition function
    928  * is called while holding two locks - r and up->rlock - so it cannot
    929  * access the semaphore value either.
    930  * 
    931  * An acquirer announces its intention to try for the semaphore
    932  * by putting a Sema structure onto the wait list and then
    933  * setting Sema.waiting.  After one last check of semaphore,
    934  * the acquirer sleeps until Sema.waiting==0.  A releaser of n
    935  * must wake up n acquirers who have Sema.waiting set.  It does
    936  * this by clearing Sema.waiting and then calling wakeup.
    937  * 
    938  * There are three interesting races here.  
    939  
    940  * The first is that in this particular sleep/wakeup usage, a single
    941  * wakeup can rouse a process from two consecutive sleeps!  
    942  * The ordering is:
    943  * 
    944  * 	(a) set Sema.waiting = 1
    945  * 	(a) call sleep
    946  * 	(b) set Sema.waiting = 0
    947  * 	(a) check Sema.waiting inside sleep, return w/o sleeping
    948  * 	(a) try for semaphore, fail
    949  * 	(a) set Sema.waiting = 1
    950  * 	(a) call sleep
    951  * 	(b) call wakeup(a)
    952  * 	(a) wake up again
    953  * 
    954  * This is okay - semacquire will just go around the loop
    955  * again.  It does mean that at the top of the for(;;) loop in
    956  * semacquire, phore.waiting might already be set to 1.
    957  * 
    958  * The second is that a releaser might wake an acquirer who is
    959  * interrupted before he can acquire the lock.  Since
    960  * release(n) issues only n wakeup calls -- only n can be used
    961  * anyway -- if the interrupted process is not going to use his
    962  * wakeup call he must pass it on to another acquirer.
    963  * 
    964  * The third race is similar to the second but more subtle.  An
    965  * acquirer sets waiting=1 and then does a final canacquire()
    966  * before going to sleep.  The opposite order would result in
    967  * missing wakeups that happen between canacquire and
    968  * waiting=1.  (In fact, the whole point of Sema.waiting is to
    969  * avoid missing wakeups between canacquire() and sleep().) But
    970  * there can be spurious wakeups between a successful
    971  * canacquire() and the following semdequeue().  This wakeup is
    972  * not useful to the acquirer, since he has already acquired
    973  * the semaphore.  Like in the previous case, though, the
    974  * acquirer must pass the wakeup call along.
    975  * 
    976  * This is all rather subtle.  The code below has been verified
    977  * with the spin model /sys/src/9/port/semaphore.p.  The
    978  * original code anticipated the second race but not the first
    979  * or third, which were caught only with spin.  The first race
    980  * is mentioned in /sys/doc/sleep.ps, but I'd forgotten about it.
    981  * It was lucky that my abstract model of sleep/wakeup still managed
    982  * to preserve that behavior.
    983  *
    984  * I remain slightly concerned about memory coherence
    985  * outside of locks.  The spin model does not take 
    986  * queued processor writes into account so we have to
    987  * think hard.  The only variables accessed outside locks
    988  * are the semaphore value itself and the boolean flag
    989  * Sema.waiting.  The value is only accessed with cmpswap,
    990  * whose job description includes doing the right thing as
    991  * far as memory coherence across processors.  That leaves
    992  * Sema.waiting.  To handle it, we call coherence() before each
    993  * read and after each write.		- rsc
    994  */
    995 
    996 /* Add semaphore p with addr a to list in seg. */
    997 static void
    998 semqueue(Segment *s, long *a, Sema *p)
    999 {
   1000 	memset(p, 0, sizeof *p);
   1001 	p->addr = a;
   1002 	lock(&s->sema.rendez.lk);	/* uses s->sema.Rendez.Lock, but no one else is */
   1003 	p->next = &s->sema;
   1004 	p->prev = s->sema.prev;
   1005 	p->next->prev = p;
   1006 	p->prev->next = p;
   1007 	unlock(&s->sema.rendez.lk);
   1008 }
   1009 
   1010 /* Remove semaphore p from list in seg. */
   1011 static void
   1012 semdequeue(Segment *s, Sema *p)
   1013 {
   1014 	lock(&s->sema.rendez.lk);
   1015 	p->next->prev = p->prev;
   1016 	p->prev->next = p->next;
   1017 	unlock(&s->sema.rendez.lk);
   1018 }
   1019 
   1020 /* Wake up n waiters with addr a on list in seg. */
   1021 static void
   1022 semwakeup(Segment *s, long *a, long n)
   1023 {
   1024 	Sema *p;
   1025 	
   1026 	lock(&s->sema.rendez.lk);
   1027 	for(p=s->sema.next; p!=&s->sema && n>0; p=p->next){
   1028 		if(p->addr == a && p->waiting){
   1029 			p->waiting = 0;
   1030 			coherence();
   1031 			wakeup(&p->rendez);
   1032 			n--;
   1033 		}
   1034 	}
   1035 	unlock(&s->sema.rendez.lk);
   1036 }
   1037 
   1038 /* Add delta to semaphore and wake up waiters as appropriate. */
   1039 static long
   1040 semrelease(Segment *s, long *addr, long delta)
   1041 {
   1042 	long value;
   1043 
   1044 	do
   1045 		value = *addr;
   1046 	while(!cmpswap(addr, value, value+delta));
   1047 	semwakeup(s, addr, delta);
   1048 	return value+delta;
   1049 }
   1050 
   1051 /* Try to acquire semaphore using compare-and-swap */
   1052 static int
   1053 canacquire(long *addr)
   1054 {
   1055 	long value;
   1056 	
   1057 	while((value=*addr) > 0)
   1058 		if(cmpswap(addr, value, value-1))
   1059 			return 1;
   1060 	return 0;
   1061 }		
   1062 
   1063 /* Should we wake up? */
   1064 static int
   1065 semawoke(void *p)
   1066 {
   1067 	coherence();
   1068 	return !((Sema*)p)->waiting;
   1069 }
   1070 
   1071 /* Acquire semaphore (subtract 1). */
   1072 static int
   1073 semacquire(Segment *s, long *addr, int block)
   1074 {
   1075 	int acquired;
   1076 	Sema phore;
   1077 
   1078 	if(canacquire(addr))
   1079 		return 1;
   1080 	if(!block)
   1081 		return 0;
   1082 
   1083 	acquired = 0;
   1084 	semqueue(s, addr, &phore);
   1085 	for(;;){
   1086 		phore.waiting = 1;
   1087 		coherence();
   1088 		if(canacquire(addr)){
   1089 			acquired = 1;
   1090 			break;
   1091 		}
   1092 		if(waserror())
   1093 			break;
   1094 		sleep(&phore.rendez, semawoke, &phore);
   1095 		poperror();
   1096 	}
   1097 	semdequeue(s, &phore);
   1098 	coherence();	/* not strictly necessary due to lock in semdequeue */
   1099 	if(!phore.waiting)
   1100 		semwakeup(s, addr, 1);
   1101 	if(!acquired)
   1102 		nexterror();
   1103 	return 1;
   1104 }
   1105 
   1106 long
   1107 syssemacquire(uint32 *arg)
   1108 {
   1109 	int block;
   1110 	long *addr;
   1111 	Segment *s;
   1112 
   1113 	addr = uvalidaddr(arg[0], sizeof(long), 1);
   1114 	evenaddr(arg[0]);
   1115 	block = arg[1];
   1116 	
   1117 	if((s = seg(up, arg[0], 0)) == nil)
   1118 		error(Ebadarg);
   1119 	if(*addr < 0)
   1120 		error(Ebadarg);
   1121 	return semacquire(s, addr, block);
   1122 }
   1123 
   1124 long
   1125 syssemrelease(uint32 *arg)
   1126 {
   1127 	long *addr, delta;
   1128 	Segment *s;
   1129 
   1130 	addr = uvalidaddr(arg[0], sizeof(long), 1);
   1131 	evenaddr(arg[0]);
   1132 	delta = arg[1];
   1133 
   1134 	if((s = seg(up, arg[0], 0)) == nil)
   1135 		error(Ebadarg);
   1136 	if(delta < 0 || *addr < 0)
   1137 		error(Ebadarg);
   1138 	return semrelease(s, addr, arg[1]);
   1139 }