proc.c (29466B)
1 #define WANT_M 2 #include "u.h" 3 #include "lib.h" 4 #include "mem.h" 5 #include "dat.h" 6 #include "fns.h" 7 #include "error.h" 8 #include "trace.h" 9 10 int schedgain = 30; /* units in seconds */ 11 int nrdy; 12 Ref noteidalloc; 13 14 void updatecpu(Proc*); 15 int reprioritize(Proc*); 16 17 ulong delayedscheds; /* statistics */ 18 long skipscheds; 19 long preempts; 20 ulong load; 21 22 static Ref pidalloc; 23 24 static struct Procalloc 25 { 26 Lock lk; 27 Proc* ht[128]; 28 Proc* arena; 29 Proc* free; 30 } procalloc; 31 32 enum 33 { 34 Q=10, 35 DQ=4, 36 Scaling=2, 37 }; 38 39 Schedq runq[Nrq]; 40 ulong runvec; 41 42 char *statename[] = 43 { /* BUG: generate automatically */ 44 "Dead", 45 "Moribund", 46 "Ready", 47 "Scheding", 48 "Running", 49 "Queueing", 50 "QueueingR", 51 "QueueingW", 52 "Wakeme", 53 "Broken", 54 "Stopped", 55 "Rendez", 56 "Waitrelease", 57 }; 58 59 static void pidhash(Proc*); 60 static void pidunhash(Proc*); 61 static void rebalance(void); 62 63 /* 64 * Always splhi()'ed. 65 */ 66 void 67 schedinit(void) /* never returns */ 68 { 69 70 setlabel(&m->sched); 71 if(traceprocs) // Plan 9 VX 72 print("schedinit %p %p %s\n", m, up, up ? up->text : ""); 73 if(up) { 74 m->proc = 0; 75 switch(up->state) { 76 case Running: 77 ready(up); 78 break; 79 case Moribund: 80 up->state = Dead; 81 82 /* 83 * Holding locks from pexit: 84 * procalloc 85 * palloc 86 */ 87 mmurelease(up); 88 89 up->qnext = procalloc.free; 90 procalloc.free = up; 91 92 unlock(&palloc.lk); 93 unlock(&procalloc.lk); 94 break; 95 } 96 up->mach = nil; 97 updatecpu(up); 98 up = nil; 99 } 100 sched(); 101 } 102 103 /* 104 * If changing this routine, look also at sleep(). It 105 * contains a copy of the guts of sched(). 106 */ 107 void 108 sched(void) 109 { 110 Proc *p; 111 112 if(traceprocs) // Plan 9 VX 113 print("sched %p %p [%s]\n", m, up, up ? up->text : ""); 114 if(m->ilockdepth) 115 panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p", 116 m->machno, 117 m->ilockdepth, 118 up? up->lastilock: nil, 119 (up && up->lastilock)? up->lastilock->pc: 0, 120 getcallerpc(&p+2)); 121 if(up){ 122 /* 123 * Delay the sched until the process gives up the locks 124 * it is holding. This avoids dumb lock loops. 125 * Don't delay if the process is Moribund. 126 * It called sched to die. 127 * But do sched eventually. This avoids a missing unlock 128 * from hanging the entire kernel. 129 * But don't reschedule procs holding palloc or procalloc. 130 * Those are far too important to be holding while asleep. 131 * 132 * This test is not exact. There can still be a few instructions 133 * in the middle of taslock when a process holds a lock 134 * but Lock.p has not yet been initialized. 135 */ 136 if(up->nlocks.ref) 137 if(up->state != Moribund) 138 if(up->delaysched < 20 139 || palloc.lk.p == up 140 || procalloc.lk.p == up){ 141 up->delaysched++; 142 delayedscheds++; 143 return; 144 } 145 up->delaysched = 0; 146 147 splhi(); 148 149 /* statistics */ 150 m->cs++; 151 152 procsave(up); 153 if(setlabel(&up->sched)){ 154 if(traceprocs) 155 print("sched %p %p: awake\n", m, up); 156 procrestore(up); 157 spllo(); 158 return; 159 } 160 if(traceprocs) 161 print("sched %p %p: entering scheduler\n", m, up); 162 gotolabel(&m->sched); 163 } 164 if(traceprocs) 165 print("sched %p %p: runproc", m, up); 166 p = runproc(); 167 if(1){ 168 updatecpu(p); 169 p->priority = reprioritize(p); 170 } 171 if(p != m->readied) 172 m->schedticks = msec() + HZ/10; 173 m->readied = 0; 174 up = p; 175 up->state = Running; 176 up->mach = MACHP(m->machno); 177 m->proc = up; 178 if(traceprocs) 179 print("run %p %p [%s]\n", m, up, up->text); 180 mmuswitch(up); 181 gotolabel(&up->sched); 182 } 183 184 int 185 anyready(void) 186 { 187 return runvec; 188 } 189 190 int 191 anyhigher(void) 192 { 193 return runvec & ~((1<<(up->priority+1))-1); 194 } 195 196 /* 197 * here once per clock tick to see if we should resched 198 */ 199 void 200 hzsched(void) 201 { 202 /* once a second, rebalance will reprioritize ready procs */ 203 if(m->machno == 0) 204 rebalance(); 205 206 /* unless preempted, get to run for at least 100ms */ 207 if(anyhigher() 208 || (!up->fixedpri && msec() > m->schedticks && anyready())){ 209 m->readied = nil; /* avoid cooperative scheduling */ 210 up->delaysched++; 211 } 212 } 213 214 /* 215 * here at the end of non-clock interrupts to see if we should preempt the 216 * current process. Returns 1 if preempted, 0 otherwise. 217 */ 218 int 219 preempted(void) 220 { 221 if(up && up->state == Running) 222 if(up->preempted == 0) 223 if(anyhigher()) 224 if(!active.exiting){ 225 m->readied = nil; /* avoid cooperative scheduling */ 226 up->preempted = 1; 227 sched(); 228 splhi(); 229 up->preempted = 0; 230 return 1; 231 } 232 return 0; 233 } 234 235 /* 236 * Update the cpu time average for this particular process, 237 * which is about to change from up -> not up or vice versa. 238 * p->lastupdate is the last time an updatecpu happened. 239 * 240 * The cpu time average is a decaying average that lasts 241 * about D clock ticks. D is chosen to be approximately 242 * the cpu time of a cpu-intensive "quick job". A job has to run 243 * for approximately D clock ticks before we home in on its 244 * actual cpu usage. Thus if you manage to get in and get out 245 * quickly, you won't be penalized during your burst. Once you 246 * start using your share of the cpu for more than about D 247 * clock ticks though, your p->cpu hits 1000 (1.0) and you end up 248 * below all the other quick jobs. Interactive tasks, because 249 * they basically always use less than their fair share of cpu, 250 * will be rewarded. 251 * 252 * If the process has not been running, then we want to 253 * apply the filter 254 * 255 * cpu = cpu * (D-1)/D 256 * 257 * n times, yielding 258 * 259 * cpu = cpu * ((D-1)/D)^n 260 * 261 * but D is big enough that this is approximately 262 * 263 * cpu = cpu * (D-n)/D 264 * 265 * so we use that instead. 266 * 267 * If the process has been running, we apply the filter to 268 * 1 - cpu, yielding a similar equation. Note that cpu is 269 * stored in fixed point (* 1000). 270 * 271 * Updatecpu must be called before changing up, in order 272 * to maintain accurate cpu usage statistics. It can be called 273 * at any time to bring the stats for a given proc up-to-date. 274 */ 275 void 276 updatecpu(Proc *p) 277 { 278 int n, t, ocpu; 279 int D = schedgain*HZ*Scaling; 280 281 if(p->edf) 282 return; 283 284 t = msec()*Scaling + Scaling/2; 285 n = t - p->lastupdate; 286 p->lastupdate = t; 287 288 if(n == 0) 289 return; 290 if(n > D) 291 n = D; 292 293 ocpu = p->cpu; 294 if(p != up) 295 p->cpu = (ocpu*(D-n))/D; 296 else{ 297 t = 1000 - ocpu; 298 t = (t*(D-n))/D; 299 p->cpu = 1000 - t; 300 } 301 302 //iprint("pid %d %s for %d cpu %d -> %d\n", p->pid,p==up?"active":"inactive",n, ocpu,p->cpu); 303 } 304 305 /* 306 * On average, p has used p->cpu of a cpu recently. 307 * Its fair share is conf.nmach/m->load of a cpu. If it has been getting 308 * too much, penalize it. If it has been getting not enough, reward it. 309 * I don't think you can get much more than your fair share that 310 * often, so most of the queues are for using less. Having a priority 311 * of 3 means you're just right. Having a higher priority (up to p->basepri) 312 * means you're not using as much as you could. 313 */ 314 int 315 reprioritize(Proc *p) 316 { 317 int fairshare, n, load, ratio; 318 319 load = MACHP(0)->load; 320 if(load == 0) 321 return p->basepri; 322 323 /* 324 * fairshare = 1.000 * conf.nproc * 1.000/load, 325 * except the decimal point is moved three places 326 * on both load and fairshare. 327 */ 328 fairshare = (conf.nmach*1000*1000)/load; 329 n = p->cpu; 330 if(n == 0) 331 n = 1; 332 ratio = (fairshare+n/2) / n; 333 if(ratio > p->basepri) 334 ratio = p->basepri; 335 if(ratio < 0) 336 panic("reprioritize"); 337 //iprint("pid %d cpu %d load %d fair %d pri %d\n", p->pid, p->cpu, load, fairshare, ratio); 338 return ratio; 339 } 340 341 /* 342 * add a process to a scheduling queue 343 */ 344 void 345 queueproc(Schedq *rq, Proc *p) 346 { 347 int pri; 348 349 pri = rq - runq; 350 lock(&runq->lk); 351 p->priority = pri; 352 p->rnext = 0; 353 if(rq->tail) 354 rq->tail->rnext = p; 355 else 356 rq->head = p; 357 rq->tail = p; 358 rq->n++; 359 nrdy++; 360 runvec |= 1<<pri; 361 unlock(&runq->lk); 362 } 363 364 /* 365 * try to remove a process from a scheduling queue (called splhi) 366 */ 367 Proc* 368 dequeueproc(Schedq *rq, Proc *tp) 369 { 370 Proc *l, *p; 371 372 if(!canlock(&runq->lk)) 373 return nil; 374 375 /* 376 * the queue may have changed before we locked runq, 377 * refind the target process. 378 */ 379 l = 0; 380 for(p = rq->head; p; p = p->rnext){ 381 if(p == tp) 382 break; 383 l = p; 384 } 385 386 /* 387 * p->mach==0 only when process state is saved 388 */ 389 if(p == 0 || p->mach){ 390 unlock(&runq->lk); 391 return nil; 392 } 393 if(p->rnext == 0) 394 rq->tail = l; 395 if(l) 396 l->rnext = p->rnext; 397 else 398 rq->head = p->rnext; 399 if(rq->head == nil) 400 runvec &= ~(1<<(rq-runq)); 401 rq->n--; 402 nrdy--; 403 if(p->state != Ready) 404 print("dequeueproc %s %lud %s\n", p->text, p->pid, statename[p->state]); 405 406 unlock(&runq->lk); 407 return p; 408 } 409 410 /* 411 * ready(p) picks a new priority for a process and sticks it in the 412 * runq for that priority. 413 */ 414 void 415 _ready(Proc *p) 416 { 417 int s, pri; 418 Schedq *rq; 419 void (*pt)(Proc*, int, vlong); 420 421 s = splhi(); 422 if(0){ 423 splx(s); 424 return; 425 } 426 427 if(up != p) 428 m->readied = p; /* group scheduling */ 429 430 updatecpu(p); 431 pri = reprioritize(p); 432 p->priority = pri; 433 rq = &runq[pri]; 434 p->state = Ready; 435 queueproc(rq, p); 436 pt = proctrace; 437 if(pt) 438 pt(p, SReady, 0); 439 splx(s); 440 } 441 442 /* 443 * yield the processor and drop our priority 444 */ 445 void 446 yield(void) 447 { 448 if(anyready()){ 449 /* pretend we just used 1/2 tick */ 450 up->lastupdate -= Scaling/2; 451 sched(); 452 } 453 } 454 455 /* 456 * recalculate priorities once a second. We need to do this 457 * since priorities will otherwise only be recalculated when 458 * the running process blocks. 459 */ 460 ulong balancetime; 461 462 static void 463 rebalance(void) 464 { 465 int pri, npri, t, x; 466 Schedq *rq; 467 Proc *p; 468 469 t = msec(); 470 if(t - balancetime < HZ) 471 return; 472 balancetime = t; 473 474 for(pri=0, rq=runq; pri<Npriq; pri++, rq++){ 475 another: 476 p = rq->head; 477 if(p == nil) 478 continue; 479 if(p->mp != MACHP(m->machno)) 480 continue; 481 if(pri == p->basepri) 482 continue; 483 updatecpu(p); 484 npri = reprioritize(p); 485 if(npri != pri){ 486 x = splhi(); 487 p = dequeueproc(rq, p); 488 if(p) 489 queueproc(&runq[npri], p); 490 splx(x); 491 goto another; 492 } 493 } 494 } 495 496 497 /* 498 * pick a process to run 499 */ 500 Proc* 501 _runproc(void) 502 { 503 Schedq *rq; 504 Proc *p; 505 ulong start, now; 506 int i; 507 void (*pt)(Proc*, int, vlong); 508 509 start = perfticks(); 510 511 /* cooperative scheduling until the clock ticks */ 512 if((p=m->readied) && p->mach==0 && p->state==Ready 513 && runq[Nrq-1].head == nil && runq[Nrq-2].head == nil){ 514 skipscheds++; 515 rq = &runq[p->priority]; 516 goto found; 517 } 518 519 preempts++; 520 521 loop: 522 /* 523 * find a process that last ran on this processor (affinity), 524 * or one that hasn't moved in a while (load balancing). Every 525 * time around the loop affinity goes down. 526 */ 527 spllo(); 528 for(i = 0;; i++){ 529 /* 530 * find the highest priority target process that this 531 * processor can run given affinity constraints. 532 * 533 */ 534 for(rq = &runq[Nrq-1]; rq >= runq; rq--){ 535 for(p = rq->head; p; p = p->rnext){ 536 if(p->mp == nil || p->mp == MACHP(m->machno) 537 || (!p->wired && i > 0)) 538 goto found; 539 } 540 } 541 542 /* waste time or halt the CPU */ 543 idlehands(); 544 545 /* remember how much time we're here */ 546 now = perfticks(); 547 m->perf.inidle += now-start; 548 start = now; 549 } 550 551 found: 552 splhi(); 553 p = dequeueproc(rq, p); 554 if(p == nil) 555 goto loop; 556 557 p->state = Scheding; 558 p->mp = MACHP(m->machno); 559 560 pt = proctrace; 561 if(pt) 562 pt(p, SRun, 0); 563 return p; 564 } 565 566 int 567 canpage(Proc *p) 568 { 569 int ok = 0; 570 571 splhi(); 572 lock(&runq->lk); 573 /* Only reliable way to see if we are Running */ 574 if(p->mach == 0) { 575 p->newtlb = 1; 576 ok = 1; 577 } 578 unlock(&runq->lk); 579 spllo(); 580 581 return ok; 582 } 583 584 Proc* 585 newproc(void) 586 { 587 char msg[64]; 588 Proc *p; 589 590 lock(&procalloc.lk); 591 for(;;) { 592 if((p = procalloc.free)) 593 break; 594 595 snprint(msg, sizeof msg, "no procs; %s forking", 596 up? up->text: "kernel"); 597 unlock(&procalloc.lk); 598 resrcwait(msg); 599 lock(&procalloc.lk); 600 } 601 procalloc.free = p->qnext; 602 unlock(&procalloc.lk); 603 604 p->state = Scheding; 605 p->psstate = "New"; 606 p->mach = 0; 607 p->qnext = 0; 608 p->nchild = 0; 609 p->nwait = 0; 610 p->waitq = 0; 611 p->parent = 0; 612 p->pgrp = 0; 613 p->egrp = 0; 614 p->fgrp = 0; 615 p->rgrp = 0; 616 p->pdbg = 0; 617 p->fpstate = FPinit; 618 p->kp = 0; 619 if(up && up->procctl == Proc_tracesyscall) 620 p->procctl = Proc_tracesyscall; 621 else 622 p->procctl = 0; 623 p->syscalltrace = 0; 624 p->notepending = 0; 625 p->ureg = 0; 626 p->privatemem = 0; 627 p->noswap = 0; 628 p->errstr = p->errbuf0; 629 p->syserrstr = p->errbuf1; 630 p->errbuf0[0] = '\0'; 631 p->errbuf1[0] = '\0'; 632 p->nlocks.ref = 0; 633 p->delaysched = 0; 634 p->trace = 0; 635 kstrdup(&p->user, "*nouser"); 636 kstrdup(&p->text, "*notext"); 637 kstrdup(&p->args, ""); 638 p->nargs = 0; 639 p->setargs = 0; 640 memset(p->seg, 0, sizeof p->seg); 641 p->pid = incref(&pidalloc); 642 pidhash(p); 643 p->noteid = incref(¬eidalloc); 644 if(p->pid==0 || p->noteid==0) 645 panic("pidalloc"); 646 if(p->kstack == 0) 647 p->kstack = smalloc(KSTACK); 648 649 /* sched params */ 650 p->mp = 0; 651 p->wired = 0; 652 procpriority(p, PriNormal, 0); 653 p->cpu = 0; 654 p->lastupdate = msec()*Scaling; 655 p->edf = nil; 656 657 vxnewproc(p); 658 return p; 659 } 660 661 /* 662 * wire this proc to a machine 663 */ 664 void 665 procwired(Proc *p, int bm) 666 { 667 Proc *pp; 668 int i; 669 char nwired[MAXMACH]; 670 Mach *wm; 671 672 if(bm < 0){ 673 /* pick a machine to wire to */ 674 memset(nwired, 0, sizeof(nwired)); 675 p->wired = 0; 676 pp = proctab(0); 677 for(i=0; i<conf.nproc; i++, pp++){ 678 wm = pp->wired; 679 if(wm && pp->pid) 680 nwired[wm->machno]++; 681 } 682 bm = 0; 683 for(i=0; i<conf.nmach; i++) 684 if(nwired[i] < nwired[bm]) 685 bm = i; 686 } else { 687 /* use the virtual machine requested */ 688 bm = bm % conf.nmach; 689 } 690 691 p->wired = MACHP(bm); 692 p->mp = p->wired; 693 } 694 695 void 696 procpriority(Proc *p, int pri, int fixed) 697 { 698 if(pri >= Npriq) 699 pri = Npriq - 1; 700 else if(pri < 0) 701 pri = 0; 702 p->basepri = pri; 703 p->priority = pri; 704 if(fixed){ 705 p->fixedpri = 1; 706 } else { 707 p->fixedpri = 0; 708 } 709 } 710 711 void 712 procinit0(void) /* bad planning - clashes with devproc.c */ 713 { 714 Proc *p; 715 int i; 716 717 procalloc.free = xalloc(conf.nproc*sizeof(Proc)); 718 if(procalloc.free == nil){ 719 xsummary(); 720 panic("cannot allocate %lud procs (%ludMB)\n", conf.nproc, conf.nproc*sizeof(Proc)/(1024*1024)); 721 } 722 procalloc.arena = procalloc.free; 723 724 p = procalloc.free; 725 for(i=0; i<conf.nproc-1; i++,p++) 726 p->qnext = p+1; 727 p->qnext = 0; 728 } 729 730 /* 731 * sleep if a condition is not true. Another process will 732 * awaken us after it sets the condition. When we awaken 733 * the condition may no longer be true. 734 * 735 * we lock both the process and the rendezvous to keep r->p 736 * and p->r synchronized. 737 */ 738 void 739 sleep(Rendez *r, int (*f)(void*), void *arg) 740 { 741 int s; 742 void (*pt)(Proc*, int, vlong); 743 744 s = splhi(); 745 746 if(up->nlocks.ref) 747 print("process %lud sleeps with %lud locks held, last lock %#p locked at pc %#lux, sleep called from %#p\n", 748 up->pid, up->nlocks.ref, up->lastlock, up->lastlock->pc, getcallerpc(&r)); 749 lock(&r->lk); 750 lock(&up->rlock); 751 if(r->p){ 752 print("double sleep called from %#p, %lud %lud\n", getcallerpc(&r), r->p->pid, up->pid); 753 dumpstack(); 754 } 755 756 /* 757 * Wakeup only knows there may be something to do by testing 758 * r->p in order to get something to lock on. 759 * Flush that information out to memory in case the sleep is 760 * committed. 761 */ 762 r->p = up; 763 764 if((*f)(arg) || up->notepending){ 765 /* 766 * if condition happened or a note is pending 767 * never mind 768 */ 769 if(traceprocs) 770 print("sleep %p %p: already happened\n", m, up); 771 r->p = nil; 772 unlock(&up->rlock); 773 unlock(&r->lk); 774 } else { 775 /* 776 * now we are committed to 777 * change state and call scheduler 778 */ 779 pt = proctrace; 780 if(pt) 781 pt(up, SSleep, 0); 782 up->state = Wakeme; 783 up->r = r; 784 785 /* statistics */ 786 m->cs++; 787 788 procsave(up); 789 if(setlabel(&up->sched)) { 790 if(traceprocs) 791 print("sleep %p %p: awake\n", m, up); 792 /* 793 * here when the process is awakened 794 */ 795 procrestore(up); 796 spllo(); 797 } else { 798 if(traceprocs) 799 print("sleep %p %p: sleeping\n", m, up); 800 /* 801 * here to go to sleep (i.e. stop Running) 802 */ 803 unlock(&up->rlock); 804 unlock(&r->lk); 805 gotolabel(&m->sched); 806 } 807 } 808 809 if(up->notepending) { 810 up->notepending = 0; 811 splx(s); 812 if(up->procctl == Proc_exitme && up->closingfgrp) 813 forceclosefgrp(); 814 error(Eintr); 815 } 816 817 splx(s); 818 } 819 820 static int 821 tfn(void *arg) 822 { 823 return up->trend == nil || up->tfn(arg); 824 } 825 826 void 827 twakeup(Ureg *ureg, Timer *t) 828 { 829 Proc *p; 830 Rendez *trend; 831 832 p = t->ta; 833 trend = p->trend; 834 p->trend = 0; 835 if(trend) 836 wakeup(trend); 837 } 838 839 void 840 tsleep(Rendez *r, int (*fn)(void*), void *arg, ulong ms) 841 { 842 if (up->timer.tt){ 843 print("tsleep: timer active: mode %d, tf %#p\n", up->timer.tmode, up->timer.tf); 844 timerdel(&up->timer); 845 } 846 up->timer.tns = MS2NS(ms); 847 up->timer.tf = twakeup; 848 up->timer.tmode = Trelative; 849 up->timer.ta = up; 850 up->trend = r; 851 up->tfn = fn; 852 timeradd(&up->timer); 853 854 if(waserror()){ 855 timerdel(&up->timer); 856 nexterror(); 857 } 858 sleep(r, tfn, arg); 859 if (up->timer.tt) 860 timerdel(&up->timer); 861 up->timer.twhen = 0; 862 poperror(); 863 } 864 865 /* 866 * Expects that only one process can call wakeup for any given Rendez. 867 * We hold both locks to ensure that r->p and p->r remain consistent. 868 * Richard Miller has a better solution that doesn't require both to 869 * be held simultaneously, but I'm a paranoid - presotto. 870 */ 871 Proc* 872 wakeup(Rendez *r) 873 { 874 Proc *p; 875 int s; 876 877 s = splhi(); 878 879 lock(&r->lk); 880 p = r->p; 881 882 if(p != nil){ 883 lock(&p->rlock); 884 if(p->state != Wakeme || p->r != r){ 885 iprint("%p %p %d\n", p->r, r, p->state); 886 panic("wakeup: state"); 887 } 888 r->p = nil; 889 p->r = nil; 890 ready(p); 891 unlock(&p->rlock); 892 } 893 unlock(&r->lk); 894 895 splx(s); 896 897 return p; 898 } 899 900 /* 901 * if waking a sleeping process, this routine must hold both 902 * p->rlock and r->lock. However, it can't know them in 903 * the same order as wakeup causing a possible lock ordering 904 * deadlock. We break the deadlock by giving up the p->rlock 905 * lock if we can't get the r->lock and retrying. 906 */ 907 int 908 postnote(Proc *p, int dolock, char *n, int flag) 909 { 910 int s, ret; 911 Rendez *r; 912 Proc *d, **l; 913 914 if(dolock) 915 qlock(&p->debug); 916 917 if(flag != NUser && (p->notify == 0 || p->notified)) 918 p->nnote = 0; 919 920 ret = 0; 921 if(p->nnote < NNOTE) { 922 strcpy(p->note[p->nnote].msg, n); 923 p->note[p->nnote++].flag = flag; 924 ret = 1; 925 } 926 p->notepending = 1; 927 if(dolock) 928 qunlock(&p->debug); 929 930 /* this loop is to avoid lock ordering problems. */ 931 for(;;){ 932 s = splhi(); 933 lock(&p->rlock); 934 r = p->r; 935 936 /* waiting for a wakeup? */ 937 if(r == nil) 938 break; /* no */ 939 940 /* try for the second lock */ 941 if(canlock(&r->lk)){ 942 if(p->state != Wakeme || r->p != p) 943 panic("postnote: state %d %d %d", r->p != p, p->r != r, p->state); 944 p->r = nil; 945 r->p = nil; 946 ready(p); 947 unlock(&r->lk); 948 break; 949 } 950 951 /* give other process time to get out of critical section and try again */ 952 unlock(&p->rlock); 953 splx(s); 954 sched(); 955 } 956 unlock(&p->rlock); 957 splx(s); 958 959 if(p->state != Rendezvous) 960 return ret; 961 962 /* Try and pull out of a rendezvous */ 963 lock(&p->rgrp->ref.lk); 964 if(p->state == Rendezvous) { 965 p->rendval = ~0; 966 l = &REND(p->rgrp, p->rendtag); 967 for(d = *l; d; d = d->rendhash) { 968 if(d == p) { 969 *l = p->rendhash; 970 break; 971 } 972 l = &d->rendhash; 973 } 974 ready(p); 975 } 976 unlock(&p->rgrp->ref.lk); 977 return ret; 978 } 979 980 /* 981 * weird thing: keep at most NBROKEN around 982 */ 983 #define NBROKEN 4 984 struct 985 { 986 QLock lk; 987 int n; 988 Proc *p[NBROKEN]; 989 }broken; 990 991 void 992 addbroken(Proc *p) 993 { 994 qlock(&broken.lk); 995 if(broken.n == NBROKEN) { 996 ready(broken.p[0]); 997 memmove(&broken.p[0], &broken.p[1], sizeof(Proc*)*(NBROKEN-1)); 998 --broken.n; 999 } 1000 broken.p[broken.n++] = p; 1001 qunlock(&broken.lk); 1002 1003 p->state = Broken; 1004 p->psstate = 0; 1005 sched(); 1006 } 1007 1008 void 1009 unbreak(Proc *p) 1010 { 1011 int b; 1012 1013 qlock(&broken.lk); 1014 for(b=0; b < broken.n; b++) 1015 if(broken.p[b] == p) { 1016 broken.n--; 1017 memmove(&broken.p[b], &broken.p[b+1], 1018 sizeof(Proc*)*(NBROKEN-(b+1))); 1019 ready(p); 1020 break; 1021 } 1022 qunlock(&broken.lk); 1023 } 1024 1025 int 1026 freebroken(void) 1027 { 1028 int i, n; 1029 1030 qlock(&broken.lk); 1031 n = broken.n; 1032 for(i=0; i<n; i++) { 1033 ready(broken.p[i]); 1034 broken.p[i] = 0; 1035 } 1036 broken.n = 0; 1037 qunlock(&broken.lk); 1038 return n; 1039 } 1040 1041 void 1042 pexit(char *exitstr, int freemem) 1043 { 1044 Proc *p; 1045 Segment **s, **es; 1046 long utime, stime; 1047 Waitq *wq, *f, *next; 1048 Fgrp *fgrp; 1049 Egrp *egrp; 1050 Rgrp *rgrp; 1051 Pgrp *pgrp; 1052 Chan *dot; 1053 void (*pt)(Proc*, int, vlong); 1054 1055 if(up->syscalltrace) 1056 free(up->syscalltrace); 1057 up->alarm = 0; 1058 if (up->timer.tt) 1059 timerdel(&up->timer); 1060 pt = proctrace; 1061 if(pt) 1062 pt(up, SDead, 0); 1063 1064 /* nil out all the resources under lock (free later) */ 1065 qlock(&up->debug); 1066 fgrp = up->fgrp; 1067 up->fgrp = nil; 1068 egrp = up->egrp; 1069 up->egrp = nil; 1070 rgrp = up->rgrp; 1071 up->rgrp = nil; 1072 pgrp = up->pgrp; 1073 up->pgrp = nil; 1074 dot = up->dot; 1075 up->dot = nil; 1076 qunlock(&up->debug); 1077 1078 if(fgrp) 1079 closefgrp(fgrp); 1080 if(egrp) 1081 closeegrp(egrp); 1082 if(rgrp) 1083 closergrp(rgrp); 1084 if(dot) 1085 cclose(dot); 1086 if(pgrp) 1087 closepgrp(pgrp); 1088 1089 /* 1090 * if not a kernel process and have a parent, 1091 * do some housekeeping. 1092 */ 1093 if(up->kp == 0) { 1094 p = up->parent; 1095 if(p == 0) { 1096 if(exitstr == 0) 1097 exitstr = "unknown"; 1098 panic("boot process died: %s", exitstr); 1099 } 1100 1101 while(waserror()) 1102 ; 1103 1104 wq = smalloc(sizeof(Waitq)); 1105 poperror(); 1106 1107 wq->w.pid = up->pid; 1108 utime = up->time[TUser] + up->time[TCUser]; 1109 stime = up->time[TSys] + up->time[TCSys]; 1110 wq->w.time[TUser] = tk2ms(utime); 1111 wq->w.time[TSys] = tk2ms(stime); 1112 wq->w.time[TReal] = tk2ms(msec() - up->time[TReal]); 1113 if(exitstr && exitstr[0]) 1114 snprint(wq->w.msg, sizeof(wq->w.msg), "%s %lud: %s", up->text, up->pid, exitstr); 1115 else 1116 wq->w.msg[0] = '\0'; 1117 1118 lock(&p->exl); 1119 /* 1120 * Check that parent is still alive. 1121 */ 1122 if(p->pid == up->parentpid && p->state != Broken) { 1123 p->nchild--; 1124 p->time[TCUser] += utime; 1125 p->time[TCSys] += stime; 1126 /* 1127 * If there would be more than 128 wait records 1128 * processes for my parent, then don't leave a wait 1129 * record behind. This helps prevent badly written 1130 * daemon processes from accumulating lots of wait 1131 * records. 1132 */ 1133 if(p->nwait < 128) { 1134 wq->next = p->waitq; 1135 p->waitq = wq; 1136 p->nwait++; 1137 wq = nil; 1138 wakeup(&p->waitr); 1139 } 1140 } 1141 unlock(&p->exl); 1142 if(wq) 1143 free(wq); 1144 } 1145 1146 if(!freemem) 1147 addbroken(up); 1148 1149 qlock(&up->seglock); 1150 es = &up->seg[NSEG]; 1151 for(s = up->seg; s < es; s++) { 1152 if(*s) { 1153 putseg(*s); 1154 *s = 0; 1155 } 1156 } 1157 qunlock(&up->seglock); 1158 1159 lock(&up->exl); /* Prevent my children from leaving waits */ 1160 pidunhash(up); 1161 up->pid = 0; 1162 wakeup(&up->waitr); 1163 unlock(&up->exl); 1164 1165 for(f = up->waitq; f; f = next) { 1166 next = f->next; 1167 free(f); 1168 } 1169 1170 /* release debuggers */ 1171 qlock(&up->debug); 1172 if(up->pdbg) { 1173 wakeup(&up->pdbg->sleep); 1174 up->pdbg = 0; 1175 } 1176 qunlock(&up->debug); 1177 1178 /* Sched must not loop for these locks */ 1179 lock(&procalloc.lk); 1180 lock(&palloc.lk); 1181 1182 up->state = Moribund; 1183 sched(); 1184 panic("pexit"); 1185 } 1186 1187 int 1188 haswaitq(void *x) 1189 { 1190 Proc *p; 1191 1192 p = (Proc *)x; 1193 return p->waitq != 0; 1194 } 1195 1196 ulong 1197 pwait(Waitmsg *w) 1198 { 1199 ulong cpid; 1200 Waitq *wq; 1201 1202 if(!canqlock(&up->qwaitr)) 1203 error(Einuse); 1204 1205 if(waserror()) { 1206 qunlock(&up->qwaitr); 1207 nexterror(); 1208 } 1209 1210 lock(&up->exl); 1211 if(up->nchild == 0 && up->waitq == 0) { 1212 unlock(&up->exl); 1213 error(Enochild); 1214 } 1215 unlock(&up->exl); 1216 1217 sleep(&up->waitr, haswaitq, up); 1218 1219 lock(&up->exl); 1220 wq = up->waitq; 1221 up->waitq = wq->next; 1222 up->nwait--; 1223 unlock(&up->exl); 1224 1225 qunlock(&up->qwaitr); 1226 poperror(); 1227 1228 if(w) 1229 memmove(w, &wq->w, sizeof(Waitmsg)); 1230 cpid = wq->w.pid; 1231 free(wq); 1232 return cpid; 1233 } 1234 1235 Proc* 1236 proctab(int i) 1237 { 1238 return &procalloc.arena[i]; 1239 } 1240 1241 void 1242 dumpaproc(Proc *p) 1243 { 1244 ulong bss; 1245 char *s; 1246 1247 if(p == 0) 1248 return; 1249 1250 bss = 0; 1251 if(p->seg[BSEG]) 1252 bss = p->seg[BSEG]->top; 1253 1254 s = p->psstate; 1255 if(s == 0) 1256 s = statename[p->state]; 1257 print("%3lud:%10s pc %8lux dbgpc %8lux %8s (%s) ut %ld st %ld bss %lux qpc %lux nl %lud nd %lud lpc %lux pri %lud\n", 1258 p->pid, p->text, p->pc, dbgpc(p), s, statename[p->state], 1259 p->time[0], p->time[1], bss, p->qpc, p->nlocks.ref, p->delaysched, p->lastlock ? p->lastlock->pc : 0, p->priority); 1260 } 1261 1262 void 1263 procdump(void) 1264 { 1265 int i; 1266 Proc *p; 1267 1268 if(up) 1269 print("up %lud\n", up->pid); 1270 else 1271 print("no current process\n"); 1272 for(i=0; i<conf.nproc; i++) { 1273 p = &procalloc.arena[i]; 1274 if(p->state == Dead) 1275 continue; 1276 1277 dumpaproc(p); 1278 } 1279 } 1280 1281 /* 1282 * wait till all processes have flushed their mmu 1283 * state about segement s 1284 */ 1285 void 1286 procflushseg(Segment *s) 1287 { 1288 int i, ns, nm, nwait; 1289 Proc *p; 1290 1291 /* 1292 * tell all processes with this 1293 * segment to flush their mmu's 1294 */ 1295 nwait = 0; 1296 for(i=0; i<conf.nproc; i++) { 1297 p = &procalloc.arena[i]; 1298 if(p->state == Dead) 1299 continue; 1300 for(ns = 0; ns < NSEG; ns++) 1301 if(p->seg[ns] == s){ 1302 p->newtlb = 1; 1303 for(nm = 0; nm < conf.nmach; nm++){ 1304 if(MACHP(nm)->proc == p){ 1305 MACHP(nm)->flushmmu = 1; 1306 nwait++; 1307 } 1308 } 1309 break; 1310 } 1311 } 1312 1313 if(nwait == 0) 1314 return; 1315 1316 /* 1317 * wait for all processors to take a clock interrupt 1318 * and flush their mmu's 1319 */ 1320 for(nm = 0; nm < conf.nmach; nm++) 1321 if(MACHP(nm) != m) 1322 while(MACHP(nm)->flushmmu) 1323 sched(); 1324 } 1325 1326 void 1327 scheddump(void) 1328 { 1329 Proc *p; 1330 Schedq *rq; 1331 1332 for(rq = &runq[Nrq-1]; rq >= runq; rq--){ 1333 if(rq->head == 0) 1334 continue; 1335 print("rq%ld:", rq-runq); 1336 for(p = rq->head; p; p = p->rnext) 1337 print(" %lud(%lud)", p->pid, msec() - p->readytime); 1338 print("\n"); 1339 delay(150); 1340 } 1341 print("nrdy %d\n", nrdy); 1342 } 1343 1344 void 1345 kproc(char *name, void (*func)(void *), void *arg) 1346 { 1347 Proc *p; 1348 static Pgrp *kpgrp; 1349 1350 extern int tracekdev; 1351 if(tracekdev) 1352 iprint("kproc %s\n", name); 1353 1354 p = newproc(); 1355 p->psstate = 0; 1356 p->procmode = 0640; 1357 p->kp = 1; 1358 p->noswap = 1; 1359 1360 if(up){ 1361 p->fpsave = up->fpsave; 1362 p->scallnr = up->scallnr; 1363 p->s = up->s; 1364 p->slash = up->slash; 1365 p->dot = up->dot; 1366 if(p->dot) 1367 incref(&p->dot->ref); 1368 1369 memmove(p->note, up->note, sizeof(p->note)); 1370 p->nnote = up->nnote; 1371 p->lastnote = up->lastnote; 1372 p->notify = up->notify; 1373 } 1374 1375 p->notified = 0; 1376 p->ureg = 0; 1377 p->dbgreg = 0; 1378 p->nerrlab = 0; 1379 1380 procpriority(p, PriKproc, 0); 1381 1382 kprocchild(p, func, arg); 1383 1384 kstrdup(&p->user, eve); 1385 kstrdup(&p->text, name); 1386 if(kpgrp == 0) 1387 kpgrp = newpgrp(); 1388 p->pgrp = kpgrp; 1389 incref(&kpgrp->ref); 1390 1391 memset(p->time, 0, sizeof(p->time)); 1392 p->time[TReal] = msec(); 1393 ready(p); 1394 } 1395 1396 /* 1397 * called splhi() by notify(). See comment in notify for the 1398 * reasoning. 1399 */ 1400 void 1401 procctl(Proc *p) 1402 { 1403 char *state; 1404 ulong s; 1405 1406 switch(p->procctl) { 1407 case Proc_exitbig: 1408 spllo(); 1409 pexit("Killed: Insufficient physical memory", 1); 1410 1411 case Proc_exitme: 1412 spllo(); /* pexit has locks in it */ 1413 pexit("Killed", 1); 1414 1415 case Proc_traceme: 1416 if(p->nnote == 0) 1417 return; 1418 /* No break */ 1419 1420 case Proc_stopme: 1421 p->procctl = 0; 1422 state = p->psstate; 1423 p->psstate = "Stopped"; 1424 /* free a waiting debugger */ 1425 s = spllo(); 1426 qlock(&p->debug); 1427 if(p->pdbg) { 1428 wakeup(&p->pdbg->sleep); 1429 p->pdbg = 0; 1430 } 1431 qunlock(&p->debug); 1432 splhi(); 1433 p->state = Stopped; 1434 sched(); 1435 p->psstate = state; 1436 splx(s); 1437 return; 1438 } 1439 } 1440 1441 #include "errstr.h" 1442 1443 void 1444 error(char *err) 1445 { 1446 spllo(); 1447 1448 assert(up->nerrlab < NERR); 1449 kstrcpy(up->errstr, err, ERRMAX); 1450 setlabel(&up->errlab[NERR-1]); 1451 nexterror(); 1452 } 1453 1454 void 1455 nexterror(void) 1456 { 1457 gotolabel(&up->errlab[--up->nerrlab]); 1458 } 1459 1460 void 1461 exhausted(char *resource) 1462 { 1463 char buf[ERRMAX]; 1464 1465 sprint(buf, "no free %s", resource); 1466 iprint("%s\n", buf); 1467 error(buf); 1468 } 1469 1470 void 1471 killbig(char *why) 1472 { 1473 int i; 1474 Segment *s; 1475 ulong l, max; 1476 Proc *p, *ep, *kp; 1477 1478 max = 0; 1479 kp = 0; 1480 ep = procalloc.arena+conf.nproc; 1481 for(p = procalloc.arena; p < ep; p++) { 1482 if(p->state == Dead || p->kp) 1483 continue; 1484 l = 0; 1485 for(i=1; i<NSEG; i++) { 1486 s = p->seg[i]; 1487 if(s != 0) 1488 l += s->top - s->base; 1489 } 1490 if(l > max && ((p->procmode&0222) || strcmp(eve, p->user)!=0)) { 1491 kp = p; 1492 max = l; 1493 } 1494 } 1495 1496 print("%lud: %s killed: %s\n", kp->pid, kp->text, why); 1497 for(p = procalloc.arena; p < ep; p++) { 1498 if(p->state == Dead || p->kp) 1499 continue; 1500 if(p != kp && p->seg[BSEG] && p->seg[BSEG] == kp->seg[BSEG]) 1501 p->procctl = Proc_exitbig; 1502 } 1503 kp->procctl = Proc_exitbig; 1504 for(i = 0; i < NSEG; i++) { 1505 s = kp->seg[i]; 1506 if(s != 0 && canqlock(&s->lk)) { 1507 mfreeseg(s, s->base, (s->top - s->base)/BY2PG); 1508 qunlock(&s->lk); 1509 } 1510 } 1511 } 1512 1513 /* 1514 * change ownership to 'new' of all processes owned by 'old'. Used when 1515 * eve changes. 1516 */ 1517 void 1518 renameuser(char *old, char *new) 1519 { 1520 Proc *p, *ep; 1521 1522 ep = procalloc.arena+conf.nproc; 1523 for(p = procalloc.arena; p < ep; p++) 1524 if(p->user!=nil && strcmp(old, p->user)==0) 1525 kstrdup(&p->user, new); 1526 } 1527 1528 /* 1529 * time accounting called by clock() splhi'd 1530 */ 1531 void 1532 accounttime(void) 1533 { 1534 Proc *p; 1535 ulong n, per; 1536 static ulong nrun; 1537 1538 p = m->proc; 1539 if(p) { 1540 nrun++; 1541 p->time[p->insyscall]++; 1542 } 1543 1544 /* calculate decaying duty cycles */ 1545 n = perfticks(); 1546 per = n - m->perf.last; 1547 m->perf.last = n; 1548 per = (m->perf.period*(HZ-1) + per)/HZ; 1549 if(per != 0) 1550 m->perf.period = per; 1551 1552 m->perf.avg_inidle = (m->perf.avg_inidle*(HZ-1)+m->perf.inidle)/HZ; 1553 m->perf.inidle = 0; 1554 1555 m->perf.avg_inintr = (m->perf.avg_inintr*(HZ-1)+m->perf.inintr)/HZ; 1556 m->perf.inintr = 0; 1557 1558 /* only one processor gets to compute system load averages */ 1559 if(m->machno != 0) 1560 return; 1561 1562 /* 1563 * calculate decaying load average. 1564 * if we decay by (n-1)/n then it takes 1565 * n clock ticks to go from load L to .36 L once 1566 * things quiet down. it takes about 5 n clock 1567 * ticks to go to zero. so using HZ means this is 1568 * approximately the load over the last second, 1569 * with a tail lasting about 5 seconds. 1570 */ 1571 n = nrun; 1572 nrun = 0; 1573 n = (nrdy+n)*1000; 1574 m->load = (m->load*(HZ-1)+n)/HZ; 1575 } 1576 1577 static void 1578 pidhash(Proc *p) 1579 { 1580 int h; 1581 1582 h = p->pid % nelem(procalloc.ht); 1583 lock(&procalloc.lk); 1584 p->pidhash = procalloc.ht[h]; 1585 procalloc.ht[h] = p; 1586 unlock(&procalloc.lk); 1587 } 1588 1589 static void 1590 pidunhash(Proc *p) 1591 { 1592 int h; 1593 Proc **l; 1594 1595 h = p->pid % nelem(procalloc.ht); 1596 lock(&procalloc.lk); 1597 for(l = &procalloc.ht[h]; *l != nil; l = &(*l)->pidhash) 1598 if(*l == p){ 1599 *l = p->pidhash; 1600 break; 1601 } 1602 unlock(&procalloc.lk); 1603 } 1604 1605 int 1606 procindex(ulong pid) 1607 { 1608 Proc *p; 1609 int h; 1610 int s; 1611 1612 s = -1; 1613 h = pid % nelem(procalloc.ht); 1614 lock(&procalloc.lk); 1615 for(p = procalloc.ht[h]; p != nil; p = p->pidhash) 1616 if(p->pid == pid){ 1617 s = p - procalloc.arena; 1618 break; 1619 } 1620 unlock(&procalloc.lk); 1621 return s; 1622 }