commit 42b0c4ecc2ef6e78f0b2057d3f01cc658d0c6df0
parent 783915cde9c6a7bf0f7c9c259e336f381ec8545e
Author: Russ Cox <rsc@swtch.com>
Date: Tue, 1 Jul 2008 16:53:53 -0400
9vx: add pager from Plan 9 to flush memory
Diffstat:
11 files changed, 455 insertions(+), 52 deletions(-)
diff --git a/src/9vx/Makefrag b/src/9vx/Makefrag
@@ -101,6 +101,7 @@ PLAN9_A_OBJS = \
sdscsi.o \
segment.o \
strecpy.o \
+ swap.o \
sysfile.o \
sysproc.o \
thwack.o \
diff --git a/src/9vx/a/dat.h b/src/9vx/a/dat.h
@@ -193,6 +193,7 @@ struct Mach
int tlbfault;
int tlbpurge;
int pfault;
+ int new;
int cs;
int syscall;
int load;
diff --git a/src/9vx/a/page.c b/src/9vx/a/page.c
@@ -129,7 +129,7 @@ newpage(int clear, Segment **s, ulong va)
color = getpgcolor(va);
hw = swapalloc.highwater;
for(;;) {
- if(palloc.freecount > hw)
+ if(palloc.freecount >= hw)
break;
if(up->kp && palloc.freecount > 0)
break;
diff --git a/src/9vx/a/proc.c b/src/9vx/a/proc.c
@@ -1310,9 +1310,9 @@ procflushseg(Segment *s)
* wait for all processors to take a clock interrupt
* and flush their mmu's
*/
- for(nm = 0; nm < conf.nmach; nm++)
+ for(nm = 0; nm < conf.nmach && nm < 1; nm++)
if(MACHP(nm) != m)
- while(MACHP(nm)->flushmmu)
+ while(MACHP(nm)->flushmmu && MACHP(nm)->proc != nil)
sched();
}
diff --git a/src/9vx/a/swap.c b/src/9vx/a/swap.c
@@ -0,0 +1,408 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+static int canflush(Proc*, Segment*);
+static void executeio(void);
+static int needpages(void *v);
+static void pageout(Proc*, Segment*);
+static void pagepte(int, Page**);
+static void pager(void *v);
+
+ Image swapimage;
+static Page **iolist;
+static int ioptr;
+
+void
+swapinit(void)
+{
+ swapalloc.swmap = xalloc(conf.nswap);
+ swapalloc.top = &swapalloc.swmap[conf.nswap];
+ swapalloc.alloc = swapalloc.swmap;
+ swapalloc.last = swapalloc.swmap;
+ swapalloc.free = conf.nswap;
+ iolist = xalloc(conf.nswppo*sizeof(Page*));
+ if(swapalloc.swmap == 0 || iolist == 0)
+ panic("swapinit: not enough memory");
+
+ swapimage.notext = 1;
+}
+
+ulong
+newswap(void)
+{
+ uchar *look;
+
+ lock(&swapalloc.lk);
+
+ if(swapalloc.free == 0){
+ unlock(&swapalloc.lk);
+ return ~0;
+ }
+
+ look = memchr(swapalloc.last, 0, swapalloc.top-swapalloc.last);
+ if(look == 0)
+ panic("inconsistent swap");
+
+ *look = 1;
+ swapalloc.last = look;
+ swapalloc.free--;
+ unlock(&swapalloc.lk);
+ return (look-swapalloc.swmap) * BY2PG;
+}
+
+void
+putswap(Page *p)
+{
+ uchar *idx;
+
+ lock(&swapalloc.lk);
+ idx = &swapalloc.swmap[((ulong)p)/BY2PG];
+ if(--(*idx) == 0) {
+ swapalloc.free++;
+ if(idx < swapalloc.last)
+ swapalloc.last = idx;
+ }
+ if(*idx >= 254)
+ panic("putswap %lux == %ud", p, *idx);
+ unlock(&swapalloc.lk);
+}
+
+void
+dupswap(Page *p)
+{
+ lock(&swapalloc.lk);
+ if(++swapalloc.swmap[((ulong)p)/BY2PG] == 0)
+ panic("dupswap");
+ unlock(&swapalloc.lk);
+}
+
+int
+swapcount(ulong daddr)
+{
+ return swapalloc.swmap[daddr/BY2PG];
+}
+
+void
+kickpager(void)
+{
+ static int started;
+
+ if(started)
+ wakeup(&swapalloc.r);
+ else {
+ kproc("pager", pager, 0);
+ started = 1;
+ }
+}
+
+static void
+pager(void *junk)
+{
+ int i;
+ Segment *s;
+ Proc *p, *ep;
+
+ if(waserror())
+ panic("pager: os error\n");
+
+ p = proctab(0);
+ ep = &p[conf.nproc];
+
+loop:
+ up->psstate = "Idle";
+ sleep(&swapalloc.r, needpages, 0);
+print("uh oh. someone woke the pager\n");
+
+ while(needpages(junk)) {
+
+ if(swapimage.c) {
+ p++;
+ if(p >= ep)
+ p = proctab(0);
+
+ if(p->state == Dead || p->noswap)
+ continue;
+
+ if(!canqlock(&p->seglock))
+ continue; /* process changing its segments */
+
+ for(i = 0; i < NSEG; i++) {
+ if(!needpages(junk)){
+ qunlock(&p->seglock);
+ goto loop;
+ }
+
+ if((s = p->seg[i])) {
+ switch(s->type&SG_TYPE) {
+ default:
+ break;
+ case SG_TEXT:
+ pageout(p, s);
+ break;
+ case SG_DATA:
+ case SG_BSS:
+ case SG_STACK:
+ case SG_SHARED:
+ up->psstate = "Pageout";
+ pageout(p, s);
+ if(ioptr != 0) {
+ up->psstate = "I/O";
+ executeio();
+ }
+ break;
+ }
+ }
+ }
+ qunlock(&p->seglock);
+ }
+ else {
+ print("out of physical memory; no swap configured\n");
+ if(!cpuserver || freebroken() == 0)
+ killbig("out of memory");
+
+ /* Emulate the old system if no swap channel */
+ tsleep(&up->sleep, return0, 0, 5000);
+ wakeup(&palloc.r);
+ }
+ }
+ goto loop;
+}
+
+static void
+pageout(Proc *p, Segment *s)
+{
+ int type, i, size;
+ Pte *l;
+ Page **pg, *entry;
+
+ if(!canqlock(&s->lk)) /* We cannot afford to wait, we will surely deadlock */
+ return;
+
+ if(s->steal) { /* Protected by /dev/proc */
+ qunlock(&s->lk);
+ return;
+ }
+
+ if(!canflush(p, s)) { /* Able to invalidate all tlbs with references */
+ qunlock(&s->lk);
+ putseg(s);
+ return;
+ }
+
+ if(waserror()) {
+ qunlock(&s->lk);
+ putseg(s);
+ return;
+ }
+
+ /* Pass through the pte tables looking for memory pages to swap out */
+ type = s->type&SG_TYPE;
+ size = s->mapsize;
+ for(i = 0; i < size; i++) {
+ l = s->map[i];
+ if(l == 0)
+ continue;
+ for(pg = l->first; pg < l->last; pg++) {
+ entry = *pg;
+ if(pagedout(entry))
+ continue;
+
+ if(entry->modref & PG_REF) {
+ entry->modref &= ~PG_REF;
+ continue;
+ }
+
+ pagepte(type, pg);
+
+ if(ioptr >= conf.nswppo)
+ goto out;
+ }
+ }
+out:
+ poperror();
+ qunlock(&s->lk);
+ putseg(s);
+}
+
+static int
+canflush(Proc *p, Segment *s)
+{
+ int i;
+ Proc *ep;
+
+ lock(&s->ref.lk);
+ if(s->ref.ref == 1) { /* Easy if we are the only user */
+ s->ref.ref++;
+ unlock(&s->ref.lk);
+ return canpage(p);
+ }
+ s->ref.ref++;
+ unlock(&s->ref.lk);
+
+ /* Now we must do hardwork to ensure all processes which have tlb
+ * entries for this segment will be flushed if we succeed in paging it out
+ */
+ p = proctab(0);
+ ep = &p[conf.nproc];
+ while(p < ep) {
+ if(p->state != Dead) {
+ for(i = 0; i < NSEG; i++)
+ if(p->seg[i] == s)
+ if(!canpage(p))
+ return 0;
+ }
+ p++;
+ }
+ return 1;
+}
+
+static void
+pagepte(int type, Page **pg)
+{
+ ulong daddr;
+ Page *outp;
+
+ outp = *pg;
+ switch(type) {
+ case SG_TEXT: /* Revert to demand load */
+ putpage(outp);
+ *pg = 0;
+ break;
+
+ case SG_DATA:
+ case SG_BSS:
+ case SG_STACK:
+ case SG_SHARED:
+ /*
+ * get a new swap address and clear any pages
+ * referring to it from the cache
+ */
+ daddr = newswap();
+ if(daddr == ~0)
+ break;
+ cachedel(&swapimage, daddr);
+
+ lock(&outp->lk);
+
+ /* forget anything that it used to cache */
+ uncachepage(outp);
+
+ /*
+ * incr the reference count to make sure it sticks around while
+ * being written
+ */
+ outp->ref++;
+
+ /*
+ * enter it into the cache so that a fault happening
+ * during the write will grab the page from the cache
+ * rather than one partially written to the disk
+ */
+ outp->daddr = daddr;
+ cachepage(outp, &swapimage);
+ *pg = (Page*)(daddr|PG_ONSWAP);
+ unlock(&outp->lk);
+
+ /* Add page to IO transaction list */
+ iolist[ioptr++] = outp;
+ break;
+ }
+}
+
+void
+pagersummary(void)
+{
+ print("%lud/%lud memory %lud/%lud swap %d iolist\n",
+ palloc.user-palloc.freecount,
+ palloc.user, conf.nswap-swapalloc.free, conf.nswap,
+ ioptr);
+}
+
+static void
+executeio(void)
+{
+ Page *out;
+ int i, n;
+ Chan *c;
+ char *kaddr;
+ KMap *k;
+
+ c = swapimage.c;
+
+ for(i = 0; i < ioptr; i++) {
+ if(ioptr > conf.nswppo)
+ panic("executeio: ioptr %d > %d\n", ioptr, conf.nswppo);
+ out = iolist[i];
+ k = kmap(out);
+ kaddr = (char*)VA(k);
+
+ if(waserror())
+ panic("executeio: page out I/O error");
+
+ n = devtab[c->type]->write(c, kaddr, BY2PG, out->daddr);
+ if(n != BY2PG)
+ nexterror();
+
+ kunmap(k);
+ poperror();
+
+ /* Free up the page after I/O */
+ lock(&out->lk);
+ out->ref--;
+ unlock(&out->lk);
+ putpage(out);
+ }
+ ioptr = 0;
+}
+
+static int
+needpages(void *v)
+{
+ return palloc.freecount < swapalloc.headroom;
+}
+
+void
+setswapchan(Chan *c)
+{
+ uchar dirbuf[sizeof(Dir)+100];
+ Dir d;
+ int n;
+
+ if(swapimage.c) {
+ if(swapalloc.free != conf.nswap){
+ cclose(c);
+ error(Einuse);
+ }
+ cclose(swapimage.c);
+ }
+
+ /*
+ * if this isn't a file, set the swap space
+ * to be at most the size of the partition
+ */
+ if(devtab[c->type]->dc != L'M'){
+ n = devtab[c->type]->stat(c, dirbuf, sizeof dirbuf);
+ if(n <= 0){
+ cclose(c);
+ error("stat failed in setswapchan");
+ }
+ convM2D(dirbuf, n, &d, nil);
+ if(d.length < conf.nswap*BY2PG){
+ conf.nswap = d.length/BY2PG;
+ swapalloc.top = &swapalloc.swmap[conf.nswap];
+ swapalloc.free = conf.nswap;
+ }
+ }
+
+ swapimage.c = c;
+}
+
+int
+swapfull(void)
+{
+ return swapalloc.free < conf.nswap/10;
+}
diff --git a/src/9vx/a/swap.ed b/src/9vx/a/swap.ed
@@ -0,0 +1,9 @@
+,s;(void\*);(void *v);g
+,s;lock(\&swapalloc);lock(\&swapalloc.lk);g
+,s;s->ref ==;s->ref.ref ==;g
+,s;s->ref++;s->ref.ref++;g
+,s;(s = p->seg\[i\]);(&);g
+,s;lock(s);lock(\&s->ref.lk);g
+,s;lock(out);lock(\&out->lk);g
+,s;lock(outp);lock(\&outp->lk);g
+g/swopen/d
diff --git a/src/9vx/main.c b/src/9vx/main.c
@@ -742,6 +742,7 @@ newmach(void)
panic("out of processors");
mm = mallocz(sizeof *mm, 1);
mm->machno = i;
+ mm->new = 1;
machp[i] = mm;
conf.nmach++;
diff --git a/src/9vx/mmu.c b/src/9vx/mmu.c
@@ -141,6 +141,8 @@ static Proc *mmup;
static void
mmapflush(void)
{
+ m->flushmmu = 0;
+
/* Nothing mapped? */
if(mmup == nil || mmup->pmmu.lo > mmup->pmmu.hi)
return;
@@ -229,12 +231,13 @@ mmuswitch(Proc *p)
* one we were just in. Also, kprocs don't count --
* only the guys on cpu0 do.
*/
- if(!p->kp && mmup != p){
+ if(!p->kp && (mmup != p || p->newtlb || m->flushmmu)){
if(0) print("^^^^^^^^^^ %ld %s\n========== %ld %s\n",
mmup ? mmup->pid : 0, mmup? mmup->text : "",
p->pid, p->text);
/* No vxproc_flush - vxproc cache is okay */
mmapflush();
+ p->newtlb = 0;
mmup = p;
}
}
@@ -249,7 +252,7 @@ mmurelease(Proc *p)
return;
if(p->pmmu.vxproc)
vxproc_flush(p->pmmu.vxproc);
- if(p == mmup){
+ if(p == mmup || m->flushmmu){
mmapflush();
mmup = nil;
}
diff --git a/src/9vx/sched.c b/src/9vx/sched.c
@@ -41,11 +41,17 @@ idlehands(void)
plock(&idling);
nbad = 0;
while(!idlewakeup){
+ if(traceprocs)
+ iprint("cpu%d: idlehands\n", m->machno);
psleep(&idling);
+ if(traceprocs)
+ iprint("cpu%d: busy hands\n", m->machno);
if(!idlewakeup && ++nbad%1000 == 0)
iprint("idlehands spurious wakeup\n");
}
idlewakeup = 0;
+ if(traceprocs)
+ iprint("cpu%d: idlehands returning\n", m->machno);
punlock(&idling);
}
@@ -96,8 +102,14 @@ ready(Proc *p)
* kick off a new one.
*/
kprocq.n++;
- if(kprocq.n > nrunproc)
+ if(kprocq.n > nrunproc){
+ if(traceprocs)
+ iprint("create new cpu: kprocq.n=%d nrunproc=%d\n", kprocq.n, nrunproc);
+ nrunproc++;
newmach();
+ }
+ if(traceprocs)
+ iprint("cpu%d: ready %ld %s; wakeup kproc cpus\n", m->machno, p->pid, p->text);
pwakeup(&run);
unlock(&kprocq.lk);
punlock(&run);
@@ -120,19 +132,29 @@ runproc(void)
nbad = 0;
plock(&run);
lock(&kprocq.lk); /* redundant but fine */
+ if(m->new){
+ nrunproc--;
+ m->new = 0;
+ }
while((p = kprocq.head) == nil){
nrunproc++;
unlock(&kprocq.lk);
+ if(traceprocs)
+ iprint("cpu%d: runproc psleep %d %d\n", m->machno, kprocq.n, nrunproc);
psleep(&run);
lock(&kprocq.lk);
if(kprocq.head == nil && ++nbad%1000 == 0)
iprint("cpu%d: runproc spurious wakeup\n", m->machno);
+ if(traceprocs)
+ iprint("cpu%d: runproc awake\n", m->machno);
nrunproc--;
}
kprocq.head = p->rnext;
if(kprocq.head == 0)
kprocq.tail = nil;
kprocq.n--;
+ if(traceprocs)
+ iprint("cpu%d: runproc %ld %s [%d %d]\n", m->machno, p->pid, p->text, kprocq.n, nrunproc);
unlock(&kprocq.lk);
punlock(&run);
return p;
diff --git a/src/9vx/stub.c b/src/9vx/stub.c
@@ -134,51 +134,6 @@ splx(int s)
/*
- * Swap
- */
-Image swapimage;
-
-int
-swapfull(void)
-{
- return 0;
-}
-
-void
-kickpager(void)
-{
-}
-
-void
-setswapchan(Chan *c)
-{
-}
-
-void
-pagersummary(void)
-{
-}
-
-void
-putswap(Page *p)
-{
- panic("putswap");
-}
-
-int
-swapcount(ulong daddr)
-{
- return 0;
-}
-
-void
-dupswap(Page *p)
-{
- panic("dupswap");
-}
-
-
-/*
* Floating point.
*/
void
@@ -542,6 +497,9 @@ panic(char *fmt, ...)
buf[n] = '\n';
write(2, buf, n+1);
if(doabort){
+#ifndef __APPLE__
+ abort();
+#endif
for(;;)
microdelay(1000000);
}
diff --git a/src/9vx/vx32.c b/src/9vx/vx32.c
@@ -44,7 +44,7 @@ static vxmem thevxmem;
void
vx32sysr1(void)
{
-// traceprocs = !traceprocs;
+ traceprocs = !traceprocs;
// vx32_debugxlate = traceprocs;
tracesyscalls = !tracesyscalls;
}