commit 0687238f4aa7b3150cc097cb757f16e762f4cfc6
parent 09a0ae1864ae46cd1220469a1104bca6df770805
Author: Russ Cox <rsc@swtch.com>
Date: Fri, 4 Jul 2008 02:21:42 -0400
9vx: introduce multiple user address spaces
Diffstat:
7 files changed, 238 insertions(+), 117 deletions(-)
diff --git a/src/9vx/a/dat.h b/src/9vx/a/dat.h
@@ -1,4 +1,5 @@
#include <ucontext.h>
+#include "libvx32/vx32.h"
typedef struct BIOS32si BIOS32si;
typedef struct Conf Conf;
@@ -114,12 +115,14 @@ struct Conf
* MMU stuff in proc
*/
#define NCOLOR 1
+typedef struct Uspace Uspace;
struct PMMU
{
- ulong lo; // Plan 9 VX
- ulong hi; // Plan 9 VX
struct vxproc *vxproc; // Plan 9 VX
- struct vxmmap *vxmm; // Plan 9 VX
+ struct vxmem vxmem;
+ struct vxmmap vxmm; // Plan 9 VX
+ Uspace *us;
+ uchar *uzero;
};
/*
@@ -348,7 +351,6 @@ struct DevConf
// Plan 9 VX
extern int traceprocs;
extern int tracesyscalls;
-extern uchar *uzero;
extern int doabort;
/* Pthreads-based sleep and wakeup. */
@@ -361,3 +363,11 @@ struct Psleep
Pwaiter *waiter;
};
+struct Uspace
+{
+ Proc *p; // proc currently mapped
+ uchar *uzero;
+ ulong lo;
+ ulong hi;
+};
+
diff --git a/src/9vx/a/fault.c b/src/9vx/a/fault.c
@@ -343,7 +343,7 @@ okaddr(ulong addr, ulong len, int write)
continue;
}
qunlock(&s->lk);
- return uzero+addr0;
+ return up->pmmu.uzero+addr0;
}
}
pprint("suicide: invalid address 0x%lux/%lud in sys call pc=0x%lux\n", addr, len, userpc());
@@ -400,7 +400,7 @@ vmemchr(void *s, int c, int n)
a += m_;
n -= m_;
if(isuaddr(a))
- uvalidaddr(a-uzero, 1, 0);
+ uvalidaddr(a-up->pmmu.uzero, 1, 0);
}
/* fits in one page */
diff --git a/src/9vx/a/sysproc.c b/src/9vx/a/sysproc.c
@@ -379,6 +379,8 @@ sysexec(ulong *arg)
/*
* Top-of-stack structure.
*/
+ uchar *uzero;
+ uzero = up->pmmu.uzero;
Tos *tos;
ulong utos;
utos = USTKTOP - sizeof(Tos);
diff --git a/src/9vx/main.c b/src/9vx/main.c
@@ -109,6 +109,9 @@ main(int argc, char **argv)
case 'S':
tracesyscalls++;
break;
+ case 'U':
+ nuspace = atoi(EARGF(usage()));
+ break;
case 'X':
vx32_debugxlate++;
break;
@@ -419,7 +422,9 @@ showexec(ulong sp)
{
ulong *a, *argv;
int i, n;
+ uchar *uzero;
+ uzero = up->pmmu.uzero;
iprint("showexec %p\n", sp);
if(sp >= USTKTOP || sp < USTKTOP-USTKSIZE)
panic("showexec: bad sp");
@@ -510,6 +515,7 @@ sigsegv(int signo, siginfo_t *info, void *v)
int read;
ulong addr, eip, esp;
ucontext_t *uc;
+ uchar *uzero;
if(m == nil)
panic("sigsegv: m == nil");
@@ -518,6 +524,8 @@ sigsegv(int signo, siginfo_t *info, void *v)
if(up == nil)
panic("sigsegv: up == nil");
+ uzero = up->pmmu.uzero;
+
uc = v;
#if defined(__APPLE__)
mcontext_t mc;
diff --git a/src/9vx/mmu.c b/src/9vx/mmu.c
@@ -30,14 +30,19 @@ int tracemmu;
static int pagefile;
static char* pagebase;
-uchar *uzero;
+
+static Uspace uspace[16];
+static Uspace *ulist[nelem(uspace)];
+int nuspace = 1;
int
isuaddr(void *v)
{
uchar *p;
+ uchar *uzero;
p = v;
+ uzero = up->pmmu.uzero;
return uzero <= p && p < uzero+USTKTOP;
}
@@ -46,7 +51,7 @@ isuaddr(void *v)
* The point is to reserve the space so that
* nothing else ends up there later.
*/
-static void
+static void*
mapzero(void)
{
int fd;
@@ -55,20 +60,16 @@ mapzero(void)
/* First try mmaping /dev/zero. Some OS'es don't allow this. */
if((fd = open("/dev/zero", O_RDONLY)) >= 0){
v = mmap(nil, USTKTOP, PROT_NONE, MAP_PRIVATE, fd, 0);
- if(v != MAP_FAILED){
- uzero = v;
- return;
- }
+ if(v != MAP_FAILED)
+ return v;
}
/* Next try an anonymous map. */
v = mmap(nil, USTKTOP, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
- if(v != MAP_FAILED){
- uzero = v;
- return;
- }
-
- panic("mapzero: cannot reserve process address space");
+ if(v != MAP_FAILED)
+ return v;
+
+ return nil;
}
void
@@ -76,8 +77,8 @@ mmuinit(void)
{
char tmp[] = "/var/tmp/9vx.pages.XXXXXX";
void *v;
-
- mapzero();
+ int i;
+
if((pagefile = mkstemp(tmp)) < 0)
panic("mkstemp: %r");
if(ftruncate(pagefile, MEMSIZE) < 0)
@@ -92,6 +93,17 @@ mmuinit(void)
panic("mmap pagefile: %r");
pagebase = v;
+ if(nuspace <= 0)
+ nuspace = 1;
+ if(nuspace > nelem(uspace))
+ nuspace = nelem(uspace);
+ for(i=0; i<nuspace; i++){
+ uspace[i].uzero = mapzero();
+ if(uspace[i].uzero == nil)
+ panic("mmap address space %d", i);
+ ulist[i] = &uspace[i];
+ }
+
conf.mem[0].base = 0;
conf.mem[0].npage = MEMSIZE / BY2PG;
@@ -128,23 +140,15 @@ kunmap(KMap *k)
}
/*
- * The process whose address space we've got mapped.
- * We cache our own copy so that entering the scheduler
- * and coming back out running the same process doesn't
- * cause unnecessary unmapping and remapping.
- */
-static Proc *mmup;
-
-/*
* Flush the current address space.
*/
static void
-mmapflush(void)
+mmapflush(Uspace *us)
{
m->flushmmu = 0;
/* Nothing mapped? */
- if(mmup == nil || mmup->pmmu.lo > mmup->pmmu.hi)
+ if(us == nil || us->lo > us->hi || us->uzero == nil)
return;
#ifdef __FreeBSD__
@@ -154,20 +158,20 @@ mmapflush(void)
* tell whether a page is mapped, so we have to remap
* something with no pages here.
*/
- if(mmap(uzero, mmup->pmmu.hi+BY2PG, PROT_NONE,
+ if(mmap(us->uzero, us->hi+BY2PG, PROT_NONE,
MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) == MAP_FAILED)
panic("mmapflush mmap: %r");
- mmup->pmmu.lo = 0x80000000UL;
- mmup->pmmu.hi = 0;
+ us->lo = 0x80000000UL;
+ us->hi = 0;
return;
}
#endif
/* Clear only as much as got mapped. */
- if(mprotect(uzero, mmup->pmmu.hi+BY2PG, PROT_NONE) < 0)
+ if(mprotect(us->uzero, us->hi+BY2PG, PROT_NONE) < 0)
panic("mmapflush mprotect: %r");
- mmup->pmmu.lo = 0x80000000UL;
- mmup->pmmu.hi = 0;
+ us->lo = 0x80000000UL;
+ us->hi = 0;
}
/*
@@ -178,13 +182,15 @@ void
putmmu(ulong va, ulong pa, Page *p)
{
int prot;
- PMMU *pmmu;
+ Uspace *us;
if(tracemmu || (pa&~(PTEWRITE|PTEVALID)) != p->pa)
print("putmmu va %lux pa %lux p->pa %lux\n", va, pa, p->pa);
assert(p->pa < MEMSIZE && pa < MEMSIZE);
assert(up);
+ us = up->pmmu.us;
+ assert(us);
/* Map the page */
prot = PROT_READ;
@@ -192,21 +198,20 @@ putmmu(ulong va, ulong pa, Page *p)
prot |= PROT_WRITE;
pa &= ~(BY2PG-1);
va &= ~(BY2PG-1);
- if(mmap(uzero+va, BY2PG, prot, MAP_FIXED|MAP_SHARED,
+ if(mmap(us->uzero+va, BY2PG, prot, MAP_FIXED|MAP_SHARED,
pagefile, pa) == MAP_FAILED)
panic("putmmu");
/* Record high and low address range for quick unmap. */
- pmmu = &up->pmmu;
- if(pmmu->lo > va)
- pmmu->lo = va;
- if(pmmu->hi < va)
- pmmu->hi = va;
+ if(us->lo > va)
+ us->lo = va;
+ if(us->hi < va)
+ us->hi = va;
// printlinuxmaps();
}
/*
- * The memory maps have changed. Flush all cached state.
+ * The memory maps have changed for up. Flush all cached state.
*/
void
flushmmu(void)
@@ -214,9 +219,78 @@ flushmmu(void)
if(tracemmu)
print("flushmmu\n");
- if(up)
+ if(up){
vxproc_flush(up->pmmu.vxproc);
- mmapflush();
+ mmapflush(up->pmmu.us);
+ }
+}
+
+void
+usespace(Uspace *us)
+{
+ int i;
+
+ for(i=0; i<nuspace; i++)
+ if(ulist[i] == us){
+ while(i > 0){
+ ulist[i] = ulist[i-1];
+ i--;
+ }
+ ulist[0] = us;
+ break;
+ }
+}
+
+Uspace*
+getspace(Proc *p)
+{
+ Uspace *us;
+
+ us = ulist[nuspace-1];
+ if(us->p){
+ if(tracemmu)
+ print("^^^^^^^^^^ %ld %s [evict %d]\n", us->p->pid, us->p->text, us - uspace);
+ mmapflush(us);
+ }
+ us->p = p;
+ p->pmmu.vxmm.base = us->uzero;
+ p->pmmu.uzero = us->uzero;
+ p->pmmu.us = us;
+ usespace(us);
+ return us;
+}
+
+void
+takespace(Proc *p, Uspace *us)
+{
+ usespace(us);
+ if(us->p == p)
+ return;
+ if(tracemmu){
+ if(us->p)
+ print("^^^^^^^^^^ %ld %s [steal %d]\n", us->p->pid, us->p->text, us - uspace);
+ }
+ us->p = p;
+ mmapflush(us);
+}
+
+void
+putspace(Uspace *us)
+{
+ int i;
+
+ mmapflush(us);
+ us->p->pmmu.us = nil;
+ us->p->pmmu.uzero = nil;
+ us->p->pmmu.vxmm.base = nil;
+ us->p = nil;
+ for(i=0; i<nuspace; i++)
+ if(ulist[i] == us){
+ while(++i < nuspace)
+ ulist[i-1] = ulist[i];
+ ulist[i-1] = us;
+ break;
+ }
}
/*
@@ -231,15 +305,31 @@ mmuswitch(Proc *p)
* one we were just in. Also, kprocs don't count --
* only the guys on cpu0 do.
*/
- if(!p->kp && (mmup != p || p->newtlb || m->flushmmu)){
- if(0) print("^^^^^^^^^^ %ld %s\n========== %ld %s\n",
- mmup ? mmup->pid : 0, mmup? mmup->text : "",
- p->pid, p->text);
- /* No vxproc_flush - vxproc cache is okay */
- mmapflush();
+ if(p->kp)
+ return;
+
+ if(tracemmu)
+ print("mmuswitch %ld %s\n", p->pid, p->text);
+
+ if(p->pmmu.us && p->pmmu.us->p == p){
+ if(tracemmu) print("---------- %ld %s [%d]\n",
+ p->pid, p->text, p->pmmu.us - uspace);
+ usespace(p->pmmu.us);
+ if(!p->newtlb && !m->flushmmu){
+ usespace(p->pmmu.us);
+ return;
+ }
+ mmapflush(p->pmmu.us);
p->newtlb = 0;
- mmup = p;
+ return;
}
+
+ if(p->pmmu.us == nil)
+ getspace(p);
+ else
+ takespace(p, p->pmmu.us);
+ if(tracemmu) print("========== %ld %s [%d]\n",
+ p->pid, p->text, p->pmmu.us - uspace);
}
/*
@@ -250,11 +340,16 @@ mmurelease(Proc *p)
{
if(p->kp)
return;
+ if(tracemmu)
+ print("mmurelease %ld %s\n", p->pid, p->text);
if(p->pmmu.vxproc)
vxproc_flush(p->pmmu.vxproc);
- if(p == mmup || m->flushmmu){
- mmapflush();
- mmup = nil;
+ if(p->pmmu.us){
+ if(tracemmu)
+ print("^^^^^^^^^^ %ld %s [release %d]\n", p->pid, p->text, p->pmmu.us - uspace);
+ putspace(p->pmmu.us);
+ if(m->flushmmu)
+ mmapflush(p->pmmu.us);
}
}
diff --git a/src/9vx/trap.c b/src/9vx/trap.c
@@ -30,7 +30,7 @@ kexit(Ureg *ureg)
Tos *tos;
/* precise time accounting, kernel exit */
- tos = (Tos*)(uzero+USTKTOP-sizeof(Tos));
+ tos = (Tos*)(up->pmmu.uzero+USTKTOP-sizeof(Tos));
cycles(&t);
tos->kcycles += t - up->kentry;
tos->pcycles = up->pcycles;
@@ -90,7 +90,7 @@ trap(Ureg *ureg)
case VXTRAP_SOFT+0x40: /* int $0x40 - system call */
if(tracesyscalls){
- ulong *sp = (ulong*)(uzero + ureg->usp);
+ ulong *sp = (ulong*)(up->pmmu.uzero + ureg->usp);
print("%d [%s] %s %#lux %08lux %08lux %08lux %08lux\n",
up->pid, up->text,
sysctab[ureg->ax], sp[0], sp[1], sp[2], sp[3]);
@@ -262,7 +262,7 @@ syscall(Ureg *ureg)
up->psstate = 0;
if(scallnr == NOTED)
- noted(ureg, *(ulong*)(uzero + sp+BY2WD));
+ noted(ureg, *(ulong*)(up->pmmu.uzero + sp+BY2WD));
if(scallnr!=RFORK && (up->procctl || up->nnote)){
splhi();
@@ -335,6 +335,8 @@ notify(Ureg* ureg)
pexit("Suicide", 0);
}
+ uchar *uzero;
+ uzero = up->pmmu.uzero;
upureg = (void*)(uzero + sp);
memmove(upureg, ureg, sizeof(Ureg));
*(ulong*)(uzero + sp-BY2WD) = up->ureg; /* word under Ureg is old up->ureg */
@@ -383,6 +385,8 @@ noted(Ureg* ureg, ulong arg0)
pexit("Suicide", 0);
}
+ uchar *uzero;
+ uzero = up->pmmu.uzero;
oureg = up->ureg;
nureg = (Ureg*)(uzero + up->ureg);
@@ -442,11 +446,11 @@ execregs(ulong entry, ulong ssize, ulong nargs)
up->fpstate = FPinit;
fpoff();
- sp = (ulong*)(uzero + USTKTOP - ssize);
+ sp = (ulong*)(up->pmmu.uzero + USTKTOP - ssize);
*--sp = nargs;
ureg = up->dbgreg;
- ureg->usp = (uchar*)sp - uzero;
+ ureg->usp = (uchar*)sp - up->pmmu.uzero;
//showexec(ureg->usp);
ureg->pc = entry;
return USTKTOP-sizeof(Tos); /* address of kernel/user shared data */
diff --git a/src/9vx/vx32.c b/src/9vx/vx32.c
@@ -17,7 +17,6 @@
#include "u.h"
#include <pthread.h>
#include <sys/mman.h>
-#include "libvx32/vx32.h"
#include "lib.h"
#include "mem.h"
#include "dat.h"
@@ -50,47 +49,6 @@ vx32sysr1(void)
}
/*
- * Vxnewproc is called at the end of newproc
- * to fill in vx32-specific entries in the Proc struct
- * before it gets used.
- */
-void
-vxnewproc(Proc *p)
-{
- PMMU *pm;
-
- pm = &p->pmmu;
-
- /*
- * Kernel procs don't need vxprocs; if this proc
- * already has one, take it away. Also, give
- * kernel procs very large stacks so they can call
- * into non-thread-friendly routines like x11
- * and getgrgid.
- */
- if(p->kp){
- if(pm->vxproc){
- pm->vxproc->mem = nil;
- vxproc_free(pm->vxproc);
- pm->vxproc = nil;
- }
- free(p->kstack);
- p->kstack = nil;
- p->kstack = smalloc(512*1024);
- return;
- }
-
- pm->lo = 0x80000000UL;
- pm->hi = 0;
- if(pm->vxproc == nil){
- pm->vxproc = vxproc_alloc();
- if(pm->vxproc == nil)
- panic("vxproc_alloc");
- pm->vxproc->mem = &thevxmem;
- }
-}
-
-/*
* Vx32 hooks to read, write, map, unmap, and check permissions
* on user memory. Normally these are more involved, but we're
* using the processor to do everything.
@@ -98,29 +56,21 @@ vxnewproc(Proc *p)
static ssize_t
vmread(vxmem *vm, void *data, uint32_t addr, uint32_t len)
{
- memmove(data, uzero+addr, len);
+ memmove(data, vm->mapped->base+addr, len);
return len;
}
static ssize_t
vmwrite(vxmem *vm, const void *data, uint32_t addr, uint32_t len)
{
- memmove(uzero+addr, data, len);
+ memmove(vm->mapped->base+addr, data, len);
return len;
}
-static vxmmap thevxmmap =
-{
- 1,
- (void*)-1, /* to be filled in with user0 */
- USTKTOP,
-};
-
static vxmmap*
vmmap(vxmem *vm, uint32_t flags)
{
- thevxmmap.base = uzero;
- return &thevxmmap;
+ return vm->mapped;
}
static void
@@ -131,6 +81,14 @@ vmunmap(vxmem *vm, vxmmap *mm)
static int
vmcheckperm(vxmem *vm, uint32_t addr, uint32_t len, uint32_t perm, uint32_t *out_faultva)
{
+ if(addr >= USTKTOP){
+ *out_faultva = addr;
+ return 0;
+ }
+ if(addr+len < addr || addr +len > USTKTOP){
+ *out_faultva = USTKTOP;
+ return 0;
+ }
/* All is allowed - handle faults as they happen. */
return 1;
}
@@ -164,6 +122,50 @@ static vxmem thevxmem =
vmfree,
};
+/*
+ * Vxnewproc is called at the end of newproc
+ * to fill in vx32-specific entries in the Proc struct
+ * before it gets used.
+ */
+void
+vxnewproc(Proc *p)
+{
+ PMMU *pm;
+
+ pm = &p->pmmu;
+
+ /*
+ * Kernel procs don't need vxprocs; if this proc
+ * already has one, take it away. Also, give
+ * kernel procs very large stacks so they can call
+ * into non-thread-friendly routines like x11
+ * and getgrgid.
+ */
+ if(p->kp){
+ if(pm->vxproc){
+ // vxunmap(p);
+ assert(pm->uzero == nil);
+ pm->vxproc->mem = nil;
+ vxproc_free(pm->vxproc);
+ pm->vxproc = nil;
+ }
+ free(p->kstack);
+ p->kstack = nil;
+ p->kstack = smalloc(512*1024);
+ return;
+ }
+
+ if(pm->vxproc == nil){
+ pm->vxproc = vxproc_alloc();
+ if(pm->vxproc == nil)
+ panic("vxproc_alloc");
+ pm->vxproc->mem = &pm->vxmem;
+ pm->vxmem = thevxmem;
+ pm->vxmem.mapped = &pm->vxmm;
+ memset(&pm->vxmm, 0, sizeof pm->vxmm);
+ }
+}
+
static void
setclock(int start)
{
@@ -224,7 +226,7 @@ touser(void *initsp)
* Optimization: try to fault in code page and stack
* page right now, since we're likely to need them.
*/
- if(up->pmmu.hi == 0){
+ if(up->pmmu.us->hi == 0){
fault(vp->cpu->eip, 1);
fault(vp->cpu->reg[ESP], 0);
}
@@ -267,11 +269,11 @@ touser(void *initsp)
addr = (uchar*)vp->cpu->trapva;
if(traceprocs)
print("fault %p read=%d\n", addr, read);
- if(isuaddr(addr) && fault(addr - uzero, read) >= 0)
+ if(isuaddr(addr) && fault(addr - up->pmmu.uzero, read) >= 0)
continue;
print("%ld %s: unhandled fault va=%lux [%lux] eip=%lux\n",
up->pid, up->text,
- addr - uzero, vp->cpu->trapva, vp->cpu->eip);
+ addr - up->pmmu.uzero, vp->cpu->trapva, vp->cpu->eip);
proc2ureg(vp, &u);
dumpregs(&u);
if(doabort)