commit b0d887c52d091ae3c62316cfc54ff178f64ab850
parent a700164cb1d15543535ee17fbff2f91fe4b4f595
Author: John (EBo) David <ebo@users.sourceforge.net>
Date:   Mon, 21 Jun 2010 03:17:15 -0500
attempting merge
--HG--
branch : yy-int-branch
Diffstat:
74 files changed, 28454 insertions(+), 87 deletions(-)
diff --git a/.hgignore b/.hgignore
@@ -25,6 +25,8 @@ src/vxa/bz2/*ebz2
 src/vxlinux/vxlinux
 src/9vx/9vx
 src/9vx/bootcode.S
+src/9vx/fossil.S
+src/9vx/venti.S
 src/9vx/data2s
 src/9vx/a/errstr.h
 src/9vx/kerndate.h
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
@@ -3,3 +3,9 @@ The following people have contributed source code to vx32.
 Bryan Ford <baford@pdos.csail.mit.edu>
 Michael Teichgräber <mt4swm@googlemail.com>
 Russ Cox <rsc@swtch.com>
+Jesus Galan Lopez <yiyu.jgl@gmail.com>
+Tuly Gray
+Devon H. O'Dell
+Ron Minnich
+Erik Quantrom
+Brian L. Stuart
diff --git a/doc/9vx.1 b/doc/9vx.1
@@ -0,0 +1,127 @@
+.TH 9VX 1
+.SH NAME
+9vx, 9vx-tap \- Plan9 port to the virtual execution environment vx32
+.SH SYNOPSIS
+.B 9vx
+[
+.I option ...
+]
+[
+.I -p 9vx.ini
+]
+[
+.I -r root
+]
+[
+.I -u user
+]
+.PP
+.B 9vx-tap
+[
+.I option ...
+]
+[
+.I -p 9vx.ini
+]
+[
+.I -r root
+]
+[
+.I -u user
+]
+.SH DESCRIPTION
+Plan 9 VX (or
+.I 9vx
+for short) is a port of the Plan 9 operating system to run on top of commodity operating systems, allowing the use of both Plan 9 and the host system simultaneously. To run user programs,
+.I 9vx
+creates an appropriate address space in a window within its own address space and invokes vx32 to simulate user mode execution. Some hardware devices are replaced by virtual versions, depending on the options given to
+.I 9vx.
+.I 9vx-tap
+is a shell script that sets up a tap device with tunctl(1), launches
+.I 9vx,
+and removes the tap device when finished.
+.PP
+Options can be passed to
+.I 9vx
+as command line arguments or in a configuration file with the
+.I -p
+option (see below). If no
+.I root
+argument is present, the current directory or
+.I /usr/local/9vx
+is used.
+Alternatively, a file system can be specified in the 9vx.ini file.
+If an
+.I user
+is not specified, the current user in the host operating system will be used.
+Other options are:
+.nr xx \w'\fL-m\f2name\ \ '
+.TP \n(xxu
+.BI -b
+Run /boot/boot instead of bootscript
+.TP
+.BI -f
+Do not fork at init
+.TP
+.BI -g
+Do not start the gui
+.TP
+.BI -i
+Run rc instead of init
+.TP
+.BI -t
+Use tty for input/output
+.TP
+.BI -n " [ tap ] [ device ]"
+Create virtual ethernet devices. The
+.I tap
+option tells that
+.I device
+is a tap device. Else, the virtual device will use pcap(3) to intercept packets going to
+.I device,
+and will therefore need root privileges. If a host
+.I device
+is not specified, pcap will use the first one available, and tap will use the
+.I tap0
+device. More than one virtual ethernet device can be used. In absence of virtual devices, the network stack of the host system will be used.
+.TP
+.BI -m " macaddress"
+Use the hardware address
+.I macaddress
+for the last given virtual network device.
+.SS 9vx.ini configuration files
+Configuration parameters can also be given to
+.I 9vx
+in the configuration file specified with the
+.I -p
+command line option.
+The file name
+.L -
+means the standard input.
+The file
+.I 9vx.ini
+has to contain a list of
+.I parameter=value
+pairs in a similar fasion to plan9.ini(8). Available options are
+.I bootboot,
+.I nofork,
+.I nogui,
+.I initrc,
+.I usetty,
+.I net,
+.I macaddr,
+.I localroot
+and
+.I user.
+Other options will be passed to the boot process as environment variables.
+.SH BUGS
+The menu system of plan9.ini(8) is not supported in
+.I 9vx.ini
+files.
+.P
+.I 9vx
+is not so stable as native Plan9 systems.
+.SH "SEE ALSO"
+.br
+Bryan Ford and Russ Cox,
+``Vx32: Lightweight User-level Sandboxing on the x86'
diff --git a/src/9vx/9vx-tap b/src/9vx/9vx-tap
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+USERID=`whoami`
+
+# Create the tap device with tunctl
+IFACE=`sudo tunctl -b -u $USERID`
+# or openvpn
+#IFACE=tap0
+#sudo openvpn --mktun --dev $IFACE --user $USERID
+
+# Bring the tap device up
+sudo /sbin/ifconfig $IFACE 0.0.0.0 up
+
+# Add it to the bridge
+sudo /usr/sbin/brctl addif br0 $IFACE
+
+# Launch 9vx (use -f to not fork)
+9vx -f -n tap $IFACE $*
+
+# Bring the tap device down and disconnect from br0
+sudo /sbin/ifconfig $IFACE down
+sudo /usr/sbin/brctl delif br0 $IFACE
+
+# Remove the tap device with tunctl
+sudo tunctl -d $IFACE &> /dev/null
+# or openvpn
+#sudo openvpn --rmtun --dev $1
diff --git a/src/9vx/LICENSE b/src/9vx/LICENSE
@@ -4,6 +4,8 @@ Plan 9 from Bell Labs distribution, which carries this license.
 The local changes are Copyright (c) 2006-2008 Russ Cox and
 are distributed as contributions under the terms of this license.
 
+Other contributors are listed on the AUTHORS file.
+
 
 ===================================================================
 
diff --git a/src/9vx/Makefrag b/src/9vx/Makefrag
@@ -29,13 +29,12 @@ PLAN9_OBJS = \
 		devaudio.o \
 		devaudio-$(PLAN9AUDIO).o \
 		devfs-posix.o \
-		devip.o \
-		devip-posix.o \
 		devmntloop.o \
 		devmouse.o \
 		devram.o \
 		devtab.o \
 		factotum.o \
+		fossil.o \
 		kprocdev.o \
 		label.o \
 		main.o \
@@ -47,6 +46,7 @@ PLAN9_OBJS = \
 		time.o \
 		trap.o \
 		tty.o \
+		venti.o \
 		vx32.o \
 	)
 
@@ -58,7 +58,6 @@ PLAN9_A_OBJS = \
 	$(addprefix 9vx/a/, \
 		allocb.o \
 		auth.o \
-		bo.o \
 		chan.o \
 		classmask.o \
 		cleanname.o \
@@ -91,6 +90,7 @@ PLAN9_A_OBJS = \
 		page.o \
 		parse.o \
 		parseip.o \
+		part.o \
 		pgrp.o \
 		print.o \
 		proc.o \
@@ -111,6 +111,48 @@ PLAN9_A_OBJS = \
 		utf.o \
 	)
 
+PLAN9_IP_OBJS = \
+	$(addprefix 9vx/,\
+		devip.o \
+		devip-posix.o \
+		etherpcap.o \
+		ethertap.o \
+		vether.o \
+	) \
+	$(addprefix 9vx/a/,\
+		devaoe.o \
+		devether.o \
+		netif.o \
+		sdaoe.o \
+	) \
+	$(addprefix 9vx/a/ip/,\
+		arp.o \
+		chandial.o \
+		devip.o \
+		esp.o \
+		ethermedium.o \
+		gre.o \
+		icmp.o \
+		icmp6.o \
+		il.o \
+		inferno.o \
+		ip.o \
+		ipaux.o \
+		ipifc.o \
+		ipmux.o \
+		iproute.o \
+		ipv6.o \
+		loopbackmedium.o \
+		netdevmedium.o \
+		netlog.o \
+		nullmedium.o \
+		pktmedium.o \
+		ptclbsum.o \
+		tcp.o \
+		udp.o \
+	)
+PLAN9_IP_LIBS = -lpcap
+
 PLAN9_nogui_OBJS = \
 	$(addprefix 9vx/,\
 		nogui.o \
@@ -142,6 +184,7 @@ PLAN9_GUI_LIBS = $(PLAN9_$(PLAN9GUI)_LIBS)
 PLAN9_DEPS = \
 	$(PLAN9_OBJS) \
 	$(PLAN9_A_OBJS) \
+	$(PLAN9_IP_OBJS) \
 	$(PLAN9_GUI_OBJS) \
 	9vx/libsec/libsec.a \
 	9vx/libmemlayer/libmemlayer.a \
@@ -150,7 +193,7 @@ PLAN9_DEPS = \
 	libvx32/libvx32.a \
 
 9vx/9vx: $(PLAN9_DEPS)
-	$(HOST_CC) -o $@ $(PLAN9_DEPS) $(PLAN9_GUI_LIBS) -lpthread
+	$(HOST_CC) -o $@ $(PLAN9_DEPS) $(PLAN9_GUI_LIBS) $(PLAN9_IP_LIBS) -lpthread
 
 9vx/a/%.o: 9vx/a/%.c
 	$(HOST_CC) $(HOST_CFLAGS) -I. -I9vx -I9vx/a -Wall -Wno-missing-braces -c -o $@ $<
@@ -176,6 +219,12 @@ PLAN9_DEPS = \
 9vx/factotum.S: 9vx/data2s 9vx/factotum.9
 	./9vx/data2s factotum < 9vx/factotum.9 >$@_ && mv $@_ $@
 
+9vx/fossil.S: 9vx/data2s 9vx/fossil.9
+	./9vx/data2s fossil < 9vx/fossil.9 >$@_ && mv $@_ $@
+
+9vx/venti.S: 9vx/data2s 9vx/venti.9
+	./9vx/data2s venti < 9vx/venti.9 > $@_ && mv $@_ $@
+
 9vx/a/errstr.h: 9vx/a/error.h
 	sed 's/extern //; s!;.*/\* ! = "!; s! \*\/!";!' 9vx/a/error.h >9vx/a/errstr.h
 
@@ -199,7 +248,10 @@ CLEAN_FILES += \
 	9vx/a/errstr.h \
 	9vx/9vx \
 	9vx/data2s \
-	9vx/bootcode.S
+	9vx/bootcode.S \
+	9vx/factotum.S \
+	9vx/fossil.S \
+	9vx/venti.S
 
 include 9vx/libdraw/Makefrag
 include 9vx/libmemlayer/Makefrag
diff --git a/src/9vx/a/aoe.h b/src/9vx/a/aoe.h
@@ -0,0 +1,84 @@
+enum {
+	ACata,
+	ACconfig,
+};
+
+enum {
+	AQCread,
+	AQCtest,
+	AQCprefix,
+	AQCset,
+	AQCfset,
+};
+
+enum {
+	AEcmd	= 1,
+	AEarg,
+	AEdev,
+	AEcfg,
+	AEver,
+};
+
+enum {
+	Aoetype	= 0x88a2,
+	Aoesectsz = 512,
+	Szaoeata	= 24+12,
+	Szaoeqc	= 24+8,
+	Aoever	= 1,
+
+	AFerr	= 1<<2,
+	AFrsp	= 1<<3,
+
+	AAFwrite= 1,
+	AAFext	= 1<<6,
+};
+
+typedef struct {
+	uchar	dst[Eaddrlen];
+	uchar	src[Eaddrlen];
+	uchar	type[2];
+	uchar	verflag;
+	uchar	error;
+	uchar	major[2];
+	uchar	minor;
+	uchar	cmd;
+	uchar	tag[4];
+} Aoehdr;
+
+typedef struct {
+	uchar	dst[Eaddrlen];
+	uchar	src[Eaddrlen];
+	uchar	type[2];
+	uchar	verflag;
+	uchar	error;
+	uchar	major[2];
+	uchar	minor;
+	uchar	cmd;
+	uchar	tag[4];
+	uchar	aflag;
+	uchar	errfeat;
+	uchar	scnt;
+	uchar	cmdstat;
+	uchar	lba[6];
+	uchar	res[2];
+} Aoeata;
+
+typedef struct {
+	uchar	dst[Eaddrlen];
+	uchar	src[Eaddrlen];
+	uchar	type[2];
+	uchar	verflag;
+	uchar	error;
+	uchar	major[2];
+	uchar	minor;
+	uchar	cmd;
+	uchar	tag[4];
+	uchar	bufcnt[2];
+	uchar	fwver[2];
+	uchar	scnt;
+	uchar	verccmd;
+	uchar	cslen[2];
+} Aoeqc;
+
+extern char Echange[];
+extern char Enotup[];
diff --git a/src/9vx/a/chan.c b/src/9vx/a/chan.c
@@ -28,7 +28,7 @@ struct Elemlist
 {
 	char	*aname;	/* original name */
 	char	*name;	/* copy of name, so '/' can be overwritten */
-	int	nelems;
+	uint	nelems;
 	char	**elems;
 	int	*off;
 	int	mustbedir;
diff --git a/src/9vx/a/devaoe.c b/src/9vx/a/devaoe.c
@@ -0,0 +1,2575 @@
+/*
+ *	© 2005-8 coraid
+ *	aoe storage initiator
+ */
+
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+#include "error.h"
+#include "netif.h"
+#include "etherif.h"
+#include "ip/ip.h"
+#include "aoe.h"
+
+#define	WAKEUP(x)	wakeup(&((x)->rend))
+#define SLEEP(a,b,c)	sleep(&(a->rend), b, c)
+
+//#pragma	varargck argpos	eventlog	1
+
+#define dprint(...)	if(debug) eventlog(__VA_ARGS__); else USED(debug);
+#define uprint(...)	snprint(up->genbuf, sizeof up->genbuf, __VA_ARGS__);
+
+enum {
+	Maxunits	= 0xff,
+	Maxframes	= 128,
+	Maxmtu		= 100000,
+	Ndevlink	= 6,
+	Nea		= 6,
+	Nnetlink	= 6,
+};
+
+#define TYPE(q)		((ulong)(q).path & 0xf)
+#define UNIT(q)		(((ulong)(q).path>>4) & 0xff)
+#define L(q)		(((ulong)(q).path>>12) & 0xf)
+#define QID(u, t) 	((u)<<4 | (t))
+#define Q3(l, u, t)	((l)<<8 | QID(u, t))
+#define UP(d)		((d)->flag & Dup)
+
+#define	Ticks		msec()
+#define	Ms2tk(t)	(((t)*HZ)/1000)
+#define	Tk2ms(t)	(((t)*1000)/HZ)
+
+enum {
+	Qzero,
+	Qtopdir		= 1,
+	Qtopbase,
+	Qtopctl		= Qtopbase,
+	Qtoplog,
+	Qtopend,
+
+	Qunitdir,
+	Qunitbase,
+	Qctl		= Qunitbase,
+	Qdata,
+	Qconfig,
+	Qident,
+
+	Qdevlinkdir,
+	Qdevlinkbase,
+	Qdevlink	= Qdevlinkbase,
+	Qdevlinkend,
+
+	Qtopfiles	= Qtopend-Qtopbase,
+	Qdevlinkfiles	= Qdevlinkend-Qdevlinkbase,
+
+	Eventlen 	= 256,
+	Nevents 	= 64,
+
+	Fread		= 0,
+	Fwrite,
+	Tfree		= -1,
+	Tmgmt,
+
+	/* round trip bounds, timeouts, in ticks */
+	Rtmax		= Ms2tk(320),
+	Rtmin		= Ms2tk(20),
+	Srbtimeout	= 45*HZ,
+
+	Dbcnt		= 1024,
+
+	Crd		= 0x20,
+	Crdext		= 0x24,
+	Cwr		= 0x30,
+	Cwrext		= 0x34,
+	Cid		= 0xec,
+};
+
+enum {
+	Read,
+	Write,
+};
+
+/*
+ * unified set of flags
+ * a Netlink + Aoedev most both be jumbo capable
+ * to send jumbograms to that interface.
+ */
+enum {
+	/* sync with ahci.h */
+	Dllba 	= 1<<0,
+	Dsmart	= 1<<1,
+	Dpower	= 1<<2,
+	Dnop	= 1<<3,
+	Datapi	= 1<<4,
+	Datapi16= 1<<5,
+
+	/* aoe specific */
+	Dup	= 1<<6,
+	Djumbo	= 1<<7,
+};
+
+static char *flagname[] = {
+	"llba",
+	"smart",
+	"power",
+	"nop",
+	"atapi",
+	"atapi16",
+
+	"up",
+	"jumbo",
+};
+
+typedef struct {
+	uchar	flag;
+	uchar	lostjumbo;
+	int	datamtu;
+
+	Chan	*cc;
+	Chan	*dc;
+	Chan	*mtu;		/* open early to prevent bind issues. */
+	char	path[Maxpath];
+	uchar	ea[Eaddrlen];
+} Netlink;
+
+typedef struct {
+	Netlink	*nl;
+	int	nea;
+	ulong	eaidx;
+	uchar	eatab[Nea][Eaddrlen];
+	int	datamtu;
+	ulong	npkt;
+	ulong	resent;
+	uchar	flag;
+
+	ulong	rttavg;
+	ulong	mintimer;
+} Devlink;
+
+typedef struct Srb Srb;
+struct Srb {
+	Rendez	rend;
+	Srb	*next;
+	ulong	ticksent;
+	ulong	len;
+	vlong	sector;
+	short	write;
+	short	nout;
+	char	*error;
+	void	*dp;
+	void	*data;
+};
+
+typedef struct {
+	int	tag;
+	ulong	bcnt;
+	ulong	dlen;
+	vlong	lba;
+	ulong	ticksent;
+	int	nhdr;
+	uchar	hdr[ETHERMINTU];
+	void	*dp;
+	Devlink	*dl;
+	Netlink	*nl;
+	int	eaidx;
+	Srb	*srb;
+} Frame;
+
+typedef struct Aoedev Aoedev;
+struct Aoedev {
+	QLock	qlock;
+	Aoedev	*next;
+
+	ulong	vers;
+
+	int	ndl;
+	ulong	dlidx;
+	Devlink	*dl;
+	Devlink	dltab[Ndevlink];
+
+	ushort	fwver;
+	uchar	flag;
+	int	nopen;
+	int	major;
+	int	minor;
+	int	unit;
+	int	lasttag;
+	int	nframes;
+	Frame	*frames;
+	vlong	bsize;
+	vlong	realbsize;
+
+	uint	maxbcnt;
+	uint	maxmtu;
+	ulong	lostjumbo;
+	ushort	nout;
+	ushort	maxout;
+	ulong	lastwadj;
+	Srb	*head;
+	Srb	*tail;
+	Srb	*inprocess;
+
+	char	serial[20+1];
+	char	firmware[8+1];
+	char	model[40+1];
+	int	nconfig;
+	uchar	config[1024];
+	uchar	ident[512];
+};
+
+//#pragma	varargck type	"æ"	Aoedev*
+
+static struct {
+	Lock	lk;
+	QLock	qlock;
+	Rendez	rend;
+	char	buf[Eventlen*Nevents];
+	char	*rp;
+	char	*wp;
+} events;
+
+static struct {
+	RWlock	rwlock;
+	int	nd;
+	Aoedev	*d;
+} devs;
+
+static struct {
+	Lock	lk;
+	int	reader[Nnetlink];	/* reader is running. */
+	Rendez	rendez[Nnetlink];	/* confirm exit. */
+	Netlink	nl[Nnetlink];
+} netlinks;
+
+extern	Dev 	aoedevtab;
+static	Ref 	units;
+static	Ref	drivevers;
+static	int	debug;
+static	int	autodiscover	= 1;
+static	int	rediscover;
+	char 	Enotup[] 	= "aoe device is down";
+	char	Echange[]	= "media or partition has changed";
+
+static Srb*
+srballoc(ulong sz)
+{
+	Srb *srb;
+
+	srb = malloc(sizeof *srb+sz);
+	srb->dp = srb->data = srb+1;
+	srb->ticksent = Ticks;
+	return srb;
+}
+
+static Srb*
+srbkalloc(void *db, ulong dummy)
+{
+	Srb *srb;
+
+	srb = malloc(sizeof *srb);
+	srb->dp = srb->data = db;
+	srb->ticksent = Ticks;
+	return srb;
+}
+
+#define srbfree(srb) free(srb)
+
+static void
+srberror(Srb *srb, char *s)
+{
+	srb->error = s;
+	srb->nout--;
+	WAKEUP(srb);
+}
+
+static void
+frameerror(Aoedev *d, Frame *f, char *s)
+{
+	Srb *srb;
+
+	srb = f->srb;
+	if(f->tag == Tfree)
+		return;
+	f->srb = nil;
+	f->tag = Tfree;		/* don't get fooled by way-slow responses */
+	if(!srb)
+		return;
+	srberror(srb, s);
+	d->nout--;
+}
+
+static char*
+unitname(Aoedev *d)
+{
+	uprint("%d.%d", d->major, d->minor);
+	return up->genbuf;
+}
+
+static long
+eventlogread(void *a, long n)
+{
+	int len;
+	char *p, *buf;
+
+	buf = smalloc(Eventlen);
+	QLOCK(&events);
+	LOCK(&events);
+	p = events.rp;
+	len = *p;
+	if(len == 0){
+		n = 0;
+		UNLOCK(&events);
+	} else {
+		if(n > len)
+			n = len;
+		/* can't move directly into pageable space with events lock held */
+		memmove(buf, p+1, n);
+		*p = 0;
+		events.rp = p += Eventlen;
+		if(p >= events.buf + sizeof events.buf)
+			events.rp = events.buf;
+		UNLOCK(&events);
+
+		/* the concern here is page faults in memmove below */
+		if(waserror()){
+			free(buf);
+			QUNLOCK(&events);
+			nexterror();
+		}
+		memmove(a, buf, n);
+		poperror();
+	}
+	free(buf);
+	QUNLOCK(&events);
+	return n;
+}
+
+static int
+eventlog(char *fmt, ...)
+{
+	int dragrp, n;
+	char *p;
+	va_list arg;
+
+	LOCK(&events);
+	p = events.wp;
+	dragrp = *p++;
+	va_start(arg, fmt);
+	n = vsnprint(p, Eventlen-1, fmt, arg);
+	*--p = n;
+	p = events.wp += Eventlen;
+	if(p >= events.buf + sizeof events.buf)
+		p = events.wp = events.buf;
+	if(dragrp)
+		events.rp = p;
+	UNLOCK(&events);
+	WAKEUP(&events);
+	return n;
+}
+
+static int
+eventcount(void)
+{
+	int n;
+
+	LOCK(&events);
+	if(*events.rp == 0)
+		n = 0;
+	else if(events.wp < events.rp)
+		n = Nevents - (events.rp - events.wp);
+	else
+		n = events.wp - events.rp;
+	UNLOCK(&events);
+	return n/Eventlen;
+}
+
+static int
+tsince(int tag)
+{
+	int n;
+
+	n = Ticks & 0xffff;
+	n -= tag & 0xffff;
+	if(n < 0)
+		n += 1<<16;
+	return n;
+}
+
+static int
+newtag(Aoedev *d)
+{
+	int t;
+
+	do {
+		t = ++d->lasttag << 16;
+		t |= Ticks & 0xffff;
+	} while (t == Tfree || t == Tmgmt);
+	return t;
+}
+
+static void
+downdev(Aoedev *d, char *err)
+{
+	Frame *f, *e;
+
+	d->flag &= ~Dup;
+	f = d->frames;
+	e = f + d->nframes;
+	for(; f < e; f->tag = Tfree, f->srb = nil, f++)
+		frameerror(d, f, Enotup);
+	d->inprocess = nil;
+	eventlog("%æ: removed; %s\n", d, err);
+}
+
+static Block*
+allocfb(Frame *f)
+{
+	int len;
+	Block *b;
+
+	len = f->nhdr + f->dlen;
+	if(len < ETHERMINTU)
+		len = ETHERMINTU;
+	b = allocb(len);
+	memmove(b->wp, f->hdr, f->nhdr);
+	if(f->dlen)
+		memmove(b->wp + f->nhdr, f->dp, f->dlen);
+	b->wp += len;
+	return b;
+}
+
+static void
+putlba(Aoeata *a, vlong lba)
+{
+	uchar *c;
+
+	c = a->lba;
+	c[0] = lba;
+	c[1] = lba >> 8;
+	c[2] = lba >> 16;
+	c[3] = lba >> 24;
+	c[4] = lba >> 32;
+	c[5] = lba >> 40;
+}
+
+static Devlink*
+pickdevlink(Aoedev *d)
+{
+	ulong i, n;
+	Devlink *l;
+
+	for(i = 0; i < d->ndl; i++){
+		n = d->dlidx++ % d->ndl;
+		l = d->dl + n;
+		if(l && l->flag & Dup)
+			return l;
+	}
+	return 0;
+}
+
+static int
+pickea(Devlink *l)
+{
+	if(l == 0)
+		return -1;
+	if(l->nea == 0)
+		return -1;
+	return l->eaidx++ % l->nea;
+}
+
+static int
+hset(Aoedev *d, Frame *f, Aoehdr *h, int cmd)
+{
+	int i;
+	Devlink *l;
+
+	if(f->srb)
+	if((long)(Ticks-f->srb->ticksent) > Srbtimeout){
+		eventlog("%æ: srb timeout\n", d);
+		frameerror(d, f, Etimedout);
+		return -1;
+	}
+	l = pickdevlink(d);
+	i = pickea(l);
+	if(i == -1){
+		downdev(d, "resend fails; no netlink/ea");
+		return -1;
+	}
+	memmove(h->dst, l->eatab[i], Eaddrlen);
+	memmove(h->src, l->nl->ea, sizeof h->src);
+	hnputs(h->type, Aoetype);
+	h->verflag = Aoever << 4;
+	h->error = 0;
+	hnputs(h->major, d->major);
+	h->minor = d->minor;
+	h->cmd = cmd;
+
+	hnputl(h->tag, f->tag = newtag(d));
+	f->dl = l;
+	f->nl = l->nl;
+	f->eaidx = i;
+	f->ticksent = Ticks;
+
+	return f->tag;
+}
+
+static int
+resend(Aoedev *d, Frame *f)
+{
+	ulong n;
+	Aoeata *a;
+
+	a = (Aoeata*)f->hdr;
+	if(hset(d, f, (Aoehdr*)a, a->cmd) == -1)
+		return -1;
+	n = f->bcnt;
+	if(n > d->maxbcnt){
+		n = d->maxbcnt;		/* mtu mismatch (jumbo fail?) */
+		if(f->dlen > n)
+			f->dlen = n;
+	}
+	a->scnt = n / Aoesectsz;
+	f->dl->resent++;
+	f->dl->npkt++;
+	if(waserror())
+		/* should remove the netlink */
+		return -1;
+	devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
+	poperror();
+	return 0;
+}
+
+static void
+discover(int major, int minor)
+{
+	Aoehdr *h;
+	Block *b;
+	Netlink *nl, *e;
+
+	nl = netlinks.nl;
+	e = nl + nelem(netlinks.nl);
+	for(; nl < e; nl++){
+		if(nl->cc == nil)
+			continue;
+		b = allocb(ETHERMINTU);
+		if(waserror()){
+			freeb(b);
+			nexterror();
+		}
+		b->wp = b->rp + ETHERMINTU;
+		memset(b->rp, 0, ETHERMINTU);
+		h = (Aoehdr*)b->rp;
+		memset(h->dst, 0xff, sizeof h->dst);
+		memmove(h->src, nl->ea, sizeof h->src);
+		hnputs(h->type, Aoetype);
+		h->verflag = Aoever << 4;
+		hnputs(h->major, major);
+		h->minor = minor;
+		h->cmd = ACconfig;
+		poperror();
+		devtab[nl->dc->type]->bwrite(nl->dc, b, 0);
+	}
+}
+
+/*
+ * Check all frames on device and resend any frames that have been
+ * outstanding for 200% of the device round trip time average.
+ */
+static void
+aoesweepproc(void *dummy)
+{
+	ulong i, tx, timeout, nbc;
+	vlong starttick;
+	enum { Nms = 100, Nbcms = 30*1000, };
+	uchar *ea;
+	Aoeata *a;
+	Aoedev *d;
+	Devlink *l;
+	Frame *f, *e;
+
+	nbc = Nbcms/Nms;
+loop:
+	if(nbc-- == 0){
+		if(rediscover && !waserror()){
+			discover(0xffff, 0xff);
+			poperror();
+		}
+		nbc = Nbcms/Nms;
+	}
+	starttick = Ticks;
+	RLOCK(&devs);
+	for(d = devs.d; d; d = d->next){
+		if(!CANQLOCK(d))
+			continue;
+		if(!UP(d)){
+			QUNLOCK(d);
+			continue;
+		}
+		tx = 0;
+		f = d->frames;
+		e = f + d->nframes;
+		for (; f < e; f++){
+			if(f->tag == Tfree)
+				continue;
+			l = f->dl;
+			timeout = l->rttavg << 1;
+			i = tsince(f->tag);
+			if(i < timeout)
+				continue;
+			if(d->nout == d->maxout){
+				if(d->maxout > 1)
+					d->maxout--;
+				d->lastwadj = Ticks;
+			}
+			a = (Aoeata*)f->hdr;
+			if(a->scnt > Dbcnt / Aoesectsz &&
+			   ++f->nl->lostjumbo > (d->nframes << 1)){
+				ea = f->dl->eatab[f->eaidx];
+				eventlog("%æ: jumbo failure on %s:%E; lba%lld\n",
+					d, f->nl->path, ea, f->lba);
+				d->maxbcnt = Dbcnt;
+				d->flag &= ~Djumbo;
+			}
+			resend(d, f);
+			if(tx++ == 0){
+				if((l->rttavg <<= 1) > Rtmax)
+					l->rttavg = Rtmax;
+				eventlog("%æ: rtt %ldms\n", d, Tk2ms(l->rttavg));
+			}
+		}
+		if(d->nout == d->maxout && d->maxout < d->nframes &&
+		   TK2MS(Ticks-d->lastwadj) > 10*1000){
+			d->maxout++;
+			d->lastwadj = Ticks;
+		}
+		QUNLOCK(d);
+	}
+	RUNLOCK(&devs);
+	i = Nms - TK2MS(Ticks - starttick);
+	if(i > 0)
+		tsleep(&up->sleep, return0, 0, i);
+	goto loop;
+}
+
+static int
+fmtaoe(Fmt *f)
+{
+	char buf[16];
+	Aoedev *d;
+
+	d = va_arg(f->args, Aoedev*);
+	snprint(buf, sizeof buf, "aoe%d.%d", d->major, d->minor);
+	return fmtstrcpy(f, buf);
+}
+
+static void netbind(char *path);
+
+static void
+aoecfg(void)
+{
+	int n, i;
+	char *p, *f[32], buf[24];
+
+	if(1)
+//	if((p = getconf("aoeif")) == nil || (n = tokenize(p, f, nelem(f))) < 1)
+		return;
+	/* goo! */
+	for(i = 0; i < n; i++){
+		p = f[i];
+		if(strncmp(p, "ether", 5) == 0)
+			snprint(buf, sizeof buf, "#l%c/ether%c", p[5], p[5]);
+		else if(strncmp(p, "#l", 2) == 0)
+			snprint(buf, sizeof buf, "#l%c/ether%c", p[2], p[2]);
+		else
+			continue;
+		if(!waserror()){
+			netbind(buf);
+			poperror();
+		}
+	}
+}
+
+static void
+aoeinit(void)
+{
+	static int init;
+	static QLock l;
+
+	if(!canqlock(&l))
+		return;
+	if(init == 0){
+		fmtinstall(L'æ', fmtaoe);
+		events.rp = events.wp = events.buf;
+		kproc("aoesweep", aoesweepproc, nil);
+		aoecfg();
+		init = 1;
+	}
+	qunlock(&l);
+}
+
+static Chan*
+aoeattach(char *spec)
+{
+	Chan *c;
+
+	if(*spec)
+		error(Enonexist);
+	aoeinit();
+	c = devattach(L'æ', spec);
+	mkqid(&c->qid, Qzero, 0, QTDIR);
+	return c;
+}
+
+static Aoedev*
+unitseq(ulong unit)
+{
+	int i;
+	Aoedev *d;
+
+	i = 0;
+	RLOCK(&devs);
+	for(d = devs.d; d; d = d->next)
+		if(i++ == unit)
+			break;
+	RUNLOCK(&devs);
+	return d;
+}
+
+static Aoedev*
+unit2dev(ulong unit)
+{
+	Aoedev *d;
+
+	RLOCK(&devs);
+	for(d = devs.d; d; d = d->next)
+		if(d->unit == unit){
+			RUNLOCK(&devs);
+			return d;
+		}
+	RUNLOCK(&devs);
+	error("unit lookup failure");
+	return nil;
+}
+
+static int
+unitgen(Chan *c, ulong type, Dir *dp)
+{
+	int perm, t;
+	ulong vers;
+	vlong size;
+	char *p;
+	Aoedev *d;
+	Qid q;
+
+	d = unit2dev(UNIT(c->qid));
+	perm = 0644;
+	size = 0;
+	vers = d->vers;
+	t = QTFILE;
+
+	switch(type){
+	default:
+		return -1;
+	case Qctl:
+		p = "ctl";
+		break;
+	case Qdata:
+		p = "data";
+		perm = 0640;
+		if(UP(d))
+			size = d->bsize;
+		break;
+	case Qconfig:
+		p = "config";
+		if(UP(d))
+			size = d->nconfig;
+		break;
+	case Qident:
+		p = "ident";
+		if(UP(d))
+			size = sizeof d->ident;
+		break;
+	case Qdevlinkdir:
+		p = "devlink";
+		t = QTDIR;
+		perm = 0555;
+		break;
+	}
+	mkqid(&q, QID(UNIT(c->qid), type), vers, t);
+	devdir(c, q, p, size, eve, perm, dp);
+	return 1;
+}
+
+static int
+topgen(Chan *c, ulong type, Dir *d)
+{
+	int perm;
+	vlong size;
+	char *p;
+	Qid q;
+
+	perm = 0444;
+	size = 0;
+	switch(type){
+	default:
+		return -1;
+	case Qtopctl:
+		p = "ctl";
+		perm = 0644;
+		break;
+	case Qtoplog:
+		p = "log";
+		size = eventcount();
+		break;
+	}
+	mkqid(&q, type, 0, QTFILE);
+	devdir(c, q, p, size, eve, perm, d);
+	return 1;
+}
+
+static int
+aoegen(Chan *c, char *d0, Dirtab *d1, int d2, int s, Dir *dp)
+{
+	int i;
+	Aoedev *d;
+	Qid q;
+
+	if(c->qid.path == 0){
+		switch(s){
+		case DEVDOTDOT:
+			q.path = 0;
+			q.type = QTDIR;
+			devdir(c, q, "#æ", 0, eve, 0555, dp);
+			break;
+		case 0:
+			q.path = Qtopdir;
+			q.type = QTDIR;
+			devdir(c, q, "aoe", 0, eve, 0555, dp);
+			break;
+		default:
+			return -1;
+		}
+		return 1;
+	}
+
+	switch(TYPE(c->qid)){
+	default:
+		return -1;
+	case Qtopdir:
+		if(s == DEVDOTDOT){
+			mkqid(&q, Qzero, 0, QTDIR);
+			devdir(c, q, "aoe", 0, eve, 0555, dp);
+			return 1;
+		}
+		if(s < Qtopfiles)
+			return topgen(c, Qtopbase + s, dp);
+		s -= Qtopfiles;
+		if((d = unitseq(s)) == 0)
+			return -1;
+		mkqid(&q, QID(d->unit, Qunitdir), 0, QTDIR);
+		devdir(c, q, unitname(d), 0, eve, 0555, dp);
+		return 1;
+	case Qtopctl:
+	case Qtoplog:
+		return topgen(c, TYPE(c->qid), dp);
+	case Qunitdir:
+		if(s == DEVDOTDOT){
+			mkqid(&q, QID(0, Qtopdir), 0, QTDIR);
+			uprint("%uld", UNIT(c->qid));
+			devdir(c, q, up->genbuf, 0, eve, 0555, dp);
+			return 1;
+		}
+		return unitgen(c, Qunitbase+s, dp);
+	case Qctl:
+	case Qdata:
+	case Qconfig:
+	case Qident:
+		return unitgen(c, TYPE(c->qid), dp);
+	case Qdevlinkdir:
+		i = UNIT(c->qid);
+		if(s == DEVDOTDOT){
+			mkqid(&q, QID(i, Qunitdir), 0, QTDIR);
+			devdir(c, q, "devlink", 0, eve, 0555, dp);
+			return 1;
+		}
+		if(i >= units.ref)
+			return -1;
+		d = unit2dev(i);
+		if(s >= d->ndl)
+			return -1;
+		uprint("%d", s);
+		mkqid(&q, Q3(s, i, Qdevlink), 0, QTFILE);
+		devdir(c, q, up->genbuf, 0, eve, 0755, dp);
+		return 1;
+	case Qdevlink:
+		uprint("%d", s);
+		mkqid(&q, Q3(s, UNIT(c->qid), Qdevlink), 0, QTFILE);
+		devdir(c, q, up->genbuf, 0, eve, 0755, dp);
+		return 1;
+	}
+}
+
+static Walkqid*
+aoewalk(Chan *c, Chan *nc, char **name, int nname)
+{
+	return devwalk(c, nc, name, nname, nil, 0, aoegen);
+}
+
+static int
+aoestat(Chan *c, uchar *db, int n)
+{
+	return devstat(c, db, n, nil, 0, aoegen);
+}
+
+static Chan*
+aoeopen(Chan *c, int omode)
+{
+	Aoedev *d;
+
+	if(TYPE(c->qid) != Qdata)
+		return devopen(c, omode, 0, 0, aoegen);
+
+	d = unit2dev(UNIT(c->qid));
+	QLOCK(d);
+	if(waserror()){
+		QUNLOCK(d);
+		nexterror();
+	}
+	if(!UP(d))
+		error(Enotup);
+	c = devopen(c, omode, 0, 0, aoegen);
+	d->nopen++;
+	poperror();
+	QUNLOCK(d);
+	return c;
+}
+
+static void
+aoeclose(Chan *c)
+{
+	Aoedev *d;
+
+	if(TYPE(c->qid) != Qdata || (c->flag&COPEN) == 0)
+		return;
+
+	d = unit2dev(UNIT(c->qid));
+	QLOCK(d);
+	if(--d->nopen == 0 && !waserror()){
+		discover(d->major, d->minor);
+		poperror();
+	}
+	QUNLOCK(d);
+}
+
+static void
+atarw(Aoedev *d, Frame *f)
+{
+	ulong bcnt;
+	char extbit, writebit;
+	Aoeata *ah;
+	Srb *srb;
+
+	extbit = 0x4;
+	writebit = 0x10;
+
+	srb = d->inprocess;
+	bcnt = d->maxbcnt;
+	if(bcnt > srb->len)
+		bcnt = srb->len;
+	f->nhdr = Szaoeata;
+	memset(f->hdr, 0, f->nhdr);
+	ah = (Aoeata*)f->hdr;
+	if(hset(d, f, (Aoehdr*)ah, ACata) == -1)
+		return;
+	f->dp = srb->dp;
+	f->bcnt = bcnt;
+	f->lba = srb->sector;
+	f->srb = srb;
+
+	ah->scnt = bcnt / Aoesectsz;
+	putlba(ah, f->lba);
+	if(d->flag & Dllba)
+		ah->aflag |= AAFext;
+	else {
+		extbit = 0;
+		ah->lba[3] &= 0x0f;
+		ah->lba[3] |= 0xe0;	/* LBA bit+obsolete 0xa0 */
+	}
+	if(srb->write){
+		ah->aflag |= AAFwrite;
+		f->dlen = bcnt;
+	}else{
+		writebit = 0;
+		f->dlen = 0;
+	}
+	ah->cmdstat = 0x20 | writebit | extbit;
+
+	/* mark tracking fields and load out */
+	srb->nout++;
+	srb->dp = (uchar*)srb->dp + bcnt;
+	srb->len -= bcnt;
+	srb->sector += bcnt / Aoesectsz;
+	if(srb->len == 0)
+		d->inprocess = nil;
+	d->nout++;
+	f->dl->npkt++;
+	if(waserror()){
+		f->tag = Tfree;
+		d->inprocess = nil;
+		nexterror();
+	}
+	devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
+	poperror();
+}
+
+static char*
+aoeerror(Aoehdr *h)
+{
+	int n;
+	static char *errs[] = {
+		"aoe protocol error: unknown",
+		"aoe protocol error: bad command code",
+		"aoe protocol error: bad argument param",
+		"aoe protocol error: device unavailable",
+		"aoe protocol error: config string present",
+		"aoe protocol error: unsupported version",
+	};
+
+	if((h->verflag & AFerr) == 0)
+		return 0;
+	n = h->error;
+	if(n > nelem(errs))
+		n = 0;
+	return errs[n];
+}
+
+static void
+rtupdate(Devlink *l, int rtt)
+{
+	int n;
+
+	n = rtt;
+	if(rtt < 0){
+		n = -rtt;
+		if(n < Rtmin)
+			n = Rtmin;
+		else if(n > Rtmax)
+			n = Rtmax;
+		l->mintimer += (n - l->mintimer) >> 1;
+	} else if(n < l->mintimer)
+		n = l->mintimer;
+	else if(n > Rtmax)
+		n = Rtmax;
+
+	/* g == .25; cf. Congestion Avoidance and Control, Jacobson&Karels; 1988 */
+	n -= l->rttavg;
+	l->rttavg += n >> 2;
+}
+
+static int
+srbready(void *v)
+{
+	Srb *s;
+
+	s = v;
+	return s->error || (!s->nout && !s->len);
+}
+
+static Frame*
+getframe(Aoedev *d, int tag)
+{
+	Frame *f, *e;
+
+	f = d->frames;
+	e = f + d->nframes;
+	for(; f < e; f++)
+		if(f->tag == tag)
+			return f;
+	return nil;
+}
+
+static Frame*
+freeframe(Aoedev *d)
+{
+	if(d->nout < d->maxout)
+		return getframe(d, Tfree);
+	return nil;
+}
+
+static void
+work(Aoedev *d)
+{
+	Frame *f;
+
+	while(f = freeframe(d)) {
+		if(d->inprocess == nil){
+			if(d->head == nil)
+				return;
+			d->inprocess = d->head;
+			d->head = d->head->next;
+			if(d->head == nil)
+				d->tail = nil;
+		}
+		atarw(d, f);
+	}
+}
+
+static void
+strategy(Aoedev *d, Srb *srb)
+{
+	QLOCK(d);
+	if(waserror()){
+		QUNLOCK(d);
+		nexterror();
+	}
+	srb->next = nil;
+	if(d->tail)
+		d->tail->next = srb;
+	d->tail = srb;
+	if(d->head == nil)
+		d->head = srb;
+	work(d);
+	poperror();
+	QUNLOCK(d);
+
+	while(waserror())
+		;
+	SLEEP(srb, srbready, srb);
+	poperror();
+}
+
+#define iskaddr(a)	(!up || (uintptr)(a) > up->pmmu.uzero+USTKTOP)
+
+static long
+rw(Aoedev *d, int write, uchar *db, long len, uvlong off)
+{
+	long n, nlen, copy;
+	enum { Srbsz = 1<<19, };
+	Srb *srb;
+
+	if((off|len) & (Aoesectsz-1))
+		error("offset and length must be sector multiple.\n");
+	if(off >= d->bsize)
+		return 0;
+	if(off + len > d->bsize)
+		len = d->bsize - off;
+	copy = 0;
+	if(iskaddr(db)){
+panic("iskaddr %p %p\n", db);
+		srb = srbkalloc(db, len);
+		copy = 1;
+	}else
+		srb = srballoc(Srbsz <= len? Srbsz: len);
+	if(waserror()){
+		srbfree(srb);
+		nexterror();
+	}
+	srb->write = write;
+	for(nlen = len; nlen; nlen -= n){
+		if(!UP(d))
+			error(Eio);
+		srb->sector = off / Aoesectsz;
+		srb->dp = srb->data;
+		n = nlen;
+		if(n > Srbsz)
+			n = Srbsz;
+		srb->len = n;
+		if(write && !copy)
+			memmove(srb->data, db, n);
+		strategy(d, srb);
+		if(srb->error)
+			error(srb->error);
+		if(!write && !copy)
+			memmove(db, srb->data, n);
+		db += n;
+		off += n;
+	}
+	poperror();
+	srbfree(srb);
+	return len;
+}
+
+static long
+readmem(ulong off, void *dst, long n, void *src, long size)
+{
+	if(off >= size)
+		return 0;
+	if(off + n > size)
+		n = size - off;
+	memmove(dst, (uchar*)src + off, n);
+	return n;
+}
+
+static char*
+pflag(char *s, char *e, uchar f)
+{
+	uchar i;
+
+	for(i = 0; i < nelem(flagname); i++)
+		if(f & 1 << i)
+			s = seprint(s, e, "%s ", flagname[i]);
+	return seprint(s, e, "\n");
+}
+
+static int
+pstat(Aoedev *d, char *db, int len, int off)
+{
+	int i;
+	char *state, *s, *p, *e;
+
+	s = p = malloc(1024);
+	e = p + 1024;
+
+	state = "down";
+	if(UP(d))
+		state = "up";
+
+	p = seprint(p, e,
+		"state: %s\n"	"nopen: %d\n"	"nout: %d\n"
+		"nmaxout: %d\n"	"nframes: %d\n"	"maxbcnt: %d [maxmtu %d]\n"
+		"fw: %.4ux\n"
+		"model: %s\n"	"serial: %s\n"	"firmware: %s\n",
+		state,		d->nopen,	d->nout,
+		d->maxout, 	d->nframes,	d->maxbcnt, d->maxmtu,
+		d->fwver,
+		d->model, 	d->serial, 	d->firmware);
+	p = seprint(p, e, "flag: ");
+	p = pflag(p, e, d->flag);
+
+	if(p - s < len)
+		len = p - s;
+	i = readstr(off, db, len, s);
+	free(s);
+	return i;
+}
+
+static long
+unitread(Chan *c, void *db, long len, vlong off)
+{
+	Aoedev *d;
+
+	d = unit2dev(UNIT(c->qid));
+	if(d->vers != c->qid.vers)
+		error(Echange);
+	switch(TYPE(c->qid)){
+	default:
+		error(Ebadarg);
+	case Qctl:
+		return pstat(d, db, len, off);
+	case Qdata:
+		return rw(d, Read, db, len, off);
+	case Qconfig:
+		if(!UP(d))
+			error(Enotup);
+		return readmem(off, db, len, d->config, d->nconfig);
+	case Qident:
+		if(!UP(d))
+			error(Enotup);
+		return readmem(off, db, len, d->ident, sizeof d->ident);
+	}
+}
+
+static int
+devlinkread(Chan *c, void *db, int len, int off)
+{
+	int i;
+	char *s, *p, *e;
+	Aoedev *d;
+	Devlink *l;
+
+	d = unit2dev(UNIT(c->qid));
+	i = L(c->qid);
+	if(i >= d->ndl)
+		return 0;
+	l = d->dl + i;
+
+	s = p = malloc(1024);
+	e = s + 1024;
+
+	p = seprint(p, e, "addr: ");
+	for(i = 0; i < l->nea; i++)
+		p = seprint(p, e, "%E ", l->eatab[i]);
+	p = seprint(p, e, "\n");
+	p = seprint(p, e, "npkt: %uld\n", l->npkt);
+	p = seprint(p, e, "resent: %uld\n", l->resent);
+	p = seprint(p, e, "flag: "); p = pflag(p, e, l->flag);
+	p = seprint(p, e, "rttavg: %uld\n", Tk2ms(l->rttavg));
+	p = seprint(p, e, "mintimer: %uld\n", Tk2ms(l->mintimer));
+
+	p = seprint(p, e, "nl path: %s\n", l->nl->path);
+	p = seprint(p, e, "nl ea: %E\n", l->nl->ea);
+	p = seprint(p, e, "nl flag: "); p = pflag(p, e, l->flag);
+	p = seprint(p, e, "nl lostjumbo: %d\n", l->nl->lostjumbo);
+	p = seprint(p, e, "nl datamtu: %d\n", l->nl->datamtu);
+
+	if(p - s < len)
+		len = p - s;
+	i = readstr(off, db, len, s);
+	free(s);
+	return i;
+}
+
+static long
+topctlread(Chan *d0, void *db, int len, int off)
+{
+	int i;
+	char *s, *p, *e;
+	Netlink *n;
+
+	s = p = malloc(1024);
+	e = s + 1024;
+
+	p = seprint(p, e, "debug: %d\n", debug);
+	p = seprint(p, e, "autodiscover: %d\n", autodiscover);
+	p = seprint(p, e, "rediscover: %d\n", rediscover);
+
+	for(i = 0; i < Nnetlink; i++){
+		n = netlinks.nl+i;
+		if(n->cc == 0)
+			continue;
+		p = seprint(p, e, "if%d path: %s\n", i, n->path);
+		p = seprint(p, e, "if%d ea: %E\n", i, n->ea);
+		p = seprint(p, e, "if%d flag: ", i); p = pflag(p, e, n->flag);
+		p = seprint(p, e, "if%d lostjumbo: %d\n", i, n->lostjumbo);
+		p = seprint(p, e, "if%d datamtu: %d\n", i, n->datamtu);
+	}
+
+	if(p - s < len)
+		len = p - s;
+	i = readstr(off, db, len, s);
+	free(s);
+	return i;
+}
+
+static long
+aoeread(Chan *c, void *db, long n, vlong off)
+{
+	switch(TYPE(c->qid)){
+	default:
+		error(Eperm);
+	case Qzero:
+	case Qtopdir:
+	case Qunitdir:
+	case Qdevlinkdir:
+		return devdirread(c, db, n, 0, 0, aoegen);
+	case Qtopctl:
+		return topctlread(c, db, n, off);
+	case Qtoplog:
+		return eventlogread(db, n);
+	case Qctl:
+	case Qdata:
+	case Qconfig:
+	case Qident:
+		return unitread(c, db, n, off);
+	case Qdevlink:
+		return devlinkread(c, db, n, off);
+	}
+}
+
+static long
+configwrite(Aoedev *d, void *db, long len)
+{
+	char *s;
+	Aoeqc *ch;
+	Frame *f;
+	Srb *srb;
+
+	if(!UP(d))
+		error(Enotup);
+	if(len > sizeof d->config)
+		error(Etoobig);
+	srb = srballoc(len);
+	s = malloc(len);
+	memmove(s, db, len);
+	if(waserror()){
+		srbfree(srb);
+		free(s);
+		nexterror();
+	}
+	for (;;) {
+		QLOCK(d);
+		if(waserror()){
+			QUNLOCK(d);
+			nexterror();
+		}
+		f = freeframe(d);
+		if(f != nil)
+			break;
+		poperror();
+		QUNLOCK(d);
+		if(waserror())
+			nexterror();
+		tsleep(&up->sleep, return0, 0, 100);
+		poperror();
+	}
+	f->nhdr = Szaoeqc;
+	memset(f->hdr, 0, f->nhdr);
+	ch = (Aoeqc*)f->hdr;
+	if(hset(d, f, (Aoehdr*)ch, ACconfig) == -1)
+		return 0;
+	f->srb = srb;
+	f->dp = s;
+	ch->verccmd = AQCfset;
+	hnputs(ch->cslen, len);
+	d->nout++;
+	srb->nout++;
+	f->dl->npkt++;
+	f->dlen = len;
+	/*
+	 * these refer to qlock & waserror in the above for loop.
+	 * there's still the first waserror outstanding.
+	 */
+	poperror();
+	QUNLOCK(d);
+
+	devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
+	SLEEP(srb, srbready, srb);
+	if(srb->error)
+		error(srb->error);
+
+	QLOCK(d);
+	if(waserror()){
+		QUNLOCK(d);
+		nexterror();
+	}
+	memmove(d->config, s, len);
+	d->nconfig = len;
+	poperror();
+	QUNLOCK(d);
+
+	poperror();			/* pop first waserror */
+
+	srbfree(srb);
+	memmove(db, s, len);
+	free(s);
+	return len;
+}
+
+static int
+getmtu(Chan *m)
+{
+	int n, mtu;
+	char buf[36];
+
+	mtu = 1514;
+	if(m == nil || waserror())
+		return mtu;
+	n = devtab[m->type]->read(m, buf, sizeof buf - 1, 0);
+	poperror();
+	if(n > 12){
+		buf[n] = 0;
+		mtu = strtoul(buf + 12, 0, 0);
+	}
+	return mtu;
+}
+
+static int
+devmaxdata(Aoedev *d)
+{
+	int i, m, mtu;
+	Devlink *l;
+	Netlink *n;
+
+	mtu = 100000;
+	for(i = 0; i < d->ndl; i++){
+		l = d->dl + i;
+		n = l->nl;
+		if((l->flag & Dup) == 0 || (n->flag & Dup) == 0)
+			continue;
+		m = getmtu(n->mtu);
+		if(m > l->datamtu)
+			m = l->datamtu;
+		if(m < mtu)
+			mtu = m;
+	}
+	if(mtu == 100000)
+		mtu = 1514;
+	mtu -= Szaoeata;
+	mtu -= mtu % Aoesectsz;
+	return mtu;
+}
+
+static int
+toggle(char *s, int init)
+{
+	if(s == nil)
+		return init ^ 1;
+	return strcmp(s, "on") == 0;
+}
+
+static void ataident(Aoedev*);
+
+static long
+unitctlwrite(Aoedev *d, void *db, long n)
+{
+	uint maxbcnt, m;
+	uvlong bsize;
+	enum {
+		Failio,
+		Ident,
+		Jumbo,
+		Maxbno,
+		Mtu,
+		Setsize,
+	};
+	Cmdbuf *cb;
+	Cmdtab *ct;
+	static Cmdtab cmds[] = {
+		{Failio, 	"failio", 	1 },
+		{Ident, 	"identify", 	1 },
+		{Jumbo, 	"jumbo", 	0 },
+		{Maxbno,	"maxbno",	0 },
+		{Mtu,		"mtu",		0 },
+		{Setsize, 	"setsize", 	0 },
+	};
+
+	cb = parsecmd(db, n);
+	QLOCK(d);
+	if(waserror()){
+		QUNLOCK(d);
+		free(cb);
+		nexterror();
+	}
+	ct = lookupcmd(cb, cmds, nelem(cmds));
+	switch(ct->index){
+	case Failio:
+		downdev(d, "i/o failure");
+		break;
+	case Ident:
+		ataident(d);
+		break;
+	case Jumbo:
+		m = 0;
+		if(d->flag & Djumbo)
+			m = 1;
+		toggle(cb->f[1], m);
+		if(m)
+			d->flag |= Djumbo;
+		else
+			d->flag &= ~Djumbo;
+		break;
+	case Maxbno:
+	case Mtu:
+		maxbcnt = devmaxdata(d);
+		if(cb->nf > 2)
+			error(Ecmdargs);
+		if(cb->nf == 2){
+			m = strtoul(cb->f[1], 0, 0);
+			if(ct->index == Maxbno)
+				m *= Aoesectsz;
+			else{
+				m -= Szaoeata;
+				m &= ~(Aoesectsz-1);
+			}
+			if(m == 0 || m > maxbcnt)
+				cmderror(cb, "invalid mtu");
+			maxbcnt = m;
+			d->maxmtu = m;
+		} else
+			d->maxmtu = Maxmtu;
+		d->maxbcnt = maxbcnt;
+		break;
+	case Setsize:
+		bsize = d->realbsize;
+		if(cb->nf > 2)
+			error(Ecmdargs);
+		if(cb->nf == 2){
+			bsize = strtoull(cb->f[1], 0, 0);
+			if(bsize % Aoesectsz)
+				cmderror(cb, "disk size must be sector aligned");
+		}
+		d->bsize = bsize;
+		break;
+	default:
+		cmderror(cb, "unknown aoe control message");
+	}
+	poperror();
+	QUNLOCK(d);
+	free(cb);
+	return n;
+}
+
+static long
+unitwrite(Chan *c, void *db, long n, vlong off)
+{
+	long rv;
+	char *buf;
+	Aoedev *d;
+
+	d = unit2dev(UNIT(c->qid));
+	switch(TYPE(c->qid)){
+	default:
+		error(Ebadarg);
+	case Qctl:
+		return unitctlwrite(d, db, n);
+	case Qident:
+		error(Eperm);
+	case Qdata:
+		return rw(d, Write, db, n, off);
+	case Qconfig:
+		if(off + n > sizeof d->config)
+			error(Etoobig);
+		buf = malloc(sizeof d->config);
+		if(waserror()){
+			free(buf);
+			nexterror();
+		}
+		memmove(buf, d->config, d->nconfig);
+		memmove(buf + off, db, n);
+		rv = configwrite(d, buf, n + off);
+		poperror();
+		free(buf);
+		return rv;
+	}
+}
+
+static Netlink*
+addnet(char *path, Chan *cc, Chan *dc, Chan *mtu, uchar *ea)
+{
+	Netlink *nl, *e;
+
+	LOCK(&netlinks);
+	if(waserror()){
+		UNLOCK(&netlinks);
+		nexterror();
+	}
+	nl = netlinks.nl;
+	e = nl + nelem(netlinks.nl);
+	for(; nl < e && nl->cc; nl++)
+		continue;
+	if(nl == e)
+		error("out of netlink structures");
+	nl->cc = cc;
+	nl->dc = dc;
+	nl->mtu = mtu;
+	strncpy(nl->path, path, sizeof nl->path);
+	memmove(nl->ea, ea, sizeof nl->ea);
+	poperror();
+	nl->flag |= Dup;
+	UNLOCK(&netlinks);
+	return nl;
+}
+
+static int
+newunit(void)
+{
+	int x;
+
+	LOCK(&units);
+	if(units.ref == Maxunits)
+		x = -1;
+	else
+		x = units.ref++;
+	UNLOCK(&units);
+	return x;
+}
+
+static int
+dropunit(void)
+{
+	int x;
+
+	LOCK(&units);
+	x = --units.ref;
+	UNLOCK(&units);
+	return x;
+}
+
+/*
+ * always allocate max frames.  maxout may change.
+ */
+static Aoedev*
+newdev(long major, long minor, int n)
+{
+	Aoedev *d;
+	Frame *f, *e;
+
+	d = malloc(sizeof *d);
+	f = malloc(sizeof *f*Maxframes);
+	if(!d || !f) {
+		free(d);
+		free(f);
+		error("aoe device allocation failure");
+	}
+	d->nframes = n;
+	d->frames = f;
+	for (e = f + Maxframes; f < e; f++)
+		f->tag = Tfree;
+	d->maxout = n;
+	d->major = major;
+	d->minor = minor;
+	d->maxbcnt = Dbcnt;
+	d->flag = Djumbo;
+	d->maxmtu = Maxmtu;
+	d->unit = newunit();		/* bzzt.  inaccurate if units removed */
+	if(d->unit == -1){
+		free(d);
+		free(d->frames);
+		error("too many units");
+	}
+	d->dl = d->dltab;
+	return d;
+}
+
+static Aoedev*
+mm2dev(int major, int minor)
+{
+	Aoedev *d;
+
+	RLOCK(&devs);
+	for(d = devs.d; d; d = d->next)
+		if(d->major == major && d->minor == minor){
+			RUNLOCK(&devs);
+			return d;
+		}
+	RUNLOCK(&devs);
+	eventlog("mm2dev: %d.%d not found\n", major, minor);
+	return nil;
+}
+
+/* Find the device in our list.  If not known, add it */
+static Aoedev*
+getdev(long major, long minor, int n)
+{
+	Aoedev *d;
+
+	if(major == 0xffff || minor == 0xff)
+		return 0;
+	WLOCK(&devs);
+	if(waserror()){
+		WUNLOCK(&devs);
+		nexterror();
+	}
+	for(d = devs.d; d; d = d->next)
+		if(d->major == major && d->minor == minor)
+			break;
+	if(d == nil) {
+		d = newdev(major, minor, n);
+		d->next = devs.d;
+		devs.d = d;
+	}
+	poperror();
+	WUNLOCK(&devs);
+	return d;
+}
+
+static ushort
+gbit16(void *a)
+{
+	uchar *i;
+
+	i = a;
+	return i[1] << 8 | i[0];
+}
+
+static ulong
+gbit32(void *a)
+{
+	ulong j;
+	uchar *i;
+
+	i = a;
+	j  = i[3] << 24;
+	j |= i[2] << 16;
+	j |= i[1] << 8;
+	j |= i[0];
+	return j;
+}
+
+static uvlong
+gbit64(void *a)
+{
+	uchar *i;
+
+	i = a;
+	return (uvlong)gbit32(i+4) << 32 | gbit32(a);
+}
+
+static void
+ataident(Aoedev *d)
+{
+	Aoeata *a;
+	Block *b;
+	Frame *f;
+
+	f = freeframe(d);
+	if(f == nil)
+		return;
+	f->nhdr = Szaoeata;
+	memset(f->hdr, 0, f->nhdr);
+	a = (Aoeata*)f->hdr;
+	if(hset(d, f, (Aoehdr*)a, ACata) == -1)
+		return;
+	f->srb = srbkalloc(0, 0);
+	a->cmdstat = Cid;	/* ata 6, page 110 */
+	a->scnt = 1;
+	a->lba[3] = 0xa0;
+	d->nout++;
+	f->dl->npkt++;
+	f->bcnt = 512;
+	f->dlen = 0;
+	b = allocfb(f);
+	devtab[f->nl->dc->type]->bwrite(f->nl->dc, b, 0);
+}
+
+static int
+newdlea(Devlink *l, uchar *ea)
+{
+	int i;
+	uchar *t;
+
+	for(i = 0; i < Nea; i++){
+		t = l->eatab[i];
+		if(i == l->nea){
+			memmove(t, ea, Eaddrlen);
+			return l->nea++;
+		}
+		if(memcmp(t, ea, Eaddrlen) == 0)
+			return i;
+	}
+	return -1;
+}
+
+static Devlink*
+newdevlink(Aoedev *d, Netlink *n, Aoeqc *c)
+{
+	int i;
+	Devlink *l;
+
+	for(i = 0; i < Ndevlink; i++){
+		l = d->dl + i;
+		if(i == d->ndl){
+			d->ndl++;
+			newdlea(l, c->src);
+			l->datamtu = c->scnt*Aoesectsz;
+			l->nl = n;
+			l->flag |= Dup;
+			l->mintimer = Rtmin;
+			l->rttavg = Rtmax;
+			return l;
+		}
+		if(l->nl == n){
+			newdlea(l, c->src);
+			l->datamtu = c->scnt*Aoesectsz;
+			l->flag |= Dup;
+			return l;
+		}
+	}
+	eventlog("%æ: out of links: %s:%E to %E\n", d, n->path, n->ea, c->src);
+	return 0;
+}
+
+static void
+errrsp(Block *b, char *s)
+{
+	int n;
+	Aoedev *d;
+	Aoehdr *h;
+	Frame *f;
+
+	h = (Aoehdr*)b->rp;
+	n = nhgetl(h->tag);
+	if(n == Tmgmt || n == Tfree)
+		return;
+	d = mm2dev(nhgets(h->major), h->minor);
+	if(d == 0)
+		return;
+	if(f = getframe(d, n))
+		frameerror(d, f, s);
+}
+
+static void
+qcfgrsp(Block *b, Netlink *nl)
+{
+	int major, cmd, cslen, blen;
+	unsigned n;
+	Aoedev *d;
+	Aoeqc *ch;
+	Devlink *l;
+	Frame *f;
+
+	ch = (Aoeqc*)b->rp;
+	major = nhgets(ch->major);
+	n = nhgetl(ch->tag);
+	if(n != Tmgmt){
+		d = mm2dev(major, ch->minor);
+		if(d == nil)
+			return;
+		QLOCK(d);
+		f = getframe(d, n);
+		if(f == nil){
+			QUNLOCK(d);
+			eventlog("%æ: unknown response tag %ux\n", d, n);
+			return;
+		}
+		cslen = nhgets(ch->cslen);
+		blen = BLEN(b) - Szaoeqc;
+		if(cslen < blen)
+			eventlog("%æ: cfgrsp: tag %.8ux oversized %d %d\n",
+				d, n, cslen, blen);
+		if(cslen > blen){
+			eventlog("%æ: cfgrsp: tag %.8ux runt %d %d\n",
+				d, n, cslen, blen);
+			cslen = blen;
+		}
+		memmove(f->dp, ch + 1, cslen);
+		f->srb->nout--;
+		WAKEUP(f->srb);
+		d->nout--;
+		f->srb = nil;
+		f->tag = Tfree;
+		QUNLOCK(d);
+		return;
+	}
+
+	cmd = ch->verccmd & 0xf;
+	if(cmd != 0){
+		eventlog("aoe%d.%d: cfgrsp: bad command %d\n", major, ch->minor, cmd);
+		return;
+	}
+	n = nhgets(ch->bufcnt);
+	if(n > Maxframes)
+		n = Maxframes;
+
+	if(waserror()){
+		eventlog("getdev: %d.%d ignored: %s\n", major, ch->minor, up->errstr);
+		return;
+	}
+	d = getdev(major, ch->minor, n);
+	poperror();
+	if(d == 0)
+		return;
+
+	QLOCK(d);
+	*up->errstr = 0;
+	if(waserror()){
+		QUNLOCK(d);
+		eventlog("%æ: %s\n", d, up->errstr);
+		nexterror();
+	}
+
+	l = newdevlink(d, nl, ch);		/* add this interface. */
+
+	d->fwver = nhgets(ch->fwver);
+	n = nhgets(ch->cslen);
+	if(n > sizeof d->config)
+		n = sizeof d->config;
+	d->nconfig = n;
+	memmove(d->config, ch + 1, n);
+
+	/* manually set mtu may be reset lower if conditions warrant */
+	if(l){
+		n = devmaxdata(d);
+		if(!(d->flag & Djumbo))
+			n = Dbcnt;
+		if(n > d->maxmtu)
+			n = d->maxmtu;
+		if(n != d->maxbcnt){
+			eventlog("%æ: setting %d byte mtu on %s:%E\n",
+				d, n, nl->path, nl->ea);
+			d->maxbcnt = n;
+		}
+	}
+	if(d->nopen == 0)
+		ataident(d);
+	poperror();
+	QUNLOCK(d);
+}
+
+static void
+idmove(char *p, ushort *a, unsigned n)
+{
+	int i;
+	char *op, *e;
+
+	op = p;
+	for(i = 0; i < n / 2; i++){
+		*p++ = a[i] >> 8;
+		*p++ = a[i];
+	}
+	*p = 0;
+	while(p > op && *--p == ' ')
+		*p = 0;
+	e = p;
+	p = op;
+	while(*p == ' ')
+		p++;
+	memmove(op, p, n - (e - p));
+}
+
+static vlong
+aoeidentify(Aoedev *d, ushort *id)
+{
+	int i;
+	vlong s;
+
+	d->flag &= ~(Dllba|Dpower|Dsmart|Dnop|Dup);
+
+	i = gbit16(id+83) | gbit16(id+86);
+	if(i & (1<<10)){
+		d->flag |= Dllba;
+		s = gbit64(id+100);
+	}else
+		s = gbit32(id+60);
+
+	i = gbit16(id+83);
+	if((i>>14) == 1) {
+		if(i & (1<<3))
+			d->flag  |= Dpower;
+		i = gbit16(id+82);
+		if(i & 1)
+			d->flag  |= Dsmart;
+		if(i & (1<<14))
+			d->flag  |= Dnop;
+	}
+//	eventlog("%æ up\n", d);
+	d->flag |= Dup;
+	memmove(d->ident, id, sizeof d->ident);
+	return s;
+}
+
+static void
+newvers(Aoedev *d)
+{
+	LOCK(&drivevers);
+	d->vers = drivevers.ref++;
+	UNLOCK(&drivevers);
+}
+
+static int
+identify(Aoedev *d, ushort *id)
+{
+	vlong osectors, s;
+	uchar oserial[21];
+
+	s = aoeidentify(d, id);
+	if(s == -1)
+		return -1;
+	osectors = d->realbsize;
+	memmove(oserial, d->serial, sizeof d->serial);
+
+	idmove(d->serial, id+10, 20);
+	idmove(d->firmware, id+23, 8);
+	idmove(d->model, id+27, 40);
+
+	s *= Aoesectsz;
+	if(osectors != s || memcmp(oserial, d->serial, sizeof oserial)){
+		d->bsize = s;
+		d->realbsize = s;
+//		d->mediachange = 1;
+		newvers(d);
+	}
+	return 0;
+}
+
+static void
+atarsp(Block *b)
+{
+	unsigned n;
+	short major;
+	Aoeata *ahin, *ahout;
+	Aoedev *d;
+	Frame *f;
+	Srb *srb;
+
+	ahin = (Aoeata*)b->rp;
+	major = nhgets(ahin->major);
+	d = mm2dev(major, ahin->minor);
+	if(d == nil)
+		return;
+	QLOCK(d);
+	if(waserror()){
+		QUNLOCK(d);
+		nexterror();
+	}
+	n = nhgetl(ahin->tag);
+	f = getframe(d, n);
+	if(f == nil){
+		dprint("%æ: unexpected response; tag %ux\n", d, n);
+		goto bail;
+	}
+	rtupdate(f->dl, tsince(f->tag));
+	ahout = (Aoeata*)f->hdr;
+	srb = f->srb;
+
+	if(ahin->cmdstat & 0xa9){
+		eventlog("%æ: ata error cmd %.2ux stat %.2ux\n",
+			d, ahout->cmdstat, ahin->cmdstat);
+		if(srb)
+			srb->error = Eio;
+	} else {
+		n = ahout->scnt * Aoesectsz;
+		switch(ahout->cmdstat){
+		case Crd:
+		case Crdext:
+			if(BLEN(b) - Szaoeata < n){
+				eventlog("%æ: runt read blen %ld expect %d\n",
+					d, BLEN(b), n);
+				goto bail;
+			}
+			memmove(f->dp, b->rp + Szaoeata, n);
+		case Cwr:
+		case Cwrext:
+			if(n > Dbcnt)
+				f->nl->lostjumbo = 0;
+			if(f->bcnt -= n){
+				f->lba += n / Aoesectsz;
+				f->dp = (uchar*)f->dp + n;
+				resend(d, f);
+				goto bail;
+			}
+			break;
+		case Cid:
+			if(BLEN(b) - Szaoeata < 512){
+				eventlog("%æ: runt identify blen %ld expect %d\n",
+					d, BLEN(b), n);
+				goto bail;
+			}
+			identify(d, (ushort*)(b->rp + Szaoeata));
+			break;
+		default:
+			eventlog("%æ: unknown ata command %.2ux \n",
+				d, ahout->cmdstat);
+		}
+	}
+
+	if(srb && --srb->nout == 0 && srb->len == 0)
+		WAKEUP(srb);
+	f->srb = nil;
+	f->tag = Tfree;
+	d->nout--;
+
+	work(d);
+bail:
+	poperror();
+	QUNLOCK(d);
+}
+
+static void
+netrdaoeproc(void *v)
+{
+	int idx;
+	char name[Maxpath+1], *s;
+	Aoehdr *h;
+	Block *b;
+	Netlink *nl;
+
+	nl = (Netlink*)v;
+	idx = nl - netlinks.nl;
+	netlinks.reader[idx] = 1;
+	kstrcpy(name, nl->path, Maxpath);
+
+	if(waserror()){
+		eventlog("netrdaoe@%s: exiting: %s\n", name, up->errstr);
+		netlinks.reader[idx] = 0;
+		wakeup(netlinks.rendez + idx);
+		pexit(up->errstr, 1);
+	}
+	if(autodiscover)
+		discover(0xffff, 0xff);
+	for (;;) {
+		if(!(nl->flag & Dup))
+			error("netlink is down");
+		if(nl->dc == nil)
+			panic("netrdaoe: nl->dc == nil");
+		b = devtab[nl->dc->type]->bread(nl->dc, 1<<16, 0);
+		if(b == nil)
+			error("network read");
+		h = (Aoehdr*)b->rp;
+		if(h->verflag & AFrsp)
+			if(s = aoeerror(h)){
+				eventlog("%s: %s\n", nl->path, s);
+				errrsp(b, s);
+			}else if(h->cmd == ACata)
+				atarsp(b);
+			else if(h->cmd == ACconfig)
+				qcfgrsp(b, nl);
+			else if((h->cmd & 0xf0) == 0){
+				eventlog("%s: unknown cmd %d\n",
+					nl->path, h->cmd);
+				errrsp(b, "unknown command");
+			}
+		freeb(b);
+	}
+}
+
+static void
+getaddr(char *path, uchar *ea)
+{
+	int n;
+	char buf[2*Eaddrlen+1];
+	Chan *c;
+
+	uprint("%s/addr", path);
+	c = namec(up->genbuf, Aopen, OREAD, 0);
+	if(waserror()) {
+		cclose(c);
+		nexterror();
+	}
+	if(c == nil)
+		panic("æ: getaddr: c == nil");
+	n = devtab[c->type]->read(c, buf, sizeof buf-1, 0);
+	poperror();
+	cclose(c);
+	buf[n] = 0;
+	if(parseether(ea, buf) < 0)
+		error("parseether failure");
+}
+
+static void
+netbind(char *path)
+{
+	char addr[Maxpath];
+	uchar ea[2*Eaddrlen+1];
+	Chan *dc, *cc, *mtu;
+	Netlink *nl;
+
+	snprint(addr, sizeof addr, "%s!0x%x", path, Aoetype);
+	dc = chandial(addr, nil, nil, &cc);
+	snprint(addr, sizeof addr, "%s/mtu", path);
+	if(waserror())
+		mtu = nil;
+	else {
+		mtu = namec(addr, Aopen, OREAD, 0);
+		poperror();
+	}
+
+	if(waserror()){
+		cclose(dc);
+		cclose(cc);
+		if(mtu)
+			cclose(mtu);
+		nexterror();
+	}
+	if(dc == nil  || cc == nil)
+		error(Enonexist);
+	getaddr(path, ea);
+	nl = addnet(path, cc, dc, mtu, ea);
+	snprint(addr, sizeof addr, "netrdaoe@%s", path);
+	kproc(addr, netrdaoeproc, nl);
+	poperror();
+}
+
+static int
+unbound(void *v)
+{
+	return *(int*)v != 0;
+}
+
+static void
+netunbind(char *path)
+{
+	int i, idx;
+	Aoedev *d, *p, *next;
+	Chan *dc, *cc;
+	Devlink *l;
+	Frame *f;
+	Netlink *n, *e;
+
+	n = netlinks.nl;
+	e = n + nelem(netlinks.nl);
+
+	LOCK(&netlinks);
+	for(; n < e; n++)
+		if(n->dc && strcmp(n->path, path) == 0)
+			break;
+	UNLOCK(&netlinks);
+	if(n == e)
+		error("device not bound");
+
+	/*
+	 * hunt down devices using this interface; disable
+	 * this also terminates the reader.
+	 */
+	idx = n - netlinks.nl;
+	WLOCK(&devs);
+	for(d = devs.d; d; d = d->next){
+		QLOCK(d);
+		for(i = 0; i < d->ndl; i++){
+			l = d->dl + i;
+			if(l->nl == n)
+				l->flag &= ~Dup;
+		}
+		QUNLOCK(d);
+	}
+	n->flag &= ~Dup;
+	WUNLOCK(&devs);
+
+	/* confirm reader is down. */
+	while(waserror())
+		;
+	sleep(netlinks.rendez + idx, unbound, netlinks.reader + idx);
+	poperror();
+
+	/* reschedule packets. */
+	WLOCK(&devs);
+	for(d = devs.d; d; d = d->next){
+		QLOCK(d);
+		for(i = 0; i < d->nframes; i++){
+			f = d->frames + i;
+			if(f->tag != Tfree && f->nl == n)
+				resend(d, f);
+		}
+		QUNLOCK(d);
+	}
+	WUNLOCK(&devs);
+
+	/* squeeze devlink pool.  (we assert nobody is using them now) */
+	WLOCK(&devs);
+	for(d = devs.d; d; d = d->next){
+		QLOCK(d);
+		for(i = 0; i < d->ndl; i++){
+			l = d->dl + i;
+			if(l->nl == n)
+				memmove(l, l + 1, sizeof *l * (--d->ndl - i));
+		}
+		QUNLOCK(d);
+	}
+	WUNLOCK(&devs);
+
+	/* close device link. */
+	LOCK(&netlinks);
+	dc = n->dc;
+	cc = n->cc;
+	if(n->mtu)
+		cclose(n->mtu);
+	memset(n, 0, sizeof *n);
+	UNLOCK(&netlinks);
+
+	cclose(dc);
+	cclose(cc);
+
+	/* squeeze orphan devices */
+	WLOCK(&devs);
+	for(p = d = devs.d; d; d = next){
+		next = d->next;
+		if(d->ndl > 0){
+			p = d;
+			continue;
+		}
+		QLOCK(d);
+		downdev(d, "orphan");
+		QUNLOCK(d);
+		if(p != devs.d)
+			p->next = next;
+		else{
+			devs.d = next;
+			p = devs.d;
+		}
+		free(d->frames);
+		free(d);
+		dropunit();
+	}
+	WUNLOCK(&devs);
+}
+
+static void
+strtoss(char *f, ushort *shelf, ushort *slot)
+{
+	ulong sh;
+	char *s;
+
+	*shelf = 0xffff;
+	*slot = 0xff;
+	if(!f)
+		return;
+	*shelf = sh = strtol(f, &s, 0);
+	if(s == f || sh > 0xffff)
+		error("bad shelf");
+	f = s;
+	if(*f++ == '.'){
+		*slot = strtol(f, &s, 0);
+		if(s == f || *slot > 0xff)
+			error("bad shelf");
+	}else
+		*slot = 0xff;
+}
+
+static void
+discoverstr(char *f)
+{
+	ushort shelf, slot;
+
+	strtoss(f, &shelf, &slot);
+	discover(shelf, slot);
+}
+
+static void
+removedev(Aoedev *d)
+{
+	int i;
+	Aoedev *p;
+
+	WLOCK(&devs);
+	p = 0;
+	if(d != devs.d)
+	for(p = devs.d; p; p = p->next)
+		if(p->next == d)
+			break;
+	QLOCK(d);
+	d->flag &= ~Dup;
+	newvers(d);
+	d->ndl = 0;
+	QUNLOCK(d);
+	for(i = 0; i < d->nframes; i++)
+		frameerror(d, d->frames+i, Enotup);
+
+	if(p)
+		p->next = d->next;
+	else
+		devs.d = d->next;
+	free(d->frames);
+	free(d);
+	dropunit();
+	WUNLOCK(&devs);
+}
+
+
+static void
+aoeremove(Chan *c)
+{
+	switch(TYPE(c->qid)){
+	default:
+	case Qzero:
+	case Qtopdir:
+	case Qtoplog:
+	case Qtopctl:
+	case Qctl:
+	case Qdata:
+	case Qconfig:
+	case Qident:
+		error(Eperm);
+	case Qunitdir:
+		removedev(unit2dev(UNIT(c->qid)));
+		break;
+	}
+}
+
+static void
+removestr(char *f)
+{
+	ushort shelf, slot;
+	Aoedev *d;
+
+	strtoss(f, &shelf, &slot);
+	WLOCK(&devs);
+	for(d = devs.d; d; d = d->next)
+		if(shelf == d->major && slot == d->minor){
+			WUNLOCK(&devs);	/* BOTCH */
+			removedev(d);
+			return;
+		}
+	WUNLOCK(&devs);
+	error("device not bound");
+}
+
+static long
+topctlwrite(void *db, long n)
+{
+	enum {
+		Autodiscover,
+		Bind,
+		Debug,
+		Discover,
+		Closewait,
+		Rediscover,
+		Remove,
+		Unbind,
+	};
+	char *f;
+	Cmdbuf *cb;
+	Cmdtab *ct;
+	static Cmdtab cmds[] = {
+		{ Autodiscover,	"autodiscover",	0	},
+		{ Bind, 	"bind", 	2	},
+		{ Debug, 	"debug", 	0	},
+		{ Discover, 	"discover", 	0	},
+		{ Rediscover,	"rediscover",	0	},
+		{ Remove,	"remove",	2	},
+		{ Unbind,	"unbind",	2	},
+	};
+
+	cb = parsecmd(db, n);
+	if(waserror()){
+		free(cb);
+		nexterror();
+	}
+	ct = lookupcmd(cb, cmds, nelem(cmds));
+	f = cb->f[1];
+	switch(ct->index){
+	case Autodiscover:
+		autodiscover = toggle(f, autodiscover);
+		break;
+	case Bind:
+		netbind(f);
+		break;
+	case Debug:
+		debug = toggle(f, debug);
+		break;
+	case Discover:
+		discoverstr(f);
+		break;
+	case Rediscover:
+		rediscover = toggle(f, rediscover);
+		break;
+	case Remove:
+		removestr(f);	/* depricated */
+		break;
+	case Unbind:
+		netunbind(f);
+		break;
+	default:
+		cmderror(cb, "unknown aoe control message");
+	}
+	poperror();
+	free(cb);
+	return n;
+}
+
+static long
+aoewrite(Chan *c, void *db, long n, vlong off)
+{
+	switch(TYPE(c->qid)){
+	default:
+	case Qzero:
+	case Qtopdir:
+	case Qunitdir:
+	case Qtoplog:
+		error(Eperm);
+	case Qtopctl:
+		return topctlwrite(db, n);
+	case Qctl:
+	case Qdata:
+	case Qconfig:
+	case Qident:
+		return unitwrite(c, db, n, off);
+	}
+}
+
+Dev aoedevtab = {
+	L'æ',
+	"aoe",
+
+	devreset,
+	devinit,
+	devshutdown,
+	aoeattach,
+	aoewalk,
+	aoestat,
+	aoeopen,
+	devcreate,
+	aoeclose,
+	aoeread,
+	devbread,
+	aoewrite,
+	devbwrite,
+	aoeremove,
+	devwstat,
+	devpower,
+	devconfig,
+};
diff --git a/src/9vx/a/devcons.c b/src/9vx/a/devcons.c
@@ -784,6 +784,7 @@ consread(Chan *c, void *buf, long n, vlong off)
 		while(!qcanread(lineq)){
 			if(qread(kbdq, &ch, 1) == 0)
 				continue;
+			//XXX TODO: startup blocks here
 			send = 0;
 			if(ch == 0){
 				/* flush output on rawoff -> rawon */
diff --git a/src/9vx/a/devether.c b/src/9vx/a/devether.c
@@ -0,0 +1,542 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+#include "error.h"
+#include "netif.h"
+
+#include "etherif.h"
+
+#define MEMSIZE (256<<20)	// same as ../mmu.c:29 (TODO: var)
+
+static Ether *etherxx[MaxEther];
+
+Chan*
+etherattach(char* spec)
+{
+	ulong ctlrno;
+	char *p;
+	Chan *chan;
+
+	ctlrno = 0;
+	if(spec && *spec){
+		ctlrno = strtoul(spec, &p, 0);
+		if((ctlrno == 0 && p == spec) || *p || (ctlrno >= MaxEther))
+			error(Ebadarg);
+	}
+	if(etherxx[ctlrno] == 0)
+		error(Enodev);
+
+	chan = devattach('l', spec);
+	if(waserror()){
+		chanfree(chan);
+		nexterror();
+	}
+	chan->dev = ctlrno;
+	if(etherxx[ctlrno]->attach)
+		etherxx[ctlrno]->attach(etherxx[ctlrno]);
+	poperror();
+	return chan;
+}
+
+static Walkqid*
+etherwalk(Chan* chan, Chan* nchan, char** name, int nname)
+{
+	return netifwalk(ðerxx[chan->dev]->ni, chan, nchan, name, nname);
+}
+
+static int
+etherstat(Chan* chan, uchar* dp, int n)
+{
+	return netifstat(ðerxx[chan->dev]->ni, chan, dp, n);
+}
+
+static Chan*
+etheropen(Chan* chan, int omode)
+{
+	return netifopen(ðerxx[chan->dev]->ni, chan, omode);
+}
+
+static void
+ethercreate(Chan* ch, char* c, int i, ulong ul)
+{
+}
+
+static void
+etherclose(Chan* chan)
+{
+	netifclose(ðerxx[chan->dev]->ni, chan);
+}
+
+static long
+etherread(Chan* chan, void* buf, long n, vlong off)
+{
+	Ether *ether;
+	ulong offset = off;
+
+	ether = etherxx[chan->dev];
+	if((chan->qid.type & QTDIR) == 0 && ether->ifstat){
+		/*
+		 * With some controllers it is necessary to reach
+		 * into the chip to extract statistics.
+		 */
+		if(NETTYPE(chan->qid.path) == Nifstatqid)
+			return ether->ifstat(ether, buf, n, offset);
+		else if(NETTYPE(chan->qid.path) == Nstatqid)
+			ether->ifstat(ether, buf, 0, offset);
+	}
+
+	return netifread(ðer->ni, chan, buf, n, offset);
+}
+
+static Block*
+etherbread(Chan* chan, long n, ulong offset)
+{
+	return netifbread(ðerxx[chan->dev]->ni, chan, n, offset);
+}
+
+static int
+etherwstat(Chan* chan, uchar* dp, int n)
+{
+	return netifwstat(ðerxx[chan->dev]->ni, chan, dp, n);
+}
+
+static void
+etherrtrace(Netfile* f, Etherpkt* pkt, int len)
+{
+	int i, n;
+	Block *bp;
+
+	if(qwindow(f->in) <= 0)
+		return;
+	if(len > 58)
+		n = 58;
+	else
+		n = len;
+	bp = iallocb(64);
+	if(bp == nil)
+		return;
+	memmove(bp->wp, pkt->d, n);
+	i = TK2MS(MACHP(0)->tscticks);
+	bp->wp[58] = len>>8;
+	bp->wp[59] = len;
+	bp->wp[60] = i>>24;
+	bp->wp[61] = i>>16;
+	bp->wp[62] = i>>8;
+	bp->wp[63] = i;
+	bp->wp += 64;
+	qpass(f->in, bp);
+}
+
+Block*
+etheriq(Ether* ether, Block* bp, int fromwire)
+{
+	Etherpkt *pkt;
+	ushort type;
+	int len, multi, tome, fromme;
+	Netfile **ep, *f, **fp, *fx;
+	Block *xbp;
+
+	ether->ni.inpackets++;
+
+	pkt = (Etherpkt*)bp->rp;
+	len = BLEN(bp);
+	type = (pkt->type[0]<<8)|pkt->type[1];
+	fx = 0;
+	ep = ðer->ni.f[Ntypes];
+
+	multi = pkt->d[0] & 1;
+	/* check for valid multicast addresses */
+	if(multi && memcmp(pkt->d, ether->ni.bcast, sizeof(pkt->d)) != 0 && ether->ni.prom == 0){
+		if(!activemulti(ðer->ni, pkt->d, sizeof(pkt->d))){
+			if(fromwire){
+				freeb(bp);
+				bp = 0;
+			}
+			return bp;
+		}
+	}
+
+	/* is it for me? */
+	tome = memcmp(pkt->d, ether->ea, sizeof(pkt->d)) == 0;
+	fromme = memcmp(pkt->s, ether->ea, sizeof(pkt->s)) == 0;
+	// if(tome||fromme)
+	//	iprint("XXX PACK: %2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux -> %2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux%s[%d]%s\n",
+	//	pkt->s[0], pkt->s[1], pkt->s[2],pkt->s[3], pkt->s[4], pkt->s[5],
+	//	pkt->d[0], pkt->d[1], pkt->d[2],pkt->d[3], pkt->d[4], pkt->d[5],
+	//	(tome ? " <<--" : ""), len, (fromme ? " -->>" : ""));
+	/*
+	 * Multiplex the packet to all the connections which want it.
+	 * If the packet is not to be used subsequently (fromwire != 0),
+	 * attempt to simply pass it into one of the connections, thereby
+	 * saving a copy of the data (usual case hopefully).
+	 */
+	for(fp = ether->ni.f; fp < ep; fp++){
+		if((f = *fp) != nil)
+		if(f->type == type || f->type < 0)
+		if(tome || multi || f->prom){
+			/* Don't want to hear bridged packets */
+			if(f->bridge && !fromwire && !fromme)
+				continue;
+			if(!f->headersonly){
+				if(fromwire && fx == 0)
+					fx = f;
+				else if((xbp = iallocb(len)) != nil){
+					memmove(xbp->wp, pkt, len);
+					xbp->wp += len;
+					if(qpass(f->in, xbp) < 0)
+						ether->ni.soverflows++;
+				}
+				else
+					ether->ni.soverflows++;
+			}
+			else
+				etherrtrace(f, pkt, len);
+		}
+	}
+
+	if(fx){
+		if(qpass(fx->in, bp) < 0)
+			ether->ni.soverflows++;
+		return 0;
+	}
+	if(fromwire){
+		freeb(bp);
+		return 0;
+	}
+
+	return bp;
+}
+
+static int
+etheroq(Ether* ether, Block* bp)
+{
+	int len, loopback, s;
+	Etherpkt *pkt;
+
+	ether->ni.outpackets++;
+
+	/*
+	 * Check if the packet has to be placed back onto the input queue,
+	 * i.e. if it's a loopback or broadcast packet or the interface is
+	 * in promiscuous mode.
+	 * If it's a loopback packet indicate to etheriq that the data isn't
+	 * needed and return, etheriq will pass-on or free the block.
+	 * To enable bridging to work, only packets that were originated
+	 * by this interface are fed back.
+	 */
+	pkt = (Etherpkt*)bp->rp;
+	len = BLEN(bp);
+	loopback = memcmp(pkt->d, ether->ea, sizeof(pkt->d)) == 0;
+	if(loopback || memcmp(pkt->d, ether->ni.bcast, sizeof(pkt->d)) == 0 || ether->ni.prom){
+		s = splhi();
+		etheriq(ether, bp, 0);
+		splx(s);
+	}
+
+	if(!loopback){
+		qbwrite(ether->oq, bp);
+		if(ether->transmit != nil)
+			ether->transmit(ether);
+	} else
+		freeb(bp);
+
+	return len;
+}
+
+static long
+etherwrite(Chan* chan, void* buf, long n, vlong v)
+{
+	Ether *ether;
+	Block *bp;
+	int nn, onoff;
+	Cmdbuf *cb;
+
+	ether = etherxx[chan->dev];
+	if(NETTYPE(chan->qid.path) != Ndataqid) {
+		nn = netifwrite(ðer->ni, chan, buf, n);
+		if(nn >= 0)
+			return nn;
+		cb = parsecmd(buf, n);
+		if(cb->f[0] && strcmp(cb->f[0], "nonblocking") == 0){
+			if(cb->nf <= 1)
+				onoff = 1;
+			else
+				onoff = atoi(cb->f[1]);
+			qnoblock(ether->oq, onoff);
+			free(cb);
+			return n;
+		}
+		free(cb);
+		if(ether->ctl!=nil)
+			return ether->ctl(ether,buf,n);
+
+		error(Ebadctl);
+	}
+
+	if(n > ether->maxmtu)
+		error(Etoobig);
+	if(n < ether->minmtu)
+		error(Etoosmall);
+
+	bp = allocb(n);
+	if(waserror()){
+		freeb(bp);
+		nexterror();
+	}
+	memmove(bp->rp, buf, n);
+	memmove(bp->rp+Eaddrlen, ether->ea, Eaddrlen);
+	poperror();
+	bp->wp += n;
+
+	return etheroq(ether, bp);
+}
+
+static long
+etherbwrite(Chan* chan, Block* bp, ulong u)
+{
+	Ether *ether;
+	long n;
+
+	n = BLEN(bp);
+	if(NETTYPE(chan->qid.path) != Ndataqid){
+		if(waserror()) {
+			freeb(bp);
+			nexterror();
+		}
+		n = etherwrite(chan, bp->rp, n, 0);
+		poperror();
+		freeb(bp);
+		return n;
+	}
+	ether = etherxx[chan->dev];
+
+	if(n > ether->maxmtu){
+		freeb(bp);
+		error(Etoobig);
+	}
+	if(n < ether->minmtu){
+		freeb(bp);
+		error(Etoosmall);
+	}
+
+	return etheroq(ether, bp);
+}
+
+static struct {
+	char*	type;
+	int	(*reset)(Ether*);
+} cards[MaxEther+1];
+
+void
+addethercard(char* t, int (*r)(Ether*))
+{
+	static int ncard;
+
+	if(ncard == MaxEther)
+		panic("too many ether cards");
+	cards[ncard].type = t;
+	cards[ncard].reset = r;
+	ncard++;
+}
+
+int
+parseether(uchar *to, char *from)
+{
+	char nip[4];
+	char *p;
+	int i;
+
+	p = from;
+	for(i = 0; i < Eaddrlen; i++){
+		if(*p == 0)
+			return -1;
+		nip[0] = *p++;
+		if(*p == 0)
+			return -1;
+		nip[1] = *p++;
+		nip[2] = 0;
+		to[i] = strtoul(nip, 0, 16);
+		if(*p == ':')
+			p++;
+	}
+	return 0;
+}
+
+static Ether*
+etherprobe(int cardno, int ctlrno)
+{
+	int i, lg;
+	ulong mb, bsz;
+	Ether *ether;
+	char buf[128], name[32];
+
+	ether = malloc(sizeof(Ether));
+	memset(ether, 0, sizeof(Ether));
+	ether->ctlrno = ctlrno;
+	ether->tbdf = BUSUNKNOWN;
+	ether->ni.mbps = 100;
+	ether->minmtu = ETHERMINTU;
+	ether->maxmtu = ETHERMAXTU;
+
+	if(cardno < 0){
+		for(cardno = 0; cards[cardno].type; cardno++){
+			for(i = 0; i < ether->isac.nopt; i++){
+				if(strncmp(ether->isac.opt[i], "ea=", 3))
+					continue;
+				if(parseether(ether->ea, ðer->isac.opt[i][3]))
+					memset(ether->ea, 0, Eaddrlen);
+			}
+			break;
+		}
+	}
+
+	if(cardno >= MaxEther || cards[cardno].type == nil){
+		free(ether);
+		return nil;
+	}
+	if(cards[cardno].reset(ether) < 0){
+		free(ether);
+		return nil;
+	}
+
+	/*
+	 * IRQ2 doesn't really exist, it's used to gang the interrupt
+	 * controllers together. A device set to IRQ2 will appear on
+	 * the second interrupt controller as IRQ9.
+	 */
+	if(ether->isac.irq == 2)
+		ether->isac.irq = 9;
+	snprint(name, sizeof(name), "ether%d", ctlrno);
+
+	i = sprint(buf, "#l%d: %s: %dMbps port 0x%luX irq %d",
+		ctlrno, cards[cardno].type, ether->ni.mbps, ether->isac.port, ether->isac.irq);
+	if(ether->isac.mem)
+		i += sprint(buf+i, " addr 0x%luX", ether->isac.mem);
+	if(ether->isac.size)
+		i += sprint(buf+i, " size 0x%luX", ether->isac.size);
+	i += sprint(buf+i, ": %2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux",
+		ether->ea[0], ether->ea[1], ether->ea[2],
+		ether->ea[3], ether->ea[4], ether->ea[5]);
+	sprint(buf+i, "\n");
+	print(buf);
+
+	/* compute log10(ether->ni.mbps) into lg */
+	for(lg = 0, mb = ether->ni.mbps; mb >= 10; lg++)
+		mb /= 10;
+	if (lg > 0)
+		lg--;
+	if (lg > 14)			/* 2^(14+17) = 2ⁱ */
+		lg = 14;
+	/* allocate larger output queues for higher-speed interfaces */
+	bsz = 1UL << (lg + 17);		/* 2ⁱ⁷ = 128K, bsz = 2ⁿ × 128K */
+	while (bsz > MEMSIZE && bsz >= 128*1024)
+		bsz /= 2;
+
+	netifinit(ðer->ni, name, Ntypes, bsz);
+	while (ether->oq == nil && bsz >= 128*1024) {
+		bsz /= 2;
+		ether->oq = qopen(bsz, Qmsg, 0, 0);
+		ether->ni.limit = bsz;
+	}
+	if(ether->oq == nil)
+		panic("etherreset %s", name);
+	ether->ni.alen = Eaddrlen;
+	memmove(ether->ni.addr, ether->ea, Eaddrlen);
+	memset(ether->ni.bcast, 0xFF, Eaddrlen);
+
+	// iprint("XXX EADDR: %2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux\n",
+	// ether->ea[0], ether->ea[1], ether->ea[2],ether->ea[3], ether->ea[4], ether->ea[5]);
+
+	return ether;
+}
+
+static void
+etherreset(void)
+{
+	Ether *ether;
+	int cardno, ctlrno;
+
+	for(ctlrno = 0; ctlrno < MaxEther; ctlrno++){
+		if((ether = etherprobe(-1, ctlrno)) == nil)
+			continue;
+		etherxx[ctlrno] = ether;
+	}
+
+	cardno = ctlrno = 0;
+	while(cards[cardno].type != nil && ctlrno < MaxEther){
+		if(etherxx[ctlrno] != nil){
+			ctlrno++;
+			continue;
+		}
+		if((ether = etherprobe(cardno, ctlrno)) == nil){
+			cardno++;
+			continue;
+		}
+		etherxx[ctlrno] = ether;
+		ctlrno++;
+	}
+}
+
+static void
+ethershutdown(void)
+{
+	Ether *ether;
+	int i;
+
+	for(i = 0; i < MaxEther; i++){
+		ether = etherxx[i];
+		if(ether == nil)
+			continue;
+		if(ether->shutdown == nil) {
+			print("#l%d: no shutdown fuction\n", i);
+			continue;
+		}
+		(*ether->shutdown)(ether);
+	}
+}
+
+
+#define POLY 0xedb88320
+
+/* really slow 32 bit crc for ethers */
+ulong
+ethercrc(uchar *p, int len)
+{
+	int i, j;
+	ulong crc, b;
+
+	crc = 0xffffffff;
+	for(i = 0; i < len; i++){
+		b = *p++;
+		for(j = 0; j < 8; j++){
+			crc = (crc>>1) ^ (((crc^b) & 1) ? POLY : 0);
+			b >>= 1;
+		}
+	}
+	return crc;
+}
+
+Dev etherdevtab = {
+	'l',
+	"ether",
+
+	etherreset,
+	devinit,
+	ethershutdown,
+	etherattach,
+	etherwalk,
+	etherstat,
+	etheropen,
+	ethercreate,
+	etherclose,
+	etherread,
+	etherbread,
+	etherwrite,
+	etherbwrite,
+	devremove,
+	etherwstat,
+};
diff --git a/src/9vx/a/devsd.c b/src/9vx/a/devsd.c
@@ -72,7 +72,7 @@ enum {
 					 ((p)<<PartSHIFT)|((t)<<TypeSHIFT))
 
 
-static void
+void
 sdaddpart(SDunit* unit, char* name, uvlong start, uvlong end)
 {
 	SDpart *pp;
@@ -135,6 +135,19 @@ sdaddpart(SDunit* unit, char* name, uvlong start, uvlong end)
 	pp->valid = 1;
 }
 
+SDpart*
+sdfindpart(SDunit *unit, char *name)
+{
+	int i;
+
+	for(i=0; i<unit->npart; i++) {
+		if(strcmp(unit->part[i].perm.name, name) == 0){
+			return &unit->part[i];
+		}
+	}
+	return nil;
+}
+
 static void
 sddelpart(SDunit* unit, char* name)
 {
@@ -198,6 +211,7 @@ sdinitpart(SDunit* unit)
 	if(unit->sectors){
 		sdincvers(unit);
 		sdaddpart(unit, "data", 0, unit->sectors);
+		partition(unit);
 #if 0
 		/*
 		 * Use partitions passed from boot program,
diff --git a/src/9vx/a/dosfs.h b/src/9vx/a/dosfs.h
@@ -0,0 +1,62 @@
+typedef struct Dosboot	Dosboot;
+typedef struct Dos	Dos;
+typedef struct Dosdir	Dosdir;
+typedef struct Dosfile	Dosfile;
+typedef struct Dospart	Dospart;
+
+struct Dospart
+{
+	uchar flag;		/* active flag */
+	uchar shead;		/* starting head */
+	uchar scs[2];		/* starting cylinder/sector */
+	uchar type;		/* partition type */
+	uchar ehead;		/* ending head */
+	uchar ecs[2];		/* ending cylinder/sector */
+	uchar start[4];		/* starting sector */
+	uchar len[4];		/* length in sectors */
+};
+
+#define FAT12	0x01
+#define FAT16	0x04
+#define EXTEND	0x05
+#define FATHUGE	0x06
+#define FAT32	0x0b
+#define FAT32X	0x0c
+#define EXTHUGE	0x0f
+#define DMDDO	0x54
+#define PLAN9	0x39
+#define LEXTEND 0x85
+
+struct Dosfile{
+	Dos	*dos;		/* owning dos file system */
+	char	name[8];
+	char	ext[3];
+	uchar	attr;
+	long	length;
+	long	pstart;		/* physical start cluster address */
+	long	pcurrent;	/* physical current cluster address */
+	long	lcurrent;	/* logical current cluster address */
+	long	offset;
+};
+
+struct Dos{
+	long	start;		/* start of file system */
+	int	sectsize;	/* in bytes */
+	int	clustsize;	/* in sectors */
+	int	clustbytes;	/* in bytes */
+	int	nresrv;		/* sectors */
+	int	nfats;		/* usually 2 */
+	int	rootsize;	/* number of entries */
+	int	volsize;	/* in sectors */
+	int	mediadesc;
+	int	fatsize;	/* in sectors */
+	int	fatclusters;
+	int	fatbits;	/* 12 or 16 */
+	long	fataddr;	/* sector number */
+	long	rootaddr;
+	long	rootclust;
+	long	dataaddr;
+	long	freeptr;
+};
+
+extern int	dosinit(Fs*);
diff --git a/src/9vx/a/etherif.h b/src/9vx/a/etherif.h
@@ -0,0 +1,39 @@
+enum {
+	MaxEther	= 48,
+	Ntypes		= 8,
+};
+
+typedef struct Ether Ether;
+struct Ether {
+	ISAConf isac;
+
+	int	ctlrno;
+	int	tbdf;			/* type+busno+devno+funcno */
+	int	minmtu;
+	int 	maxmtu;
+	uchar	ea[Eaddrlen];
+
+	void	(*attach)(Ether*);	/* filled in by reset routine */
+	void	(*detach)(Ether*);
+	void	(*transmit)(Ether*);
+	void	(*interrupt)(Ureg*, void*);
+	long	(*ifstat)(Ether*, void*, long, ulong);
+	long 	(*ctl)(Ether*, void*, long); /* custom ctl messages */
+	void	(*power)(Ether*, int);	/* power on/off */
+	void	(*shutdown)(Ether*);	/* shutdown hardware before reboot */
+	void	*ctlr;
+
+	Queue*	oq;
+
+	Netif	ni;
+};
+
+extern Block* etheriq(Ether*, Block*, int);
+extern void addethercard(char*, int(*)(Ether*));
+extern ulong ethercrc(uchar*, int);
+extern int parseether(uchar*, char*);
+
+#define NEXT(x, l)	(((uint)(x)+1)%(l))
+#define PREV(x, l)	(((x) == 0) ? (l)-1: (x)-1)
+#define	HOWMANY(x, y)	(((x)+((y)-1))/(y))
+#define ROUNDUP(x, y)	(HOWMANY((x), (y))*(y))
diff --git a/src/9vx/a/fns.ed b/src/9vx/a/fns.ed
@@ -16,4 +16,54 @@ int	tailkmesg(char*, int);
 void	trap(Ureg*);
 void	uartecho(char*, int);
 void	uartinit(int);
+
+#define GSHORT(p)	(((p)[1]<<8)|(p)[0])
+#define GLONG(p)	((GSHORT(p+2)<<16)|GSHORT(p))
+
+void	__plock(Psleep*);
+void	__punlock(Psleep*);
+void	__pwakeup(Psleep*);
+void	__psleep(Psleep*);
+
+extern int tracelock;
+
+#define lockfngen(type)	__ ## type
+
+#define lockgen(type, arg) 								\
+	do {										\
+		if (tracelock) {							\
+			iprint("%s %p %s %d\n", (#type), (arg), __FILE__, __LINE__);	\
+			lockfngen(type)((arg));						\
+		} else {								\
+			lockfngen(type)((arg));						\
+		}									\
+	} while (0)
+
+#define qlock(x)	lockgen(qlock, (x))
+#define qunlock(x)	lockgen(qunlock, (x))
+#define rlock(x)	lockgen(rlock, (x))
+#define runlock(x)	lockgen(runlock, (x))
+#define wlock(x)	lockgen(wlock, (x))
+#define wunlock(x)	lockgen(wunlock, (x))
+#define plock(x)	lockgen(plock, (x))
+#define punlock(x)	lockgen(punlock, (x))
+#define pwakeup(x)	lockgen(pwakeup, (x))
+#define psleep(x)	lockgen(psleep, (x))
+// #define lock(x)		lockgen(lock, (x))
+// #define unlock(x)	lockgen(unlock, (x))
+#define lock(x) __lock(x)
+#define unlock(x) __unlock(x)
+#define canqlock	__canqlock
+#define canrlock	__canrlock
+
+#define	LOCK(x)		lock(&((x)->lk))
+#define	UNLOCK(x)	unlock(&((x)->lk))
+#define CANQLOCK(x)	canqlock(&((x)->qlock))
+#define	QLOCK(x)	qlock(&((x)->qlock))
+#define	QUNLOCK(x)	qunlock(&((x)->qlock))
+#define CANRLOCK(x)	canrlock(&((x)->rwlock))
+#define	RLOCK(x)	rlock(&((x)->rwlock))
+#define	RUNLOCK(x)	runlock(&((x)->rwlock))
+#define	WLOCK(x)	wlock(&((x)->rwlock))
+#define	WUNLOCK(x)	wunlock(&((x)->rwlock))
 .
diff --git a/src/9vx/a/fns.h b/src/9vx/a/fns.h
@@ -167,8 +167,53 @@ void	*uvalidaddr(ulong addr, ulong len, int write);
 int	isuaddr(void*);
 void	setsigsegv(int invx32);
 
-void	plock(Psleep*);
-void	punlock(Psleep*);
-void	pwakeup(Psleep*);
-void	psleep(Psleep*);
+#define GSHORT(p)	(((p)[1]<<8)|(p)[0])
+#define GLONG(p)	((GSHORT(p+2)<<16)|GSHORT(p))
+
+void	__plock(Psleep*);
+void	__punlock(Psleep*);
+void	__pwakeup(Psleep*);
+void	__psleep(Psleep*);
+
+extern int tracelock;
+
+#define lockfngen(type)	__ ## type
+
+#define lockgen(type, arg) 								\
+	do {										\
+		if (tracelock) {							\
+			iprint("%s %p %s %d\n", (#type), (arg), __FILE__, __LINE__);	\
+			lockfngen(type)((arg));						\
+		} else {								\
+			lockfngen(type)((arg));						\
+		}									\
+	} while (0)
+
+#define qlock(x)	lockgen(qlock, (x))
+#define qunlock(x)	lockgen(qunlock, (x))
+#define rlock(x)	lockgen(rlock, (x))
+#define runlock(x)	lockgen(runlock, (x))
+#define wlock(x)	lockgen(wlock, (x))
+#define wunlock(x)	lockgen(wunlock, (x))
+#define plock(x)	lockgen(plock, (x))
+#define punlock(x)	lockgen(punlock, (x))
+#define pwakeup(x)	lockgen(pwakeup, (x))
+#define psleep(x)	lockgen(psleep, (x))
+// #define lock(x)		lockgen(lock, (x))
+// #define unlock(x)	lockgen(unlock, (x))
+#define lock(x) __lock(x)
+#define unlock(x) __unlock(x)
+#define canqlock	__canqlock
+#define canrlock	__canrlock
+
+#define	LOCK(x)		lock(&((x)->lk))
+#define	UNLOCK(x)	unlock(&((x)->lk))
+#define CANQLOCK(x)	canqlock(&((x)->qlock))
+#define	QLOCK(x)	qlock(&((x)->qlock))
+#define	QUNLOCK(x)	qunlock(&((x)->qlock))
+#define CANRLOCK(x)	canrlock(&((x)->rwlock))
+#define	RLOCK(x)	rlock(&((x)->rwlock))
+#define	RUNLOCK(x)	runlock(&((x)->rwlock))
+#define	WLOCK(x)	wlock(&((x)->rwlock))
+#define	WUNLOCK(x)	wunlock(&((x)->rwlock))
 
diff --git a/src/9vx/a/fs.h b/src/9vx/a/fs.h
@@ -0,0 +1,38 @@
+typedef struct File File;
+typedef struct Fs Fs;
+
+#include "dosfs.h"
+#include "kfs.h"
+
+struct File{
+	union{
+		Dosfile	dos;
+		Kfsfile	kfs;
+		int walked;
+	};
+	Fs	*fs;
+	char	*path;
+};
+
+struct Fs{
+	union {
+		Dos dos;
+		Kfs kfs;
+	};
+	int	dev;				/* device id */
+	long	(*diskread)(Fs*, void*, long);	/* disk read routine */
+	vlong	(*diskseek)(Fs*, vlong);	/* disk seek routine */
+	long	(*read)(File*, void*, long);
+	int	(*walk)(File*, char*);
+	File	root;
+};
+
+/*
+extern int chatty;
+extern int dotini(Fs*);
+extern int fswalk(Fs*, char*, File*);
+extern int fsread(File*, void*, long);
+extern int fsboot(Fs*, char*, Boot*);
+*/
+
+#define BADPTR(x) ((ulong)x < 0x80000000)
diff --git a/src/9vx/a/ip.ed b/src/9vx/a/ip.ed
@@ -0,0 +1,2297 @@
+diff -e ip.orig/arp.c ip/arp.c
+643c
+	QUNLOCK(arp);
+.
+613,614c
+	RUNLOCK(ifc);
+	QLOCK(arp);
+.
+609c
+	QUNLOCK(arp);	/* for icmpns */
+.
+589c
+		if((a->rxtsrem <= 0) || !(CANRLOCK(ifc)) || (a->ifcid != ifc->ifcid)){
+.
+574c
+	QLOCK(arp);
+.
+557c
+		QUNLOCK(arp);
+.
+554c
+		QLOCK(arp);
+.
+511c
+		QUNLOCK(arp);
+.
+481c
+		QLOCK(arp);
+.
+444c
+		QUNLOCK(arp);
+.
+426c
+		QLOCK(arp);
+.
+398c
+	QUNLOCK(arp);
+.
+380c
+					RUNLOCK(ifc);
+.
+375c
+					RLOCK(ifc);
+.
+372c
+						RUNLOCK(ifc);
+.
+366c
+			QUNLOCK(arp);
+.
+337c
+	QLOCK(arp);
+.
+292c
+	QUNLOCK(arp);
+.
+260c
+	QUNLOCK(arp);
+.
+258c
+arprelease(Arp *arp, Arpent* ae)
+.
+250c
+	QUNLOCK(arp);
+.
+219c
+	QLOCK(arp);
+.
+50c
+int 	ReTransTimer = RETRANS_TIMER;
+.
+48c
+#define haship(s) ((ulong)((s)[IPaddrlen-1])%NHASH)
+.
+36c
+	QLock	qlock;
+.
+14d
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/chandial.c ip/chandial.c
+6,7c
+#include	"error.h"
+#include	"ip/ip.h"
+.
+2c
+#include	"lib.h"
+.
+diff -e ip.orig/devip.c ip/devip.c
+1430c
+	QUNLOCK(c);
+.
+1418c
+		QUNLOCK(c);
+.
+1404,1411c
+		QUNLOCK(c);
+.
+1399c
+	QLOCK(c);
+.
+1349c
+	QUNLOCK(c);
+.
+1326,1328d
+1322,1323d
+1318c
+			QUNLOCK(c);
+.
+1310c
+		if(CANQLOCK(c)){
+.
+1294c
+			QLOCK(c);
+.
+1185c
+		QUNLOCK(c);
+.
+1130c
+			QUNLOCK(c);
+.
+1128c
+		QLOCK(c);
+.
+1033c
+	QLOCK(c);
+.
+1029c
+		QLOCK(c);
+.
+1027c
+	QUNLOCK(c);
+.
+980c
+	QLOCK(c);
+.
+976c
+		QLOCK(c);
+.
+974c
+	QUNLOCK(c);
+.
+831c
+	QUNLOCK(p);
+.
+820,826c
+	QUNLOCK(p);
+.
+793c
+	QLOCK(p);
+.
+765c
+	QUNLOCK(p);
+.
+760c
+			QUNLOCK(p);
+.
+748c
+	QLOCK(p);
+.
+582c
+	QUNLOCK(cv);
+.
+561c
+		QUNLOCK(cv);
+.
+558c
+	QLOCK(cv);
+.
+516c
+ipremove(Chan* _)
+.
+510c
+ipcreate(Chan* _, char* __, int ___, ulong ____)
+.
+494c
+			QUNLOCK(cv);
+.
+487c
+			QLOCK(cv);
+.
+470c
+		QUNLOCK(cv);
+.
+468c
+		QLOCK(cv);
+.
+447,448c
+		QUNLOCK(cv);
+		QUNLOCK(p);
+.
+431,432c
+			QUNLOCK(cv);
+			QUNLOCK(p);
+.
+429c
+		QLOCK(cv);
+.
+427c
+		QLOCK(p);
+.
+415c
+		QUNLOCK(p);
+.
+411c
+			QUNLOCK(p);
+.
+409c
+		QLOCK(p);
+.
+174c
+ipgen(Chan *c, char* __ch, Dirtab* __dt, int __i, int s, Dir *dp)
+.
+50c
+#define QID(p, c, y) 	( ((uint)(p)<<(Shiftproto)) | ((uint)(c)<<Shiftconv) | (y) )
+.
+6,7c
+#include	"error.h"
+#include	"ip/ip.h"
+.
+2c
+#include	"lib.h"
+.
+diff -e ip.orig/esp.c ip/esp.c
+1106a
+
+
+#ifdef notdef
+enum {
+	RC4forward= 10*1024*1024,	/* maximum skip forward */
+	RC4back = 100*1024,	/* maximum look back */
+};
+
+typedef struct Esprc4 Esprc4;
+struct Esprc4
+{
+	ulong	cseq;		/* current byte sequence number */
+	RC4state current;
+
+	int	ovalid;		/* old is valid */
+	ulong	lgseq;		/* last good sequence */
+	ulong	oseq;		/* old byte sequence number */
+	RC4state old;
+};
+
+static void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
+
+static int
+rc4cipher(Espcb *ecb, uchar *p, int n)
+{
+	Esprc4 *esprc4;
+	RC4state tmpstate;
+	ulong seq;
+	long d, dd;
+
+	if(n < 4)
+		return 0;
+
+	esprc4 = ecb->espstate;
+	if(ecb->incoming) {
+		seq = nhgetl(p);
+		p += 4;
+		n -= 4;
+		d = seq-esprc4->cseq;
+		if(d == 0) {
+			rc4(&esprc4->current, p, n);
+			esprc4->cseq += n;
+			if(esprc4->ovalid) {
+				dd = esprc4->cseq - esprc4->lgseq;
+				if(dd > RC4back)
+					esprc4->ovalid = 0;
+			}
+		} else if(d > 0) {
+print("esp rc4cipher: missing packet: %uld %ld\n", seq, d); /* this link is hosed */
+			if(d > RC4forward) {
+				strcpy(up->errstr, "rc4cipher: skipped too much");
+				return 0;
+			}
+			esprc4->lgseq = seq;
+			if(!esprc4->ovalid) {
+				esprc4->ovalid = 1;
+				esprc4->oseq = esprc4->cseq;
+				memmove(&esprc4->old, &esprc4->current,
+					sizeof(RC4state));
+			}
+			rc4skip(&esprc4->current, d);
+			rc4(&esprc4->current, p, n);
+			esprc4->cseq = seq+n;
+		} else {
+print("esp rc4cipher: reordered packet: %uld %ld\n", seq, d);
+			dd = seq - esprc4->oseq;
+			if(!esprc4->ovalid || -d > RC4back || dd < 0) {
+				strcpy(up->errstr, "rc4cipher: too far back");
+				return 0;
+			}
+			memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
+			rc4skip(&tmpstate, dd);
+			rc4(&tmpstate, p, n);
+			return 1;
+		}
+
+		/* move old state up */
+		if(esprc4->ovalid) {
+			dd = esprc4->cseq - RC4back - esprc4->oseq;
+			if(dd > 0) {
+				rc4skip(&esprc4->old, dd);
+				esprc4->oseq += dd;
+			}
+		}
+	} else {
+		hnputl(p, esprc4->cseq);
+		p += 4;
+		n -= 4;
+		rc4(&esprc4->current, p, n);
+		esprc4->cseq += n;
+	}
+	return 1;
+}
+
+static void
+rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
+{
+	Esprc4 *esprc4;
+
+	/* bits to bytes */
+	n = (n+7)>>3;
+	esprc4 = smalloc(sizeof(Esprc4));
+	memset(esprc4, 0, sizeof(Esprc4));
+	setupRC4state(&esprc4->current, k, n);
+	ecb->espalg = name;
+	ecb->espblklen = 4;
+	ecb->espivlen = 4;
+	ecb->cipher = rc4cipher;
+	ecb->espstate = esprc4;
+}
+#endif
+.
+1056,1081d
+1048,1050c
+	ecb->espblklen = 8;
+	ecb->espivlen = 8;
+.
+1045c
+	for(i=0; i<8; i++)
+.
+1040,1042c
+	/* bits to bytes */
+	n = (n+7)>>3;
+	if(n > 8)
+		n = 8;
+.
+1037c
+	uchar key[8], ivec[8];
+.
+1035c
+desespinit(Espcb *ecb, char *name, uchar *k, int n)
+.
+1019,1033d
+1013,1014c
+		memmove(p, ds->ivec, 8);
+		for(p += 8; p < ep; p += 8){
+			pp = p;
+			ip = ds->ivec;
+			for(eip = ip+8; ip < eip; )
+				*pp++ ^= *ip++;
+			block_cipher(ds->expanded, p, 0);
+			memmove(ds->ivec, p, 8);
+		}
+.
+1010,1011c
+		memmove(ds->ivec, p, 8);
+		p += 8;
+		while(p < ep){
+			memmove(tmp, p, 8);
+			block_cipher(ds->expanded, p, 1);
+			tp = tmp;
+			ip = ds->ivec;
+			for(eip = ip+8; ip < eip; ){
+				*p++ ^= *ip;
+				*ip++ = *tp++;
+			}
+		}
+.
+1008a
+	ep = p + n;
+.
+1006a
+	uchar tmp[8];
+	uchar *pp, *tp, *ip, *eip, *ep;
+.
+999,1003d
+993c
+	ecb->ahlen = 12;
+.
+990c
+	klen >>= 3;		/* convert to bytes */
+
+.
+986c
+md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
+.
+979c
+	seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+.
+968c
+	digest = md5(opad, 64, nil, nil);
+.
+966c
+	digest = md5(ipad, 64, nil, nil);
+.
+959,962c
+	for(i=0; i<64; i++){
+		ipad[i] = 0x36;
+		opad[i] = 0x5c;
+	}
+	ipad[64] = opad[64] = 0;
+	for(i=0; i<klen; i++){
+.
+957a
+	uchar innerhash[MD5dlen];
+.
+956d
+954a
+	uchar ipad[65], opad[65];
+.
+796,952c
+void
+.
+790c
+	ecb->ahlen = 12;
+.
+786c
+	klen >>= 8;		/* convert to bytes */
+.
+782c
+shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
+.
+775c
+	seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+.
+772a
+	int r;
+.
+771d
+764c
+	digest = sha1(opad, 64, nil, nil);
+.
+762c
+	digest = sha1(ipad, 64, nil, nil);
+.
+755,758c
+	for(i=0; i<64; i++){
+		ipad[i] = 0x36;
+		opad[i] = 0x5c;
+	}
+	ipad[64] = opad[64] = 0;
+	for(i=0; i<klen; i++){
+.
+753a
+	uchar innerhash[SHA1dlen];
+.
+752d
+750a
+	uchar ipad[65], opad[65];
+.
+743,748c
+void
+.
+735c
+nullahinit(Espcb *ecb, char *name, uchar* _, int __)
+.
+729c
+nullauth(Espcb* _, uchar* __, int ___, uchar* ____)
+.
+720c
+nullespinit(Espcb *ecb, char *name, uchar* _, int __)
+.
+714c
+nullcipher(Espcb* _, uchar* __, int ___)
+.
+708,712d
+647c
+	QUNLOCK(c);
+.
+642c
+	QLOCK(c);
+.
+632c
+	QUNLOCK(c);
+.
+627c
+	QLOCK(c);
+.
+606c
+	QUNLOCK(esp);
+.
+600,601c
+	spi = nhgets(h->espspi);
+	QLOCK(esp);
+	c = convlookup(esp, spi);
+.
+597,598c
+	h = (Esp4hdr*)(bp->rp);
+.
+595c
+	ulong spi;
+.
+593a
+	Esp4hdr *h;
+.
+590d
+568c
+	QUNLOCK(c);
+.
+565c
+		qpass(c->rq, bp);
+.
+560,561c
+		netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
+			laddr, spi);
+.
+557,558d
+547c
+	bp->rp += hdrlen + ecb->espivlen;
+.
+539,541c
+		QUNLOCK(c);
+		netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n",
+			raddr, laddr, spi);
+.
+535c
+	et = (Esptail*)(bp->rp + hdrlen + payload);
+.
+523,529c
+	if(!ecb->cipher(ecb, bp->rp + hdrlen, payload)) {
+		QUNLOCK(c);
+print("esp: cipher failed %I -> %I!%ld: %s\n", raddr, laddr, spi, up->errstr);
+		netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %s\n", raddr,
+			laddr, spi, up->errstr);
+.
+517,519c
+		QUNLOCK(c);
+		netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n",
+			raddr, laddr, spi, payload, BLEN(bp));
+.
+515c
+	payload = BLEN(bp) - hdrlen - ecb->ahlen;
+.
+507,510c
+		QUNLOCK(c);
+print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
+		netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
+			laddr, spi);
+.
+502,505c
+	espspi = version == V4? ((Esp4hdr*)bp->rp)->espspi:
+				((Esp6hdr*)bp->rp)->espspi;
+.
+493,496c
+	if(BLEN(bp) < hdrlen + ecb->espivlen + Esptaillen + ecb->ahlen) {
+		QUNLOCK(c);
+		netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
+			laddr, spi);
+.
+485,486c
+	QLOCK(c);
+	QUNLOCK(esp);
+.
+477,479c
+		QUNLOCK(esp);
+		netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
+			laddr, spi);
+.
+475c
+	c = convlookup(esp, spi);
+.
+473c
+	if (version == V4) {
+		eh4 = (Esp4hdr*)bp->rp;
+		spi = nhgetl(eh4->espspi);
+		v4tov6(raddr, eh4->espsrc);
+		v4tov6(laddr, eh4->espdst);
+	} else {
+		eh6 = (Esp6hdr*)bp->rp;
+		spi = nhgetl(eh6->espspi);
+		ipmove(raddr, eh6->src);
+		ipmove(laddr, eh6->dst);
+	}
+
+	QLOCK(esp);
+.
+471d
+464,466c
+	bp = pullupblock(bp, hdrlen + Esptaillen);
+.
+462a
+	if (bp == nil || BLEN(bp) == 0) {
+		/* get enough to identify the IP version */
+		bp = pullupblock(bp, IP4HDR);
+		if(bp == nil) {
+			netlog(f, Logesp, "esp: short packet\n");
+			return;
+		}
+	}
+	eh4 = (Esp4hdr*)bp->rp;
+	version = ((eh4->vihl & 0xf0) == IP_VER4? V4: V6);
+	hdrlen = version == V4? Esp4hdrlen: Esp6hdrlen;
+.
+459,460c
+	uchar *auth, *espspi;
+	ulong spi;
+	int payload, nexthdr, version, hdrlen;
+.
+457c
+	uchar raddr[IPaddrlen], laddr[IPaddrlen];
+.
+453,454c
+	Esp4hdr *eh4;
+	Esp6hdr *eh6;
+	Esptail *et;
+	Userhdr *uh;
+.
+451c
+espiput(Proto *esp, Ipifc* _, Block *bp)
+.
+446,449d
+440c
+	if (version == V4)
+.
+438c
+	QUNLOCK(c);
+.
+434,435c
+	ecb->auth(ecb, bp->rp + iphdrlen, (hdrlen - iphdrlen) +
+.
+429,431d
+425a
+		hnputl(eh6->espspi, ecb->spi);
+		hnputl(eh6->espseq, ++ecb->seq);
+.
+424d
+420,422d
+414a
+		hnputl(eh4->espspi, ecb->spi);
+		hnputl(eh4->espseq, ++ecb->seq);
+.
+411,413c
+	/* fill in head */
+	if (version == V4) {
+.
+407,409c
+	ecb->cipher(ecb, bp->rp + hdrlen, payload + pad + Esptaillen);
+	auth = bp->rp + hdrlen + payload + pad + Esptaillen;
+.
+401c
+	eh4 = (Esp4hdr *)bp->rp;
+	eh6 = (Esp6hdr *)bp->rp;
+	et = (Esptail*)(bp->rp + hdrlen + payload + pad);
+.
+383,384c
+	bp = padblock(bp, hdrlen + ecb->espivlen);
+.
+370c
+			QUNLOCK(c);
+.
+363c
+	QLOCK(c);
+.
+358c
+	version = ipvers(c);
+	iphdrlen = version == V4? IP4HDR: IP6HDR;
+	hdrlen =   version == V4? Esp4hdrlen: Esp6hdrlen;
+
+.
+356c
+	Espcb *ecb;
+	Block *bp;
+	int nexthdr, payload, pad, align, version, hdrlen, iphdrlen;
+	uchar *auth;
+.
+353d
+347,349d
+299,344d
+284,297d
+274c
+ipvers(Conv *c)
+.
+221c
+			QUNLOCK(c->p);
+.
+215c
+			QLOCK(c->p);
+.
+207,210c
+		parseip(c->raddr, argv[1]);
+.
+192c
+	char *p, *pp;
+	char *e = nil;
+.
+182,186c
+	"null",			0,	nullahinit,
+	"hmac_sha1_96",		128,	shaahinit,	/* rfc2404 */
+//	"aes_xcbc_mac_96",	128,	aesahinit,	/* rfc3566 */
+	"hmac_md5_96",		128,	md5ahinit,	/* rfc2403 */
+	nil,			0,	nil,
+.
+170,177c
+	"null",			0,	nullespinit,
+//	"des3_cbc",		192,	des3espinit,	/* rfc2451 */
+//	"aes_128_cbc",		128,	aescbcespinit,	/* rfc3602 */
+//	"aes_ctr",		128,	aesctrespinit,	/* rfc3686 */
+	"des_56_cbc",		64,	desespinit,	/* rfc2405, deprecated */
+//	"rc4_128",		128,	rc4espinit,	/* gone in rfc4305 */
+	nil,			0,	nil,
+.
+163,166c
+static	void nullahinit(Espcb*, char*, uchar *key, int keylen);
+static	void shaahinit(Espcb*, char*, uchar *key, int keylen);
+static	void md5ahinit(Espcb*, char*, uchar *key, int keylen);
+.
+157,161c
+static	void nullespinit(Espcb*, char*, uchar *key, int keylen);
+static	void desespinit(Espcb *ecb, char *name, uchar *k, int n);
+.
+150c
+	void	(*init)(Espcb*, char* name, uchar *key, int keylen);
+.
+143d
+137d
+131d
+127c
+	int	header;		/* user user level header */
+.
+96,107d
+86,87c
+	/* Ip6hdr; */
+	uchar	vcf[4];		/* version:4, traffic class:8, flow label:20 */
+	uchar	ploadlen[2];	/* payload length: packet length - 40 */
+	uchar	proto;		/* next header type */
+	uchar	ttl;		/* hop limit */
+	uchar	src[IPaddrlen];
+	uchar	dst[IPaddrlen];
+
+	/* Esphdr; */
+	uchar	espspi[4];	/* Security parameter index */
+	uchar	espseq[4];	/* Sequence number */
+.
+80c
+	/* Esphdr; */
+	uchar	espspi[4];	/* Security parameter index */
+	uchar	espseq[4];	/* Sequence number */
+.
+58,64c
+ * tunnel-mode layout:		IP | ESP | TCP/UDP | user data.
+ * transport-mode layout is:	ESP | IP | TCP/UDP | user data.
+.
+54d
+42,47d
+32,35c
+enum
+{
+.
+30a
+typedef struct Esppriv Esppriv;
+typedef struct Espcb Espcb;
+typedef struct Algorithm Algorithm;
+.
+26,28d
+20,23c
+typedef struct Esphdr Esphdr;
+.
+14c
+#include	"error.h"
+.
+10c
+#include	"lib.h"
+.
+6,7c
+ * TODO: update to match rfc4303.
+.
+3,4d
+diff -e ip.orig/ethermedium.c ip/ethermedium.c
+536c
+	if((sflag = ipv6anylocal(ifc, ipsrc)) != 0)
+.
+429c
+etherremmulti(Ipifc *ifc, uchar *a, uchar *_)
+.
+407c
+etheraddmulti(Ipifc *ifc, uchar *a, uchar *_)
+.
+401c
+		RUNLOCK(ifc);
+.
+392c
+			RUNLOCK(ifc);
+.
+387c
+		if(!CANRLOCK(ifc)){
+.
+362c
+		RUNLOCK(ifc);
+.
+353c
+			RUNLOCK(ifc);
+.
+348c
+		if(!CANRLOCK(ifc)){
+.
+269c
+ *  called by ipoput with a single block to write with ifc RLOCK'd
+.
+123a
+
+.
+8c
+#include "netif.h"
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/gre.c ip/gre.c
+968c
+	gre->ptclsize = 0;
+.
+919,948d
+894,916c
+	return "unknown control request";
+.
+885,892d
+881,883c
+		else if(strcmp(f[0], "cooked") == 0){
+			gpriv->raw = 0;
+			return nil;
+.
+696,879c
+	gpriv = c->p->priv;
+	if(n == 1){
+		if(strcmp(f[0], "raw") == 0){
+			gpriv->raw = 1;
+			return nil;
+.
+694c
+	GREpriv *gpriv;
+.
+691,692c
+char*
+grectl(Conv *c, char **f, int n)
+.
+681,688c
+	return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr);
+.
+675,679d
+659,660c
+	if(qlen(c->rq) > 64*1024)
+		freeblist(bp);
+.
+651d
+648d
+645c
+		freeblist(bp);
+.
+643c
+	len = nhgets(ghp->len) - GRE_IPONLY;
+.
+639a
+	QUNLOCK(gre);
+
+.
+633,636c
+	if(*p == nil) {
+		QUNLOCK(gre);
+		freeblist(bp);
+.
+590,629c
+		if(c->rport == eproto && 
+			(gpriv->raw || ipcmp(c->raddr, raddr) == 0))
+.
+587d
+553,585c
+	/* Look for a conversation structure for this port and address */
+	c = nil;
+	for(p = gre->conv; *p; p++) {
+.
+547,551c
+	v4tov6(raddr, ghp->src);
+	eproto = nhgets(ghp->eproto);
+	QLOCK(gre);
+.
+536,545c
+	gpriv = gre->priv;
+	ghp = (GREhdr*)(bp->rp);
+.
+534d
+531,532c
+	ushort eproto;
+	uchar raddr[IPaddrlen];
+.
+336,529c
+	int len;
+	GREhdr *ghp;
+.
+334c
+greiput(Proto *gre, Ipifc* __, Block *bp)
+.
+328,329d
+325,326c
+	ghp->proto = IP_GREPROTO;
+	ghp->frag[0] = 0;
+	ghp->frag[1] = 0;
+.
+322c
+		hnputs(ghp->eproto, c->rport);
+.
+318,320c
+				findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */
+			memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen);
+.
+314,315c
+			memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen);
+		v4tov6(laddr, ghp->src);
+.
+311,312c
+	if(!((GREpriv*)c->p->priv)->raw){
+		v4tov6(raddr, ghp->dst);
+.
+308,309c
+	ghp = (GREhdr *)(bp->rp);
+	ghp->vihl = IP_VER4;
+.
+295,297d
+287,289c
+	Conv *c = x;
+	GREhdr *ghp;
+.
+283a
+int drop;
+
+.
+281c
+	c->lport = 0;
+	c->rport = 0;
+.
+247,278c
+	qclose(c->rq);
+	qclose(c->wq);
+	qclose(c->eq);
+.
+241c
+	return "pktifc does not support announce";
+.
+239c
+greannounce(Conv* _, char** __, int ___)
+.
+218,235c
+	USED(c);
+	return snprint(state, n, "%s\n", "Datagram");
+.
+211c
+	c->rq = qopen(64*1024, Qmsg, 0, c);
+.
+199c
+	QUNLOCK(p);
+.
+184c
+	QLOCK(p);
+.
+138,171c
+static char*
+.
+136d
+71,134d
+68c
+	ulong		csumerr;		/* checksum errors */
+	ulong		lenerr;			/* short packet */
+.
+66c
+struct GREpriv
+{
+	int		raw;			/* Raw GRE mode */
+
+.
+63c
+} GREhdr;
+.
+54c
+	uchar	Unused;	
+.
+46,47c
+typedef struct GREhdr
+{
+.
+21,43d
+13c
+enum
+{
+.
+9c
+#include "error.h"
+.
+5c
+#include "lib.h"
+.
+diff -e ip.orig/icmp.c ip/icmp.c
+350c
+	if(iplen > n || ((uint)iplen % 1)){
+.
+339,341c
+	netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code);
+.
+324c
+icmpiput(Proto *icmp, Ipifc* __, Block *bp)
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/icmp6.c ip/icmp6.c
+781c
+			bp->rp -= sizeof(IPICMP);
+.
+770c
+			bp->rp += sizeof(IPICMP);
+.
+762c
+		bp->rp -= sizeof(IPICMP);
+.
+750c
+		bp->rp += sizeof(IPICMP);
+.
+711c
+	RUNLOCK(ifc);
+.
+707c
+			RUNLOCK(ifc);
+.
+700c
+		RUNLOCK(ifc);
+.
+698c
+	RLOCK(ifc);
+.
+666c
+			sz = sizeof(IPICMP) + 8;
+.
+661c
+			if(pktsz - sizeof(Ip6hdr) < 8) {
+.
+649c
+			sz = sizeof(IPICMP) + 8;
+.
+641c
+			if(pktsz - sizeof(Ip6hdr) < 16) {
+.
+575c
+	if(iplen > n - IP6HDR || ((uint)iplen % 1) != 0) {
+.
+568c
+	if(n < sizeof(IPICMP)) {
+.
+546c
+	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+.
+537c
+		netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n",
+.
+534c
+		netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n",
+.
+518c
+	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+.
+506c
+	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+.
+498c
+		netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n",
+.
+495c
+		netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n",
+.
+479c
+	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+.
+471c
+	RUNLOCK(ifc);
+.
+457c
+	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+.
+445c
+		netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n",
+.
+442c
+		netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n",
+.
+440c
+	RLOCK(ifc);
+.
+425c
+	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+.
+397c
+	nbp = newIPICMP(sizeof(Ndpkt));
+.
+375c
+		nbp->wp -= sizeof(Ndpkt) - sizeof(NdiscC);
+.
+354c
+	nbp = newIPICMP(sizeof(Ndpkt));
+.
+260c
+	if(blocklen(bp) < sizeof(IPICMP)){
+.
+257c
+		bp = padblock(bp, sizeof(Ip6hdr));
+.
+122c
+	QLock	qlock;
+.
+109,110d
+106d
+101a
+
+.
+99,100c
+	/* ICMPpkt; */
+	uchar	type;
+	uchar	code;
+	uchar	cksum[2];
+	uchar	icmpid[2];
+	uchar	seq[2];
+
+.
+97c
+struct Ndpkt
+{
+	/* NdiscC; */
+	/* IPICMP; */
+	/* Ip6hdr; */
+	uchar	vcf[4];		/* version:4, traffic class:8, flow label:20 */
+	uchar	ploadlen[2];	/* payload length: packet length - 40 */
+	uchar	proto;		/* next header type */
+	uchar	ttl;		/* hop limit */
+	uchar	src[IPaddrlen];
+	uchar	dst[IPaddrlen];
+.
+94d
+91,92c
+	/* ICMPpkt; */
+	uchar	type;
+	uchar	code;
+	uchar	cksum[2];
+	uchar	icmpid[2];
+	uchar	seq[2];
+
+.
+89c
+struct NdiscC
+{
+	/* IPICMP; */
+	/* Ip6hdr; */
+	uchar	vcf[4];		/* version:4, traffic class:8, flow label:20 */
+	uchar	ploadlen[2];	/* payload length: packet length - 40 */
+	uchar	proto;		/* next header type */
+	uchar	ttl;		/* hop limit */
+	uchar	src[IPaddrlen];
+	uchar	dst[IPaddrlen];
+.
+85,86c
+	/* Ip6hdr; */
+	uchar	vcf[4];		/* version:4, traffic class:8, flow label:20 */
+	uchar	ploadlen[2];	/* payload length: packet length - 40 */
+	uchar	proto;		/* next header type */
+	uchar	ttl;		/* hop limit */
+	uchar	src[IPaddrlen];
+	uchar	dst[IPaddrlen];
+
+	/* ICMPpkt; */
+	uchar	type;
+	uchar	code;
+	uchar	cksum[2];
+	uchar	icmpid[2];
+	uchar	seq[2];
+.
+75,82c
+struct ICMPpkt {
+	uchar	type;
+	uchar	code;
+	uchar	cksum[2];
+	uchar	icmpid[2];
+	uchar	seq[2];
+};
+.
+70c
+typedef struct ICMPpkt ICMPpkt;
+.
+9c
+#include "error.h"
+.
+5c
+#include "lib.h"
+.
+diff -e ip.orig/igmp.c ip/igmp.c
+217c
+		mp = Mediacopymulti(m);
+.
+177c
+igmpiput(Media *m, Ipifc *, Block *bp)
+.
+123c
+	byte ip[IPaddrlen];
+.
+97,99c
+	bp->wp += sizeof(IGMPpkt);
+	memset(bp->rp, 0, sizeof(IGMPpkt));
+	hnputl(p->src, Mediagetaddr(m));
+.
+87c
+igmpsendreport(Media *m, byte *addr)
+.
+68c
+	Lock lk;
+
+.
+60c
+	Media		*m;
+.
+51,52d
+43,48c
+	byte	vertype;	/* version and type */
+	byte	unused;
+	byte	igmpcksum[2];		/* checksum of igmp portion */
+	byte	group[IPaddrlen];	/* multicast group */
+.
+31,40c
+	byte	vihl;		/* Version and header length */
+	byte	tos;		/* Type of service */
+	byte	len[2];		/* packet length (including headers) */
+	byte	id[2];		/* Identification */
+	byte	frag[2];	/* Fragment information */
+	byte	Unused;	
+	byte	proto;		/* Protocol */
+	byte	cksum[2];	/* checksum of ip portion */
+	byte	src[IPaddrlen];		/* Ip source */
+	byte	dst[IPaddrlen];		/* Ip destination */
+.
+27a
+typedef char byte;
+
+.
+10c
+#include "error.h"
+.
+6c
+#include "lib.h"
+.
+1,4d
+diff -e ip.orig/inferno.c ip/inferno.c
+28a
+
+Medium tripmedium =
+{
+	"trip",
+};
+.
+25c
+bootpread(char* _, ulong __, int ___)
+.
+23a
+char*
+bootp(Ipifc* _)
+{
+	return "unimplmented";
+}
+
+.
+17a
+Chan*
+commonfdtochan(int fd, int mode, int a, int b)
+{
+	return fdtochan(fd, mode, a, b);
+}
+
+.
+6c
+#include	"error.h"
+#include	"ip.h"
+.
+2c
+#include	"lib.h"
+.
+diff -e ip.orig/ip.c ip/ip.c
+522,524c
+	if(bp->base+sizeof(Ipfrag) >= bp->rp){
+		bp = padblock(bp, sizeof(Ipfrag));
+		bp->rp += sizeof(Ipfrag);
+.
+466,467c
+	for(i = 0; i < Nstats; i++)
+		p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]);
+.
+383c
+			freeb(bp);
+.
+381a
+		Conv conv;
+
+.
+322d
+320d
+301c
+	RUNLOCK(ifc);
+.
+213c
+		RUNLOCK(ifc);
+.
+211d
+196,199c
+	medialen = ifc->maxtu - ifc->m->hsize;
+.
+189c
+		RUNLOCK(ifc);
+.
+186c
+	if(!CANRLOCK(ifc))
+.
+11a
+/* MIB II counters */
+enum
+{
+	Forwarding,
+	DefaultTTL,
+	InReceives,
+	InHdrErrors,
+	InAddrErrors,
+	ForwDatagrams,
+	InUnknownProtos,
+	InDiscards,
+	InDelivers,
+	OutRequests,
+	OutDiscards,
+	OutNoRoutes,
+	ReasmTimeout,
+	ReasmReqds,
+	ReasmOKs,
+	ReasmFails,
+	FragOKs,
+	FragFails,
+	FragCreates,
+
+	Nstats,
+};
+
+struct Fragment4
+{
+	Block*	blist;
+	Fragment4*	next;
+	ulong 	src;
+	ulong 	dst;
+	ushort	id;
+	ulong 	age;
+};
+
+struct Fragment6
+{
+	Block*	blist;
+	Fragment6*	next;
+	uchar 	src[IPaddrlen];
+	uchar 	dst[IPaddrlen];
+	uint	id;
+	ulong 	age;
+};
+
+struct Ipfrag
+{
+	ushort	foff;
+	ushort	flen;
+};
+
+/* an instance of IP */
+struct IP
+{
+	ulong		stats[Nstats];
+
+	QLock		fraglock4;
+	Fragment4*	flisthead4;
+	Fragment4*	fragfree4;
+	Ref		id4;
+
+	QLock		fraglock6;
+	Fragment6*	flisthead6;
+	Fragment6*	fragfree6;
+	Ref		id6;
+
+	int		iprouting;	/* true if we route like a gateway */
+};
+
+.
+9a
+typedef struct Fragment4	Fragment4;
+typedef struct Fragment6	Fragment6;
+typedef struct Ipfrag		Ipfrag;
+
+.
+6c
+#include	"error.h"
+.
+2c
+#include	"lib.h"
+.
+diff -e ip.orig/ip.h ip/ip.h
+732a
+Chan*		commonfdtochan(int, int, int, int);
+.
+727a
+extern char*	bootp(Ipifc*);
+.
+676a
+extern Medium	tripmedium;
+.
+669c
+#define	NOW	msec()
+.
+578c
+/*	RouteTree; */
+	Route*	right;
+	Route*	left;
+	Route*	mid;
+	uchar	depth;
+	uchar	type;
+	uchar	ifcid;		/* must match ifc->id */
+	Ipifc	*ifc;
+	char	tag[4];
+	int	ref;
+.
+516,517d
+491a
+	Logilmsg=	1<<8,
+.
+488a
+	Logil=		1<<4,
+.
+423c
+	RWlock	rwlock;
+
+	Conv	*conv;		/* link to its conversation structure */
+.
+386c
+	QLock		qlock;
+
+.
+374c
+	Lock	lk;
+
+.
+312c
+	RWlock	rwlock;
+.
+173c
+	QLock	qlock;
+.
+153a
+typedef struct Ip4hdr		Ip4hdr;
+.
+79,152d
+41c
+	Maxincall=	5,
+.
+30,35d
+8,9d
+2,3d
+diff -e ip.orig/ipaux.c ip/ipaux.c
+366c
+	UNLOCK(ht);
+.
+363c
+		UNLOCK(ht);
+.
+352c
+			UNLOCK(ht);
+.
+340c
+			UNLOCK(ht);
+.
+328c
+			UNLOCK(ht);
+.
+316c
+			UNLOCK(ht);
+.
+309c
+	LOCK(ht);
+.
+290c
+	UNLOCK(ht);
+.
+282c
+	LOCK(ht);
+.
+272c
+	UNLOCK(ht);
+.
+269c
+	LOCK(ht);
+.
+241c
+	return (ulong)(sa[IPaddrlen-1]<<24 ^ sp<< 16 ^ da[IPaddrlen-1]<<8 ^ dp) % Nhash;
+.
+6c
+#include	"error.h"
+.
+2c
+#include	"lib.h"
+.
+diff -e ip.orig/ipifc.c ip/ipifc.c
+1575c
+			RUNLOCK(nifc);
+.
+1565c
+				RUNLOCK(nifc);
+.
+1562c
+			RLOCK(nifc);
+.
+1555c
+			RUNLOCK(nifc);
+.
+1541c
+				RUNLOCK(nifc);
+.
+1538c
+			RLOCK(nifc);
+.
+1518d
+1511d
+1498c
+		WUNLOCK(ifc);
+.
+1494c
+		WLOCK(ifc);
+.
+1491c
+			WUNLOCK(ifc);
+.
+1455c
+		WUNLOCK(ifc);
+.
+1451c
+		WLOCK(ifc);
+.
+1448c
+			WUNLOCK(ifc);
+.
+1301c
+	QUNLOCK(f->ipifc);
+.
+1265,1266c
+				if((atypel > atype && atype < atyper) ||
+				   (atypel < atype && atype > atyper)){
+.
+1232,1234c
+	QLOCK(f->ipifc);
+.
+1154c
+	    (isv6mcast(addr) && (addr[1] & 0xF) <= Link_local_scop))
+.
+1054c
+	QUNLOCK(f->self);
+.
+1040c
+	QLOCK(f->self);
+.
+1021c
+	QUNLOCK(f->self);
+.
+951c
+	QLOCK(f->self);
+.
+888c
+	QUNLOCK(f->self);
+.
+839c
+	QLOCK(f->self);
+.
+689c
+	WUNLOCK(ifc);
+.
+683c
+	WLOCK(ifc);
+.
+680c
+		WUNLOCK(ifc);
+.
+619c
+	WUNLOCK(ifc);
+.
+604c
+	WLOCK(ifc);
+.
+539c
+ *  always called with ifc WLOCK'd
+.
+531c
+	WUNLOCK(ifc);
+.
+417c
+	WLOCK(ifc);
+.
+319c
+	c->sq = qopen(2*QMAX, 0, 0, 0);
+.
+306c
+	RUNLOCK(ifc);
+.
+299c
+		RUNLOCK(ifc);
+.
+294c
+	if(!CANRLOCK(ifc)){
+.
+266c
+	RUNLOCK(ifc);
+.
+259c
+	RLOCK(ifc);
+.
+244c
+	RUNLOCK(ifc);
+.
+238c
+	RLOCK(ifc);
+.
+212c
+	WUNLOCK(ifc);
+.
+181c
+	WLOCK(ifc);
+.
+178c
+		WUNLOCK(ifc);
+.
+162c
+	WUNLOCK(ifc);
+.
+124c
+		WUNLOCK(ifc);
+.
+120c
+		WUNLOCK(ifc);
+.
+118c
+	WLOCK(ifc);
+.
+58c
+#define hashipa(a) ( (ulong)(((a)[IPaddrlen-2]<<8) | (a)[IPaddrlen-1])%NHASH )
+.
+39c
+	QLock	qlock;
+.
+18c
+	QMAX		= 64*1024-1,
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/ipmux.c ip/ipmux.c
+811c
+	RUNLOCK(f);
+.
+809c
+	RLOCK(f);
+.
+742c
+	RUNLOCK(f);
+.
+680c
+	RLOCK(f);
+.
+631,633c
+	WLOCK(f);
+	i = (Ipmux *)c->p->priv;
+	ipmuxremove(&i, r->chain);
+	WUNLOCK(f);
+.
+617a
+	Ipmux *i;
+.
+610c
+ipmuxannounce(Conv* _, char** __, int ___)
+.
+583c
+	WUNLOCK(f);
+.
+581c
+	WLOCK(f);
+.
+9c
+#include "error.h"
+.
+5c
+#include "lib.h"
+.
+diff -e ip.orig/iproute.c ip/iproute.c
+469c
+				while((p = f->queue) != nil) {
+.
+425c
+				while((p = f->queue) != nil) {
+.
+359c
+		while((p = f->queue) != nil) {
+.
+313c
+		while((p = f->queue) != nil) {
+.
+213,214c
+	dl = 0; if((l = p->left) != nil) dl = l->depth;
+	dr = 0; if((r = p->right) != nil) dr = r->depth;
+.
+6c
+#include	"error.h"
+.
+2c
+#include	"lib.h"
+.
+diff -e ip.orig/ipv6.c ip/ipv6.c
+506,508c
+	if(bp->base+sizeof(Ipfrag) >= bp->rp){
+		bp = padblock(bp, sizeof(Ipfrag));
+		bp->rp += sizeof(Ipfrag);
+.
+218c
+	RUNLOCK(ifc);
+.
+122c
+		RUNLOCK(ifc);
+.
+110c
+		RUNLOCK(ifc);
+.
+106c
+	if(!CANRLOCK(ifc))
+.
+29a
+/* MIB II counters */
+enum
+{
+	Forwarding,
+	DefaultTTL,
+	InReceives,
+	InHdrErrors,
+	InAddrErrors,
+	ForwDatagrams,
+	InUnknownProtos,
+	InDiscards,
+	InDelivers,
+	OutRequests,
+	OutDiscards,
+	OutNoRoutes,
+	ReasmTimeout,
+	ReasmReqds,
+	ReasmOKs,
+	ReasmFails,
+	FragOKs,
+	FragFails,
+	FragCreates,
+
+	Nstats,
+};
+
+static char *statnames[] =
+{
+[Forwarding]	"Forwarding",
+[DefaultTTL]	"DefaultTTL",
+[InReceives]	"InReceives",
+[InHdrErrors]	"InHdrErrors",
+[InAddrErrors]	"InAddrErrors",
+[ForwDatagrams]	"ForwDatagrams",
+[InUnknownProtos]	"InUnknownProtos",
+[InDiscards]	"InDiscards",
+[InDelivers]	"InDelivers",
+[OutRequests]	"OutRequests",
+[OutDiscards]	"OutDiscards",
+[OutNoRoutes]	"OutNoRoutes",
+[ReasmTimeout]	"ReasmTimeout",
+[ReasmReqds]	"ReasmReqds",
+[ReasmOKs]	"ReasmOKs",
+[ReasmFails]	"ReasmFails",
+[FragOKs]	"FragOKs",
+[FragFails]	"FragFails",
+[FragCreates]	"FragCreates",
+};
+
+struct Fragment4
+{
+	Block*	blist;
+	Fragment4*	next;
+	ulong 	src;
+	ulong 	dst;
+	ushort	id;
+	ulong 	age;
+};
+
+struct Fragment6
+{
+	Block*	blist;
+	Fragment6*	next;
+	uchar 	src[IPaddrlen];
+	uchar 	dst[IPaddrlen];
+	uint	id;
+	ulong 	age;
+};
+
+struct Ipfrag
+{
+	ushort	foff;
+	ushort	flen;
+};
+
+/* an instance of IP */
+struct IP
+{
+	ulong		stats[Nstats];
+
+	QLock		fraglock4;
+	Fragment4*	flisthead4;
+	Fragment4*	fragfree4;
+	Ref		id4;
+
+	QLock		fraglock6;
+	Fragment6*	flisthead6;
+	Fragment6*	fragfree6;
+	Ref		id6;
+
+	int		iprouting;	/* true if we route like a gateway */
+};
+
+.
+22a
+typedef struct	Fragment4	Fragment4;
+typedef struct	Fragment6	Fragment6;
+typedef struct	Ipfrag	Ipfrag;
+
+.
+6c
+#include	"error.h"
+.
+2c
+#include	"lib.h"
+.
+diff -e ip.orig/ipv6.h ip/ipv6.h
+145c
+struct	Routinghdr {
+.
+134c
+struct	Opthdr {
+.
+130,131c
+	uchar	vcf[4];		/* version:4, traffic class:8, flow label:20 */
+	uchar	ploadlen[2];	/* payload length: packet length - 40 */
+	uchar	proto;		/* next header type */
+	uchar	ttl;		/* hop limit */
+	uchar	src[IPaddrlen];
+	uchar	dst[IPaddrlen];
+.
+120,128d
+81c
+	IP6HDR		= 20,		/* sizeof(Ip6hdr) */
+.
+26a
+#undef ESP
+
+.
+diff -e ip.orig/loopbackmedium.c ip/loopbackmedium.c
+99c
+		RUNLOCK(ifc);
+.
+92c
+			RUNLOCK(ifc);
+.
+87c
+		if(!CANRLOCK(ifc)){
+.
+58c
+loopbackbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
+.
+26c
+loopbackbind(Ipifc *ifc, int _, char** __)
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/netdevmedium.c ip/netdevmedium.c
+144c
+		RUNLOCK(ifc);
+.
+136c
+			RUNLOCK(ifc);
+.
+131c
+		if(!CANRLOCK(ifc)){
+.
+85c
+netdevbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/netlog.c ip/netlog.c
+260c
+	wakeup(&f->alog->rendez);
+.
+258c
+	UNLOCK(f->alog);
+.
+242c
+	LOCK(f->alog);
+.
+228c
+	char buf[128], *t, *fp;
+.
+185c
+	set = 1;
+.
+160c
+	QUNLOCK(f->alog);
+.
+157c
+		sleep(&f->alog->rendez, netlogready, f);
+.
+155c
+			UNLOCK(f->alog);
+.
+146c
+			UNLOCK(f->alog);
+.
+134c
+		LOCK(f->alog);
+.
+129c
+		QUNLOCK(f->alog);
+.
+127c
+	QLOCK(f->alog);
+.
+122c
+netlogread(Fs *f, void *a, ulong _, long n)
+.
+109c
+	UNLOCK(f->alog);
+.
+101c
+		UNLOCK(f->alog);
+.
+99c
+	LOCK(f->alog);
+.
+92c
+	UNLOCK(f->alog);
+.
+82c
+		UNLOCK(f->alog);
+.
+80c
+	LOCK(f->alog);
+.
+28,29c
+	QLock	qlock;
+	Rendez	rendez;
+.
+17c
+	Lock	lk;
+.
+6,7c
+#include	"error.h"
+#include	"ip/ip.h"
+.
+2c
+#include	"lib.h"
+.
+diff -e ip.orig/nullmedium.c ip/nullmedium.c
+22c
+nullbwrite(Ipifc* _, Block* __, int ___, uchar* ____)
+.
+17c
+nullunbind(Ipifc* _)
+.
+11c
+nullbind(Ipifc* _, int __, char** ___)
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/pktmedium.c ip/pktmedium.c
+51c
+pktbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
+.
+43c
+pktunbind(Ipifc* _)
+.
+36d
+34c
+pktbind(Ipifc* _, int argc, char **argv)
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/ptclbsum.c ip/ptclbsum.c
+68c
+	while((hisum = losum>>16))
+.
+6c
+#include	"error.h"
+.
+2c
+#include	"lib.h"
+.
+diff -e ip.orig/rudp.c ip/rudp.c
+693c
+	rudp->nc = 16;
+.
+11c
+#include	"error.h"
+.
+7c
+#include	"lib.h"
+.
+diff -e ip.orig/tcp.c ip/tcp.c
+3171c
+		QUNLOCK(c);
+.
+3154c
+		if(!CANQLOCK(c))
+.
+3127c
+		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+.
+3101c
+/* called with c QLOCKed */
+.
+3085c
+	QUNLOCK(tcp);
+.
+3080c
+			QUNLOCK(s);
+.
+3073,3074c
+			QLOCK(s);
+			QUNLOCK(tcp);
+.
+3064c
+	QLOCK(tcp);
+.
+2871,2873d
+2869c
+	if(seg->mss != 0 && seg->mss < tcb->mss)
+.
+2859d
+2842c
+	QUNLOCK(s);
+.
+2830c
+		netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
+.
+2817c
+	QLOCK(s);
+.
+2814c
+		QUNLOCK(s);
+.
+2768c
+tcpsetchecksum(Conv *s, char **f, int _)
+.
+2737c
+	QUNLOCK(s);
+.
+2728c
+	QLOCK(s);
+.
+2725c
+		QUNLOCK(s);
+.
+2641c
+			QLOCK(s);
+.
+2638,2639c
+		if((uint)(msgs%4) == 1){
+			QUNLOCK(s);
+.
+2563c
+			netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
+.
+2421c
+	QUNLOCK(s);
+.
+2417c
+	QUNLOCK(s);
+.
+2351c
+				QUNLOCK(s);
+.
+2189c
+		QUNLOCK(s);
+.
+2172,2174d
+2144c
+		QUNLOCK(s);
+.
+2095,2096c
+	QLOCK(s);
+	QUNLOCK(tcp);
+.
+2092c
+		QUNLOCK(s);
+.
+2072c
+			QUNLOCK(tcp);
+.
+2064c
+			QUNLOCK(tcp);
+.
+2053c
+		QUNLOCK(tcp);
+.
+2050,2051c
+		netlog(f, Logtcp, "iphtlook failed\n");
+.
+2045c
+	QLOCK(tcp);
+.
+1942c
+tcpiput(Proto *tcp, Ipifc* _, Block *bp)
+.
+1862c
+		netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
+.
+1817c
+		netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
+.
+1685,1686d
+1683c
+	if(lp->mss != 0 && lp->mss < tcb->mss)
+.
+1626c
+		netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d\n",
+.
+1562c
+	QUNLOCK(tcp);
+.
+1529c
+	if(!CANQLOCK(tcp))
+.
+1421,1422d
+1334c
+ *  called with s QLOCKed
+.
+1245,1246d
+1231,1232d
+1210,1211d
+1208c
+			if(optlen == MSS_LENGTH)
+.
+995d
+873c
+ *  called with s QLOCKed
+.
+861,862d
+805d
+609c
+	QUNLOCK(s);
+.
+603c
+	QLOCK(s);
+.
+600c
+		QUNLOCK(s);
+.
+583,584d
+569c
+	QUNLOCK(s);
+.
+551c
+	QLOCK(s);
+.
+548c
+		QUNLOCK(s);
+.
+352c
+	ulong	stats[Nstats];
+.
+317d
+293d
+231c
+	ulong	window;			/* Recevive window */
+.
+229c
+	ushort	mss;			/* Mean segment size */
+.
+193c
+ *  the QLOCK in the Conv locks this structure
+.
+49,50c
+	DEF_MSS		= 1460,		/* Default mean segment */
+	DEF_MSS6	= 1280,		/* Default mean segment (min) for v6 */
+.
+44c
+	MSS_LENGTH	= 4,		/* Mean segment size */
+.
+6c
+#include	"error.h"
+.
+2c
+#include	"lib.h"
+.
+diff -e ip.orig/udp.c ip/udp.c
+590,591c
+	return snprint(buf, len, "InDatagrams: %lud\nNoPorts: %lud\nInErrors: %lud\nOutDatagrams: %lud\n",
+.
+580c
+	QUNLOCK(udp);
+.
+575c
+			QUNLOCK(s);
+.
+571,572c
+			QLOCK(s);
+			QUNLOCK(udp);
+.
+562c
+	QLOCK(udp);
+.
+510c
+	QUNLOCK(c);
+.
+502c
+		QUNLOCK(c);
+.
+475c
+		QUNLOCK(c);
+.
+456,457c
+	QLOCK(c);
+	QUNLOCK(udp);
+.
+447c
+				QUNLOCK(udp);
+.
+410c
+		QUNLOCK(udp);
+.
+404c
+	QLOCK(udp);
+.
+197c
+	netlog(c->p->f, Logudp, "udp: kick\n");
+.
+103c
+	QLock	qlock;
+.
+78c
+	ulong	udpOutDatagrams;
+.
+75c
+	ulong	udpInDatagrams;
+.
+6c
+#include	"error.h"
+.
+2c
+#include	"lib.h"
+.
diff --git a/src/9vx/a/ip/arp.c b/src/9vx/a/ip/arp.c
@@ -0,0 +1,684 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+
+/*
+ *  address resolution tables
+ */
+enum
+{
+	NHASH		= (1<<6),
+	NCACHE		= 256,
+
+	AOK		= 1,
+	AWAIT		= 2,
+};
+
+char *arpstate[] =
+{
+	"UNUSED",
+	"OK",
+	"WAIT",
+};
+
+/*
+ *  one per Fs
+ */
+struct Arp
+{
+	QLock	qlock;
+	Fs	*f;
+	Arpent	*hash[NHASH];
+	Arpent	cache[NCACHE];
+	Arpent	*rxmt;
+	Proc	*rxmitp;	/* neib sol re-transmit proc */
+	Rendez	rxmtq;
+	Block 	*dropf, *dropl;
+};
+
+char *Ebadarp = "bad arp";
+
+#define haship(s) ((ulong)((s)[IPaddrlen-1])%NHASH)
+
+int 	ReTransTimer = RETRANS_TIMER;
+
+static void 	rxmitproc(void *v);
+
+void
+arpinit(Fs *f)
+{
+	f->arp = smalloc(sizeof(Arp));
+	f->arp->f = f;
+	f->arp->rxmt = nil;
+	f->arp->dropf = f->arp->dropl = nil;
+	kproc("rxmitproc", rxmitproc, f->arp);
+}
+
+/*
+ *  create a new arp entry for an ip address.
+ */
+static Arpent*
+newarp6(Arp *arp, uchar *ip, Ipifc *ifc, int addrxt)
+{
+	uint t;
+	Block *next, *xp;
+	Arpent *a, *e, *f, **l;
+	Medium *m = ifc->m;
+	int empty;
+
+	/* find oldest entry */
+	e = &arp->cache[NCACHE];
+	a = arp->cache;
+	t = a->utime;
+	for(f = a; f < e; f++){
+		if(f->utime < t){
+			t = f->utime;
+			a = f;
+		}
+	}
+
+	/* dump waiting packets */
+	xp = a->hold;
+	a->hold = nil;
+
+	if(isv4(a->ip)){
+		while(xp){
+			next = xp->list;
+			freeblist(xp);
+			xp = next;
+		}
+	}
+	else { /* queue icmp unreachable for rxmitproc later on, w/o arp lock */
+		if(xp){
+			if(arp->dropl == nil) 
+				arp->dropf = xp;
+			else
+				arp->dropl->list = xp;
+
+			for(next = xp->list; next; next = next->list)
+				xp = next;
+			arp->dropl = xp;
+			wakeup(&arp->rxmtq);
+		}
+	}
+
+	/* take out of current chain */
+	l = &arp->hash[haship(a->ip)];
+	for(f = *l; f; f = f->hash){
+		if(f == a){
+			*l = a->hash;
+			break;
+		}
+		l = &f->hash;
+	}
+
+	/* insert into new chain */
+	l = &arp->hash[haship(ip)];
+	a->hash = *l;
+	*l = a;
+
+	memmove(a->ip, ip, sizeof(a->ip));
+	a->utime = NOW;
+	a->ctime = 0;
+	a->type = m;
+
+	a->rtime = NOW + ReTransTimer;
+	a->rxtsrem = MAX_MULTICAST_SOLICIT;
+	a->ifc = ifc;
+	a->ifcid = ifc->ifcid;
+
+	/* put to the end of re-transmit chain; addrxt is 0 when isv4(a->ip) */
+	if(!ipismulticast(a->ip) && addrxt){
+		l = &arp->rxmt;
+		empty = (*l==nil);
+
+		for(f = *l; f; f = f->nextrxt){
+			if(f == a){
+				*l = a->nextrxt;
+				break;
+			}
+			l = &f->nextrxt;
+		}
+		for(f = *l; f; f = f->nextrxt){
+			l = &f->nextrxt;
+		}
+		*l = a;
+		if(empty) 
+			wakeup(&arp->rxmtq);
+	}
+
+	a->nextrxt = nil;
+
+	return a;
+}
+
+/* called with arp qlocked */
+
+void
+cleanarpent(Arp *arp, Arpent *a)
+{
+	Arpent *f, **l;
+
+	a->utime = 0;
+	a->ctime = 0;
+	a->type = 0;
+	a->state = 0;
+	
+	/* take out of current chain */
+	l = &arp->hash[haship(a->ip)];
+	for(f = *l; f; f = f->hash){
+		if(f == a){
+			*l = a->hash;
+			break;
+		}
+		l = &f->hash;
+	}
+
+	/* take out of re-transmit chain */
+	l = &arp->rxmt;
+	for(f = *l; f; f = f->nextrxt){
+		if(f == a){
+			*l = a->nextrxt;
+			break;
+		}
+		l = &f->nextrxt;
+	}
+	a->nextrxt = nil;
+	a->hash = nil;
+	a->hold = nil;
+	a->last = nil;
+	a->ifc = nil;
+}
+
+/*
+ *  fill in the media address if we have it.  Otherwise return an
+ *  Arpent that represents the state of the address resolution FSM
+ *  for ip.  Add the packet to be sent onto the list of packets
+ *  waiting for ip->mac to be resolved.
+ */
+Arpent*
+arpget(Arp *arp, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *mac)
+{
+	int hash;
+	Arpent *a;
+	Medium *type = ifc->m;
+	uchar v6ip[IPaddrlen];
+
+	if(version == V4){
+		v4tov6(v6ip, ip);
+		ip = v6ip;
+	}
+
+	QLOCK(arp);
+	hash = haship(ip);
+	for(a = arp->hash[hash]; a; a = a->hash){
+		if(memcmp(ip, a->ip, sizeof(a->ip)) == 0)
+		if(type == a->type)
+			break;
+	}
+
+	if(a == nil){
+		a = newarp6(arp, ip, ifc, (version != V4));
+		a->state = AWAIT;
+	}
+	a->utime = NOW;
+	if(a->state == AWAIT){
+		if(bp != nil){
+			if(a->hold)
+				a->last->list = bp;
+			else
+				a->hold = bp;
+			a->last = bp;
+			bp->list = nil; 
+		}
+		return a;		/* return with arp qlocked */
+	}
+
+	memmove(mac, a->mac, a->type->maclen);
+
+	/* remove old entries */
+	if(NOW - a->ctime > 15*60*1000)
+		cleanarpent(arp, a);
+
+	QUNLOCK(arp);
+	return nil;
+}
+
+/*
+ * called with arp locked
+ */
+void
+arprelease(Arp *arp, Arpent* ae)
+{
+	QUNLOCK(arp);
+}
+
+/*
+ * Copy out the mac address from the Arpent.  Return the
+ * block waiting to get sent to this mac address.
+ *
+ * called with arp locked
+ */
+Block*
+arpresolve(Arp *arp, Arpent *a, Medium *type, uchar *mac)
+{
+	Block *bp;
+	Arpent *f, **l;
+
+	if(!isv4(a->ip)){
+		l = &arp->rxmt;
+		for(f = *l; f; f = f->nextrxt){
+			if(f == a){
+				*l = a->nextrxt;
+				break;
+			}
+			l = &f->nextrxt;
+		}
+	}
+
+	memmove(a->mac, mac, type->maclen);
+	a->type = type;
+	a->state = AOK;
+	a->utime = NOW;
+	bp = a->hold;
+	a->hold = nil;
+	QUNLOCK(arp);
+
+	return bp;
+}
+
+void
+arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, int refresh)
+{
+	Arp *arp;
+	Route *r;
+	Arpent *a, *f, **l;
+	Ipifc *ifc;
+	Medium *type;
+	Block *bp, *next;
+	uchar v6ip[IPaddrlen];
+
+	arp = fs->arp;
+
+	if(n != 6){
+//		print("arp: len = %d\n", n);
+		return;
+	}
+
+	switch(version){
+	case V4:
+		r = v4lookup(fs, ip, nil);
+		v4tov6(v6ip, ip);
+		ip = v6ip;
+		break;
+	case V6:
+		r = v6lookup(fs, ip, nil);
+		break;
+	default:
+		panic("arpenter: version %d", version);
+		return;	/* to supress warnings */
+	}
+
+	if(r == nil){
+//		print("arp: no route for entry\n");
+		return;
+	}
+
+	ifc = r->ifc;
+	type = ifc->m;
+
+	QLOCK(arp);
+	for(a = arp->hash[haship(ip)]; a; a = a->hash){
+		if(a->type != type || (a->state != AWAIT && a->state != AOK))
+			continue;
+
+		if(ipcmp(a->ip, ip) == 0){
+			a->state = AOK;
+			memmove(a->mac, mac, type->maclen);
+
+			if(version == V6){
+				/* take out of re-transmit chain */
+				l = &arp->rxmt;
+				for(f = *l; f; f = f->nextrxt){
+					if(f == a){
+						*l = a->nextrxt;
+						break;
+					}
+					l = &f->nextrxt;
+				}
+			}
+
+			a->ifc = ifc;
+			a->ifcid = ifc->ifcid;
+			bp = a->hold;
+			a->hold = nil;
+			if(version == V4)
+				ip += IPv4off;
+			a->utime = NOW;
+			a->ctime = a->utime;
+			QUNLOCK(arp);
+
+			while(bp){
+				next = bp->list;
+				if(ifc != nil){
+					if(waserror()){
+						RUNLOCK(ifc);
+						nexterror();
+					}
+					RLOCK(ifc);
+					if(ifc->m != nil)
+						ifc->m->bwrite(ifc, bp, version, ip);
+					else
+						freeb(bp);
+					RUNLOCK(ifc);
+					poperror();
+				} else
+					freeb(bp);
+				bp = next;
+			}
+			return;
+		}
+	}
+
+	if(refresh == 0){
+		a = newarp6(arp, ip, ifc, 0);
+		a->state = AOK;
+		a->type = type;
+		a->ctime = NOW;
+		memmove(a->mac, mac, type->maclen);
+	}
+
+	QUNLOCK(arp);
+}
+
+int
+arpwrite(Fs *fs, char *s, int len)
+{
+	int n;
+	Route *r;
+	Arp *arp;
+	Block *bp;
+	Arpent *a, *fl, **l;
+	Medium *m;
+	char *f[4], buf[256];
+	uchar ip[IPaddrlen], mac[MAClen];
+
+	arp = fs->arp;
+
+	if(len == 0)
+		error(Ebadarp);
+	if(len >= sizeof(buf))
+		len = sizeof(buf)-1;
+	strncpy(buf, s, len);
+	buf[len] = 0;
+	if(len > 0 && buf[len-1] == '\n')
+		buf[len-1] = 0;
+
+	n = getfields(buf, f, 4, 1, " ");
+	if(strcmp(f[0], "flush") == 0){
+		QLOCK(arp);
+		for(a = arp->cache; a < &arp->cache[NCACHE]; a++){
+			memset(a->ip, 0, sizeof(a->ip));
+			memset(a->mac, 0, sizeof(a->mac));
+			a->hash = nil;
+			a->state = 0;
+			a->utime = 0;
+			while(a->hold != nil){
+				bp = a->hold->list;
+				freeblist(a->hold);
+				a->hold = bp;
+			}
+		}
+		memset(arp->hash, 0, sizeof(arp->hash));
+		/* clear all pkts on these lists (rxmt, dropf/l) */
+		arp->rxmt = nil;
+		arp->dropf = nil;
+		arp->dropl = nil;
+		QUNLOCK(arp);
+	} else if(strcmp(f[0], "add") == 0){
+		switch(n){
+		default:
+			error(Ebadarg);
+		case 3:
+			if (parseip(ip, f[1]) == -1)
+				error(Ebadip);
+			if(isv4(ip))
+				r = v4lookup(fs, ip+IPv4off, nil);
+			else
+				r = v6lookup(fs, ip, nil);
+			if(r == nil)
+				error("Destination unreachable");
+			m = r->ifc->m;
+			n = parsemac(mac, f[2], m->maclen);
+			break;
+		case 4:
+			m = ipfindmedium(f[1]);
+			if(m == nil)
+				error(Ebadarp);
+			if (parseip(ip, f[2]) == -1)
+				error(Ebadip);
+			n = parsemac(mac, f[3], m->maclen);
+			break;
+		}
+
+		if(m->ares == nil)
+			error(Ebadarp);
+
+		m->ares(fs, V6, ip, mac, n, 0);
+	} else if(strcmp(f[0], "del") == 0){
+		if(n != 2)
+			error(Ebadarg);
+
+		if (parseip(ip, f[1]) == -1)
+			error(Ebadip);
+		QLOCK(arp);
+
+		l = &arp->hash[haship(ip)];
+		for(a = *l; a; a = a->hash){
+			if(memcmp(ip, a->ip, sizeof(a->ip)) == 0){
+				*l = a->hash;
+				break;
+			}
+			l = &a->hash;
+		}
+	
+		if(a){
+			/* take out of re-transmit chain */
+			l = &arp->rxmt;
+			for(fl = *l; fl; fl = fl->nextrxt){
+				if(fl == a){
+					*l = a->nextrxt;
+					break;
+				}
+				l = &fl->nextrxt;
+			}
+
+			a->nextrxt = nil;
+			a->hash = nil;
+			a->hold = nil;
+			a->last = nil;
+			a->ifc = nil;
+			memset(a->ip, 0, sizeof(a->ip));
+			memset(a->mac, 0, sizeof(a->mac));
+		}
+		QUNLOCK(arp);
+	} else
+		error(Ebadarp);
+
+	return len;
+}
+
+enum
+{
+	Alinelen=	90,
+};
+
+char *aformat = "%-6.6s %-8.8s %-40.40I %-32.32s\n";
+
+static void
+convmac(char *p, uchar *mac, int n)
+{
+	while(n-- > 0)
+		p += sprint(p, "%2.2ux", *mac++);
+}
+
+int
+arpread(Arp *arp, char *p, ulong offset, int len)
+{
+	Arpent *a;
+	int n;
+	char mac[2*MAClen+1];
+
+	if(offset % Alinelen)
+		return 0;
+
+	offset = offset/Alinelen;
+	len = len/Alinelen;
+
+	n = 0;
+	for(a = arp->cache; len > 0 && a < &arp->cache[NCACHE]; a++){
+		if(a->state == 0)
+			continue;
+		if(offset > 0){
+			offset--;
+			continue;
+		}
+		len--;
+		QLOCK(arp);
+		convmac(mac, a->mac, a->type->maclen);
+		n += sprint(p+n, aformat, a->type->name, arpstate[a->state], a->ip, mac);
+		QUNLOCK(arp);
+	}
+
+	return n;
+}
+
+extern int
+rxmitsols(Arp *arp)
+{
+	uint sflag;
+	Block *next, *xp;
+	Arpent *a, *b, **l;
+	Fs *f;
+	uchar ipsrc[IPaddrlen];
+	Ipifc *ifc = nil;
+	long nrxt;
+
+	QLOCK(arp);
+	f = arp->f;
+
+	a = arp->rxmt;
+	if(a==nil){
+		nrxt = 0;
+		goto dodrops; 		/* return nrxt; */
+	}
+	nrxt = a->rtime - NOW;
+	if(nrxt > 3*ReTransTimer/4) 
+		goto dodrops; 		/* return nrxt; */
+
+	for(; a; a = a->nextrxt){
+		ifc = a->ifc;
+		assert(ifc != nil);
+		if((a->rxtsrem <= 0) || !(CANRLOCK(ifc)) || (a->ifcid != ifc->ifcid)){
+			xp = a->hold;
+			a->hold = nil;
+
+			if(xp){
+				if(arp->dropl == nil) 
+					arp->dropf = xp;
+				else
+					arp->dropl->list = xp;
+			}
+
+			cleanarpent(arp, a);
+		}
+		else
+			break;
+	}
+	if(a == nil)
+		goto dodrops;
+
+
+	QUNLOCK(arp);	/* for icmpns */
+	if((sflag = ipv6anylocal(ifc, ipsrc)) != SRC_UNSPEC) 
+		icmpns(f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac); 
+
+	RUNLOCK(ifc);
+	QLOCK(arp);
+
+	/* put to the end of re-transmit chain */
+	l = &arp->rxmt;
+	for(b = *l; b; b = b->nextrxt){
+		if(b == a){
+			*l = a->nextrxt;
+			break;
+		}
+		l = &b->nextrxt;
+	}
+	for(b = *l; b; b = b->nextrxt){
+		l = &b->nextrxt;
+	}
+	*l = a;
+	a->rxtsrem--;
+	a->nextrxt = nil;
+	a->rtime = NOW + ReTransTimer;
+
+	a = arp->rxmt;
+	if(a==nil)
+		nrxt = 0;
+	else 
+		nrxt = a->rtime - NOW;
+
+dodrops:
+	xp = arp->dropf;
+	arp->dropf = nil;
+	arp->dropl = nil;
+	QUNLOCK(arp);
+
+	for(; xp; xp = next){
+		next = xp->list;
+		icmphostunr(f, ifc, xp, Icmp6_adr_unreach, 1);
+	}
+
+	return nrxt;
+
+}
+
+static int
+rxready(void *v)
+{
+	Arp *arp = (Arp *) v;
+	int x;
+
+	x = ((arp->rxmt != nil) || (arp->dropf != nil));
+
+	return x;
+}
+
+static void
+rxmitproc(void *v)
+{
+	Arp *arp = v;
+	long wakeupat;
+
+	arp->rxmitp = up;
+	//print("arp rxmitproc started\n");
+	if(waserror()){
+		arp->rxmitp = 0;
+		pexit("hangup", 1);
+	}
+	for(;;){
+		wakeupat = rxmitsols(arp);
+		if(wakeupat == 0) 
+			sleep(&arp->rxmtq, rxready, v); 
+		else if(wakeupat > ReTransTimer/4) 
+			tsleep(&arp->rxmtq, return0, 0, wakeupat); 
+	}
+}
+
diff --git a/src/9vx/a/ip/chandial.c b/src/9vx/a/ip/chandial.c
@@ -0,0 +1,124 @@
+#include	"u.h"
+#include	"lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"error.h"
+#include	"ip/ip.h"
+
+typedef struct DS DS;
+static Chan*	call(char*, char*, DS*);
+static void	_dial_string_parse(char*, DS*);
+
+enum
+{
+	Maxstring=	128,
+};
+
+struct DS
+{
+	char	buf[Maxstring];			/* dist string */
+	char	*netdir;
+	char	*proto;
+	char	*rem;
+	char	*local;				/* other args */
+	char	*dir;
+	Chan	**ctlp;
+};
+
+/*
+ *  the dialstring is of the form '[/net/]proto!dest'
+ */
+Chan*
+chandial(char *dest, char *local, char *dir, Chan **ctlp)
+{
+	DS ds;
+	char clone[Maxpath];
+
+	ds.local = local;
+	ds.dir = dir;
+	ds.ctlp = ctlp;
+
+	_dial_string_parse(dest, &ds);
+	if(ds.netdir == 0)
+		ds.netdir = "/net";
+
+	/* no connection server, don't translate */
+	snprint(clone, sizeof(clone), "%s/%s/clone", ds.netdir, ds.proto);
+	return call(clone, ds.rem, &ds);
+}
+
+static Chan*
+call(char *clone, char *dest, DS *ds)
+{
+	int n;
+	Chan *dchan, *cchan;
+	char name[Maxpath], data[Maxpath], *p;
+
+	cchan = namec(clone, Aopen, ORDWR, 0);
+
+	/* get directory name */
+	if(waserror()){
+		cclose(cchan);
+		nexterror();
+	}
+	n = devtab[cchan->type]->read(cchan, name, sizeof(name)-1, 0);
+	name[n] = 0;
+	for(p = name; *p == ' '; p++)
+		;
+	sprint(name, "%lud", strtoul(p, 0, 0));
+	p = strrchr(clone, '/');
+	*p = 0;
+	if(ds->dir)
+		snprint(ds->dir, Maxpath, "%s/%s", clone, name);
+	snprint(data, sizeof(data), "%s/%s/data", clone, name);
+
+	/* connect */
+	if(ds->local)
+		snprint(name, sizeof(name), "connect %s %s", dest, ds->local);
+	else
+		snprint(name, sizeof(name), "connect %s", dest);
+	devtab[cchan->type]->write(cchan, name, strlen(name), 0);
+
+	/* open data connection */
+	dchan = namec(data, Aopen, ORDWR, 0);
+	if(ds->ctlp)
+		*ds->ctlp = cchan;
+	else
+		cclose(cchan);
+	poperror();
+	return dchan;
+
+}
+
+/*
+ *  parse a dial string
+ */
+static void
+_dial_string_parse(char *str, DS *ds)
+{
+	char *p, *p2;
+
+	strncpy(ds->buf, str, Maxstring);
+	ds->buf[Maxstring-1] = 0;
+
+	p = strchr(ds->buf, '!');
+	if(p == 0) {
+		ds->netdir = 0;
+		ds->proto = "net";
+		ds->rem = ds->buf;
+	} else {
+		if(*ds->buf != '/' && *ds->buf != '#'){
+			ds->netdir = 0;
+			ds->proto = ds->buf;
+		} else {
+			for(p2 = p; *p2 != '/'; p2--)
+				;
+			*p2++ = 0;
+			ds->netdir = ds->buf;
+			ds->proto = p2;
+		}
+		*p = 0;
+		ds->rem = p + 1;
+	}
+}
diff --git a/src/9vx/a/ip/devip.c b/src/9vx/a/ip/devip.c
@@ -0,0 +1,1439 @@
+#include	"u.h"
+#include	"lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"error.h"
+#include	"ip/ip.h"
+
+enum
+{
+	Qtopdir=	1,		/* top level directory */
+	Qtopbase,
+	Qarp=		Qtopbase,
+	Qbootp,
+	Qndb,
+	Qiproute,
+	Qipselftab,
+	Qlog,
+
+	Qprotodir,			/* directory for a protocol */
+	Qprotobase,
+	Qclone=		Qprotobase,
+	Qstats,
+
+	Qconvdir,			/* directory for a conversation */
+	Qconvbase,
+	Qctl=		Qconvbase,
+	Qdata,
+	Qerr,
+	Qlisten,
+	Qlocal,
+	Qremote,
+	Qstatus,
+	Qsnoop,
+
+	Logtype=	5,
+	Masktype=	(1<<Logtype)-1,
+	Logconv=	12,
+	Maskconv=	(1<<Logconv)-1,
+	Shiftconv=	Logtype,
+	Logproto=	8,
+	Maskproto=	(1<<Logproto)-1,
+	Shiftproto=	Logtype + Logconv,
+
+	Nfs=		128,
+};
+#define TYPE(x) 	( ((ulong)(x).path) & Masktype )
+#define CONV(x) 	( (((ulong)(x).path) >> Shiftconv) & Maskconv )
+#define PROTO(x) 	( (((ulong)(x).path) >> Shiftproto) & Maskproto )
+#define QID(p, c, y) 	( ((uint)(p)<<(Shiftproto)) | ((uint)(c)<<Shiftconv) | (y) )
+
+static char network[] = "network";
+
+QLock	fslock;
+Fs	*ipfs[Nfs];	/* attached fs's */
+Queue	*qlog;
+
+extern	void nullmediumlink(void);
+extern	void pktmediumlink(void);
+	long ndbwrite(Fs *f, char *a, ulong off, int n);
+
+static int
+ip3gen(Chan *c, int i, Dir *dp)
+{
+	Qid q;
+	Conv *cv;
+	char *p;
+
+	cv = ipfs[c->dev]->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+	if(cv->owner == nil)
+		kstrdup(&cv->owner, eve);
+	mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE);
+
+	switch(i) {
+	default:
+		return -1;
+	case Qctl:
+		devdir(c, q, "ctl", 0, cv->owner, cv->perm, dp);
+		return 1;
+	case Qdata:
+		devdir(c, q, "data", qlen(cv->rq), cv->owner, cv->perm, dp);
+		return 1;
+	case Qerr:
+		devdir(c, q, "err", qlen(cv->eq), cv->owner, cv->perm, dp);
+		return 1;
+	case Qlisten:
+		devdir(c, q, "listen", 0, cv->owner, cv->perm, dp);
+		return 1;
+	case Qlocal:
+		p = "local";
+		break;
+	case Qremote:
+		p = "remote";
+		break;
+	case Qsnoop:
+		if(strcmp(cv->p->name, "ipifc") != 0)
+			return -1;
+		devdir(c, q, "snoop", qlen(cv->sq), cv->owner, 0400, dp);
+		return 1;
+	case Qstatus:
+		p = "status";
+		break;
+	}
+	devdir(c, q, p, 0, cv->owner, 0444, dp);
+	return 1;
+}
+
+static int
+ip2gen(Chan *c, int i, Dir *dp)
+{
+	Qid q;
+
+	switch(i) {
+	case Qclone:
+		mkqid(&q, QID(PROTO(c->qid), 0, Qclone), 0, QTFILE);
+		devdir(c, q, "clone", 0, network, 0666, dp);
+		return 1;
+	case Qstats:
+		mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE);
+		devdir(c, q, "stats", 0, network, 0444, dp);
+		return 1;
+	}
+	return -1;
+}
+
+static int
+ip1gen(Chan *c, int i, Dir *dp)
+{
+	Qid q;
+	char *p;
+	int prot;
+	int len = 0;
+	Fs *f;
+	extern ulong	kerndate;
+
+	f = ipfs[c->dev];
+
+	prot = 0666;
+	mkqid(&q, QID(0, 0, i), 0, QTFILE);
+	switch(i) {
+	default:
+		return -1;
+	case Qarp:
+		p = "arp";
+		prot = 0664;
+		break;
+	case Qbootp:
+		p = "bootp";
+		break;
+	case Qndb:
+		p = "ndb";
+		len = strlen(f->ndb);
+		q.vers = f->ndbvers;
+		break;
+	case Qiproute:
+		p = "iproute";
+		prot = 0664;
+		break;
+	case Qipselftab:
+		p = "ipselftab";
+		prot = 0444;
+		break;
+	case Qlog:
+		p = "log";
+		break;
+	}
+	devdir(c, q, p, len, network, prot, dp);
+	if(i == Qndb && f->ndbmtime > kerndate)
+		dp->mtime = f->ndbmtime;
+	return 1;
+}
+
+static int
+ipgen(Chan *c, char* __ch, Dirtab* __dt, int __i, int s, Dir *dp)
+{
+	Qid q;
+	Conv *cv;
+	Fs *f;
+
+	f = ipfs[c->dev];
+
+	switch(TYPE(c->qid)) {
+	case Qtopdir:
+		if(s == DEVDOTDOT){
+			mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
+			sprint(up->genbuf, "#I%lud", c->dev);
+			devdir(c, q, up->genbuf, 0, network, 0555, dp);
+			return 1;
+		}
+		if(s < f->np) {
+			if(f->p[s]->connect == nil)
+				return 0;	/* protocol with no user interface */
+			mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
+			devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
+			return 1;
+		}
+		s -= f->np;
+		return ip1gen(c, s+Qtopbase, dp);
+	case Qarp:
+	case Qbootp:
+	case Qndb:
+	case Qlog:
+	case Qiproute:
+	case Qipselftab:
+		return ip1gen(c, TYPE(c->qid), dp);
+	case Qprotodir:
+		if(s == DEVDOTDOT){
+			mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
+			sprint(up->genbuf, "#I%lud", c->dev);
+			devdir(c, q, up->genbuf, 0, network, 0555, dp);
+			return 1;
+		}
+		if(s < f->p[PROTO(c->qid)]->ac) {
+			cv = f->p[PROTO(c->qid)]->conv[s];
+			sprint(up->genbuf, "%d", s);
+			mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
+			devdir(c, q, up->genbuf, 0, cv->owner, 0555, dp);
+			return 1;
+		}
+		s -= f->p[PROTO(c->qid)]->ac;
+		return ip2gen(c, s+Qprotobase, dp);
+	case Qclone:
+	case Qstats:
+		return ip2gen(c, TYPE(c->qid), dp);
+	case Qconvdir:
+		if(s == DEVDOTDOT){
+			s = PROTO(c->qid);
+			mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
+			devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
+			return 1;
+		}
+		return ip3gen(c, s+Qconvbase, dp);
+	case Qctl:
+	case Qdata:
+	case Qerr:
+	case Qlisten:
+	case Qlocal:
+	case Qremote:
+	case Qstatus:
+	case Qsnoop:
+		return ip3gen(c, TYPE(c->qid), dp);
+	}
+	return -1;
+}
+
+static void
+ipreset(void)
+{
+	nullmediumlink();
+	pktmediumlink();
+
+	fmtinstall('i', eipfmt);
+	fmtinstall('I', eipfmt);
+	fmtinstall('E', eipfmt);
+	fmtinstall('V', eipfmt);
+	fmtinstall('M', eipfmt);
+}
+
+static Fs*
+ipgetfs(int dev)
+{
+	extern void (*ipprotoinit[])(Fs*);
+	Fs *f;
+	int i;
+
+	if(dev >= Nfs)
+		return nil;
+
+	qlock(&fslock);
+	if(ipfs[dev] == nil){
+		f = smalloc(sizeof(Fs));
+		ip_init(f);
+		arpinit(f);
+		netloginit(f);
+		for(i = 0; ipprotoinit[i]; i++)
+			ipprotoinit[i](f);
+		f->dev = dev;
+		ipfs[dev] = f;
+	}
+	qunlock(&fslock);
+
+	return ipfs[dev];
+}
+
+IPaux*
+newipaux(char *owner, char *tag)
+{
+	IPaux *a;
+	int n;
+
+	a = smalloc(sizeof(*a));
+	kstrdup(&a->owner, owner);
+	memset(a->tag, ' ', sizeof(a->tag));
+	n = strlen(tag);
+	if(n > sizeof(a->tag))
+		n = sizeof(a->tag);
+	memmove(a->tag, tag, n);
+	return a;
+}
+
+#define ATTACHER(c) (((IPaux*)((c)->aux))->owner)
+
+static Chan*
+ipattach(char* spec)
+{
+	Chan *c;
+	int dev;
+
+	dev = atoi(spec);
+	if(dev >= Nfs)
+		error("bad specification");
+
+	ipgetfs(dev);
+	c = devattach('I', spec);
+	mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
+	c->dev = dev;
+
+	c->aux = newipaux(commonuser(), "none");
+
+	return c;
+}
+
+static Walkqid*
+ipwalk(Chan* c, Chan *nc, char **name, int nname)
+{
+	IPaux *a = c->aux;
+	Walkqid* w;
+
+	w = devwalk(c, nc, name, nname, nil, 0, ipgen);
+	if(w != nil && w->clone != nil)
+		w->clone->aux = newipaux(a->owner, a->tag);
+	return w;
+}
+
+
+static int
+ipstat(Chan* c, uchar* db, int n)
+{
+	return devstat(c, db, n, nil, 0, ipgen);
+}
+
+static int
+incoming(void* arg)
+{
+	Conv *conv;
+
+	conv = arg;
+	return conv->incall != nil;
+}
+
+static int m2p[] = {
+	[OREAD]		4,
+	[OWRITE]	2,
+	[ORDWR]		6
+};
+
+static Chan*
+ipopen(Chan* c, int omode)
+{
+	Conv *cv, *nc;
+	Proto *p;
+	int perm;
+	Fs *f;
+
+	perm = m2p[omode&3];
+
+	f = ipfs[c->dev];
+
+	switch(TYPE(c->qid)) {
+	default:
+		break;
+	case Qndb:
+		if(omode & (OWRITE|OTRUNC) && !iseve())
+			error(Eperm);
+		if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC))
+			f->ndb[0] = 0;
+		break;
+	case Qlog:
+		netlogopen(f);
+		break;
+	case Qiproute:
+	case Qarp:
+		if(omode != OREAD && !iseve())
+			error(Eperm);
+		break;
+	case Qtopdir:
+	case Qprotodir:
+	case Qconvdir:
+	case Qstatus:
+	case Qremote:
+	case Qlocal:
+	case Qstats:
+	case Qbootp:
+	case Qipselftab:
+		if(omode != OREAD)
+			error(Eperm);
+		break;
+	case Qsnoop:
+		if(omode != OREAD)
+			error(Eperm);
+		p = f->p[PROTO(c->qid)];
+		cv = p->conv[CONV(c->qid)];
+		if(strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
+			error(Eperm);
+		incref(&cv->snoopers);
+		break;
+	case Qclone:
+		p = f->p[PROTO(c->qid)];
+		QLOCK(p);
+		if(waserror()){
+			QUNLOCK(p);
+			nexterror();
+		}
+		cv = Fsprotoclone(p, ATTACHER(c));
+		QUNLOCK(p);
+		poperror();
+		if(cv == nil) {
+			error(Enodev);
+			break;
+		}
+		mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE);
+		break;
+	case Qdata:
+	case Qctl:
+	case Qerr:
+		p = f->p[PROTO(c->qid)];
+		QLOCK(p);
+		cv = p->conv[CONV(c->qid)];
+		QLOCK(cv);
+		if(waserror()) {
+			QUNLOCK(cv);
+			QUNLOCK(p);
+			nexterror();
+		}
+		if((perm & (cv->perm>>6)) != perm) {
+			if(strcmp(ATTACHER(c), cv->owner) != 0)
+				error(Eperm);
+		 	if((perm & cv->perm) != perm)
+				error(Eperm);
+
+		}
+		cv->inuse++;
+		if(cv->inuse == 1){
+			kstrdup(&cv->owner, ATTACHER(c));
+			cv->perm = 0660;
+		}
+		QUNLOCK(cv);
+		QUNLOCK(p);
+		poperror();
+		break;
+	case Qlisten:
+		cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+		if((perm & (cv->perm>>6)) != perm) {
+			if(strcmp(ATTACHER(c), cv->owner) != 0)
+				error(Eperm);
+		 	if((perm & cv->perm) != perm)
+				error(Eperm);
+
+		}
+
+		if(cv->state != Announced)
+			error("not announced");
+
+		if(waserror()){
+			closeconv(cv);
+			nexterror();
+		}
+		QLOCK(cv);
+		cv->inuse++;
+		QUNLOCK(cv);
+
+		nc = nil;
+		while(nc == nil) {
+			/* give up if we got a hangup */
+			if(qisclosed(cv->rq))
+				error("listen hungup");
+
+			qlock(&cv->listenq);
+			if(waserror()) {
+				qunlock(&cv->listenq);
+				nexterror();
+			}
+
+			/* wait for a connect */
+			sleep(&cv->listenr, incoming, cv);
+
+			QLOCK(cv);
+			nc = cv->incall;
+			if(nc != nil){
+				cv->incall = nc->next;
+				mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
+				kstrdup(&cv->owner, ATTACHER(c));
+			}
+			QUNLOCK(cv);
+
+			qunlock(&cv->listenq);
+			poperror();
+		}
+		closeconv(cv);
+		poperror();
+		break;
+	}
+	c->mode = openmode(omode);
+	c->flag |= COPEN;
+	c->offset = 0;
+	return c;
+}
+
+static void
+ipcreate(Chan* _, char* __, int ___, ulong ____)
+{
+	error(Eperm);
+}
+
+static void
+ipremove(Chan* _)
+{
+	error(Eperm);
+}
+
+static int
+ipwstat(Chan *c, uchar *dp, int n)
+{
+	Dir d;
+	Conv *cv;
+	Fs *f;
+	Proto *p;
+
+	f = ipfs[c->dev];
+	switch(TYPE(c->qid)) {
+	default:
+		error(Eperm);
+		break;
+	case Qctl:
+	case Qdata:
+		break;
+	}
+
+	n = convM2D(dp, n, &d, nil);
+	if(n > 0){
+		p = f->p[PROTO(c->qid)];
+		cv = p->conv[CONV(c->qid)];
+		if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
+			error(Eperm);
+		if(d.uid[0])
+			kstrdup(&cv->owner, d.uid);
+		cv->perm = d.mode & 0777;
+	}
+	return n;
+}
+
+void
+closeconv(Conv *cv)
+{
+	Conv *nc;
+	Ipmulti *mp;
+
+	QLOCK(cv);
+
+	if(--cv->inuse > 0) {
+		QUNLOCK(cv);
+		return;
+	}
+
+	/* close all incoming calls since no listen will ever happen */
+	for(nc = cv->incall; nc; nc = cv->incall){
+		cv->incall = nc->next;
+		closeconv(nc);
+	}
+	cv->incall = nil;
+
+	kstrdup(&cv->owner, network);
+	cv->perm = 0660;
+
+	while((mp = cv->multi) != nil)
+		ipifcremmulti(cv, mp->ma, mp->ia);
+
+	cv->r = nil;
+	cv->rgen = 0;
+	cv->p->close(cv);
+	cv->state = Idle;
+	QUNLOCK(cv);
+}
+
+static void
+ipclose(Chan* c)
+{
+	Fs *f;
+
+	f = ipfs[c->dev];
+	switch(TYPE(c->qid)) {
+	default:
+		break;
+	case Qlog:
+		if(c->flag & COPEN)
+			netlogclose(f);
+		break;
+	case Qdata:
+	case Qctl:
+	case Qerr:
+		if(c->flag & COPEN)
+			closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]);
+		break;
+	case Qsnoop:
+		if(c->flag & COPEN)
+			decref(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers);
+		break;
+	}
+	free(((IPaux*)c->aux)->owner);
+	free(c->aux);
+}
+
+enum
+{
+	Statelen=	32*1024,
+};
+
+static long
+ipread(Chan *ch, void *a, long n, vlong off)
+{
+	Conv *c;
+	Proto *x;
+	char *buf, *p;
+	long rv;
+	Fs *f;
+	ulong offset = off;
+
+	f = ipfs[ch->dev];
+
+	p = a;
+	switch(TYPE(ch->qid)) {
+	default:
+		error(Eperm);
+	case Qtopdir:
+	case Qprotodir:
+	case Qconvdir:
+		return devdirread(ch, a, n, 0, 0, ipgen);
+	case Qarp:
+		return arpread(f->arp, a, offset, n);
+ 	case Qbootp:
+ 		return bootpread(a, offset, n);
+ 	case Qndb:
+		return readstr(offset, a, n, f->ndb);
+	case Qiproute:
+		return routeread(f, a, offset, n);
+	case Qipselftab:
+		return ipselftabread(f, a, offset, n);
+	case Qlog:
+		return netlogread(f, a, offset, n);
+	case Qctl:
+		buf = smalloc(16);
+		sprint(buf, "%lud", CONV(ch->qid));
+		rv = readstr(offset, p, n, buf);
+		free(buf);
+		return rv;
+	case Qremote:
+		buf = smalloc(Statelen);
+		x = f->p[PROTO(ch->qid)];
+		c = x->conv[CONV(ch->qid)];
+		if(x->remote == nil) {
+			sprint(buf, "%I!%d\n", c->raddr, c->rport);
+		} else {
+			(*x->remote)(c, buf, Statelen-2);
+		}
+		rv = readstr(offset, p, n, buf);
+		free(buf);
+		return rv;
+	case Qlocal:
+		buf = smalloc(Statelen);
+		x = f->p[PROTO(ch->qid)];
+		c = x->conv[CONV(ch->qid)];
+		if(x->local == nil) {
+			sprint(buf, "%I!%d\n", c->laddr, c->lport);
+		} else {
+			(*x->local)(c, buf, Statelen-2);
+		}
+		rv = readstr(offset, p, n, buf);
+		free(buf);
+		return rv;
+	case Qstatus:
+		buf = smalloc(Statelen);
+		x = f->p[PROTO(ch->qid)];
+		c = x->conv[CONV(ch->qid)];
+		(*x->state)(c, buf, Statelen-2);
+		rv = readstr(offset, p, n, buf);
+		free(buf);
+		return rv;
+	case Qdata:
+		c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
+		return qread(c->rq, a, n);
+	case Qerr:
+		c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
+		return qread(c->eq, a, n);
+	case Qsnoop:
+		c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
+		return qread(c->sq, a, n);
+	case Qstats:
+		x = f->p[PROTO(ch->qid)];
+		if(x->stats == nil)
+			error("stats not implemented");
+		buf = smalloc(Statelen);
+		(*x->stats)(x, buf, Statelen);
+		rv = readstr(offset, p, n, buf);
+		free(buf);
+		return rv;
+	}
+}
+
+static Block*
+ipbread(Chan* ch, long n, ulong offset)
+{
+	Conv *c;
+	Proto *x;
+	Fs *f;
+
+	switch(TYPE(ch->qid)){
+	case Qdata:
+		f = ipfs[ch->dev];
+		x = f->p[PROTO(ch->qid)];
+		c = x->conv[CONV(ch->qid)];
+		return qbread(c->rq, n);
+	default:
+		return devbread(ch, n, offset);
+	}
+}
+
+/*
+ *  set local address to be that of the ifc closest to remote address
+ */
+static void
+setladdr(Conv* c)
+{
+	findlocalip(c->p->f, c->laddr, c->raddr);
+}
+
+/*
+ *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
+ */
+char*
+setluniqueport(Conv* c, int lport)
+{
+	Proto *p;
+	Conv *xp;
+	int x;
+
+	p = c->p;
+
+	QLOCK(p);
+	for(x = 0; x < p->nc; x++){
+		xp = p->conv[x];
+		if(xp == nil)
+			break;
+		if(xp == c)
+			continue;
+		if((xp->state == Connected || xp->state == Announced)
+		&& xp->lport == lport
+		&& xp->rport == c->rport
+		&& ipcmp(xp->raddr, c->raddr) == 0
+		&& ipcmp(xp->laddr, c->laddr) == 0){
+			QUNLOCK(p);
+			return "address in use";
+		}
+	}
+	c->lport = lport;
+	QUNLOCK(p);
+	return nil;
+}
+
+/*
+ * is lport in use by anyone?
+ */
+static int
+lportinuse(Proto *p, ushort lport)
+{
+	int x;
+
+	for(x = 0; x < p->nc && p->conv[x]; x++)
+		if(p->conv[x]->lport == lport)
+			return 1;
+	return 0;
+}
+
+/*
+ *  pick a local port and set it
+ */
+char *
+setlport(Conv* c)
+{
+	Proto *p;
+	int i, port;
+
+	p = c->p;
+	QLOCK(p);
+	if(c->restricted){
+		/* Restricted ports cycle between 600 and 1024. */
+		for(i=0; i<1024-600; i++){
+			if(p->nextrport >= 1024 || p->nextrport < 600)
+				p->nextrport = 600;
+			port = p->nextrport++;
+			if(!lportinuse(p, port))
+				goto chosen;
+		}
+	}else{
+		/*
+		 * Unrestricted ports are chosen randomly
+		 * between 2^15 and 2^16.  There are at most
+		 * 4*Nchan = 4096 ports in use at any given time,
+		 * so even in the worst case, a random probe has a
+		 * 1 - 4096/2^15 = 87% chance of success.
+		 * If 64 successive probes fail, there is a bug somewhere
+		 * (or a once in 10^58 event has happened, but that's
+		 * less likely than a venti collision).
+		 */
+		for(i=0; i<64; i++){
+			port = (1<<15) + nrand(1<<15);
+			if(!lportinuse(p, port))
+				goto chosen;
+		}
+	}
+	QUNLOCK(p);
+	return "no ports available";
+
+chosen:
+	c->lport = port;
+	QUNLOCK(p);
+	return nil;
+}
+
+/*
+ *  set a local address and port from a string of the form
+ *	[address!]port[!r]
+ */
+char*
+setladdrport(Conv* c, char* str, int announcing)
+{
+	char *p;
+	char *rv;
+	ushort lport;
+	uchar addr[IPaddrlen];
+
+	/*
+	 *  ignore restricted part if it exists.  it's
+	 *  meaningless on local ports.
+	 */
+	p = strchr(str, '!');
+	if(p != nil){
+		*p++ = 0;
+		if(strcmp(p, "r") == 0)
+			p = nil;
+	}
+
+	c->lport = 0;
+	if(p == nil){
+		if(announcing)
+			ipmove(c->laddr, IPnoaddr);
+		else
+			setladdr(c);
+		p = str;
+	} else {
+		if(strcmp(str, "*") == 0)
+			ipmove(c->laddr, IPnoaddr);
+		else {
+			if(parseip(addr, str) == -1)
+				return Ebadip;
+			if(ipforme(c->p->f, addr))
+				ipmove(c->laddr, addr);
+			else
+				return "not a local IP address";
+		}
+	}
+
+	/* one process can get all connections */
+	if(announcing && strcmp(p, "*") == 0){
+		if(!iseve())
+			error(Eperm);
+		return setluniqueport(c, 0);
+	}
+
+	lport = atoi(p);
+	if(lport <= 0)
+		rv = setlport(c);
+	else
+		rv = setluniqueport(c, lport);
+	return rv;
+}
+
+static char*
+setraddrport(Conv* c, char* str)
+{
+	char *p;
+
+	p = strchr(str, '!');
+	if(p == nil)
+		return "malformed address";
+	*p++ = 0;
+	if (parseip(c->raddr, str) == -1)
+		return Ebadip;
+	c->rport = atoi(p);
+	p = strchr(p, '!');
+	if(p){
+		if(strstr(p, "!r") != nil)
+			c->restricted = 1;
+	}
+	return nil;
+}
+
+/*
+ *  called by protocol connect routine to set addresses
+ */
+char*
+Fsstdconnect(Conv *c, char *argv[], int argc)
+{
+	char *p;
+
+	switch(argc) {
+	default:
+		return "bad args to connect";
+	case 2:
+		p = setraddrport(c, argv[1]);
+		if(p != nil)
+			return p;
+		setladdr(c);
+		p = setlport(c);
+		if (p != nil)
+			return p;
+		break;
+	case 3:
+		p = setraddrport(c, argv[1]);
+		if(p != nil)
+			return p;
+		p = setladdrport(c, argv[2], 0);
+		if(p != nil)
+			return p;
+	}
+
+	if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
+		memcmp(c->laddr, v4prefix, IPv4off) == 0)
+		|| ipcmp(c->raddr, IPnoaddr) == 0)
+		c->ipversion = V4;
+	else
+		c->ipversion = V6;
+
+	return nil;
+}
+/*
+ *  initiate connection and sleep till its set up
+ */
+static int
+connected(void* a)
+{
+	return ((Conv*)a)->state == Connected;
+}
+static void
+connectctlmsg(Proto *x, Conv *c, Cmdbuf *cb)
+{
+	char *p;
+
+	if(c->state != 0)
+		error(Econinuse);
+	c->state = Connecting;
+	c->cerr[0] = '\0';
+	if(x->connect == nil)
+		error("connect not supported");
+	p = x->connect(c, cb->f, cb->nf);
+	if(p != nil)
+		error(p);
+
+	QUNLOCK(c);
+	if(waserror()){
+		QLOCK(c);
+		nexterror();
+	}
+	sleep(&c->cr, connected, c);
+	QLOCK(c);
+	poperror();
+
+	if(c->cerr[0] != '\0')
+		error(c->cerr);
+}
+
+/*
+ *  called by protocol announce routine to set addresses
+ */
+char*
+Fsstdannounce(Conv* c, char* argv[], int argc)
+{
+	memset(c->raddr, 0, sizeof(c->raddr));
+	c->rport = 0;
+	switch(argc){
+	default:
+		break;
+	case 2:
+		return setladdrport(c, argv[1], 1);
+	}
+	return "bad args to announce";
+}
+
+/*
+ *  initiate announcement and sleep till its set up
+ */
+static int
+announced(void* a)
+{
+	return ((Conv*)a)->state == Announced;
+}
+static void
+announcectlmsg(Proto *x, Conv *c, Cmdbuf *cb)
+{
+	char *p;
+
+	if(c->state != 0)
+		error(Econinuse);
+	c->state = Announcing;
+	c->cerr[0] = '\0';
+	if(x->announce == nil)
+		error("announce not supported");
+	p = x->announce(c, cb->f, cb->nf);
+	if(p != nil)
+		error(p);
+
+	QUNLOCK(c);
+	if(waserror()){
+		QLOCK(c);
+		nexterror();
+	}
+	sleep(&c->cr, announced, c);
+	QLOCK(c);
+	poperror();
+
+	if(c->cerr[0] != '\0')
+		error(c->cerr);
+}
+
+/*
+ *  called by protocol bind routine to set addresses
+ */
+char*
+Fsstdbind(Conv* c, char* argv[], int argc)
+{
+	switch(argc){
+	default:
+		break;
+	case 2:
+		return setladdrport(c, argv[1], 0);
+	}
+	return "bad args to bind";
+}
+
+static void
+bindctlmsg(Proto *x, Conv *c, Cmdbuf *cb)
+{
+	char *p;
+
+	if(x->bind == nil)
+		p = Fsstdbind(c, cb->f, cb->nf);
+	else
+		p = x->bind(c, cb->f, cb->nf);
+	if(p != nil)
+		error(p);
+}
+
+static void
+tosctlmsg(Conv *c, Cmdbuf *cb)
+{
+	if(cb->nf < 2)
+		c->tos = 0;
+	else
+		c->tos = atoi(cb->f[1]);
+}
+
+static void
+ttlctlmsg(Conv *c, Cmdbuf *cb)
+{
+	if(cb->nf < 2)
+		c->ttl = MAXTTL;
+	else
+		c->ttl = atoi(cb->f[1]);
+}
+
+static long
+ipwrite(Chan* ch, void *v, long n, vlong off)
+{
+	Conv *c;
+	Proto *x;
+	char *p;
+	Cmdbuf *cb;
+	uchar ia[IPaddrlen], ma[IPaddrlen];
+	Fs *f;
+	char *a;
+	ulong offset = off;
+
+	a = v;
+	f = ipfs[ch->dev];
+
+	switch(TYPE(ch->qid)){
+	default:
+		error(Eperm);
+	case Qdata:
+		x = f->p[PROTO(ch->qid)];
+		c = x->conv[CONV(ch->qid)];
+
+		if(c->wq == nil)
+			error(Eperm);
+
+		qwrite(c->wq, a, n);
+		break;
+	case Qarp:
+		return arpwrite(f, a, n);
+	case Qiproute:
+		return routewrite(f, ch, a, n);
+	case Qlog:
+		netlogctl(f, a, n);
+		return n;
+	case Qndb:
+		return ndbwrite(f, a, offset, n);
+		break;
+	case Qctl:
+		x = f->p[PROTO(ch->qid)];
+		c = x->conv[CONV(ch->qid)];
+		cb = parsecmd(a, n);
+
+		QLOCK(c);
+		if(waserror()) {
+			QUNLOCK(c);
+			free(cb);
+			nexterror();
+		}
+		if(cb->nf < 1)
+			error("short control request");
+		if(strcmp(cb->f[0], "connect") == 0)
+			connectctlmsg(x, c, cb);
+		else if(strcmp(cb->f[0], "announce") == 0)
+			announcectlmsg(x, c, cb);
+		else if(strcmp(cb->f[0], "bind") == 0)
+			bindctlmsg(x, c, cb);
+		else if(strcmp(cb->f[0], "ttl") == 0)
+			ttlctlmsg(c, cb);
+		else if(strcmp(cb->f[0], "tos") == 0)
+			tosctlmsg(c, cb);
+		else if(strcmp(cb->f[0], "ignoreadvice") == 0)
+			c->ignoreadvice = 1;
+		else if(strcmp(cb->f[0], "addmulti") == 0){
+			if(cb->nf < 2)
+				error("addmulti needs interface address");
+			if(cb->nf == 2){
+				if(!ipismulticast(c->raddr))
+					error("addmulti for a non multicast address");
+				if (parseip(ia, cb->f[1]) == -1)
+					error(Ebadip);
+				ipifcaddmulti(c, c->raddr, ia);
+			} else {
+				if (parseip(ia, cb->f[1]) == -1 ||
+				    parseip(ma, cb->f[2]) == -1)
+					error(Ebadip);
+				if(!ipismulticast(ma))
+					error("addmulti for a non multicast address");
+				ipifcaddmulti(c, ma, ia);
+			}
+		} else if(strcmp(cb->f[0], "remmulti") == 0){
+			if(cb->nf < 2)
+				error("remmulti needs interface address");
+			if(!ipismulticast(c->raddr))
+				error("remmulti for a non multicast address");
+			if (parseip(ia, cb->f[1]) == -1)
+				error(Ebadip);
+			ipifcremmulti(c, c->raddr, ia);
+		} else if(strcmp(cb->f[0], "maxfragsize") == 0){
+			if(cb->nf < 2)
+				error("maxfragsize needs size");
+
+			c->maxfragsize = (int)strtol(cb->f[1], nil, 0);
+			
+		} else if(x->ctl != nil) {
+			p = x->ctl(c, cb->f, cb->nf);
+			if(p != nil)
+				error(p);
+		} else
+			error("unknown control request");
+		QUNLOCK(c);
+		free(cb);
+		poperror();
+	}
+	return n;
+}
+
+static long
+ipbwrite(Chan* ch, Block* bp, ulong offset)
+{
+	Conv *c;
+	Proto *x;
+	Fs *f;
+	int n;
+
+	switch(TYPE(ch->qid)){
+	case Qdata:
+		f = ipfs[ch->dev];
+		x = f->p[PROTO(ch->qid)];
+		c = x->conv[CONV(ch->qid)];
+
+		if(c->wq == nil)
+			error(Eperm);
+
+		if(bp->next)
+			bp = concatblock(bp);
+		n = BLEN(bp);
+		qbwrite(c->wq, bp);
+		return n;
+	default:
+		return devbwrite(ch, bp, offset);
+	}
+}
+
+Dev ipdevtab = {
+	'I',
+	"ip",
+
+	ipreset,
+	devinit,
+	devshutdown,
+	ipattach,
+	ipwalk,
+	ipstat,
+	ipopen,
+	ipcreate,
+	ipclose,
+	ipread,
+	ipbread,
+	ipwrite,
+	ipbwrite,
+	ipremove,
+	ipwstat,
+};
+
+int
+Fsproto(Fs *f, Proto *p)
+{
+	if(f->np >= Maxproto)
+		return -1;
+
+	p->f = f;
+
+	if(p->ipproto > 0){
+		if(f->t2p[p->ipproto] != nil)
+			return -1;
+		f->t2p[p->ipproto] = p;
+	}
+
+	p->qid.type = QTDIR;
+	p->qid.path = QID(f->np, 0, Qprotodir);
+	p->conv = malloc(sizeof(Conv*)*(p->nc+1));
+	if(p->conv == nil)
+		panic("Fsproto");
+
+	p->x = f->np;
+	p->nextrport = 600;
+	f->p[f->np++] = p;
+
+	return 0;
+}
+
+/*
+ *  return true if this protocol is
+ *  built in
+ */
+int
+Fsbuiltinproto(Fs* f, uchar proto)
+{
+	return f->t2p[proto] != nil;
+}
+
+/*
+ *  called with protocol locked
+ */
+Conv*
+Fsprotoclone(Proto *p, char *user)
+{
+	Conv *c, **pp, **ep;
+
+retry:
+	c = nil;
+	ep = &p->conv[p->nc];
+	for(pp = p->conv; pp < ep; pp++) {
+		c = *pp;
+		if(c == nil){
+			c = malloc(sizeof(Conv));
+			if(c == nil)
+				error(Enomem);
+			QLOCK(c);
+			c->p = p;
+			c->x = pp - p->conv;
+			if(p->ptclsize != 0){
+				c->ptcl = malloc(p->ptclsize);
+				if(c->ptcl == nil) {
+					free(c);
+					error(Enomem);
+				}
+			}
+			*pp = c;
+			p->ac++;
+			c->eq = qopen(1024, Qmsg, 0, 0);
+			(*p->create)(c);
+			break;
+		}
+		if(CANQLOCK(c)){
+			/*
+			 *  make sure both processes and protocol
+			 *  are done with this Conv
+			 */
+			if(c->inuse == 0 && (p->inuse == nil || (*p->inuse)(c) == 0))
+				break;
+
+			QUNLOCK(c);
+		}
+	}
+	if(pp >= ep) {
+		if(p->gc != nil && (*p->gc)(p))
+			goto retry;
+		return nil;
+	}
+
+	c->inuse = 1;
+	kstrdup(&c->owner, user);
+	c->perm = 0660;
+	c->state = Idle;
+	ipmove(c->laddr, IPnoaddr);
+	ipmove(c->raddr, IPnoaddr);
+	c->r = nil;
+	c->rgen = 0;
+	c->lport = 0;
+	c->rport = 0;
+	c->restricted = 0;
+	c->maxfragsize = 0;
+	c->ttl = MAXTTL;
+	qreopen(c->rq);
+	qreopen(c->wq);
+	qreopen(c->eq);
+
+	QUNLOCK(c);
+	return c;
+}
+
+int
+Fsconnected(Conv* c, char* msg)
+{
+	if(msg != nil && *msg != '\0')
+		strncpy(c->cerr, msg, ERRMAX-1);
+
+	switch(c->state){
+
+	case Announcing:
+		c->state = Announced;
+		break;
+
+	case Connecting:
+		c->state = Connected;
+		break;
+	}
+
+	wakeup(&c->cr);
+	return 0;
+}
+
+Proto*
+Fsrcvpcol(Fs* f, uchar proto)
+{
+	if(f->ipmux)
+		return f->ipmux;
+	else
+		return f->t2p[proto];
+}
+
+Proto*
+Fsrcvpcolx(Fs *f, uchar proto)
+{
+	return f->t2p[proto];
+}
+
+/*
+ *  called with protocol locked
+ */
+Conv*
+Fsnewcall(Conv *c, uchar *raddr, ushort rport, uchar *laddr, ushort lport, uchar version)
+{
+	Conv *nc;
+	Conv **l;
+	int i;
+
+	QLOCK(c);
+	i = 0;
+	for(l = &c->incall; *l; l = &(*l)->next)
+		i++;
+	if(i >= Maxincall) {
+		QUNLOCK(c);
+		return nil;
+	}
+
+	/* find a free conversation */
+	nc = Fsprotoclone(c->p, network);
+	if(nc == nil) {
+		QUNLOCK(c);
+		return nil;
+	}
+	ipmove(nc->raddr, raddr);
+	nc->rport = rport;
+	ipmove(nc->laddr, laddr);
+	nc->lport = lport;
+	nc->next = nil;
+	*l = nc;
+	nc->state = Connected;
+	nc->ipversion = version;
+
+	QUNLOCK(c);
+
+	wakeup(&c->listenr);
+
+	return nc;
+}
+
+long
+ndbwrite(Fs *f, char *a, ulong off, int n)
+{
+	if(off > strlen(f->ndb))
+		error(Eio);
+	if(off+n >= sizeof(f->ndb))
+		error(Eio);
+	memmove(f->ndb+off, a, n);
+	f->ndb[off+n] = 0;
+	f->ndbvers++;
+	f->ndbmtime = seconds();
+	return n;
+}
+
+ulong
+scalednconv(void)
+{
+	if(cpuserver && conf.npage*BY2PG >= 128*MB)
+		return Nchans*4;
+	return Nchans;
+}
diff --git a/src/9vx/a/ip/eipconvtest.c b/src/9vx/a/ip/eipconvtest.c
@@ -0,0 +1,152 @@
+#include <u.h>
+#include <libc.h>
+
+enum
+{
+	Isprefix= 16,
+};
+
+uchar prefixvals[256] =
+{
+[0x00] 0 | Isprefix,
+[0x80] 1 | Isprefix,
+[0xC0] 2 | Isprefix,
+[0xE0] 3 | Isprefix,
+[0xF0] 4 | Isprefix,
+[0xF8] 5 | Isprefix,
+[0xFC] 6 | Isprefix,
+[0xFE] 7 | Isprefix,
+[0xFF] 8 | Isprefix,
+};
+
+uchar v4prefix[16] = {
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0xff, 0xff,
+	0, 0, 0, 0
+};
+
+void
+hnputl(void *p, ulong v)
+{
+	uchar *a;
+
+	a = p;
+	a[0] = v>>24;
+	a[1] = v>>16;
+	a[2] = v>>8;
+	a[3] = v;
+}
+
+int
+eipconv(va_list *arg, Fconv *f)
+{
+	char buf[8*5];
+	static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
+	static char *ifmt = "%d.%d.%d.%d";
+	uchar *p, ip[16];
+	ulong *lp;
+	ushort s;
+	int i, j, n, eln, eli;
+
+	switch(f->chr) {
+	case 'E':		/* Ethernet address */
+		p = va_arg(*arg, uchar*);
+		sprint(buf, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
+		break;
+	case 'I':		/* Ip address */
+		p = va_arg(*arg, uchar*);
+common:
+		if(memcmp(p, v4prefix, 12) == 0)
+			sprint(buf, ifmt, p[12], p[13], p[14], p[15]);
+		else {
+			/* find longest elision */
+			eln = eli = -1;
+			for(i = 0; i < 16; i += 2){
+				for(j = i; j < 16; j += 2)
+					if(p[j] != 0 || p[j+1] != 0)
+						break;
+				if(j > i && j - i > eln){
+					eli = i;
+					eln = j - i;
+				}
+			}
+
+			/* print with possible elision */
+			n = 0;
+			for(i = 0; i < 16; i += 2){
+				if(i == eli){
+					n += sprint(buf+n, "::");
+					i += eln;
+					if(i >= 16)
+						break;
+				} else if(i != 0)
+					n += sprint(buf+n, ":");
+				s = (p[i]<<8) + p[i+1];
+				n += sprint(buf+n, "%ux", s);
+			}
+		}
+		break;
+	case 'i':		/* v6 address as 4 longs */
+		lp = va_arg(*arg, ulong*);
+		for(i = 0; i < 4; i++)
+			hnputl(ip+4*i, *lp++);
+		p = ip;
+		goto common;
+	case 'V':		/* v4 ip address */
+		p = va_arg(*arg, uchar*);
+		sprint(buf, ifmt, p[0], p[1], p[2], p[3]);
+		break;
+	case 'M':		/* ip mask */
+		p = va_arg(*arg, uchar*);
+
+		/* look for a prefix mask */
+		for(i = 0; i < 16; i++)
+			if(p[i] != 0xff)
+				break;
+		if(i < 16){
+			if((prefixvals[p[i]] & Isprefix) == 0)
+				goto common;
+			for(j = i+1; j < 16; j++)
+				if(p[j] != 0)
+					goto common;
+			n = 8*i + (prefixvals[p[i]] & ~Isprefix);
+		} else
+			n = 8*16;
+
+		/* got one, use /xx format */
+		sprint(buf, "/%d", n);
+		break;
+	default:
+		strcpy(buf, "(eipconv)");
+	}
+	strconv(buf, f);
+	return sizeof(uchar*);
+}
+
+uchar testvec[11][16] =
+{
+ { 0,0,0,0, 0,0,0,0, 0,0,0xff,0xff, 1,3,4,5, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, },
+ { 0xff,0xff,0x80,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xc0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xe0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xf0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xff,0xf8,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, },
+ { 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0,0,0,0, 0,0x11,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0,0,0,0x11, 0,0,0,0, 0,0,0,0, 0,0,0,0x12, },
+};
+
+void
+main(void)
+{
+	int i;
+
+	fmtinstall('I', eipconv);
+	fmtinstall('M', eipconv);
+	for(i = 0; i < 11; i++)
+		print("%I\n%M\n", testvec[i], testvec[i]);
+	exits(0);
+}
diff --git a/src/9vx/a/ip/esp.c b/src/9vx/a/ip/esp.c
@@ -0,0 +1,951 @@
+/*
+ * Encapsulating Security Payload for IPsec for IPv4, rfc1827.
+ *	currently only implements tunnel mode.
+ * TODO: update to match rfc4303.
+ */
+#include	"u.h"
+#include	"lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"error.h"
+
+#include	"ip.h"
+#include	"ipv6.h"
+#include	"libsec.h"
+
+typedef struct Esphdr Esphdr;
+typedef struct Esp4hdr Esp4hdr;
+typedef struct Esp6hdr Esp6hdr;
+typedef struct Esptail Esptail;
+typedef struct Userhdr Userhdr;
+typedef struct Esppriv Esppriv;
+typedef struct Espcb Espcb;
+typedef struct Algorithm Algorithm;
+
+enum
+{
+	IP_ESPPROTO	= 50,	/* IP v4 and v6 protocol number */
+	Esp4hdrlen	= IP4HDR + 8,
+	Esp6hdrlen	= IP6HDR + 8,
+
+	Esptaillen	= 2,	/* does not include pad or auth data */
+	Userhdrlen	= 4,	/* user-visible header size - if enabled */
+};
+
+struct Esphdr
+{
+	uchar	espspi[4];	/* Security parameter index */
+	uchar	espseq[4];	/* Sequence number */
+};
+
+/*
+ * tunnel-mode layout:		IP | ESP | TCP/UDP | user data.
+ * transport-mode layout is:	ESP | IP | TCP/UDP | user data.
+ */
+struct Esp4hdr
+{
+	/* ipv4 header */
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	length[2];	/* packet length */
+	uchar	id[2];		/* Identification */
+	uchar	frag[2];	/* Fragment information */
+	uchar	Unused;
+	uchar	espproto;	/* Protocol */
+	uchar	espplen[2];	/* Header plus data length */
+	uchar	espsrc[4];	/* Ip source */
+	uchar	espdst[4];	/* Ip destination */
+
+	/* Esphdr; */
+	uchar	espspi[4];	/* Security parameter index */
+	uchar	espseq[4];	/* Sequence number */
+};
+
+/* tunnel-mode layout */
+struct Esp6hdr
+{
+	/* Ip6hdr; */
+	uchar	vcf[4];		/* version:4, traffic class:8, flow label:20 */
+	uchar	ploadlen[2];	/* payload length: packet length - 40 */
+	uchar	proto;		/* next header type */
+	uchar	ttl;		/* hop limit */
+	uchar	src[IPaddrlen];
+	uchar	dst[IPaddrlen];
+
+	/* Esphdr; */
+	uchar	espspi[4];	/* Security parameter index */
+	uchar	espseq[4];	/* Sequence number */
+};
+
+struct Esptail
+{
+	uchar	pad;
+	uchar	nexthdr;
+};
+
+/* header as seen by the user */
+struct Userhdr
+{
+	uchar	nexthdr;	/* next protocol */
+	uchar	unused[3];
+};
+
+struct Esppriv
+{
+	ulong	in;
+	ulong	inerrors;
+};
+
+/*
+ *  protocol specific part of Conv
+ */
+struct Espcb
+{
+	int	incoming;
+	int	header;		/* user user level header */
+	ulong	spi;
+	ulong	seq;		/* last seq sent */
+	ulong	window;		/* for replay attacks */
+	char	*espalg;
+	void	*espstate;	/* other state for esp */
+	int	espivlen;	/* in bytes */
+	int	espblklen;
+	int	(*cipher)(Espcb*, uchar *buf, int len);
+	char	*ahalg;
+	void	*ahstate;	/* other state for esp */
+	int	ahlen;		/* auth data length in bytes */
+	int	ahblklen;
+	int	(*auth)(Espcb*, uchar *buf, int len, uchar *hash);
+};
+
+struct Algorithm
+{
+	char 	*name;
+	int	keylen;		/* in bits */
+	void	(*init)(Espcb*, char* name, uchar *key, int keylen);
+};
+
+static	Conv* convlookup(Proto *esp, ulong spi);
+static	char *setalg(Espcb *ecb, char **f, int n, Algorithm *alg);
+static	void espkick(void *x);
+
+static	void nullespinit(Espcb*, char*, uchar *key, int keylen);
+static	void desespinit(Espcb *ecb, char *name, uchar *k, int n);
+
+static	void nullahinit(Espcb*, char*, uchar *key, int keylen);
+static	void shaahinit(Espcb*, char*, uchar *key, int keylen);
+static	void md5ahinit(Espcb*, char*, uchar *key, int keylen);
+
+static Algorithm espalg[] =
+{
+	"null",			0,	nullespinit,
+//	"des3_cbc",		192,	des3espinit,	/* rfc2451 */
+//	"aes_128_cbc",		128,	aescbcespinit,	/* rfc3602 */
+//	"aes_ctr",		128,	aesctrespinit,	/* rfc3686 */
+	"des_56_cbc",		64,	desespinit,	/* rfc2405, deprecated */
+//	"rc4_128",		128,	rc4espinit,	/* gone in rfc4305 */
+	nil,			0,	nil,
+};
+
+static Algorithm ahalg[] =
+{
+	"null",			0,	nullahinit,
+	"hmac_sha1_96",		128,	shaahinit,	/* rfc2404 */
+//	"aes_xcbc_mac_96",	128,	aesahinit,	/* rfc3566 */
+	"hmac_md5_96",		128,	md5ahinit,	/* rfc2403 */
+	nil,			0,	nil,
+};
+
+static char*
+espconnect(Conv *c, char **argv, int argc)
+{
+	char *p, *pp;
+	char *e = nil;
+	ulong spi;
+	Espcb *ecb = (Espcb*)c->ptcl;
+
+	switch(argc) {
+	default:
+		e = "bad args to connect";
+		break;
+	case 2:
+		p = strchr(argv[1], '!');
+		if(p == nil){
+			e = "malformed address";
+			break;
+		}
+		*p++ = 0;
+		parseip(c->raddr, argv[1]);
+		findlocalip(c->p->f, c->laddr, c->raddr);
+		ecb->incoming = 0;
+		ecb->seq = 0;
+		if(strcmp(p, "*") == 0) {
+			QLOCK(c->p);
+			for(;;) {
+				spi = nrand(1<<16) + 256;
+				if(convlookup(c->p, spi) == nil)
+					break;
+			}
+			QUNLOCK(c->p);
+			ecb->spi = spi;
+			ecb->incoming = 1;
+			qhangup(c->wq, nil);
+		} else {
+			spi = strtoul(p, &pp, 10);
+			if(pp == p) {
+				e = "malformed address";
+				break;
+			}
+			ecb->spi = spi;
+			qhangup(c->rq, nil);
+		}
+		nullespinit(ecb, "null", nil, 0);
+		nullahinit(ecb, "null", nil, 0);
+	}
+	Fsconnected(c, e);
+
+	return e;
+}
+
+
+static int
+espstate(Conv *c, char *state, int n)
+{
+	return snprint(state, n, "%s", c->inuse?"Open\n":"Closed\n");
+}
+
+static void
+espcreate(Conv *c)
+{
+	c->rq = qopen(64*1024, Qmsg, 0, 0);
+	c->wq = qopen(64*1024, Qkick, espkick, c);
+}
+
+static void
+espclose(Conv *c)
+{
+	Espcb *ecb;
+
+	qclose(c->rq);
+	qclose(c->wq);
+	qclose(c->eq);
+	ipmove(c->laddr, IPnoaddr);
+	ipmove(c->raddr, IPnoaddr);
+
+	ecb = (Espcb*)c->ptcl;
+	free(ecb->espstate);
+	free(ecb->ahstate);
+	memset(ecb, 0, sizeof(Espcb));
+}
+
+static int
+ipvers(Conv *c)
+{
+	if((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
+	    memcmp(c->laddr, v4prefix, IPv4off) == 0) ||
+	    ipcmp(c->raddr, IPnoaddr) == 0)
+		return V4;
+	else
+		return V6;
+}
+
+static void
+espkick(void *x)
+{
+	Conv *c = x;
+	Esp4hdr *eh4;
+	Esp6hdr *eh6;
+	Esptail *et;
+	Userhdr *uh;
+	Espcb *ecb;
+	Block *bp;
+	int nexthdr, payload, pad, align, version, hdrlen, iphdrlen;
+	uchar *auth;
+
+	version = ipvers(c);
+	iphdrlen = version == V4? IP4HDR: IP6HDR;
+	hdrlen =   version == V4? Esp4hdrlen: Esp6hdrlen;
+
+	bp = qget(c->wq);
+	if(bp == nil)
+		return;
+
+	QLOCK(c);
+	ecb = c->ptcl;
+
+	if(ecb->header) {
+		/* make sure the message has a User header */
+		bp = pullupblock(bp, Userhdrlen);
+		if(bp == nil) {
+			QUNLOCK(c);
+			return;
+		}
+		uh = (Userhdr*)bp->rp;
+		nexthdr = uh->nexthdr;
+		bp->rp += Userhdrlen;
+	} else {
+		nexthdr = 0;	/* what should this be? */
+	}
+
+	payload = BLEN(bp) + ecb->espivlen;
+
+	/* Make space to fit ip header */
+	bp = padblock(bp, hdrlen + ecb->espivlen);
+
+	align = 4;
+	if(ecb->espblklen > align)
+		align = ecb->espblklen;
+	if(align % ecb->ahblklen != 0)
+		panic("espkick: ahblklen is important after all");
+	pad = (align-1) - (payload + Esptaillen-1)%align;
+
+	/*
+	 * Make space for tail
+	 * this is done by calling padblock with a negative size
+	 * Padblock does not change bp->wp!
+	 */
+	bp = padblock(bp, -(pad+Esptaillen+ecb->ahlen));
+	bp->wp += pad+Esptaillen+ecb->ahlen;
+
+	eh4 = (Esp4hdr *)bp->rp;
+	eh6 = (Esp6hdr *)bp->rp;
+	et = (Esptail*)(bp->rp + hdrlen + payload + pad);
+
+	/* fill in tail */
+	et->pad = pad;
+	et->nexthdr = nexthdr;
+
+	ecb->cipher(ecb, bp->rp + hdrlen, payload + pad + Esptaillen);
+	auth = bp->rp + hdrlen + payload + pad + Esptaillen;
+
+	/* fill in head */
+	if (version == V4) {
+		eh4->vihl = IP_VER4;
+		hnputl(eh4->espspi, ecb->spi);
+		hnputl(eh4->espseq, ++ecb->seq);
+		v6tov4(eh4->espsrc, c->laddr);
+		v6tov4(eh4->espdst, c->raddr);
+		eh4->espproto = IP_ESPPROTO;
+		eh4->frag[0] = 0;
+		eh4->frag[1] = 0;
+	} else {
+		eh6->vcf[0] = IP_VER6;
+		hnputl(eh6->espspi, ecb->spi);
+		hnputl(eh6->espseq, ++ecb->seq);
+		ipmove(eh6->src, c->laddr);
+		ipmove(eh6->dst, c->raddr);
+		eh6->proto = IP_ESPPROTO;
+	}
+
+	ecb->auth(ecb, bp->rp + iphdrlen, (hdrlen - iphdrlen) +
+		payload + pad + Esptaillen, auth);
+
+	QUNLOCK(c);
+	/* print("esp: pass down: %uld\n", BLEN(bp)); */
+	if (version == V4)
+		ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+	else
+		ipoput6(c->p->f, bp, 0, c->ttl, c->tos, c);
+}
+
+void
+espiput(Proto *esp, Ipifc* _, Block *bp)
+{
+	Esp4hdr *eh4;
+	Esp6hdr *eh6;
+	Esptail *et;
+	Userhdr *uh;
+	Conv *c;
+	Espcb *ecb;
+	uchar raddr[IPaddrlen], laddr[IPaddrlen];
+	Fs *f;
+	uchar *auth, *espspi;
+	ulong spi;
+	int payload, nexthdr, version, hdrlen;
+
+	f = esp->f;
+	if (bp == nil || BLEN(bp) == 0) {
+		/* get enough to identify the IP version */
+		bp = pullupblock(bp, IP4HDR);
+		if(bp == nil) {
+			netlog(f, Logesp, "esp: short packet\n");
+			return;
+		}
+	}
+	eh4 = (Esp4hdr*)bp->rp;
+	version = ((eh4->vihl & 0xf0) == IP_VER4? V4: V6);
+	hdrlen = version == V4? Esp4hdrlen: Esp6hdrlen;
+
+	bp = pullupblock(bp, hdrlen + Esptaillen);
+	if(bp == nil) {
+		netlog(f, Logesp, "esp: short packet\n");
+		return;
+	}
+
+	if (version == V4) {
+		eh4 = (Esp4hdr*)bp->rp;
+		spi = nhgetl(eh4->espspi);
+		v4tov6(raddr, eh4->espsrc);
+		v4tov6(laddr, eh4->espdst);
+	} else {
+		eh6 = (Esp6hdr*)bp->rp;
+		spi = nhgetl(eh6->espspi);
+		ipmove(raddr, eh6->src);
+		ipmove(laddr, eh6->dst);
+	}
+
+	QLOCK(esp);
+	/* Look for a conversation structure for this port */
+	c = convlookup(esp, spi);
+	if(c == nil) {
+		QUNLOCK(esp);
+		netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
+			laddr, spi);
+		icmpnoconv(f, bp);
+		freeblist(bp);
+		return;
+	}
+
+	QLOCK(c);
+	QUNLOCK(esp);
+
+	ecb = c->ptcl;
+	/* too hard to do decryption/authentication on block lists */
+	if(bp->next)
+		bp = concatblock(bp);
+
+	if(BLEN(bp) < hdrlen + ecb->espivlen + Esptaillen + ecb->ahlen) {
+		QUNLOCK(c);
+		netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
+			laddr, spi);
+		freeb(bp);
+		return;
+	}
+
+	auth = bp->wp - ecb->ahlen;
+	espspi = version == V4? ((Esp4hdr*)bp->rp)->espspi:
+				((Esp6hdr*)bp->rp)->espspi;
+	if(!ecb->auth(ecb, espspi, auth - espspi, auth)) {
+		QUNLOCK(c);
+print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
+		netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
+			laddr, spi);
+		freeb(bp);
+		return;
+	}
+
+	payload = BLEN(bp) - hdrlen - ecb->ahlen;
+	if(payload <= 0 || payload % 4 != 0 || payload % ecb->espblklen != 0) {
+		QUNLOCK(c);
+		netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n",
+			raddr, laddr, spi, payload, BLEN(bp));
+		freeb(bp);
+		return;
+	}
+	if(!ecb->cipher(ecb, bp->rp + hdrlen, payload)) {
+		QUNLOCK(c);
+print("esp: cipher failed %I -> %I!%ld: %s\n", raddr, laddr, spi, up->errstr);
+		netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %s\n", raddr,
+			laddr, spi, up->errstr);
+		freeb(bp);
+		return;
+	}
+
+	payload -= Esptaillen;
+	et = (Esptail*)(bp->rp + hdrlen + payload);
+	payload -= et->pad + ecb->espivlen;
+	nexthdr = et->nexthdr;
+	if(payload <= 0) {
+		QUNLOCK(c);
+		netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n",
+			raddr, laddr, spi);
+		freeb(bp);
+		return;
+	}
+
+	/* trim packet */
+	bp->rp += hdrlen + ecb->espivlen;
+	bp->wp = bp->rp + payload;
+	if(ecb->header) {
+		/* assume Userhdrlen < Esp4hdrlen < Esp6hdrlen */
+		bp->rp -= Userhdrlen;
+		uh = (Userhdr*)bp->rp;
+		memset(uh, 0, Userhdrlen);
+		uh->nexthdr = nexthdr;
+	}
+
+	if(qfull(c->rq)){
+		netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
+			laddr, spi);
+		freeblist(bp);
+	}else {
+//		print("esp: pass up: %uld\n", BLEN(bp));
+		qpass(c->rq, bp);
+	}
+
+	QUNLOCK(c);
+}
+
+char*
+espctl(Conv *c, char **f, int n)
+{
+	Espcb *ecb = c->ptcl;
+	char *e = nil;
+
+	if(strcmp(f[0], "esp") == 0)
+		e = setalg(ecb, f, n, espalg);
+	else if(strcmp(f[0], "ah") == 0)
+		e = setalg(ecb, f, n, ahalg);
+	else if(strcmp(f[0], "header") == 0)
+		ecb->header = 1;
+	else if(strcmp(f[0], "noheader") == 0)
+		ecb->header = 0;
+	else
+		e = "unknown control request";
+	return e;
+}
+
+void
+espadvise(Proto *esp, Block *bp, char *msg)
+{
+	Esp4hdr *h;
+	Conv *c;
+	ulong spi;
+
+	h = (Esp4hdr*)(bp->rp);
+
+	spi = nhgets(h->espspi);
+	QLOCK(esp);
+	c = convlookup(esp, spi);
+	if(c != nil) {
+		qhangup(c->rq, msg);
+		qhangup(c->wq, msg);
+	}
+	QUNLOCK(esp);
+	freeblist(bp);
+}
+
+int
+espstats(Proto *esp, char *buf, int len)
+{
+	Esppriv *upriv;
+
+	upriv = esp->priv;
+	return snprint(buf, len, "%lud %lud\n",
+		upriv->in,
+		upriv->inerrors);
+}
+
+static int
+esplocal(Conv *c, char *buf, int len)
+{
+	Espcb *ecb = c->ptcl;
+	int n;
+
+	QLOCK(c);
+	if(ecb->incoming)
+		n = snprint(buf, len, "%I!%uld\n", c->laddr, ecb->spi);
+	else
+		n = snprint(buf, len, "%I\n", c->laddr);
+	QUNLOCK(c);
+	return n;
+}
+
+static int
+espremote(Conv *c, char *buf, int len)
+{
+	Espcb *ecb = c->ptcl;
+	int n;
+
+	QLOCK(c);
+	if(ecb->incoming)
+		n = snprint(buf, len, "%I\n", c->raddr);
+	else
+		n = snprint(buf, len, "%I!%uld\n", c->raddr, ecb->spi);
+	QUNLOCK(c);
+	return n;
+}
+
+static	Conv*
+convlookup(Proto *esp, ulong spi)
+{
+	Conv *c, **p;
+	Espcb *ecb;
+
+	for(p=esp->conv; *p; p++){
+		c = *p;
+		ecb = c->ptcl;
+		if(ecb->incoming && ecb->spi == spi)
+			return c;
+	}
+	return nil;
+}
+
+static char *
+setalg(Espcb *ecb, char **f, int n, Algorithm *alg)
+{
+	uchar *key;
+	int c, i, nbyte, nchar;
+
+	if(n < 2)
+		return "bad format";
+	for(; alg->name; alg++)
+		if(strcmp(f[1], alg->name) == 0)
+			break;
+	if(alg->name == nil)
+		return "unknown algorithm";
+
+	if(n != 3)
+		return "bad format";
+	nbyte = (alg->keylen + 7) >> 3;
+	nchar = strlen(f[2]);
+	for(i=0; i<nchar; i++) {
+		c = f[2][i];
+		if(c >= '0' && c <= '9')
+			f[2][i] -= '0';
+		else if(c >= 'a' && c <= 'f')
+			f[2][i] -= 'a'-10;
+		else if(c >= 'A' && c <= 'F')
+			f[2][i] -= 'A'-10;
+		else
+			return "bad character in key";
+	}
+	key = smalloc(nbyte);
+	for(i=0; i<nchar && i*2<nbyte; i++) {
+		c = f[2][nchar-i-1];
+		if(i&1)
+			c <<= 4;
+		key[i>>1] |= c;
+	}
+
+	alg->init(ecb, alg->name, key, alg->keylen);
+	free(key);
+	return nil;
+}
+
+static int
+nullcipher(Espcb* _, uchar* __, int ___)
+{
+	return 1;
+}
+
+static void
+nullespinit(Espcb *ecb, char *name, uchar* _, int __)
+{
+	ecb->espalg = name;
+	ecb->espblklen = 1;
+	ecb->espivlen = 0;
+	ecb->cipher = nullcipher;
+}
+
+static int
+nullauth(Espcb* _, uchar* __, int ___, uchar* ____)
+{
+	return 1;
+}
+
+static void
+nullahinit(Espcb *ecb, char *name, uchar* _, int __)
+{
+	ecb->ahalg = name;
+	ecb->ahblklen = 1;
+	ecb->ahlen = 0;
+	ecb->auth = nullauth;
+}
+
+void
+seanq_hmac_sha1(uchar hash[SHA1dlen], uchar *t, long tlen, uchar *key, long klen)
+{
+	uchar ipad[65], opad[65];
+	int i;
+	DigestState *digest;
+	uchar innerhash[SHA1dlen];
+
+	for(i=0; i<64; i++){
+		ipad[i] = 0x36;
+		opad[i] = 0x5c;
+	}
+	ipad[64] = opad[64] = 0;
+	for(i=0; i<klen; i++){
+		ipad[i] ^= key[i];
+		opad[i] ^= key[i];
+	}
+	digest = sha1(ipad, 64, nil, nil);
+	sha1(t, tlen, innerhash, digest);
+	digest = sha1(opad, 64, nil, nil);
+	sha1(innerhash, SHA1dlen, hash, digest);
+}
+
+static int
+shaauth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
+{
+	uchar hash[SHA1dlen];
+	int r;
+
+	memset(hash, 0, SHA1dlen);
+	seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+	r = memcmp(auth, hash, ecb->ahlen) == 0;
+	memmove(auth, hash, ecb->ahlen);
+	return r;
+}
+
+static void
+shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
+{
+	if(klen != 128)
+		panic("shaahinit: bad keylen");
+	klen >>= 8;		/* convert to bytes */
+
+	ecb->ahalg = name;
+	ecb->ahblklen = 1;
+	ecb->ahlen = 12;
+	ecb->auth = shaauth;
+	ecb->ahstate = smalloc(klen);
+	memmove(ecb->ahstate, key, klen);
+}
+
+void
+seanq_hmac_md5(uchar hash[MD5dlen], uchar *t, long tlen, uchar *key, long klen)
+{
+	uchar ipad[65], opad[65];
+	int i;
+	DigestState *digest;
+	uchar innerhash[MD5dlen];
+
+	for(i=0; i<64; i++){
+		ipad[i] = 0x36;
+		opad[i] = 0x5c;
+	}
+	ipad[64] = opad[64] = 0;
+	for(i=0; i<klen; i++){
+		ipad[i] ^= key[i];
+		opad[i] ^= key[i];
+	}
+	digest = md5(ipad, 64, nil, nil);
+	md5(t, tlen, innerhash, digest);
+	digest = md5(opad, 64, nil, nil);
+	md5(innerhash, MD5dlen, hash, digest);
+}
+
+static int
+md5auth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
+{
+	uchar hash[MD5dlen];
+	int r;
+
+	memset(hash, 0, MD5dlen);
+	seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+	r = memcmp(auth, hash, ecb->ahlen) == 0;
+	memmove(auth, hash, ecb->ahlen);
+	return r;
+}
+
+static void
+md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
+{
+	if(klen != 128)
+		panic("md5ahinit: bad keylen");
+	klen >>= 3;		/* convert to bytes */
+
+	ecb->ahalg = name;
+	ecb->ahblklen = 1;
+	ecb->ahlen = 12;
+	ecb->auth = md5auth;
+	ecb->ahstate = smalloc(klen);
+	memmove(ecb->ahstate, key, klen);
+}
+
+static int
+descipher(Espcb *ecb, uchar *p, int n)
+{
+	uchar tmp[8];
+	uchar *pp, *tp, *ip, *eip, *ep;
+	DESstate *ds = ecb->espstate;
+
+	ep = p + n;
+	if(ecb->incoming) {
+		memmove(ds->ivec, p, 8);
+		p += 8;
+		while(p < ep){
+			memmove(tmp, p, 8);
+			block_cipher(ds->expanded, p, 1);
+			tp = tmp;
+			ip = ds->ivec;
+			for(eip = ip+8; ip < eip; ){
+				*p++ ^= *ip;
+				*ip++ = *tp++;
+			}
+		}
+	} else {
+		memmove(p, ds->ivec, 8);
+		for(p += 8; p < ep; p += 8){
+			pp = p;
+			ip = ds->ivec;
+			for(eip = ip+8; ip < eip; )
+				*pp++ ^= *ip++;
+			block_cipher(ds->expanded, p, 0);
+			memmove(ds->ivec, p, 8);
+		}
+	}
+	return 1;
+}
+
+static void
+desespinit(Espcb *ecb, char *name, uchar *k, int n)
+{
+	uchar key[8], ivec[8];
+	int i;
+
+	/* bits to bytes */
+	n = (n+7)>>3;
+	if(n > 8)
+		n = 8;
+	memset(key, 0, sizeof(key));
+	memmove(key, k, n);
+	for(i=0; i<8; i++)
+		ivec[i] = nrand(256);
+	ecb->espalg = name;
+	ecb->espblklen = 8;
+	ecb->espivlen = 8;
+	ecb->cipher = descipher;
+	ecb->espstate = smalloc(sizeof(DESstate));
+	setupDESstate(ecb->espstate, key, ivec);
+}
+
+void
+espinit(Fs *fs)
+{
+	Proto *esp;
+
+	esp = smalloc(sizeof(Proto));
+	esp->priv = smalloc(sizeof(Esppriv));
+	esp->name = "esp";
+	esp->connect = espconnect;
+	esp->announce = nil;
+	esp->ctl = espctl;
+	esp->state = espstate;
+	esp->create = espcreate;
+	esp->close = espclose;
+	esp->rcv = espiput;
+	esp->advise = espadvise;
+	esp->stats = espstats;
+	esp->local = esplocal;
+	esp->remote = espremote;
+	esp->ipproto = IP_ESPPROTO;
+	esp->nc = Nchans;
+	esp->ptclsize = sizeof(Espcb);
+
+	Fsproto(fs, esp);
+}
+
+
+#ifdef notdef
+enum {
+	RC4forward= 10*1024*1024,	/* maximum skip forward */
+	RC4back = 100*1024,	/* maximum look back */
+};
+
+typedef struct Esprc4 Esprc4;
+struct Esprc4
+{
+	ulong	cseq;		/* current byte sequence number */
+	RC4state current;
+
+	int	ovalid;		/* old is valid */
+	ulong	lgseq;		/* last good sequence */
+	ulong	oseq;		/* old byte sequence number */
+	RC4state old;
+};
+
+static void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
+
+static int
+rc4cipher(Espcb *ecb, uchar *p, int n)
+{
+	Esprc4 *esprc4;
+	RC4state tmpstate;
+	ulong seq;
+	long d, dd;
+
+	if(n < 4)
+		return 0;
+
+	esprc4 = ecb->espstate;
+	if(ecb->incoming) {
+		seq = nhgetl(p);
+		p += 4;
+		n -= 4;
+		d = seq-esprc4->cseq;
+		if(d == 0) {
+			rc4(&esprc4->current, p, n);
+			esprc4->cseq += n;
+			if(esprc4->ovalid) {
+				dd = esprc4->cseq - esprc4->lgseq;
+				if(dd > RC4back)
+					esprc4->ovalid = 0;
+			}
+		} else if(d > 0) {
+print("esp rc4cipher: missing packet: %uld %ld\n", seq, d); /* this link is hosed */
+			if(d > RC4forward) {
+				strcpy(up->errstr, "rc4cipher: skipped too much");
+				return 0;
+			}
+			esprc4->lgseq = seq;
+			if(!esprc4->ovalid) {
+				esprc4->ovalid = 1;
+				esprc4->oseq = esprc4->cseq;
+				memmove(&esprc4->old, &esprc4->current,
+					sizeof(RC4state));
+			}
+			rc4skip(&esprc4->current, d);
+			rc4(&esprc4->current, p, n);
+			esprc4->cseq = seq+n;
+		} else {
+print("esp rc4cipher: reordered packet: %uld %ld\n", seq, d);
+			dd = seq - esprc4->oseq;
+			if(!esprc4->ovalid || -d > RC4back || dd < 0) {
+				strcpy(up->errstr, "rc4cipher: too far back");
+				return 0;
+			}
+			memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
+			rc4skip(&tmpstate, dd);
+			rc4(&tmpstate, p, n);
+			return 1;
+		}
+
+		/* move old state up */
+		if(esprc4->ovalid) {
+			dd = esprc4->cseq - RC4back - esprc4->oseq;
+			if(dd > 0) {
+				rc4skip(&esprc4->old, dd);
+				esprc4->oseq += dd;
+			}
+		}
+	} else {
+		hnputl(p, esprc4->cseq);
+		p += 4;
+		n -= 4;
+		rc4(&esprc4->current, p, n);
+		esprc4->cseq += n;
+	}
+	return 1;
+}
+
+static void
+rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
+{
+	Esprc4 *esprc4;
+
+	/* bits to bytes */
+	n = (n+7)>>3;
+	esprc4 = smalloc(sizeof(Esprc4));
+	memset(esprc4, 0, sizeof(Esprc4));
+	setupRC4state(&esprc4->current, k, n);
+	ecb->espalg = name;
+	ecb->espblklen = 4;
+	ecb->espivlen = 4;
+	ecb->cipher = rc4cipher;
+	ecb->espstate = esprc4;
+}
+#endif
diff --git a/src/9vx/a/ip/ethermedium.c b/src/9vx/a/ip/ethermedium.c
@@ -0,0 +1,766 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "netif.h"
+#include "ip.h"
+#include "ipv6.h"
+
+typedef struct Etherhdr Etherhdr;
+struct Etherhdr
+{
+	uchar	d[6];
+	uchar	s[6];
+	uchar	t[2];
+};
+
+static uchar ipbroadcast[IPaddrlen] = {
+	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,
+};
+
+static uchar etherbroadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+static void	etherread4(void *a);
+static void	etherread6(void *a);
+static void	etherbind(Ipifc *ifc, int argc, char **argv);
+static void	etherunbind(Ipifc *ifc);
+static void	etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
+static void	etheraddmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static void	etherremmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static Block*	multicastarp(Fs *f, Arpent *a, Medium*, uchar *mac);
+static void	sendarp(Ipifc *ifc, Arpent *a);
+static void	sendgarp(Ipifc *ifc, uchar*);
+static int	multicastea(uchar *ea, uchar *ip);
+static void	recvarpproc(void*);
+static void	resolveaddr6(Ipifc *ifc, Arpent *a);
+static void	etherpref2addr(uchar *pref, uchar *ea);
+
+Medium ethermedium =
+{
+.name=		"ether",
+.hsize=		14,
+.mintu=		60,
+.maxtu=		1514,
+.maclen=	6,
+.bind=		etherbind,
+.unbind=	etherunbind,
+.bwrite=	etherbwrite,
+.addmulti=	etheraddmulti,
+.remmulti=	etherremmulti,
+.ares=		arpenter,
+.areg=		sendgarp,
+.pref2addr=	etherpref2addr,
+};
+
+Medium gbemedium =
+{
+.name=		"gbe",
+.hsize=		14,
+.mintu=		60,
+.maxtu=		9014,
+.maclen=	6,
+.bind=		etherbind,
+.unbind=	etherunbind,
+.bwrite=	etherbwrite,
+.addmulti=	etheraddmulti,
+.remmulti=	etherremmulti,
+.ares=		arpenter,
+.areg=		sendgarp,
+.pref2addr=	etherpref2addr,
+};
+
+typedef struct	Etherrock Etherrock;
+struct Etherrock
+{
+	Fs	*f;		/* file system we belong to */
+	Proc	*arpp;		/* arp process */
+	Proc	*read4p;	/* reading process (v4)*/
+	Proc	*read6p;	/* reading process (v6)*/
+	Chan	*mchan4;	/* Data channel for v4 */
+	Chan	*achan;		/* Arp channel */
+	Chan	*cchan4;	/* Control channel for v4 */
+	Chan	*mchan6;	/* Data channel for v6 */
+	Chan	*cchan6;	/* Control channel for v6 */
+};
+
+/*
+ *  ethernet arp request
+ */
+enum
+{
+	ARPREQUEST	= 1,
+	ARPREPLY	= 2,
+};
+
+typedef struct Etherarp Etherarp;
+struct Etherarp
+{
+	uchar	d[6];
+	uchar	s[6];
+	uchar	type[2];
+	uchar	hrd[2];
+	uchar	pro[2];
+	uchar	hln;
+	uchar	pln;
+	uchar	op[2];
+	uchar	sha[6];
+	uchar	spa[4];
+	uchar	tha[6];
+	uchar	tpa[4];
+};
+
+static char *nbmsg = "nonblocking";
+
+/*
+ *  called to bind an IP ifc to an ethernet device
+ *  called with ifc wlock'd
+ */
+
+static void
+etherbind(Ipifc *ifc, int argc, char **argv)
+{
+	Chan *mchan4, *cchan4, *achan, *mchan6, *cchan6, *schan;
+	char addr[Maxpath];	//char addr[2*KNAMELEN];
+	char dir[Maxpath];	//char dir[2*KNAMELEN];
+	char *buf;
+	int n;
+	char *ptr;
+	Etherrock *er;
+
+	if(argc < 2)
+		error(Ebadarg);
+
+	mchan4 = cchan4 = achan = mchan6 = cchan6 = nil;
+	buf = nil;
+	if(waserror()){
+		if(mchan4 != nil)
+			cclose(mchan4);
+		if(cchan4 != nil)
+			cclose(cchan4);
+		if(achan != nil)
+			cclose(achan);
+		if(mchan6 != nil)
+			cclose(mchan6);
+		if(cchan6 != nil)
+			cclose(cchan6);
+		if(buf != nil)
+			free(buf);
+		nexterror();
+	}
+
+	/*
+	 *  open ipv4 conversation
+	 *
+	 *  the dial will fail if the type is already open on
+	 *  this device.
+	 */
+	snprint(addr, sizeof(addr), "%s!0x800", argv[2]);	/* ETIP4 */
+	mchan4 = chandial(addr, nil, dir, &cchan4);
+
+	/*
+	 *  make it non-blocking
+	 */
+	devtab[cchan4->type]->write(cchan4, nbmsg, strlen(nbmsg), 0);
+
+	/*
+	 *  get mac address and speed
+	 */
+	snprint(addr, sizeof(addr), "%s/stats", argv[2]);
+	buf = smalloc(512);
+	schan = namec(addr, Aopen, OREAD, 0);
+	if(waserror()){
+		cclose(schan);
+		nexterror();
+	}
+	n = devtab[schan->type]->read(schan, buf, 511, 0);
+	cclose(schan);
+	poperror();
+	buf[n] = 0;
+
+	ptr = strstr(buf, "addr: ");
+	if(!ptr)
+		error(Eio);
+	ptr += 6;
+	parsemac(ifc->mac, ptr, 6);
+
+	ptr = strstr(buf, "mbps: ");
+	if(ptr){
+		ptr += 6;
+		ifc->mbps = atoi(ptr);
+	} else
+		ifc->mbps = 100;
+
+	/*
+ 	 *  open arp conversation
+	 */
+	snprint(addr, sizeof(addr), "%s!0x806", argv[2]);	/* ETARP */
+	achan = chandial(addr, nil, nil, nil);
+
+	/*
+	 *  open ipv6 conversation
+	 *
+	 *  the dial will fail if the type is already open on
+	 *  this device.
+	 */
+	snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);	/* ETIP6 */
+	mchan6 = chandial(addr, nil, dir, &cchan6);
+
+	/*
+	 *  make it non-blocking
+	 */
+	devtab[cchan6->type]->write(cchan6, nbmsg, strlen(nbmsg), 0);
+
+	er = smalloc(sizeof(*er));
+	er->mchan4 = mchan4;
+	er->cchan4 = cchan4;
+	er->achan = achan;
+	er->mchan6 = mchan6;
+	er->cchan6 = cchan6;
+	er->f = ifc->conv->p->f;
+	ifc->arg = er;
+
+	free(buf);
+	poperror();
+
+	kproc("etherread4", etherread4, ifc);
+	kproc("recvarpproc", recvarpproc, ifc);
+	kproc("etherread6", etherread6, ifc);
+}
+
+/*
+ *  called with ifc wlock'd
+ */
+static void
+etherunbind(Ipifc *ifc)
+{
+	Etherrock *er = ifc->arg;
+
+	if(er->read4p)
+		postnote(er->read4p, 1, "unbind", 0);
+	if(er->read6p)
+		postnote(er->read6p, 1, "unbind", 0);
+	if(er->arpp)
+		postnote(er->arpp, 1, "unbind", 0);
+
+	/* wait for readers to die */
+	while(er->arpp != 0 || er->read4p != 0 || er->read6p != 0)
+		tsleep(&up->sleep, return0, 0, 300);
+
+	if(er->mchan4 != nil)
+		cclose(er->mchan4);
+	if(er->achan != nil)
+		cclose(er->achan);
+	if(er->cchan4 != nil)
+		cclose(er->cchan4);
+	if(er->mchan6 != nil)
+		cclose(er->mchan6);
+	if(er->cchan6 != nil)
+		cclose(er->cchan6);
+
+	free(er);
+}
+
+/*
+ *  called by ipoput with a single block to write with ifc RLOCK'd
+ */
+static void
+etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip)
+{
+	Etherhdr *eh;
+	Arpent *a;
+	uchar mac[6];
+	Etherrock *er = ifc->arg;
+
+	/* get mac address of destination */
+	a = arpget(er->f->arp, bp, version, ifc, ip, mac);
+	if(a){
+		/* check for broadcast or multicast */
+		bp = multicastarp(er->f, a, ifc->m, mac);
+		if(bp==nil){
+			switch(version){
+			case V4:
+				sendarp(ifc, a);
+				break;
+			case V6:
+				resolveaddr6(ifc, a);
+				break;
+			default:
+				panic("etherbwrite: version %d", version);
+			}
+			return;
+		}
+	}
+
+	/* make it a single block with space for the ether header */
+	bp = padblock(bp, ifc->m->hsize);
+	if(bp->next)
+		bp = concatblock(bp);
+	if(BLEN(bp) < ifc->mintu)
+		bp = adjustblock(bp, ifc->mintu);
+	eh = (Etherhdr*)bp->rp;
+
+	/* copy in mac addresses and ether type */
+	memmove(eh->s, ifc->mac, sizeof(eh->s));
+	memmove(eh->d, mac, sizeof(eh->d));
+
+ 	switch(version){
+	case V4:
+		eh->t[0] = 0x08;
+		eh->t[1] = 0x00;
+		devtab[er->mchan4->type]->bwrite(er->mchan4, bp, 0);
+		break;
+	case V6:
+		eh->t[0] = 0x86;
+		eh->t[1] = 0xDD;
+		devtab[er->mchan6->type]->bwrite(er->mchan6, bp, 0);
+		break;
+	default:
+		panic("etherbwrite2: version %d", version);
+	}
+	ifc->out++;
+}
+
+
+/*
+ *  process to read from the ethernet
+ */
+static void
+etherread4(void *a)
+{
+	Ipifc *ifc;
+	Block *bp;
+	Etherrock *er;
+
+	ifc = a;
+	er = ifc->arg;
+	er->read4p = up;	/* hide identity under a rock for unbind */
+	if(waserror()){
+		er->read4p = 0;
+		pexit("hangup", 1);
+	}
+	for(;;){
+		bp = devtab[er->mchan4->type]->bread(er->mchan4, ifc->maxtu, 0);
+		if(!CANRLOCK(ifc)){
+			freeb(bp);
+			continue;
+		}
+		if(waserror()){
+			RUNLOCK(ifc);
+			nexterror();
+		}
+		ifc->in++;
+		bp->rp += ifc->m->hsize;
+		if(ifc->lifc == nil)
+			freeb(bp);
+		else
+			ipiput4(er->f, ifc, bp);
+		RUNLOCK(ifc);
+		poperror();
+	}
+}
+
+
+/*
+ *  process to read from the ethernet, IPv6
+ */
+static void
+etherread6(void *a)
+{
+	Ipifc *ifc;
+	Block *bp;
+	Etherrock *er;
+
+	ifc = a;
+	er = ifc->arg;
+	er->read6p = up;	/* hide identity under a rock for unbind */
+	if(waserror()){
+		er->read6p = 0;
+		pexit("hangup", 1);
+	}
+	for(;;){
+		bp = devtab[er->mchan6->type]->bread(er->mchan6, ifc->maxtu, 0);
+		if(!CANRLOCK(ifc)){
+			freeb(bp);
+			continue;
+		}
+		if(waserror()){
+			RUNLOCK(ifc);
+			nexterror();
+		}
+		ifc->in++;
+		bp->rp += ifc->m->hsize;
+		if(ifc->lifc == nil)
+			freeb(bp);
+		else
+			ipiput6(er->f, ifc, bp);
+		RUNLOCK(ifc);
+		poperror();
+	}
+}
+
+static void
+etheraddmulti(Ipifc *ifc, uchar *a, uchar *_)
+{
+	uchar mac[6];
+	char buf[64];
+	Etherrock *er = ifc->arg;
+	int version;
+
+	version = multicastea(mac, a);
+	sprint(buf, "addmulti %E", mac);
+	switch(version){
+	case V4:
+		devtab[er->cchan4->type]->write(er->cchan4, buf, strlen(buf), 0);
+		break;
+	case V6:
+		devtab[er->cchan6->type]->write(er->cchan6, buf, strlen(buf), 0);
+		break;
+	default:
+		panic("etheraddmulti: version %d", version);
+	}
+}
+
+static void
+etherremmulti(Ipifc *ifc, uchar *a, uchar *_)
+{
+	uchar mac[6];
+	char buf[64];
+	Etherrock *er = ifc->arg;
+	int version;
+
+	version = multicastea(mac, a);
+	sprint(buf, "remmulti %E", mac);
+	switch(version){
+	case V4:
+		devtab[er->cchan4->type]->write(er->cchan4, buf, strlen(buf), 0);
+		break;
+	case V6:
+		devtab[er->cchan6->type]->write(er->cchan6, buf, strlen(buf), 0);
+		break;
+	default:
+		panic("etherremmulti: version %d", version);
+	}
+}
+
+/*
+ *  send an ethernet arp
+ *  (only v4, v6 uses the neighbor discovery, rfc1970)
+ */
+static void
+sendarp(Ipifc *ifc, Arpent *a)
+{
+	int n;
+	Block *bp;
+	Etherarp *e;
+	Etherrock *er = ifc->arg;
+
+	/* don't do anything if it's been less than a second since the last */
+	if(NOW - a->ctime < 1000){
+		arprelease(er->f->arp, a);
+		return;
+	}
+
+	/* remove all but the last message */
+	while((bp = a->hold) != nil){
+		if(bp == a->last)
+			break;
+		a->hold = bp->list;
+		freeblist(bp);
+	}
+
+	/* try to keep it around for a second more */
+	a->ctime = NOW;
+	arprelease(er->f->arp, a);
+
+	n = sizeof(Etherarp);
+	if(n < a->type->mintu)
+		n = a->type->mintu;
+	bp = allocb(n);
+	memset(bp->rp, 0, n);
+	e = (Etherarp*)bp->rp;
+	memmove(e->tpa, a->ip+IPv4off, sizeof(e->tpa));
+	ipv4local(ifc, e->spa);
+	memmove(e->sha, ifc->mac, sizeof(e->sha));
+	memset(e->d, 0xff, sizeof(e->d));		/* ethernet broadcast */
+	memmove(e->s, ifc->mac, sizeof(e->s));
+
+	hnputs(e->type, ETARP);
+	hnputs(e->hrd, 1);
+	hnputs(e->pro, ETIP4);
+	e->hln = sizeof(e->sha);
+	e->pln = sizeof(e->spa);
+	hnputs(e->op, ARPREQUEST);
+	bp->wp += n;
+
+	devtab[er->achan->type]->bwrite(er->achan, bp, 0);
+}
+
+static void
+resolveaddr6(Ipifc *ifc, Arpent *a)
+{
+	int sflag;
+	Block *bp;
+	Etherrock *er = ifc->arg;
+	uchar ipsrc[IPaddrlen];
+
+	/* don't do anything if it's been less than a second since the last */
+	if(NOW - a->ctime < ReTransTimer){
+		arprelease(er->f->arp, a);
+		return;
+	}
+
+	/* remove all but the last message */
+	while((bp = a->hold) != nil){
+		if(bp == a->last)
+			break;
+		a->hold = bp->list;
+		freeblist(bp);
+	}
+
+	/* try to keep it around for a second more */
+	a->ctime = NOW;
+	a->rtime = NOW + ReTransTimer;
+	if(a->rxtsrem <= 0) {
+		arprelease(er->f->arp, a);
+		return;
+	}
+
+	a->rxtsrem--;
+	arprelease(er->f->arp, a);
+
+	if((sflag = ipv6anylocal(ifc, ipsrc)) != 0)
+		icmpns(er->f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
+}
+
+/*
+ *  send a gratuitous arp to refresh arp caches
+ */
+static void
+sendgarp(Ipifc *ifc, uchar *ip)
+{
+	int n;
+	Block *bp;
+	Etherarp *e;
+	Etherrock *er = ifc->arg;
+
+	/* don't arp for our initial non address */
+	if(ipcmp(ip, IPnoaddr) == 0)
+		return;
+
+	n = sizeof(Etherarp);
+	if(n < ifc->m->mintu)
+		n = ifc->m->mintu;
+	bp = allocb(n);
+	memset(bp->rp, 0, n);
+	e = (Etherarp*)bp->rp;
+	memmove(e->tpa, ip+IPv4off, sizeof(e->tpa));
+	memmove(e->spa, ip+IPv4off, sizeof(e->spa));
+	memmove(e->sha, ifc->mac, sizeof(e->sha));
+	memset(e->d, 0xff, sizeof(e->d));		/* ethernet broadcast */
+	memmove(e->s, ifc->mac, sizeof(e->s));
+
+	hnputs(e->type, ETARP);
+	hnputs(e->hrd, 1);
+	hnputs(e->pro, ETIP4);
+	e->hln = sizeof(e->sha);
+	e->pln = sizeof(e->spa);
+	hnputs(e->op, ARPREQUEST);
+	bp->wp += n;
+
+	devtab[er->achan->type]->bwrite(er->achan, bp, 0);
+}
+
+static void
+recvarp(Ipifc *ifc)
+{
+	int n;
+	Block *ebp, *rbp;
+	Etherarp *e, *r;
+	uchar ip[IPaddrlen];
+	static uchar eprinted[4];
+	Etherrock *er = ifc->arg;
+
+	ebp = devtab[er->achan->type]->bread(er->achan, ifc->maxtu, 0);
+	if(ebp == nil)
+		return;
+
+	e = (Etherarp*)ebp->rp;
+	switch(nhgets(e->op)) {
+	default:
+		break;
+
+	case ARPREPLY:
+		/* check for machine using my ip address */
+		v4tov6(ip, e->spa);
+		if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
+				print("arprep: 0x%E/0x%E also has ip addr %V\n",
+					e->s, e->sha, e->spa);
+				break;
+			}
+		}
+
+		/* make sure we're not entering broadcast addresses */
+		if(ipcmp(ip, ipbroadcast) == 0 ||
+			!memcmp(e->sha, etherbroadcast, sizeof(e->sha))){
+			print("arprep: 0x%E/0x%E cannot register broadcast address %I\n",
+				e->s, e->sha, e->spa);
+			break;
+		}
+
+		arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 0);
+		break;
+
+	case ARPREQUEST:
+		/* don't answer arps till we know who we are */
+		if(ifc->lifc == 0)
+			break;
+
+		/* check for machine using my ip or ether address */
+		v4tov6(ip, e->spa);
+		if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
+				if (memcmp(eprinted, e->spa, sizeof(e->spa))){
+					/* print only once */
+					print("arpreq: 0x%E also has ip addr %V\n", e->sha, e->spa);
+					memmove(eprinted, e->spa, sizeof(e->spa));
+				}
+			}
+		} else {
+			if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) == 0){
+				print("arpreq: %V also has ether addr %E\n", e->spa, e->sha);
+				break;
+			}
+		}
+
+		/* refresh what we know about sender */
+		arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 1);
+
+		/* answer only requests for our address or systems we're proxying for */
+		v4tov6(ip, e->tpa);
+		if(!iplocalonifc(ifc, ip))
+		if(!ipproxyifc(er->f, ifc, ip))
+			break;
+
+		n = sizeof(Etherarp);
+		if(n < ifc->mintu)
+			n = ifc->mintu;
+		rbp = allocb(n);
+		r = (Etherarp*)rbp->rp;
+		memset(r, 0, sizeof(Etherarp));
+		hnputs(r->type, ETARP);
+		hnputs(r->hrd, 1);
+		hnputs(r->pro, ETIP4);
+		r->hln = sizeof(r->sha);
+		r->pln = sizeof(r->spa);
+		hnputs(r->op, ARPREPLY);
+		memmove(r->tha, e->sha, sizeof(r->tha));
+		memmove(r->tpa, e->spa, sizeof(r->tpa));
+		memmove(r->sha, ifc->mac, sizeof(r->sha));
+		memmove(r->spa, e->tpa, sizeof(r->spa));
+		memmove(r->d, e->sha, sizeof(r->d));
+		memmove(r->s, ifc->mac, sizeof(r->s));
+		rbp->wp += n;
+
+		devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
+	}
+	freeb(ebp);
+}
+
+static void
+recvarpproc(void *v)
+{
+	Ipifc *ifc = v;
+	Etherrock *er = ifc->arg;
+
+	er->arpp = up;
+	if(waserror()){
+		er->arpp = 0;
+		pexit("hangup", 1);
+	}
+	for(;;)
+		recvarp(ifc);
+}
+
+static int
+multicastea(uchar *ea, uchar *ip)
+{
+	int x;
+
+	switch(x = ipismulticast(ip)){
+	case V4:
+		ea[0] = 0x01;
+		ea[1] = 0x00;
+		ea[2] = 0x5e;
+		ea[3] = ip[13] & 0x7f;
+		ea[4] = ip[14];
+		ea[5] = ip[15];
+		break;
+ 	case V6:
+ 		ea[0] = 0x33;
+ 		ea[1] = 0x33;
+ 		ea[2] = ip[12];
+		ea[3] = ip[13];
+ 		ea[4] = ip[14];
+ 		ea[5] = ip[15];
+ 		break;
+	}
+	return x;
+}
+
+/*
+ *  fill in an arp entry for broadcast or multicast
+ *  addresses.  Return the first queued packet for the
+ *  IP address.
+ */
+static Block*
+multicastarp(Fs *f, Arpent *a, Medium *medium, uchar *mac)
+{
+	/* is it broadcast? */
+	switch(ipforme(f, a->ip)){
+	case Runi:
+		return nil;
+	case Rbcast:
+		memset(mac, 0xff, 6);
+		return arpresolve(f->arp, a, medium, mac);
+	default:
+		break;
+	}
+
+	/* if multicast, fill in mac */
+	switch(multicastea(mac, a->ip)){
+	case V4:
+	case V6:
+		return arpresolve(f->arp, a, medium, mac);
+	}
+
+	/* let arp take care of it */
+	return nil;
+}
+
+void
+ethermediumlink(void)
+{
+	addipmedium(ðermedium);
+	addipmedium(&gbemedium);
+}
+
+
+static void
+etherpref2addr(uchar *pref, uchar *ea)
+{
+	pref[8] = ea[0] | 0x2;
+	pref[9] = ea[1];
+	pref[10] = ea[2];
+	pref[11] = 0xFF;
+	pref[12] = 0xFE;
+	pref[13] = ea[3];
+	pref[14] = ea[4];
+	pref[15] = ea[5];
+}
diff --git a/src/9vx/a/ip/gre.c b/src/9vx/a/ip/gre.c
@@ -0,0 +1,283 @@
+/*
+ * Generic Routing Encapsulation over IPv4, rfc1702
+ */
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+enum
+{
+	GRE_IPONLY	= 12,		/* size of ip header */
+	GRE_IPPLUSGRE	= 12,		/* minimum size of GRE header */
+	IP_GREPROTO	= 47,
+
+	GRErxms		= 200,
+	GREtickms	= 100,
+	GREmaxxmit	= 10,
+};
+
+typedef struct GREhdr
+{
+	/* ip header */
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	len[2];		/* packet length (including headers) */
+	uchar	id[2];		/* Identification */
+	uchar	frag[2];	/* Fragment information */
+	uchar	Unused;	
+	uchar	proto;		/* Protocol */
+	uchar	cksum[2];	/* checksum */
+	uchar	src[4];		/* Ip source */
+	uchar	dst[4];		/* Ip destination */
+
+	/* gre header */
+	uchar	flags[2];
+	uchar	eproto[2];	/* encapsulation protocol */
+} GREhdr;
+
+typedef struct GREpriv GREpriv;
+struct GREpriv
+{
+	int		raw;			/* Raw GRE mode */
+
+	/* non-MIB stats */
+	ulong		csumerr;		/* checksum errors */
+	ulong		lenerr;			/* short packet */
+};
+
+static void grekick(void *x, Block *bp);
+
+static char*
+greconnect(Conv *c, char **argv, int argc)
+{
+	Proto *p;
+	char *err;
+	Conv *tc, **cp, **ecp;
+
+	err = Fsstdconnect(c, argv, argc);
+	if(err != nil)
+		return err;
+
+	/* make sure noone's already connected to this other sys */
+	p = c->p;
+	QLOCK(p);
+	ecp = &p->conv[p->nc];
+	for(cp = p->conv; cp < ecp; cp++){
+		tc = *cp;
+		if(tc == nil)
+			break;
+		if(tc == c)
+			continue;
+		if(tc->rport == c->rport && ipcmp(tc->raddr, c->raddr) == 0){
+			err = "already connected to that addr/proto";
+			ipmove(c->laddr, IPnoaddr);
+			ipmove(c->raddr, IPnoaddr);
+			break;
+		}
+	}
+	QUNLOCK(p);
+
+	if(err != nil)
+		return err;
+	Fsconnected(c, nil);
+
+	return nil;
+}
+
+static void
+grecreate(Conv *c)
+{
+	c->rq = qopen(64*1024, Qmsg, 0, c);
+	c->wq = qbypass(grekick, c);
+}
+
+static int
+grestate(Conv *c, char *state, int n)
+{
+	USED(c);
+	return snprint(state, n, "%s\n", "Datagram");
+}
+
+static char*
+greannounce(Conv* _, char** __, int ___)
+{
+	return "pktifc does not support announce";
+}
+
+static void
+greclose(Conv *c)
+{
+	qclose(c->rq);
+	qclose(c->wq);
+	qclose(c->eq);
+	ipmove(c->laddr, IPnoaddr);
+	ipmove(c->raddr, IPnoaddr);
+	c->lport = 0;
+	c->rport = 0;
+}
+
+int drop;
+
+static void
+grekick(void *x, Block *bp)
+{
+	Conv *c = x;
+	GREhdr *ghp;
+	uchar laddr[IPaddrlen], raddr[IPaddrlen];
+
+	if(bp == nil)
+		return;
+
+	/* Make space to fit ip header (gre header already there) */
+	bp = padblock(bp, GRE_IPONLY);
+	if(bp == nil)
+		return;
+
+	/* make sure the message has a GRE header */
+	bp = pullupblock(bp, GRE_IPONLY+GRE_IPPLUSGRE);
+	if(bp == nil)
+		return;
+
+	ghp = (GREhdr *)(bp->rp);
+	ghp->vihl = IP_VER4;
+
+	if(!((GREpriv*)c->p->priv)->raw){
+		v4tov6(raddr, ghp->dst);
+		if(ipcmp(raddr, v4prefix) == 0)
+			memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen);
+		v4tov6(laddr, ghp->src);
+		if(ipcmp(laddr, v4prefix) == 0){
+			if(ipcmp(c->laddr, IPnoaddr) == 0)
+				findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */
+			memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen);
+		}
+		hnputs(ghp->eproto, c->rport);
+	}
+
+	ghp->proto = IP_GREPROTO;
+	ghp->frag[0] = 0;
+	ghp->frag[1] = 0;
+
+	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
+}
+
+static void
+greiput(Proto *gre, Ipifc* __, Block *bp)
+{
+	int len;
+	GREhdr *ghp;
+	Conv *c, **p;
+	ushort eproto;
+	uchar raddr[IPaddrlen];
+	GREpriv *gpriv;
+
+	gpriv = gre->priv;
+	ghp = (GREhdr*)(bp->rp);
+
+	v4tov6(raddr, ghp->src);
+	eproto = nhgets(ghp->eproto);
+	QLOCK(gre);
+
+	/* Look for a conversation structure for this port and address */
+	c = nil;
+	for(p = gre->conv; *p; p++) {
+		c = *p;
+		if(c->inuse == 0)
+			continue;
+		if(c->rport == eproto && 
+			(gpriv->raw || ipcmp(c->raddr, raddr) == 0))
+			break;
+	}
+
+	if(*p == nil) {
+		QUNLOCK(gre);
+		freeblist(bp);
+		return;
+	}
+
+	QUNLOCK(gre);
+
+	/*
+	 * Trim the packet down to data size
+	 */
+	len = nhgets(ghp->len) - GRE_IPONLY;
+	if(len < GRE_IPPLUSGRE){
+		freeblist(bp);
+		return;
+	}
+	bp = trimblock(bp, GRE_IPONLY, len);
+	if(bp == nil){
+		gpriv->lenerr++;
+		return;
+	}
+
+	/*
+	 *  Can't delimit packet so pull it all into one block.
+	 */
+	if(qlen(c->rq) > 64*1024)
+		freeblist(bp);
+	else{
+		bp = concatblock(bp);
+		if(bp == 0)
+			panic("greiput");
+		qpass(c->rq, bp);
+	}
+}
+
+int
+grestats(Proto *gre, char *buf, int len)
+{
+	GREpriv *gpriv;
+
+	gpriv = gre->priv;
+
+	return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr);
+}
+
+char*
+grectl(Conv *c, char **f, int n)
+{
+	GREpriv *gpriv;
+
+	gpriv = c->p->priv;
+	if(n == 1){
+		if(strcmp(f[0], "raw") == 0){
+			gpriv->raw = 1;
+			return nil;
+		}
+		else if(strcmp(f[0], "cooked") == 0){
+			gpriv->raw = 0;
+			return nil;
+		}
+	}
+	return "unknown control request";
+}
+
+void
+greinit(Fs *fs)
+{
+	Proto *gre;
+
+	gre = smalloc(sizeof(Proto));
+	gre->priv = smalloc(sizeof(GREpriv));
+	gre->name = "gre";
+	gre->connect = greconnect;
+	gre->announce = greannounce;
+	gre->state = grestate;
+	gre->create = grecreate;
+	gre->close = greclose;
+	gre->rcv = greiput;
+	gre->ctl = grectl;
+	gre->advise = nil;
+	gre->stats = grestats;
+	gre->ipproto = IP_GREPROTO;
+	gre->nc = 64;
+	gre->ptclsize = 0;
+
+	Fsproto(fs, gre);
+}
diff --git a/src/9vx/a/ip/icmp.c b/src/9vx/a/ip/icmp.c
@@ -0,0 +1,490 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+typedef struct Icmp {
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	length[2];	/* packet length */
+	uchar	id[2];		/* Identification */
+	uchar	frag[2];	/* Fragment information */
+	uchar	ttl;		/* Time to live */
+	uchar	proto;		/* Protocol */
+	uchar	ipcksum[2];	/* Header checksum */
+	uchar	src[4];		/* Ip source */
+	uchar	dst[4];		/* Ip destination */
+	uchar	type;
+	uchar	code;
+	uchar	cksum[2];
+	uchar	icmpid[2];
+	uchar	seq[2];
+	uchar	data[1];
+} Icmp;
+
+enum {			/* Packet Types */
+	EchoReply	= 0,
+	Unreachable	= 3,
+	SrcQuench	= 4,
+	Redirect	= 5,
+	EchoRequest	= 8,
+	TimeExceed	= 11,
+	InParmProblem	= 12,
+	Timestamp	= 13,
+	TimestampReply	= 14,
+	InfoRequest	= 15,
+	InfoReply	= 16,
+	AddrMaskRequest = 17,
+	AddrMaskReply   = 18,
+
+	Maxtype		= 18,
+};
+
+enum
+{
+	MinAdvise	= 24,	/* minimum needed for us to advise another protocol */ 
+};
+
+char *icmpnames[Maxtype+1] =
+{
+[EchoReply]		"EchoReply",
+[Unreachable]		"Unreachable",
+[SrcQuench]		"SrcQuench",
+[Redirect]		"Redirect",
+[EchoRequest]		"EchoRequest",
+[TimeExceed]		"TimeExceed",
+[InParmProblem]		"InParmProblem",
+[Timestamp]		"Timestamp",
+[TimestampReply]	"TimestampReply",
+[InfoRequest]		"InfoRequest",
+[InfoReply]		"InfoReply",
+[AddrMaskRequest]	"AddrMaskRequest",
+[AddrMaskReply  ]	"AddrMaskReply  ",
+};
+
+enum {
+	IP_ICMPPROTO	= 1,
+	ICMP_IPSIZE	= 20,
+	ICMP_HDRSIZE	= 8,
+};
+
+enum
+{
+	InMsgs,
+	InErrors,
+	OutMsgs,
+	CsumErrs,
+	LenErrs,
+	HlenErrs,
+
+	Nstats,
+};
+
+static char *statnames[Nstats] =
+{
+[InMsgs]	"InMsgs",
+[InErrors]	"InErrors",
+[OutMsgs]	"OutMsgs",
+[CsumErrs]	"CsumErrs",
+[LenErrs]	"LenErrs",
+[HlenErrs]	"HlenErrs",
+};
+
+typedef struct Icmppriv Icmppriv;
+struct Icmppriv
+{
+	ulong	stats[Nstats];
+
+	/* message counts */
+	ulong	in[Maxtype+1];
+	ulong	out[Maxtype+1];
+};
+
+static void icmpkick(void *x, Block*);
+
+static void
+icmpcreate(Conv *c)
+{
+	c->rq = qopen(64*1024, Qmsg, 0, c);
+	c->wq = qbypass(icmpkick, c);
+}
+
+extern char*
+icmpconnect(Conv *c, char **argv, int argc)
+{
+	char *e;
+
+	e = Fsstdconnect(c, argv, argc);
+	if(e != nil)
+		return e;
+	Fsconnected(c, e);
+
+	return nil;
+}
+
+extern int
+icmpstate(Conv *c, char *state, int n)
+{
+	USED(c);
+	return snprint(state, n, "%s qin %d qout %d\n",
+		"Datagram",
+		c->rq ? qlen(c->rq) : 0,
+		c->wq ? qlen(c->wq) : 0
+	);
+}
+
+extern char*
+icmpannounce(Conv *c, char **argv, int argc)
+{
+	char *e;
+
+	e = Fsstdannounce(c, argv, argc);
+	if(e != nil)
+		return e;
+	Fsconnected(c, nil);
+
+	return nil;
+}
+
+extern void
+icmpclose(Conv *c)
+{
+	qclose(c->rq);
+	qclose(c->wq);
+	ipmove(c->laddr, IPnoaddr);
+	ipmove(c->raddr, IPnoaddr);
+	c->lport = 0;
+}
+
+static void
+icmpkick(void *x, Block *bp)
+{
+	Conv *c = x;
+	Icmp *p;
+	Icmppriv *ipriv;
+
+	if(bp == nil)
+		return;
+
+	if(blocklen(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
+		freeblist(bp);
+		return;
+	}
+	p = (Icmp *)(bp->rp);
+	p->vihl = IP_VER4;
+	ipriv = c->p->priv;
+	if(p->type <= Maxtype)	
+		ipriv->out[p->type]++;
+	
+	v6tov4(p->dst, c->raddr);
+	v6tov4(p->src, c->laddr);
+	p->proto = IP_ICMPPROTO;
+	hnputs(p->icmpid, c->lport);
+	memset(p->cksum, 0, sizeof(p->cksum));
+	hnputs(p->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE));
+	ipriv->stats[OutMsgs]++;
+	ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
+}
+
+extern void
+icmpttlexceeded(Fs *f, uchar *ia, Block *bp)
+{
+	Block	*nbp;
+	Icmp	*p, *np;
+
+	p = (Icmp *)bp->rp;
+
+	netlog(f, Logicmp, "sending icmpttlexceeded -> %V\n", p->src);
+	nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
+	nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
+	np = (Icmp *)nbp->rp;
+	np->vihl = IP_VER4;
+	memmove(np->dst, p->src, sizeof(np->dst));
+	v6tov4(np->src, ia);
+	memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
+	np->type = TimeExceed;
+	np->code = 0;
+	np->proto = IP_ICMPPROTO;
+	hnputs(np->icmpid, 0);
+	hnputs(np->seq, 0);
+	memset(np->cksum, 0, sizeof(np->cksum));
+	hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
+	ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+
+}
+
+static void
+icmpunreachable(Fs *f, Block *bp, int code, int seq)
+{
+	Block	*nbp;
+	Icmp	*p, *np;
+	int	i;
+	uchar	addr[IPaddrlen];
+
+	p = (Icmp *)bp->rp;
+
+	/* only do this for unicast sources and destinations */
+	v4tov6(addr, p->dst);
+	i = ipforme(f, addr);
+	if((i&Runi) == 0)
+		return;
+	v4tov6(addr, p->src);
+	i = ipforme(f, addr);
+	if(i != 0 && (i&Runi) == 0)
+		return;
+
+	netlog(f, Logicmp, "sending icmpnoconv -> %V\n", p->src);
+	nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
+	nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
+	np = (Icmp *)nbp->rp;
+	np->vihl = IP_VER4;
+	memmove(np->dst, p->src, sizeof(np->dst));
+	memmove(np->src, p->dst, sizeof(np->src));
+	memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
+	np->type = Unreachable;
+	np->code = code;
+	np->proto = IP_ICMPPROTO;
+	hnputs(np->icmpid, 0);
+	hnputs(np->seq, seq);
+	memset(np->cksum, 0, sizeof(np->cksum));
+	hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
+	ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+extern void
+icmpnoconv(Fs *f, Block *bp)
+{
+	icmpunreachable(f, bp, 3, 0);
+}
+
+extern void
+icmpcantfrag(Fs *f, Block *bp, int mtu)
+{
+	icmpunreachable(f, bp, 4, mtu);
+}
+
+static void
+goticmpkt(Proto *icmp, Block *bp)
+{
+	Conv	**c, *s;
+	Icmp	*p;
+	uchar	dst[IPaddrlen];
+	ushort	recid;
+
+	p = (Icmp *) bp->rp;
+	v4tov6(dst, p->src);
+	recid = nhgets(p->icmpid);
+
+	for(c = icmp->conv; *c; c++) {
+		s = *c;
+		if(s->lport == recid)
+		if(ipcmp(s->raddr, dst) == 0){
+			bp = concatblock(bp);
+			if(bp != nil)
+				qpass(s->rq, bp);
+			return;
+		}
+	}
+	freeblist(bp);
+}
+
+static Block *
+mkechoreply(Block *bp)
+{
+	Icmp	*q;
+	uchar	ip[4];
+
+	q = (Icmp *)bp->rp;
+	q->vihl = IP_VER4;
+	memmove(ip, q->src, sizeof(q->dst));
+	memmove(q->src, q->dst, sizeof(q->src));
+	memmove(q->dst, ip,  sizeof(q->dst));
+	q->type = EchoReply;
+	memset(q->cksum, 0, sizeof(q->cksum));
+	hnputs(q->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE));
+
+	return bp;
+}
+
+static char *unreachcode[] =
+{
+[0]	"net unreachable",
+[1]	"host unreachable",
+[2]	"protocol unreachable",
+[3]	"port unreachable",
+[4]	"fragmentation needed and DF set",
+[5]	"source route failed",
+};
+
+static void
+icmpiput(Proto *icmp, Ipifc* __, Block *bp)
+{
+	int	n, iplen;
+	Icmp	*p;
+	Block	*r;
+	Proto	*pr;
+	char	*msg;
+	char	m2[128];
+	Icmppriv *ipriv;
+
+	ipriv = icmp->priv;
+	
+	ipriv->stats[InMsgs]++;
+
+	p = (Icmp *)bp->rp;
+	netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code);
+	n = blocklen(bp);
+	if(n < ICMP_IPSIZE+ICMP_HDRSIZE){
+		ipriv->stats[InErrors]++;
+		ipriv->stats[HlenErrs]++;
+		netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+		goto raise;
+	}
+	iplen = nhgets(p->length);
+	if(iplen > n || ((uint)iplen % 1)){
+		ipriv->stats[LenErrs]++;
+		ipriv->stats[InErrors]++;
+		netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
+		goto raise;
+	}
+	if(ptclcsum(bp, ICMP_IPSIZE, iplen - ICMP_IPSIZE)){
+		ipriv->stats[InErrors]++;
+		ipriv->stats[CsumErrs]++;
+		netlog(icmp->f, Logicmp, "icmp checksum error\n");
+		goto raise;
+	}
+	if(p->type <= Maxtype)
+		ipriv->in[p->type]++;
+
+	switch(p->type) {
+	case EchoRequest:
+		if (iplen < n)
+			bp = trimblock(bp, 0, iplen);
+		r = mkechoreply(bp);
+		ipriv->out[EchoReply]++;
+		ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
+		break;
+	case Unreachable:
+		if(p->code > 5)
+			msg = unreachcode[1];
+		else
+			msg = unreachcode[p->code];
+
+		bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
+		if(blocklen(bp) < MinAdvise){
+			ipriv->stats[LenErrs]++;
+			goto raise;
+		}
+		p = (Icmp *)bp->rp;
+		pr = Fsrcvpcolx(icmp->f, p->proto);
+		if(pr != nil && pr->advise != nil) {
+			(*pr->advise)(pr, bp, msg);
+			return;
+		}
+
+		bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+		goticmpkt(icmp, bp);
+		break;
+	case TimeExceed:
+		if(p->code == 0){
+			sprint(m2, "ttl exceeded at %V", p->src);
+
+			bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
+			if(blocklen(bp) < MinAdvise){
+				ipriv->stats[LenErrs]++;
+				goto raise;
+			}
+			p = (Icmp *)bp->rp;
+			pr = Fsrcvpcolx(icmp->f, p->proto);
+			if(pr != nil && pr->advise != nil) {
+				(*pr->advise)(pr, bp, m2);
+				return;
+			}
+			bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+		}
+
+		goticmpkt(icmp, bp);
+		break;
+	default:
+		goticmpkt(icmp, bp);
+		break;
+	}
+	return;
+
+raise:
+	freeblist(bp);
+}
+
+void
+icmpadvise(Proto *icmp, Block *bp, char *msg)
+{
+	Conv	**c, *s;
+	Icmp	*p;
+	uchar	dst[IPaddrlen];
+	ushort	recid;
+
+	p = (Icmp *) bp->rp;
+	v4tov6(dst, p->dst);
+	recid = nhgets(p->icmpid);
+
+	for(c = icmp->conv; *c; c++) {
+		s = *c;
+		if(s->lport == recid)
+		if(ipcmp(s->raddr, dst) == 0){
+			qhangup(s->rq, msg);
+			qhangup(s->wq, msg);
+			break;
+		}
+	}
+	freeblist(bp);
+}
+
+int
+icmpstats(Proto *icmp, char *buf, int len)
+{
+	Icmppriv *priv;
+	char *p, *e;
+	int i;
+
+	priv = icmp->priv;
+	p = buf;
+	e = p+len;
+	for(i = 0; i < Nstats; i++)
+		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+	for(i = 0; i <= Maxtype; i++){
+		if(icmpnames[i])
+			p = seprint(p, e, "%s: %lud %lud\n", icmpnames[i], priv->in[i], priv->out[i]);
+		else
+			p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
+	}
+	return p - buf;
+}
+	
+void
+icmpinit(Fs *fs)
+{
+	Proto *icmp;
+
+	icmp = smalloc(sizeof(Proto));
+	icmp->priv = smalloc(sizeof(Icmppriv));
+	icmp->name = "icmp";
+	icmp->connect = icmpconnect;
+	icmp->announce = icmpannounce;
+	icmp->state = icmpstate;
+	icmp->create = icmpcreate;
+	icmp->close = icmpclose;
+	icmp->rcv = icmpiput;
+	icmp->stats = icmpstats;
+	icmp->ctl = nil;
+	icmp->advise = icmpadvise;
+	icmp->gc = nil;
+	icmp->ipproto = IP_ICMPPROTO;
+	icmp->nc = 128;
+	icmp->ptclsize = 0;
+
+	Fsproto(fs, icmp);
+}
diff --git a/src/9vx/a/ip/icmp6.c b/src/9vx/a/ip/icmp6.c
@@ -0,0 +1,946 @@
+/*
+ * Internet Control Message Protocol for IPv6
+ */
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "ip.h"
+#include "ipv6.h"
+
+enum
+{
+	InMsgs6,
+	InErrors6,
+	OutMsgs6,
+	CsumErrs6,
+	LenErrs6,
+	HlenErrs6,
+	HoplimErrs6,
+	IcmpCodeErrs6,
+	TargetErrs6,
+	OptlenErrs6,
+	AddrmxpErrs6,
+	RouterAddrErrs6,
+
+	Nstats6,
+};
+
+enum {
+	ICMP_USEAD6	= 40,
+};
+
+enum {
+	Oflag	= 1<<5,
+	Sflag	= 1<<6,
+	Rflag	= 1<<7,
+};
+
+enum {
+	/* ICMPv6 types */
+	EchoReply	= 0,
+	UnreachableV6	= 1,
+	PacketTooBigV6	= 2,
+	TimeExceedV6	= 3,
+	SrcQuench	= 4,
+	ParamProblemV6	= 4,
+	Redirect	= 5,
+	EchoRequest	= 8,
+	TimeExceed	= 11,
+	InParmProblem	= 12,
+	Timestamp	= 13,
+	TimestampReply	= 14,
+	InfoRequest	= 15,
+	InfoReply	= 16,
+	AddrMaskRequest = 17,
+	AddrMaskReply   = 18,
+	EchoRequestV6	= 128,
+	EchoReplyV6	= 129,
+	RouterSolicit	= 133,
+	RouterAdvert	= 134,
+	NbrSolicit	= 135,
+	NbrAdvert	= 136,
+	RedirectV6	= 137,
+
+	Maxtype6	= 137,
+};
+
+typedef struct ICMPpkt ICMPpkt;
+typedef struct IPICMP IPICMP;
+typedef struct Ndpkt Ndpkt;
+typedef struct NdiscC NdiscC;
+
+struct ICMPpkt {
+	uchar	type;
+	uchar	code;
+	uchar	cksum[2];
+	uchar	icmpid[2];
+	uchar	seq[2];
+};
+
+struct IPICMP {
+	/* Ip6hdr; */
+	uchar	vcf[4];		/* version:4, traffic class:8, flow label:20 */
+	uchar	ploadlen[2];	/* payload length: packet length - 40 */
+	uchar	proto;		/* next header type */
+	uchar	ttl;		/* hop limit */
+	uchar	src[IPaddrlen];
+	uchar	dst[IPaddrlen];
+
+	/* ICMPpkt; */
+	uchar	type;
+	uchar	code;
+	uchar	cksum[2];
+	uchar	icmpid[2];
+	uchar	seq[2];
+};
+
+struct NdiscC
+{
+	/* IPICMP; */
+	/* Ip6hdr; */
+	uchar	vcf[4];		/* version:4, traffic class:8, flow label:20 */
+	uchar	ploadlen[2];	/* payload length: packet length - 40 */
+	uchar	proto;		/* next header type */
+	uchar	ttl;		/* hop limit */
+	uchar	src[IPaddrlen];
+	uchar	dst[IPaddrlen];
+
+	/* ICMPpkt; */
+	uchar	type;
+	uchar	code;
+	uchar	cksum[2];
+	uchar	icmpid[2];
+	uchar	seq[2];
+
+	uchar	target[IPaddrlen];
+};
+
+struct Ndpkt
+{
+	/* NdiscC; */
+	/* IPICMP; */
+	/* Ip6hdr; */
+	uchar	vcf[4];		/* version:4, traffic class:8, flow label:20 */
+	uchar	ploadlen[2];	/* payload length: packet length - 40 */
+	uchar	proto;		/* next header type */
+	uchar	ttl;		/* hop limit */
+	uchar	src[IPaddrlen];
+	uchar	dst[IPaddrlen];
+
+	/* ICMPpkt; */
+	uchar	type;
+	uchar	code;
+	uchar	cksum[2];
+	uchar	icmpid[2];
+	uchar	seq[2];
+
+	uchar	target[IPaddrlen];
+
+	uchar	otype;
+	uchar	olen;		/* length in units of 8 octets(incl type, code),
+				 * 1 for IEEE 802 addresses */
+	uchar	lnaddr[6];	/* link-layer address */
+};
+
+typedef struct Icmppriv6
+{
+	ulong	stats[Nstats6];
+
+	/* message counts */
+	ulong	in[Maxtype6+1];
+	ulong	out[Maxtype6+1];
+} Icmppriv6;
+
+typedef struct Icmpcb6
+{
+	QLock	qlock;
+	uchar	headers;
+} Icmpcb6;
+
+char *icmpnames6[Maxtype6+1] =
+{
+[EchoReply]		"EchoReply",
+[UnreachableV6]		"UnreachableV6",
+[PacketTooBigV6]	"PacketTooBigV6",
+[TimeExceedV6]		"TimeExceedV6",
+[SrcQuench]		"SrcQuench",
+[Redirect]		"Redirect",
+[EchoRequest]		"EchoRequest",
+[TimeExceed]		"TimeExceed",
+[InParmProblem]		"InParmProblem",
+[Timestamp]		"Timestamp",
+[TimestampReply]	"TimestampReply",
+[InfoRequest]		"InfoRequest",
+[InfoReply]		"InfoReply",
+[AddrMaskRequest]	"AddrMaskRequest",
+[AddrMaskReply]		"AddrMaskReply",
+[EchoRequestV6]		"EchoRequestV6",
+[EchoReplyV6]		"EchoReplyV6",
+[RouterSolicit]		"RouterSolicit",
+[RouterAdvert]		"RouterAdvert",
+[NbrSolicit]		"NbrSolicit",
+[NbrAdvert]		"NbrAdvert",
+[RedirectV6]		"RedirectV6",
+};
+
+static char *statnames6[Nstats6] =
+{
+[InMsgs6]	"InMsgs",
+[InErrors6]	"InErrors",
+[OutMsgs6]	"OutMsgs",
+[CsumErrs6]	"CsumErrs",
+[LenErrs6]	"LenErrs",
+[HlenErrs6]	"HlenErrs",
+[HoplimErrs6]	"HoplimErrs",
+[IcmpCodeErrs6]	"IcmpCodeErrs",
+[TargetErrs6]	"TargetErrs",
+[OptlenErrs6]	"OptlenErrs",
+[AddrmxpErrs6]	"AddrmxpErrs",
+[RouterAddrErrs6]	"RouterAddrErrs",
+};
+
+static char *unreachcode[] =
+{
+[Icmp6_no_route]	"no route to destination",
+[Icmp6_ad_prohib]	"comm with destination administratively prohibited",
+[Icmp6_out_src_scope]	"beyond scope of source address",
+[Icmp6_adr_unreach]	"address unreachable",
+[Icmp6_port_unreach]	"port unreachable",
+[Icmp6_gress_src_fail]	"source address failed ingress/egress policy",
+[Icmp6_rej_route]	"reject route to destination",
+[Icmp6_unknown]		"icmp unreachable: unknown code",
+};
+
+static void icmpkick6(void *x, Block *bp);
+
+static void
+icmpcreate6(Conv *c)
+{
+	c->rq = qopen(64*1024, Qmsg, 0, c);
+	c->wq = qbypass(icmpkick6, c);
+}
+
+static void
+set_cksum(Block *bp)
+{
+	IPICMP *p = (IPICMP *)(bp->rp);
+
+	hnputl(p->vcf, 0);  	/* borrow IP header as pseudoheader */
+	hnputs(p->ploadlen, blocklen(bp) - IP6HDR);
+	p->proto = 0;
+	p->ttl = ICMPv6;	/* ttl gets set later */
+	hnputs(p->cksum, 0);
+	hnputs(p->cksum, ptclcsum(bp, 0, blocklen(bp)));
+	p->proto = ICMPv6;
+}
+
+static Block *
+newIPICMP(int packetlen)
+{
+	Block *nbp;
+
+	nbp = allocb(packetlen);
+	nbp->wp += packetlen;
+	memset(nbp->rp, 0, packetlen);
+	return nbp;
+}
+
+void
+icmpadvise6(Proto *icmp, Block *bp, char *msg)
+{
+	ushort recid;
+	Conv **c, *s;
+	IPICMP *p;
+
+	p = (IPICMP *)bp->rp;
+	recid = nhgets(p->icmpid);
+
+	for(c = icmp->conv; *c; c++) {
+		s = *c;
+		if(s->lport == recid && ipcmp(s->raddr, p->dst) == 0){
+			qhangup(s->rq, msg);
+			qhangup(s->wq, msg);
+			break;
+		}
+	}
+	freeblist(bp);
+}
+
+static void
+icmpkick6(void *x, Block *bp)
+{
+	uchar laddr[IPaddrlen], raddr[IPaddrlen];
+	Conv *c = x;
+	IPICMP *p;
+	Icmppriv6 *ipriv = c->p->priv;
+	Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
+
+	if(bp == nil)
+		return;
+
+	if(icb->headers==6) {
+		/* get user specified addresses */
+		bp = pullupblock(bp, ICMP_USEAD6);
+		if(bp == nil)
+			return;
+		bp->rp += 8;
+		ipmove(laddr, bp->rp);
+		bp->rp += IPaddrlen;
+		ipmove(raddr, bp->rp);
+		bp->rp += IPaddrlen;
+		bp = padblock(bp, sizeof(Ip6hdr));
+	}
+
+	if(blocklen(bp) < sizeof(IPICMP)){
+		freeblist(bp);
+		return;
+	}
+	p = (IPICMP *)(bp->rp);
+	if(icb->headers == 6) {
+		ipmove(p->dst, raddr);
+		ipmove(p->src, laddr);
+	} else {
+		ipmove(p->dst, c->raddr);
+		ipmove(p->src, c->laddr);
+		hnputs(p->icmpid, c->lport);
+	}
+
+	set_cksum(bp);
+	p->vcf[0] = 0x06 << 4;
+	if(p->type <= Maxtype6)
+		ipriv->out[p->type]++;
+	ipoput6(c->p->f, bp, 0, c->ttl, c->tos, nil);
+}
+
+char*
+icmpctl6(Conv *c, char **argv, int argc)
+{
+	Icmpcb6 *icb;
+
+	icb = (Icmpcb6*) c->ptcl;
+	if(argc==1 && strcmp(argv[0], "headers")==0) {
+		icb->headers = 6;
+		return nil;
+	}
+	return "unknown control request";
+}
+
+static void
+goticmpkt6(Proto *icmp, Block *bp, int muxkey)
+{
+	ushort recid;
+	uchar *addr;
+	Conv **c, *s;
+	IPICMP *p = (IPICMP *)bp->rp;
+
+	if(muxkey == 0) {
+		recid = nhgets(p->icmpid);
+		addr = p->src;
+	} else {
+		recid = muxkey;
+		addr = p->dst;
+	}
+
+	for(c = icmp->conv; *c; c++){
+		s = *c;
+		if(s->lport == recid && ipcmp(s->raddr, addr) == 0){
+			bp = concatblock(bp);
+			if(bp != nil)
+				qpass(s->rq, bp);
+			return;
+		}
+	}
+
+	freeblist(bp);
+}
+
+static Block *
+mkechoreply6(Block *bp, Ipifc *ifc)
+{
+	uchar addr[IPaddrlen];
+	IPICMP *p = (IPICMP *)(bp->rp);
+
+	ipmove(addr, p->src);
+	if(!isv6mcast(p->dst))
+		ipmove(p->src, p->dst);
+	else if (!ipv6anylocal(ifc, p->src))
+		return nil;
+	ipmove(p->dst, addr);
+	p->type = EchoReplyV6;
+	set_cksum(bp);
+	return bp;
+}
+
+/*
+ * sends out an ICMPv6 neighbor solicitation
+ * 	suni == SRC_UNSPEC or SRC_UNI,
+ *	tuni == TARG_MULTI => multicast for address resolution,
+ * 	and tuni == TARG_UNI => neighbor reachability.
+ */
+extern void
+icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac)
+{
+	Block *nbp;
+	Ndpkt *np;
+	Proto *icmp = f->t2p[ICMPv6];
+	Icmppriv6 *ipriv = icmp->priv;
+
+	nbp = newIPICMP(sizeof(Ndpkt));
+	np = (Ndpkt*) nbp->rp;
+
+	if(suni == SRC_UNSPEC)
+		memmove(np->src, v6Unspecified, IPaddrlen);
+	else
+		memmove(np->src, src, IPaddrlen);
+
+	if(tuni == TARG_UNI)
+		memmove(np->dst, targ, IPaddrlen);
+	else
+		ipv62smcast(np->dst, targ);
+
+	np->type = NbrSolicit;
+	np->code = 0;
+	memmove(np->target, targ, IPaddrlen);
+	if(suni != SRC_UNSPEC) {
+		np->otype = SRC_LLADDR;
+		np->olen = 1;		/* 1+1+6 = 8 = 1 8-octet */
+		memmove(np->lnaddr, mac, sizeof(np->lnaddr));
+	} else
+		nbp->wp -= sizeof(Ndpkt) - sizeof(NdiscC);
+
+	set_cksum(nbp);
+	np = (Ndpkt*)nbp->rp;
+	np->ttl = HOP_LIMIT;
+	np->vcf[0] = 0x06 << 4;
+	ipriv->out[NbrSolicit]++;
+	netlog(f, Logicmp, "sending neighbor solicitation %I\n", targ);
+	ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+/*
+ * sends out an ICMPv6 neighbor advertisement. pktflags == RSO flags.
+ */
+extern void
+icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags)
+{
+	Block *nbp;
+	Ndpkt *np;
+	Proto *icmp = f->t2p[ICMPv6];
+	Icmppriv6 *ipriv = icmp->priv;
+
+	nbp = newIPICMP(sizeof(Ndpkt));
+	np = (Ndpkt*)nbp->rp;
+
+	memmove(np->src, src, IPaddrlen);
+	memmove(np->dst, dst, IPaddrlen);
+
+	np->type = NbrAdvert;
+	np->code = 0;
+	np->icmpid[0] = flags;
+	memmove(np->target, targ, IPaddrlen);
+
+	np->otype = TARGET_LLADDR;
+	np->olen = 1;
+	memmove(np->lnaddr, mac, sizeof(np->lnaddr));
+
+	set_cksum(nbp);
+	np = (Ndpkt*) nbp->rp;
+	np->ttl = HOP_LIMIT;
+	np->vcf[0] = 0x06 << 4;
+	ipriv->out[NbrAdvert]++;
+	netlog(f, Logicmp, "sending neighbor advertisement %I\n", src);
+	ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+extern void
+icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free)
+{
+	int osz = BLEN(bp);
+	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+	Block *nbp;
+	IPICMP *np;
+	Ip6hdr *p;
+	Proto *icmp = f->t2p[ICMPv6];
+	Icmppriv6 *ipriv = icmp->priv;
+
+	p = (Ip6hdr *)bp->rp;
+
+	if(isv6mcast(p->src))
+		goto clean;
+
+	nbp = newIPICMP(sz);
+	np = (IPICMP *)nbp->rp;
+
+	RLOCK(ifc);
+	if(ipv6anylocal(ifc, np->src))
+		netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n",
+			p->src, p->dst);
+	else {
+		netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n",
+			p->src, p->dst);
+		freeblist(nbp);
+		if(free)
+			goto clean;
+		else
+			return;
+	}
+
+	memmove(np->dst, p->src, IPaddrlen);
+	np->type = UnreachableV6;
+	np->code = code;
+	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	set_cksum(nbp);
+	np->ttl = HOP_LIMIT;
+	np->vcf[0] = 0x06 << 4;
+	ipriv->out[UnreachableV6]++;
+
+	if(free)
+		ipiput6(f, ifc, nbp);
+	else {
+		ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+		return;
+	}
+
+clean:
+	RUNLOCK(ifc);
+	freeblist(bp);
+}
+
+extern void
+icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp)
+{
+	int osz = BLEN(bp);
+	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+	Block *nbp;
+	IPICMP *np;
+	Ip6hdr *p;
+	Proto *icmp = f->t2p[ICMPv6];
+	Icmppriv6 *ipriv = icmp->priv;
+
+	p = (Ip6hdr *)bp->rp;
+
+	if(isv6mcast(p->src))
+		return;
+
+	nbp = newIPICMP(sz);
+	np = (IPICMP *) nbp->rp;
+
+	if(ipv6anylocal(ifc, np->src))
+		netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n",
+			p->src, p->dst);
+	else {
+		netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n",
+			p->src, p->dst);
+		return;
+	}
+
+	memmove(np->dst, p->src, IPaddrlen);
+	np->type = TimeExceedV6;
+	np->code = 0;
+	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	set_cksum(nbp);
+	np->ttl = HOP_LIMIT;
+	np->vcf[0] = 0x06 << 4;
+	ipriv->out[TimeExceedV6]++;
+	ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+extern void
+icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp)
+{
+	int osz = BLEN(bp);
+	int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+	Block *nbp;
+	IPICMP *np;
+	Ip6hdr *p;
+	Proto *icmp = f->t2p[ICMPv6];
+	Icmppriv6 *ipriv = icmp->priv;
+
+	p = (Ip6hdr *)bp->rp;
+
+	if(isv6mcast(p->src))
+		return;
+
+	nbp = newIPICMP(sz);
+	np = (IPICMP *)nbp->rp;
+
+	if(ipv6anylocal(ifc, np->src))
+		netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n",
+			p->src, p->dst);
+	else {
+		netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n",
+			p->src, p->dst);
+		return;
+	}
+
+	memmove(np->dst, p->src, IPaddrlen);
+	np->type = PacketTooBigV6;
+	np->code = 0;
+	hnputl(np->icmpid, ifc->maxtu - ifc->m->hsize);
+	memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+	set_cksum(nbp);
+	np->ttl = HOP_LIMIT;
+	np->vcf[0] = 0x06 << 4;
+	ipriv->out[PacketTooBigV6]++;
+	ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+/*
+ * RFC 2461, pages 39-40, pages 57-58.
+ */
+static int
+valid(Proto *icmp, Ipifc *ifc, Block *bp, Icmppriv6 *ipriv)
+{
+	int sz, osz, unsp, n, ttl, iplen;
+	int pktsz = BLEN(bp);
+	uchar *packet = bp->rp;
+	IPICMP *p = (IPICMP *) packet;
+	Ndpkt *np;
+
+	USED(ifc);
+	n = blocklen(bp);
+	if(n < sizeof(IPICMP)) {
+		ipriv->stats[HlenErrs6]++;
+		netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+		goto err;
+	}
+
+	iplen = nhgets(p->ploadlen);
+	if(iplen > n - IP6HDR || ((uint)iplen % 1) != 0) {
+		ipriv->stats[LenErrs6]++;
+		netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
+		goto err;
+	}
+
+	/* Rather than construct explicit pseudoheader, overwrite IPv6 header */
+	if(p->proto != ICMPv6) {
+		/* This code assumes no extension headers!!! */
+		netlog(icmp->f, Logicmp, "icmp error: extension header\n");
+		goto err;
+	}
+	memset(packet, 0, 4);
+	ttl = p->ttl;
+	p->ttl = p->proto;
+	p->proto = 0;
+	if(ptclcsum(bp, 0, iplen + IP6HDR)) {
+		ipriv->stats[CsumErrs6]++;
+		netlog(icmp->f, Logicmp, "icmp checksum error\n");
+		goto err;
+	}
+	p->proto = p->ttl;
+	p->ttl = ttl;
+
+	/* additional tests for some pkt types */
+	if (p->type == NbrSolicit   || p->type == NbrAdvert ||
+	    p->type == RouterAdvert || p->type == RouterSolicit ||
+	    p->type == RedirectV6) {
+		if(p->ttl != HOP_LIMIT) {
+			ipriv->stats[HoplimErrs6]++;
+			goto err;
+		}
+		if(p->code != 0) {
+			ipriv->stats[IcmpCodeErrs6]++;
+			goto err;
+		}
+
+		switch (p->type) {
+		case NbrSolicit:
+		case NbrAdvert:
+			np = (Ndpkt*) p;
+			if(isv6mcast(np->target)) {
+				ipriv->stats[TargetErrs6]++;
+				goto err;
+			}
+			if(optexsts(np) && np->olen == 0) {
+				ipriv->stats[OptlenErrs6]++;
+				goto err;
+			}
+
+			if (p->type == NbrSolicit &&
+			    ipcmp(np->src, v6Unspecified) == 0)
+				if(!issmcast(np->dst) || optexsts(np)) {
+					ipriv->stats[AddrmxpErrs6]++;
+					goto err;
+				}
+
+			if(p->type == NbrAdvert)
+				if(isv6mcast(np->dst) &&
+				    (nhgets(np->icmpid) & Sflag)){
+					ipriv->stats[AddrmxpErrs6]++;
+					goto err;
+				}
+			break;
+
+		case RouterAdvert:
+			if(pktsz - sizeof(Ip6hdr) < 16) {
+				ipriv->stats[HlenErrs6]++;
+				goto err;
+			}
+			if(!islinklocal(p->src)) {
+				ipriv->stats[RouterAddrErrs6]++;
+				goto err;
+			}
+			sz = sizeof(IPICMP) + 8;
+			while (sz+1 < pktsz) {
+				osz = packet[sz+1];
+				if(osz <= 0) {
+					ipriv->stats[OptlenErrs6]++;
+					goto err;
+				}
+				sz += 8*osz;
+			}
+			break;
+
+		case RouterSolicit:
+			if(pktsz - sizeof(Ip6hdr) < 8) {
+				ipriv->stats[HlenErrs6]++;
+				goto err;
+			}
+			unsp = (ipcmp(p->src, v6Unspecified) == 0);
+			sz = sizeof(IPICMP) + 8;
+			while (sz+1 < pktsz) {
+				osz = packet[sz+1];
+				if(osz <= 0 ||
+				    (unsp && packet[sz] == SRC_LLADDR)) {
+					ipriv->stats[OptlenErrs6]++;
+					goto err;
+				}
+				sz += 8*osz;
+			}
+			break;
+
+		case RedirectV6:
+			/* to be filled in */
+			break;
+
+		default:
+			goto err;
+		}
+	}
+	return 1;
+err:
+	ipriv->stats[InErrors6]++;
+	return 0;
+}
+
+static int
+targettype(Fs *f, Ipifc *ifc, uchar *target)
+{
+	Iplifc *lifc;
+	int t;
+
+	RLOCK(ifc);
+	if(ipproxyifc(f, ifc, target)) {
+		RUNLOCK(ifc);
+		return Tuniproxy;
+	}
+
+	for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+		if(ipcmp(lifc->local, target) == 0) {
+			t = (lifc->tentative)? Tunitent: Tunirany;
+			RUNLOCK(ifc);
+			return t;
+		}
+
+	RUNLOCK(ifc);
+	return 0;
+}
+
+static void
+icmpiput6(Proto *icmp, Ipifc *ipifc, Block *bp)
+{
+	int refresh = 1;
+	char *msg, m2[128];
+	uchar pktflags;
+	uchar *packet = bp->rp;
+	uchar lsrc[IPaddrlen];
+	Block *r;
+	IPICMP *p = (IPICMP *)packet;
+	Icmppriv6 *ipriv = icmp->priv;
+	Iplifc *lifc;
+	Ndpkt* np;
+	Proto *pr;
+
+	if(!valid(icmp, ipifc, bp, ipriv) || p->type > Maxtype6)
+		goto raise;
+
+	ipriv->in[p->type]++;
+
+	switch(p->type) {
+	case EchoRequestV6:
+		r = mkechoreply6(bp, ipifc);
+		if(r == nil)
+			goto raise;
+		ipriv->out[EchoReply]++;
+		ipoput6(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
+		break;
+
+	case UnreachableV6:
+		if(p->code >= nelem(unreachcode))
+			msg = unreachcode[Icmp6_unknown];
+		else
+			msg = unreachcode[p->code];
+
+		bp->rp += sizeof(IPICMP);
+		if(blocklen(bp) < 8){
+			ipriv->stats[LenErrs6]++;
+			goto raise;
+		}
+		p = (IPICMP *)bp->rp;
+		pr = Fsrcvpcolx(icmp->f, p->proto);
+		if(pr != nil && pr->advise != nil) {
+			(*pr->advise)(pr, bp, msg);
+			return;
+		}
+
+		bp->rp -= sizeof(IPICMP);
+		goticmpkt6(icmp, bp, 0);
+		break;
+
+	case TimeExceedV6:
+		if(p->code == 0){
+			sprint(m2, "ttl exceeded at %I", p->src);
+
+			bp->rp += sizeof(IPICMP);
+			if(blocklen(bp) < 8){
+				ipriv->stats[LenErrs6]++;
+				goto raise;
+			}
+			p = (IPICMP *)bp->rp;
+			pr = Fsrcvpcolx(icmp->f, p->proto);
+			if(pr && pr->advise) {
+				(*pr->advise)(pr, bp, m2);
+				return;
+			}
+			bp->rp -= sizeof(IPICMP);
+		}
+
+		goticmpkt6(icmp, bp, 0);
+		break;
+
+	case RouterAdvert:
+	case RouterSolicit:
+		/* using lsrc as a temp, munge hdr for goticmp6 */
+		if (0) {
+			memmove(lsrc, p->src, IPaddrlen);
+			memmove(p->src, p->dst, IPaddrlen);
+			memmove(p->dst, lsrc, IPaddrlen);
+		}
+		goticmpkt6(icmp, bp, p->type);
+		break;
+
+	case NbrSolicit:
+		np = (Ndpkt*) p;
+		pktflags = 0;
+		switch (targettype(icmp->f, ipifc, np->target)) {
+		case Tunirany:
+			pktflags |= Oflag;
+			/* fall through */
+
+		case Tuniproxy:
+			if(ipcmp(np->src, v6Unspecified) != 0) {
+				arpenter(icmp->f, V6, np->src, np->lnaddr,
+					8*np->olen-2, 0);
+				pktflags |= Sflag;
+			}
+			if(ipv6local(ipifc, lsrc))
+				icmpna(icmp->f, lsrc,
+					(ipcmp(np->src, v6Unspecified) == 0?
+						v6allnodesL: np->src),
+					np->target, ipifc->mac, pktflags);
+			else
+				freeblist(bp);
+			break;
+
+		case Tunitent:
+			/* not clear what needs to be done. send up
+			 * an icmp mesg saying don't use this address? */
+		default:
+			freeblist(bp);
+		}
+		break;
+
+	case NbrAdvert:
+		np = (Ndpkt*) p;
+
+		/*
+		 * if the target address matches one of the local interface
+		 * addresses and the local interface address has tentative bit
+		 * set, insert into ARP table. this is so the duplicate address
+		 * detection part of ipconfig can discover duplication through
+		 * the arp table.
+		 */
+		lifc = iplocalonifc(ipifc, np->target);
+		if(lifc && lifc->tentative)
+			refresh = 0;
+		arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2,
+			refresh);
+		freeblist(bp);
+		break;
+
+	case PacketTooBigV6:
+	default:
+		goticmpkt6(icmp, bp, 0);
+		break;
+	}
+	return;
+raise:
+	freeblist(bp);
+}
+
+int
+icmpstats6(Proto *icmp6, char *buf, int len)
+{
+	Icmppriv6 *priv;
+	char *p, *e;
+	int i;
+
+	priv = icmp6->priv;
+	p = buf;
+	e = p+len;
+	for(i = 0; i < Nstats6; i++)
+		p = seprint(p, e, "%s: %lud\n", statnames6[i], priv->stats[i]);
+	for(i = 0; i <= Maxtype6; i++)
+		if(icmpnames6[i])
+			p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i],
+				priv->in[i], priv->out[i]);
+/*		else
+			p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i],
+				priv->out[i]);
+ */
+	return p - buf;
+}
+
+
+/* import from icmp.c */
+extern int	icmpstate(Conv *c, char *state, int n);
+extern char*	icmpannounce(Conv *c, char **argv, int argc);
+extern char*	icmpconnect(Conv *c, char **argv, int argc);
+extern void	icmpclose(Conv *c);
+
+void
+icmp6init(Fs *fs)
+{
+	Proto *icmp6 = smalloc(sizeof(Proto));
+
+	icmp6->priv = smalloc(sizeof(Icmppriv6));
+	icmp6->name = "icmpv6";
+	icmp6->connect = icmpconnect;
+	icmp6->announce = icmpannounce;
+	icmp6->state = icmpstate;
+	icmp6->create = icmpcreate6;
+	icmp6->close = icmpclose;
+	icmp6->rcv = icmpiput6;
+	icmp6->stats = icmpstats6;
+	icmp6->ctl = icmpctl6;
+	icmp6->advise = icmpadvise6;
+	icmp6->gc = nil;
+	icmp6->ipproto = ICMPv6;
+	icmp6->nc = 16;
+	icmp6->ptclsize = sizeof(Icmpcb6);
+
+	Fsproto(fs, icmp6);
+}
diff --git a/src/9vx/a/ip/igmp.c b/src/9vx/a/ip/igmp.c
@@ -0,0 +1,294 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+enum
+{
+	IGMP_IPHDRSIZE	= 20,		/* size of ip header */
+	IGMP_HDRSIZE	= 8,		/* size of IGMP header */
+	IP_IGMPPROTO	= 2,
+
+	IGMPquery	= 1,
+	IGMPreport	= 2,
+
+	MSPTICK		= 100,
+	MAXTIMEOUT	= 10000/MSPTICK,	/* at most 10 secs for a response */
+};
+
+typedef struct IGMPpkt IGMPpkt;
+typedef char byte;
+
+struct IGMPpkt
+{
+	/* ip header */
+	byte	vihl;		/* Version and header length */
+	byte	tos;		/* Type of service */
+	byte	len[2];		/* packet length (including headers) */
+	byte	id[2];		/* Identification */
+	byte	frag[2];	/* Fragment information */
+	byte	Unused;	
+	byte	proto;		/* Protocol */
+	byte	cksum[2];	/* checksum of ip portion */
+	byte	src[IPaddrlen];		/* Ip source */
+	byte	dst[IPaddrlen];		/* Ip destination */
+
+	/* igmp header */
+	byte	vertype;	/* version and type */
+	byte	unused;
+	byte	igmpcksum[2];		/* checksum of igmp portion */
+	byte	group[IPaddrlen];	/* multicast group */
+};
+
+/*
+ *  lists for group reports
+ */
+typedef struct IGMPrep IGMPrep;
+struct IGMPrep
+{
+	IGMPrep		*next;
+	Media		*m;
+	int		ticks;
+	Multicast	*multi;
+};
+
+typedef struct IGMP IGMP;
+struct IGMP
+{
+	Lock lk;
+
+	Rendez	r;
+	IGMPrep	*reports;
+};
+
+IGMP igmpalloc;
+
+	Proto	igmp;
+extern	Fs	fs;
+
+static struct Stats
+{
+	ulong 	inqueries;
+	ulong	outqueries;
+	ulong	inreports;
+	ulong	outreports;
+} stats;
+
+void
+igmpsendreport(Media *m, byte *addr)
+{
+	IGMPpkt *p;
+	Block *bp;
+
+	bp = allocb(sizeof(IGMPpkt));
+	if(bp == nil)
+		return;
+	p = (IGMPpkt*)bp->wp;
+	p->vihl = IP_VER4;
+	bp->wp += sizeof(IGMPpkt);
+	memset(bp->rp, 0, sizeof(IGMPpkt));
+	hnputl(p->src, Mediagetaddr(m));
+	hnputl(p->dst, Ipallsys);
+	p->vertype = (1<<4) | IGMPreport;
+	p->proto = IP_IGMPPROTO;
+	memmove(p->group, addr, IPaddrlen);
+	hnputs(p->igmpcksum, ptclcsum(bp, IGMP_IPHDRSIZE, IGMP_HDRSIZE));
+	netlog(Logigmp, "igmpreport %I\n", p->group);
+	stats.outreports++;
+	ipoput4(bp, 0, 1, DFLTTOS, nil);	/* TTL of 1 */
+}
+
+static int
+isreport(void *a)
+{
+	USED(a);
+	return igmpalloc.reports != 0;
+}
+
+
+void
+igmpproc(void *a)
+{
+	IGMPrep *rp, **lrp;
+	Multicast *mp, **lmp;
+	byte ip[IPaddrlen];
+
+	USED(a);
+
+	for(;;){
+		sleep(&igmpalloc.r, isreport, 0);
+		for(;;){
+			lock(&igmpalloc);
+
+			if(igmpalloc.reports == nil)
+				break;
+	
+			/* look for a single report */
+			lrp = &igmpalloc.reports;
+			mp = nil;
+			for(rp = *lrp; rp; rp = *lrp){
+				rp->ticks++;
+				lmp = &rp->multi;
+				for(mp = *lmp; mp; mp = *lmp){
+					if(rp->ticks >= mp->timeout){
+						*lmp = mp->next;
+						break;
+					}
+					lmp = &mp->next;
+				}
+				if(mp != nil)
+					break;
+
+				if(rp->multi != nil){
+					lrp = &rp->next;
+					continue;
+				} else {
+					*lrp = rp->next;
+					free(rp);
+				}
+			}
+			unlock(&igmpalloc);
+
+			if(mp){
+				/* do a single report and try again */
+				hnputl(ip, mp->addr);
+				igmpsendreport(rp->m, ip);
+				free(mp);
+				continue;
+			}
+
+			tsleep(&up->sleep, return0, 0, MSPTICK);
+		}
+		unlock(&igmpalloc);
+	}
+
+}
+
+void
+igmpiput(Media *m, Ipifc *, Block *bp)
+{
+	int n;
+	IGMPpkt *ghp;
+	Ipaddr group;
+	IGMPrep *rp, **lrp;
+	Multicast *mp, **lmp;
+
+	ghp = (IGMPpkt*)(bp->rp);
+	netlog(Logigmp, "igmpiput: %d %I\n", ghp->vertype, ghp->group);
+
+	n = blocklen(bp);
+	if(n < IGMP_IPHDRSIZE+IGMP_HDRSIZE){
+		netlog(Logigmp, "igmpiput: bad len\n");
+		goto error;
+	}
+	if((ghp->vertype>>4) != 1){
+		netlog(Logigmp, "igmpiput: bad igmp type\n");
+		goto error;
+	}
+	if(ptclcsum(bp, IGMP_IPHDRSIZE, IGMP_HDRSIZE)){
+		netlog(Logigmp, "igmpiput: checksum error %I\n", ghp->src);
+		goto error;
+	}
+
+	group = nhgetl(ghp->group);
+	
+	lock(&igmpalloc);
+	switch(ghp->vertype & 0xf){
+	case IGMPquery:
+		/*
+		 *  start reporting groups that we're a member of.
+		 */
+		stats.inqueries++;
+		for(rp = igmpalloc.reports; rp; rp = rp->next)
+			if(rp->m == m)
+				break;
+		if(rp != nil)
+			break;	/* already reporting */
+
+		mp = Mediacopymulti(m);
+		if(mp == nil)
+			break;
+
+		rp = malloc(sizeof(*rp));
+		if(rp == nil)
+			break;
+
+		rp->m = m;
+		rp->multi = mp;
+		rp->ticks = 0;
+		for(; mp; mp = mp->next)
+			mp->timeout = nrand(MAXTIMEOUT);
+		rp->next = igmpalloc.reports;
+		igmpalloc.reports = rp;
+
+		wakeup(&igmpalloc.r);
+
+		break;
+	case IGMPreport:
+		/*
+		 *  find report list for this medium
+		 */
+		stats.inreports++;
+		lrp = &igmpalloc.reports;
+		for(rp = *lrp; rp; rp = *lrp){
+			if(rp->m == m)
+				break;
+			lrp = &rp->next;
+		}
+		if(rp == nil)
+			break;
+
+		/*
+		 *  if someone else has reported a group,
+		 *  we don't have to.
+		 */
+		lmp = &rp->multi;
+		for(mp = *lmp; mp; mp = *lmp){
+			if(mp->addr == group){
+				*lmp = mp->next;
+				free(mp);
+				break;
+			}
+			lmp = &mp->next;
+		}
+
+		break;
+	}
+	unlock(&igmpalloc);
+
+error:
+	freeb(bp);
+}
+
+int
+igmpstats(char *buf, int len)
+{
+	return snprint(buf, len, "\trcvd %d %d\n\tsent %d %d\n",
+		stats.inqueries, stats.inreports,
+		stats.outqueries, stats.outreports);
+}
+
+void
+igmpinit(Fs *fs)
+{
+	igmp.name = "igmp";
+	igmp.connect = nil;
+	igmp.announce = nil;
+	igmp.ctl = nil;
+	igmp.state = nil;
+	igmp.close = nil;
+	igmp.rcv = igmpiput;
+	igmp.stats = igmpstats;
+	igmp.ipproto = IP_IGMPPROTO;
+	igmp.nc = 0;
+	igmp.ptclsize = 0;
+
+	igmpreportfn = igmpsendreport;
+	kproc("igmpproc", igmpproc, 0);
+
+	Fsproto(fs, &igmp);
+}
diff --git a/src/9vx/a/ip/il.c b/src/9vx/a/ip/il.c
@@ -0,0 +1,1408 @@
+#include	"u.h"
+#include	"lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"error.h"
+
+#include	"ip.h"
+
+enum				/* Connection state */
+{
+	Ilclosed,
+	Ilsyncer,
+	Ilsyncee,
+	Ilestablished,
+	Illistening,
+	Ilclosing,
+	Ilopening,		/* only for file server */
+};
+
+char	*ilstates[] = 
+{ 
+	"Closed",
+	"Syncer",
+	"Syncee",
+	"Established",
+	"Listen",
+	"Closing",
+	"Opening",		/* only for file server */
+};
+
+enum				/* Packet types */
+{
+	Ilsync,
+	Ildata,
+	Ildataquery,
+	Ilack,
+	Ilquery,
+	Ilstate,
+	Ilclose,
+};
+
+char	*iltype[] = 
+{	
+	"sync",
+	"data",
+	"dataquery",
+	"ack",
+	"query",
+	"state",
+	"close" 
+};
+
+enum
+{
+	Seconds		= 1000,
+	Iltickms 	= 50,		/* time base */
+	AckDelay	= 2*Iltickms,	/* max time twixt message rcvd & ack sent */
+	MaxTimeout 	= 30*Seconds,	/* max time between rexmit */
+	QueryTime	= 10*Seconds,	/* time between subsequent queries */
+	DeathTime	= 30*QueryTime,
+
+	MaxRexmit 	= 16,		/* max retransmissions before hangup */
+	Defaultwin	= 20,
+
+	LogAGain	= 3,
+	AGain		= 1<<LogAGain,
+	LogDGain	= 2,
+	DGain		= 1<<LogDGain,
+
+	DefByteRate	= 100,		/* assume a megabit link */
+	DefRtt		= 50,		/* cross country on a great day */
+
+	Maxrq		= 64*1024,
+};
+
+enum
+{
+	Nqt=	8,
+};
+
+typedef struct Ilcb Ilcb;
+struct Ilcb			/* Control block */
+{
+	int	state;		/* Connection state */
+	Conv	*conv;
+	QLock	ackq;		/* Unacknowledged queue */
+	Block	*unacked;
+	Block	*unackedtail;
+	ulong	unackedbytes;
+	QLock	outo;		/* Out of order packet queue */
+	Block	*outoforder;
+	ulong	next;		/* Id of next to send */
+	ulong	recvd;		/* Last packet received */
+	ulong	acksent;	/* Last packet acked */
+	ulong	start;		/* Local start id */
+	ulong	rstart;		/* Remote start id */
+	int	window;		/* Maximum receive window */
+	int	rxquery;	/* number of queries on this connection */
+	int	rxtot;		/* number of retransmits on this connection */
+	int	rexmit;		/* number of retransmits of *unacked */
+	ulong	qt[Nqt+1];	/* state table for query messages */
+	int	qtx;		/* ... index into qt */
+
+	/* if set, fasttimeout causes a connection request to terminate after 4*Iltickms */
+	int	fasttimeout;
+
+	/* timers */
+	ulong	lastxmit;	/* time of last xmit */
+	ulong	lastrecv;	/* time of last recv */
+	ulong	timeout;	/* retransmission time for *unacked */
+	ulong	acktime;	/* time to send next ack */
+	ulong	querytime;	/* time to send next query */
+
+	/* adaptive measurements */
+	int	delay;		/* Average of the fixed rtt delay */
+	int	rate;		/* Average uchar rate */
+	int	mdev;		/* Mean deviation of rtt */
+	int	maxrtt;		/* largest rtt seen */
+	ulong	rttack;		/* The ack we are waiting for */
+	int	rttlen;		/* Length of rttack packet */
+	uvlong	rttstart;	/* Time we issued rttack packet */
+};
+
+enum
+{
+	IL_IPSIZE 	= 20,
+	IL_HDRSIZE	= 18,	
+	IL_LISTEN	= 0,
+	IL_CONNECT	= 1,
+	IP_ILPROTO	= 40,
+};
+
+typedef struct Ilhdr Ilhdr;
+struct Ilhdr
+{
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	length[2];	/* packet length */
+	uchar	id[2];		/* Identification */
+	uchar	frag[2];	/* Fragment information */
+	uchar	ttl;		/* Time to live */
+	uchar	proto;		/* Protocol */
+	uchar	cksum[2];	/* Header checksum */
+	uchar	src[4];		/* Ip source */
+	uchar	dst[4];		/* Ip destination */
+	uchar	ilsum[2];	/* Checksum including header */
+	uchar	illen[2];	/* Packet length */
+	uchar	iltype;		/* Packet type */
+	uchar	ilspec;		/* Special */
+	uchar	ilsrc[2];	/* Src port */
+	uchar	ildst[2];	/* Dst port */
+	uchar	ilid[4];	/* Sequence id */
+	uchar	ilack[4];	/* Acked sequence */
+};
+
+enum
+{
+	InMsgs,
+	OutMsgs,
+	CsumErrs,		/* checksum errors */
+	HlenErrs,		/* header length error */
+	LenErrs,		/* short packet */
+	OutOfOrder,		/* out of order */
+	Retrans,		/* retransmissions */
+	DupMsg,
+	DupBytes,
+	DroppedMsgs,
+
+	Nstats,
+};
+
+static char *statnames[] =
+{
+[InMsgs]	"InMsgs",
+[OutMsgs]	"OutMsgs",
+[CsumErrs]	"CsumErrs",
+[HlenErrs]	"HlenErr",
+[LenErrs]	"LenErrs",
+[OutOfOrder]	"OutOfOrder",
+[Retrans]	"Retrans",
+[DupMsg]	"DupMsg",
+[DupBytes]	"DupBytes",
+[DroppedMsgs]	"DroppedMsgs",
+};
+
+typedef struct Ilpriv Ilpriv;
+struct Ilpriv
+{
+	Ipht	ht;
+
+	ulong	stats[Nstats];
+
+	ulong	csumerr;		/* checksum errors */
+	ulong	hlenerr;		/* header length error */
+	ulong	lenerr;			/* short packet */
+	ulong	order;			/* out of order */
+	ulong	rexmit;			/* retransmissions */
+	ulong	dup;
+	ulong	dupb;
+
+	/* keeping track of the ack kproc */
+	int	ackprocstarted;
+	QLock	apl;
+};
+
+/* state for query/dataquery messages */
+
+
+void	ilrcvmsg(Conv*, Block*);
+void	ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
+void	ilackq(Ilcb*, Block*);
+void	ilprocess(Conv*, Ilhdr*, Block*);
+void	ilpullup(Conv*);
+void	ilhangup(Conv*, char*);
+void	ilfreeq(Ilcb*);
+void	ilrexmit(Ilcb*);
+void	ilbackoff(Ilcb*);
+void	ilsettimeout(Ilcb*);
+char*	ilstart(Conv*, int, int);
+void	ilackproc(void*);
+void	iloutoforder(Conv*, Ilhdr*, Block*);
+void	iliput(Proto*, Ipifc*, Block*);
+void	iladvise(Proto*, Block*, char*);
+int	ilnextqt(Ilcb*);
+void	ilcbinit(Ilcb*);
+int	later(ulong, ulong, char*);
+void	ilreject(Fs*, Ilhdr*);
+void	illocalclose(Conv *c);
+	int 	ilcksum = 1;
+static 	int 	initseq = 25001;
+static	ulong	scalediv, scalemul;
+static	char	*etime = "connection timed out";
+
+static char*
+ilconnect(Conv *c, char **argv, int argc)
+{
+	char *e, *p;
+	int fast;
+
+	/* huge hack to quickly try an il connection */
+	fast = 0;
+	if(argc > 1){
+		p = strstr(argv[1], "!fasttimeout");
+		if(p != nil){
+			*p = 0;
+			fast = 1;
+		}
+	}
+
+	e = Fsstdconnect(c, argv, argc);
+	if(e != nil)
+		return e;
+	return ilstart(c, IL_CONNECT, fast);
+}
+
+static int
+ilstate(Conv *c, char *state, int n)
+{
+	Ilcb *ic;
+
+	ic = (Ilcb*)(c->ptcl);
+	return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d\n",
+		ilstates[ic->state],
+		c->rq ? qlen(c->rq) : 0,
+		c->wq ? qlen(c->wq) : 0,
+		ic->delay>>LogAGain, ic->rate>>LogAGain, ic->mdev>>LogDGain,
+		ic->unackedbytes, ic->rxtot, ic->rxquery, ic->maxrtt);
+}
+
+static int
+ilinuse(Conv *c)
+{
+	Ilcb *ic;
+
+	ic = (Ilcb*)(c->ptcl);
+	return ic->state != Ilclosed;
+
+}
+
+/* called with c locked */
+static char*
+ilannounce(Conv *c, char **argv, int argc)
+{
+	char *e;
+
+	e = Fsstdannounce(c, argv, argc);
+	if(e != nil)
+		return e;
+	e = ilstart(c, IL_LISTEN, 0);
+	if(e != nil)
+		return e;
+	Fsconnected(c, nil);
+
+	return nil;
+}
+
+void
+illocalclose(Conv *c)
+{
+	Ilcb *ic;
+	Ilpriv *ipriv;
+
+	ipriv = c->p->priv;
+	ic = (Ilcb*)c->ptcl;
+	ic->state = Ilclosed;
+	iphtrem(&ipriv->ht, c);
+	ipmove(c->laddr, IPnoaddr);
+	c->lport = 0;
+}
+
+static void
+ilclose(Conv *c)
+{
+	Ilcb *ic;
+
+	ic = (Ilcb*)c->ptcl;
+
+	qclose(c->rq);
+	qclose(c->wq);
+	qclose(c->eq);
+
+	switch(ic->state) {
+	case Ilclosing:
+	case Ilclosed:
+		break;
+	case Ilsyncer:
+	case Ilsyncee:
+	case Ilestablished:
+		ic->state = Ilclosing;
+		ilsettimeout(ic);
+		ilsendctl(c, nil, Ilclose, ic->next, ic->recvd, 0);
+		break;
+	case Illistening:
+		illocalclose(c);
+		break;
+	}
+	ilfreeq(ic);
+}
+
+void
+ilkick(void *x, Block *bp)
+{
+	Conv *c = x;
+	Ilhdr *ih;
+	Ilcb *ic;
+	int dlen;
+	ulong id, ack;
+	Fs *f;
+	Ilpriv *priv;
+
+	f = c->p->f;
+	priv = c->p->priv;
+	ic = (Ilcb*)c->ptcl;
+
+	if(bp == nil)
+		return;
+
+	switch(ic->state) {
+	case Ilclosed:
+	case Illistening:
+	case Ilclosing:
+		freeblist(bp);
+		qhangup(c->rq, nil);
+		return;
+	}
+
+	dlen = blocklen(bp);
+
+	/* Make space to fit il & ip */
+	bp = padblock(bp, IL_IPSIZE+IL_HDRSIZE);
+	ih = (Ilhdr *)(bp->rp);
+	ih->vihl = IP_VER4;
+
+	/* Ip fields */
+	ih->frag[0] = 0;
+	ih->frag[1] = 0;
+	v6tov4(ih->dst, c->raddr);
+	v6tov4(ih->src, c->laddr);
+	ih->proto = IP_ILPROTO;
+
+	/* Il fields */
+	hnputs(ih->illen, dlen+IL_HDRSIZE);
+	hnputs(ih->ilsrc, c->lport);
+	hnputs(ih->ildst, c->rport);
+
+	qlock(&ic->ackq);
+	id = ic->next++;
+	hnputl(ih->ilid, id);
+	ack = ic->recvd;
+	hnputl(ih->ilack, ack);
+	ic->acksent = ack;
+	ic->acktime = NOW + AckDelay;
+	ih->iltype = Ildata;
+	ih->ilspec = 0;
+	ih->ilsum[0] = 0;
+	ih->ilsum[1] = 0;
+
+	/* Checksum of ilheader plus data (not ip & no pseudo header) */
+	if(ilcksum)
+		hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, dlen+IL_HDRSIZE));
+
+	ilackq(ic, bp);
+	qunlock(&ic->ackq);
+
+	/* Start the round trip timer for this packet if the timer is free */
+	if(ic->rttack == 0) {
+		ic->rttack = id;
+		ic->rttstart = fastticks(nil);
+		ic->rttlen = dlen + IL_IPSIZE + IL_HDRSIZE;
+	}
+
+	if(later(NOW, ic->timeout, nil))
+		ilsettimeout(ic);
+	ipoput4(f, bp, 0, c->ttl, c->tos, c);
+	priv->stats[OutMsgs]++;
+}
+
+static void
+ilcreate(Conv *c)
+{
+	c->rq = qopen(Maxrq, 0, 0, c);
+	c->wq = qbypass(ilkick, c);
+}
+
+int
+ilxstats(Proto *il, char *buf, int len)
+{
+	Ilpriv *priv;
+	char *p, *e;
+	int i;
+
+	priv = il->priv;
+	p = buf;
+	e = p+len;
+	for(i = 0; i < Nstats; i++)
+		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+	return p - buf;
+}
+
+void
+ilackq(Ilcb *ic, Block *bp)
+{
+	Block *np;
+	int n;
+
+	n = blocklen(bp);
+
+	/* Enqueue a copy on the unacked queue in case this one gets lost */
+	np = copyblock(bp, n);
+	if(ic->unacked)
+		ic->unackedtail->list = np;
+	else
+		ic->unacked = np;
+	ic->unackedtail = np;
+	np->list = nil;
+	ic->unackedbytes += n;
+}
+
+static
+void
+ilrttcalc(Ilcb *ic, Block *bp)
+{
+	int rtt, tt, pt, delay, rate;
+
+	rtt = fastticks(nil) - ic->rttstart;
+	rtt = (rtt*scalemul)/scalediv;
+	delay = ic->delay;
+	rate = ic->rate;
+
+	/* Guard against zero wrap */
+	if(rtt > 120000 || rtt < 0)
+		return;
+
+	/* this block had to be transmitted after the one acked so count its size */
+	ic->rttlen += blocklen(bp)  + IL_IPSIZE + IL_HDRSIZE;
+
+	if(ic->rttlen < 256){
+		/* guess fixed delay as rtt of small packets */
+		delay += rtt - (delay>>LogAGain);
+		if(delay < AGain)
+			delay = AGain;
+		ic->delay = delay;
+	} else {
+		/* if packet took longer than avg rtt delay, recalc rate */
+		tt = rtt - (delay>>LogAGain);
+		if(tt > 0){
+			rate += ic->rttlen/tt - (rate>>LogAGain);
+			if(rate < AGain)
+				rate = AGain;
+			ic->rate = rate;
+		}
+	}
+
+	/* mdev */
+	pt = ic->rttlen/(rate>>LogAGain) + (delay>>LogAGain);
+	ic->mdev += abs(rtt-pt) - (ic->mdev>>LogDGain);
+
+	if(rtt > ic->maxrtt)
+		ic->maxrtt = rtt;
+}
+
+void
+ilackto(Ilcb *ic, ulong ackto, Block *bp)
+{
+	Ilhdr *h;
+	ulong id;
+
+	if(ic->rttack == ackto)
+		ilrttcalc(ic, bp);
+
+	/* Cancel if we've passed the packet we were interested in */
+	if(ic->rttack <= ackto)
+		ic->rttack = 0;
+
+	qlock(&ic->ackq);
+	while(ic->unacked) {
+		h = (Ilhdr *)ic->unacked->rp;
+		id = nhgetl(h->ilid);
+		if(ackto < id)
+			break;
+
+		bp = ic->unacked;
+		ic->unacked = bp->list;
+		bp->list = nil;
+		ic->unackedbytes -= blocklen(bp);
+		freeblist(bp);
+		ic->rexmit = 0;
+		ilsettimeout(ic);
+	}
+	qunlock(&ic->ackq);
+}
+
+void
+iliput(Proto *il, Ipifc *dummy, Block *bp)
+{
+	char *st;
+	Ilcb *ic;
+	Ilhdr *ih;
+	uchar raddr[IPaddrlen];
+	uchar laddr[IPaddrlen];
+	ushort sp, dp, csum;
+	int plen, illen;
+	Conv *new, *s;
+	Ilpriv *ipriv;
+
+	ipriv = il->priv;
+
+	ih = (Ilhdr *)bp->rp;
+	plen = blocklen(bp);
+	if(plen < IL_IPSIZE+IL_HDRSIZE){
+		netlog(il->f, Logil, "il: hlenerr\n");
+		ipriv->stats[HlenErrs]++;
+		goto raise;
+	}
+
+	illen = nhgets(ih->illen);
+	if(illen+IL_IPSIZE > plen){
+		netlog(il->f, Logil, "il: lenerr\n");
+		ipriv->stats[LenErrs]++;
+		goto raise;
+	}
+
+	sp = nhgets(ih->ildst);
+	dp = nhgets(ih->ilsrc);
+	v4tov6(raddr, ih->src);
+	v4tov6(laddr, ih->dst);
+
+	if((csum = ptclcsum(bp, IL_IPSIZE, illen)) != 0) {
+		if(ih->iltype > Ilclose)
+			st = "?";
+		else
+			st = iltype[ih->iltype];
+		ipriv->stats[CsumErrs]++;
+		netlog(il->f, Logil, "il: cksum %ux %ux, pkt(%s id %lud ack %lud %I/%d->%d)\n",
+			csum, st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
+		goto raise;
+	}
+
+	QLOCK(il);
+	s = iphtlook(&ipriv->ht, raddr, dp, laddr, sp);
+	if(s == nil){
+		if(ih->iltype == Ilsync)
+			ilreject(il->f, ih);		/* no listener */
+		QUNLOCK(il);
+		goto raise;
+	}
+
+	ic = (Ilcb*)s->ptcl;
+	if(ic->state == Illistening){
+		if(ih->iltype != Ilsync){
+			QUNLOCK(il);
+			if(ih->iltype > Ilclose)
+				st = "?";
+			else
+				st = iltype[ih->iltype];
+			ilreject(il->f, ih);		/* no channel and not sync */
+			netlog(il->f, Logil, "il: no channel, pkt(%s id %lud ack %lud %I/%ud->%ud)\n",
+				st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp); 
+			goto raise;
+		}
+
+		new = Fsnewcall(s, raddr, dp, laddr, sp, V4);
+		if(new == nil){
+			QUNLOCK(il);
+			netlog(il->f, Logil, "il: bad newcall %I/%ud->%ud\n", raddr, sp, dp);
+			ilsendctl(s, ih, Ilclose, 0, nhgetl(ih->ilid), 0);
+			goto raise;
+		}
+		s = new;
+
+		ic = (Ilcb*)s->ptcl;
+	
+		ic->conv = s;
+		ic->state = Ilsyncee;
+		ilcbinit(ic);
+		ic->rstart = nhgetl(ih->ilid);
+		iphtadd(&ipriv->ht, s);
+	}
+
+	QLOCK(s);
+	QUNLOCK(il);
+	if(waserror()){
+		QUNLOCK(s);
+		nexterror();
+	}
+	ilprocess(s, ih, bp);
+	QUNLOCK(s);
+	poperror();
+	return;
+raise:
+	freeblist(bp);
+}
+
+void
+_ilprocess(Conv *s, Ilhdr *h, Block *bp)
+{
+	Ilcb *ic;
+	ulong id, ack;
+	Ilpriv *priv;
+
+	id = nhgetl(h->ilid);
+	ack = nhgetl(h->ilack);
+
+	ic = (Ilcb*)s->ptcl;
+
+	ic->lastrecv = NOW;
+	ic->querytime = NOW + QueryTime;
+	priv = s->p->priv;
+	priv->stats[InMsgs]++;
+
+	switch(ic->state) {
+	default:
+		netlog(s->p->f, Logil, "il: unknown state %d\n", ic->state);
+	case Ilclosed:
+		freeblist(bp);
+		break;
+	case Ilsyncer:
+		switch(h->iltype) {
+		default:
+			break;
+		case Ilsync:
+			if(ack != ic->start)
+				ilhangup(s, "connection rejected");
+			else {
+				ic->recvd = id;
+				ic->rstart = id;
+				ilsendctl(s, nil, Ilack, ic->next, ic->recvd, 0);
+				ic->state = Ilestablished;
+				ic->fasttimeout = 0;
+				ic->rexmit = 0;
+				Fsconnected(s, nil);
+				ilpullup(s);
+			}
+			break;
+		case Ilclose:
+			if(ack == ic->start)
+				ilhangup(s, "connection rejected");
+			break;
+		}
+		freeblist(bp);
+		break;
+	case Ilsyncee:
+		switch(h->iltype) {
+		default:
+			break;
+		case Ilsync:
+			if(id != ic->rstart || ack != 0){
+				illocalclose(s);
+			} else {
+				ic->recvd = id;
+				ilsendctl(s, nil, Ilsync, ic->start, ic->recvd, 0);
+			}
+			break;
+		case Ilack:
+			if(ack == ic->start) {
+				ic->state = Ilestablished;
+				ic->fasttimeout = 0;
+				ic->rexmit = 0;
+				ilpullup(s);
+			}
+			break;
+		case Ildata:
+			if(ack == ic->start) {
+				ic->state = Ilestablished;
+				ic->fasttimeout = 0;
+				ic->rexmit = 0;
+				goto established;
+			}
+			break;
+		case Ilclose:
+			if(ack == ic->start)
+				ilhangup(s, "remote close");
+			break;
+		}
+		freeblist(bp);
+		break;
+	case Ilestablished:
+	established:
+		switch(h->iltype) {
+		case Ilsync:
+			if(id != ic->rstart)
+				ilhangup(s, "remote close");
+			else
+				ilsendctl(s, nil, Ilack, ic->next, ic->rstart, 0);
+			freeblist(bp);	
+			break;
+		case Ildata:
+			/*
+			 * avoid consuming all the mount rpc buffers in the
+			 * system.  if the input queue is too long, drop this
+			 * packet.
+			 */
+			if (s->rq && qlen(s->rq) >= Maxrq) {
+				priv->stats[DroppedMsgs]++;
+				freeblist(bp);
+				break;
+			}
+
+			ilackto(ic, ack, bp);
+			iloutoforder(s, h, bp);
+			ilpullup(s);
+			break;
+		case Ildataquery:
+			ilackto(ic, ack, bp);
+			iloutoforder(s, h, bp);
+			ilpullup(s);
+			ilsendctl(s, nil, Ilstate, ic->next, ic->recvd, h->ilspec);
+			break;
+		case Ilack:
+			ilackto(ic, ack, bp);
+			freeblist(bp);
+			break;
+		case Ilquery:
+			ilackto(ic, ack, bp);
+			ilsendctl(s, nil, Ilstate, ic->next, ic->recvd, h->ilspec);
+			freeblist(bp);
+			break;
+		case Ilstate:
+			if(ack >= ic->rttack)
+				ic->rttack = 0;
+			ilackto(ic, ack, bp);
+			if(h->ilspec > Nqt)
+				h->ilspec = 0;
+			if(ic->qt[h->ilspec] > ack){
+				ilrexmit(ic);
+				ilsettimeout(ic);
+			}
+			freeblist(bp);
+			break;
+		case Ilclose:
+			freeblist(bp);
+			if(ack < ic->start || ack > ic->next) 
+				break;
+			ic->recvd = id;
+			ilsendctl(s, nil, Ilclose, ic->next, ic->recvd, 0);
+			ic->state = Ilclosing;
+			ilsettimeout(ic);
+			ilfreeq(ic);
+			break;
+		}
+		break;
+	case Illistening:
+		freeblist(bp);
+		break;
+	case Ilclosing:
+		switch(h->iltype) {
+		case Ilclose:
+			ic->recvd = id;
+			ilsendctl(s, nil, Ilclose, ic->next, ic->recvd, 0);
+			if(ack == ic->next)
+				ilhangup(s, nil);
+			break;
+		default:
+			break;
+		}
+		freeblist(bp);
+		break;
+	}
+}
+
+void
+ilrexmit(Ilcb *ic)
+{
+	Ilhdr *h;
+	Block *nb;
+	Conv *c;
+	ulong id;
+	Ilpriv *priv;
+
+	nb = nil;
+	qlock(&ic->ackq);
+	if(ic->unacked)
+		nb = copyblock(ic->unacked, blocklen(ic->unacked));
+	qunlock(&ic->ackq);
+
+	if(nb == nil)
+		return;
+
+	h = (Ilhdr*)nb->rp;
+	h->vihl = IP_VER4;
+
+	h->iltype = Ildataquery;
+	hnputl(h->ilack, ic->recvd);
+	h->ilspec = ilnextqt(ic);
+	h->ilsum[0] = 0;
+	h->ilsum[1] = 0;
+	hnputs(h->ilsum, ptclcsum(nb, IL_IPSIZE, nhgets(h->illen)));
+
+	c = ic->conv;
+	id = nhgetl(h->ilid);
+	netlog(c->p->f, Logil, "il: rexmit %d %ud: %d %d: %i %d/%d\n", id, ic->recvd,
+		ic->rexmit, ic->timeout,
+		c->raddr, c->lport, c->rport);
+
+	ilbackoff(ic);
+
+	ipoput4(c->p->f, nb, 0, c->ttl, c->tos, c);
+
+	/* statistics */
+	ic->rxtot++;
+	priv = c->p->priv;
+	priv->rexmit++;
+}
+
+/* DEBUG */
+void
+ilprocess(Conv *s, Ilhdr *h, Block *bp)
+{
+	Ilcb *ic;
+
+	ic = (Ilcb*)s->ptcl;
+
+	USED(ic);
+	netlog(s->p->f, Logilmsg, "%11s rcv %d/%d snt %d/%d pkt(%s id %d ack %d %d->%d) ",
+		ilstates[ic->state],  ic->rstart, ic->recvd, ic->start, 
+		ic->next, iltype[h->iltype], nhgetl(h->ilid), 
+		nhgetl(h->ilack), nhgets(h->ilsrc), nhgets(h->ildst));
+
+	_ilprocess(s, h, bp);
+
+	netlog(s->p->f, Logilmsg, "%11s rcv %d snt %d\n", ilstates[ic->state], ic->recvd, ic->next);
+}
+
+void
+ilhangup(Conv *s, char *msg)
+{
+	Ilcb *ic;
+	int callout;
+
+	netlog(s->p->f, Logil, "il: hangup! %I %d/%d: %s\n", s->raddr,
+		s->lport, s->rport, msg?msg:"no reason");
+
+	ic = (Ilcb*)s->ptcl;
+	callout = ic->state == Ilsyncer;
+	illocalclose(s);
+
+	qhangup(s->rq, msg);
+	qhangup(s->wq, msg);
+
+	if(callout)
+		Fsconnected(s, msg);
+}
+
+void
+ilpullup(Conv *s)
+{
+	Ilcb *ic;
+	Ilhdr *oh;
+	Block *bp;
+	ulong oid, dlen;
+	Ilpriv *ipriv;
+
+	ic = (Ilcb*)s->ptcl;
+	if(ic->state != Ilestablished)
+		return;
+
+	qlock(&ic->outo);
+	while(ic->outoforder) {
+		bp = ic->outoforder;
+		oh = (Ilhdr*)bp->rp;
+		oid = nhgetl(oh->ilid);
+		if(oid <= ic->recvd) {
+			ic->outoforder = bp->list;
+			freeblist(bp);
+			continue;
+		}
+		if(oid != ic->recvd+1){
+			ipriv = s->p->priv;
+			ipriv->stats[OutOfOrder]++;
+			break;
+		}
+
+		ic->recvd = oid;
+		ic->outoforder = bp->list;
+
+		bp->list = nil;
+		dlen = nhgets(oh->illen)-IL_HDRSIZE;
+		bp = trimblock(bp, IL_IPSIZE+IL_HDRSIZE, dlen);
+		/*
+		 * Upper levels don't know about multiple-block
+		 * messages so copy all into one (yick).
+		 */
+		bp = concatblock(bp);
+		if(bp == 0)
+			panic("ilpullup");
+		bp = packblock(bp);
+		if(bp == 0)
+			panic("ilpullup2");
+		qpass(s->rq, bp);
+	}
+	qunlock(&ic->outo);
+}
+
+void
+iloutoforder(Conv *s, Ilhdr *h, Block *bp)
+{
+	Ilcb *ic;
+	uchar *lid;
+	Block *f, **l;
+	ulong id, newid;
+	Ilpriv *ipriv;
+
+	ipriv = s->p->priv;
+	ic = (Ilcb*)s->ptcl;
+	bp->list = nil;
+
+	id = nhgetl(h->ilid);
+	/* Window checks */
+	if(id <= ic->recvd || id > ic->recvd+ic->window) {
+		netlog(s->p->f, Logil, "il: message outside window %ud <%ud-%ud>: %i %d/%d\n",
+			id, ic->recvd, ic->recvd+ic->window, s->raddr, s->lport, s->rport);
+		freeblist(bp);
+		return;
+	}
+
+	/* Packet is acceptable so sort onto receive queue for pullup */
+	qlock(&ic->outo);
+	if(ic->outoforder == nil)
+		ic->outoforder = bp;
+	else {
+		l = &ic->outoforder;
+		for(f = *l; f; f = f->list) {
+			lid = ((Ilhdr*)(f->rp))->ilid;
+			newid = nhgetl(lid);
+			if(id <= newid) {
+				if(id == newid) {
+					ipriv->stats[DupMsg]++;
+					ipriv->stats[DupBytes] += blocklen(bp);
+					qunlock(&ic->outo);
+					freeblist(bp);
+					return;
+				}
+				bp->list = f;
+				*l = bp;
+				qunlock(&ic->outo);
+				return;
+			}
+			l = &f->list;
+		}
+		*l = bp;
+	}
+	qunlock(&ic->outo);
+}
+
+void
+ilsendctl(Conv *ipc, Ilhdr *inih, int type, ulong id, ulong ack, int ilspec)
+{
+	Ilhdr *ih;
+	Ilcb *ic;
+	Block *bp;
+	int ttl, tos;
+
+	bp = allocb(IL_IPSIZE+IL_HDRSIZE);
+	bp->wp += IL_IPSIZE+IL_HDRSIZE;
+
+	ih = (Ilhdr *)(bp->rp);
+	ih->vihl = IP_VER4;
+
+	/* Ip fields */
+	ih->proto = IP_ILPROTO;
+	hnputs(ih->illen, IL_HDRSIZE);
+	ih->frag[0] = 0;
+	ih->frag[1] = 0;
+	if(inih) {
+		hnputl(ih->dst, nhgetl(inih->src));
+		hnputl(ih->src, nhgetl(inih->dst));
+		hnputs(ih->ilsrc, nhgets(inih->ildst));
+		hnputs(ih->ildst, nhgets(inih->ilsrc));
+		hnputl(ih->ilid, nhgetl(inih->ilack));
+		hnputl(ih->ilack, nhgetl(inih->ilid));
+		ttl = MAXTTL;
+		tos = DFLTTOS;
+	}
+	else {
+		v6tov4(ih->dst, ipc->raddr);
+		v6tov4(ih->src, ipc->laddr);
+		hnputs(ih->ilsrc, ipc->lport);
+		hnputs(ih->ildst, ipc->rport);
+		hnputl(ih->ilid, id);
+		hnputl(ih->ilack, ack);
+		ic = (Ilcb*)ipc->ptcl;
+		ic->acksent = ack;
+		ic->acktime = NOW;
+		ttl = ipc->ttl;
+		tos = ipc->tos;
+	}
+	ih->iltype = type;
+	ih->ilspec = ilspec;
+	ih->ilsum[0] = 0;
+	ih->ilsum[1] = 0;
+
+	if(ilcksum)
+		hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
+
+if(ipc==nil)
+	panic("ipc is nil caller is %#p", getcallerpc(&ipc));
+if(ipc->p==nil)
+	panic("ipc->p is nil");
+
+	netlog(ipc->p->f, Logilmsg, "ctl(%s id %d ack %d %d->%d)\n",
+		iltype[ih->iltype], nhgetl(ih->ilid), nhgetl(ih->ilack), 
+		nhgets(ih->ilsrc), nhgets(ih->ildst));
+
+	ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
+}
+
+void
+ilreject(Fs *f, Ilhdr *inih)
+{
+	Ilhdr *ih;
+	Block *bp;
+
+	bp = allocb(IL_IPSIZE+IL_HDRSIZE);
+	bp->wp += IL_IPSIZE+IL_HDRSIZE;
+
+	ih = (Ilhdr *)(bp->rp);
+	ih->vihl = IP_VER4;
+
+	/* Ip fields */
+	ih->proto = IP_ILPROTO;
+	hnputs(ih->illen, IL_HDRSIZE);
+	ih->frag[0] = 0;
+	ih->frag[1] = 0;
+	hnputl(ih->dst, nhgetl(inih->src));
+	hnputl(ih->src, nhgetl(inih->dst));
+	hnputs(ih->ilsrc, nhgets(inih->ildst));
+	hnputs(ih->ildst, nhgets(inih->ilsrc));
+	hnputl(ih->ilid, nhgetl(inih->ilack));
+	hnputl(ih->ilack, nhgetl(inih->ilid));
+	ih->iltype = Ilclose;
+	ih->ilspec = 0;
+	ih->ilsum[0] = 0;
+	ih->ilsum[1] = 0;
+
+	if(ilcksum)
+		hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
+
+	ipoput4(f, bp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+void
+ilsettimeout(Ilcb *ic)
+{
+	ulong pt;
+
+	pt = (ic->delay>>LogAGain)
+		+ ic->unackedbytes/(ic->rate>>LogAGain)
+		+ (ic->mdev>>(LogDGain-1))
+		+ AckDelay;
+	if(pt > MaxTimeout)
+		pt = MaxTimeout;
+	ic->timeout = NOW + pt;
+}
+
+void
+ilbackoff(Ilcb *ic)
+{
+	ulong pt;
+	int i;
+
+	pt = (ic->delay>>LogAGain)
+		+ ic->unackedbytes/(ic->rate>>LogAGain)
+		+ (ic->mdev>>(LogDGain-1))
+		+ AckDelay;
+	for(i = 0; i < ic->rexmit; i++)
+		pt = pt + (pt>>1);
+	if(pt > MaxTimeout)
+		pt = MaxTimeout;
+	ic->timeout = NOW + pt;
+
+	if(ic->fasttimeout)
+		ic->timeout = NOW+Iltickms;
+
+	ic->rexmit++;
+}
+
+// complain if two numbers not within an hour of each other
+#define Tfuture (1000*60*60)
+int
+later(ulong t1, ulong t2, char *x)
+{
+	int dt;
+
+	dt = t1 - t2;
+	if(dt > 0) {
+		if(x != nil && dt > Tfuture)
+			print("%s: way future %d\n", x, dt);
+		return 1;
+	}
+	if(dt < -Tfuture) {
+		if(x != nil)
+			print("%s: way past %d\n", x, -dt);
+		return 1;
+	}
+	return 0;
+}
+
+void
+ilackproc(void *x)
+{
+	Ilcb *ic;
+	Conv **s, *p;
+	Proto *il;
+
+	il = x;
+
+loop:
+	tsleep(&up->sleep, return0, 0, Iltickms);
+	for(s = il->conv; s && *s; s++) {
+		p = *s;
+		ic = (Ilcb*)p->ptcl;
+
+		switch(ic->state) {
+		case Ilclosed:
+		case Illistening:
+			break;
+		case Ilclosing:
+			if(later(NOW, ic->timeout, "timeout0")) {
+				if(ic->rexmit > MaxRexmit){
+					ilhangup(p, nil);
+					break;
+				}
+				ilsendctl(p, nil, Ilclose, ic->next, ic->recvd, 0);
+				ilbackoff(ic);
+			}
+			break;
+
+		case Ilsyncee:
+		case Ilsyncer:
+			if(later(NOW, ic->timeout, "timeout1")) {
+				if(ic->rexmit > MaxRexmit){
+					ilhangup(p, etime);
+					break;
+				}
+				ilsendctl(p, nil, Ilsync, ic->start, ic->recvd, 0);
+				ilbackoff(ic);
+			}
+			break;
+
+		case Ilestablished:
+			if(ic->recvd != ic->acksent)
+			if(later(NOW, ic->acktime, "acktime"))
+				ilsendctl(p, nil, Ilack, ic->next, ic->recvd, 0);
+
+			if(later(NOW, ic->querytime, "querytime")){
+				if(later(NOW, ic->lastrecv+DeathTime, "deathtime")){
+					netlog(il->f, Logil, "il: hangup: deathtime\n");
+					ilhangup(p, etime);
+					break;
+				}
+				ilsendctl(p, nil, Ilquery, ic->next, ic->recvd, ilnextqt(ic));
+				ic->querytime = NOW + QueryTime;
+			}
+
+			if(ic->unacked != nil)
+			if(later(NOW, ic->timeout, "timeout2")) {
+				if(ic->rexmit > MaxRexmit){
+					netlog(il->f, Logil, "il: hangup: too many rexmits\n");
+					ilhangup(p, etime);
+					break;
+				}
+				ilsendctl(p, nil, Ilquery, ic->next, ic->recvd, ilnextqt(ic));
+				ic->rxquery++;
+				ilbackoff(ic);
+			}
+			break;
+		}
+	}
+	goto loop;
+}
+
+void
+ilcbinit(Ilcb *ic)
+{
+	ic->start = nrand(0x1000000);
+	ic->next = ic->start+1;
+	ic->recvd = 0;
+	ic->window = Defaultwin;
+	ic->unackedbytes = 0;
+	ic->unacked = nil;
+	ic->outoforder = nil;
+	ic->rexmit = 0;
+	ic->rxtot = 0;
+	ic->rxquery = 0;
+	ic->qtx = 1;
+	ic->fasttimeout = 0;
+
+	/* timers */
+	ic->delay = DefRtt<<LogAGain;
+	ic->mdev = DefRtt<<LogDGain;
+	ic->rate = DefByteRate<<LogAGain;
+	ic->querytime = NOW + QueryTime;
+	ic->lastrecv = NOW;	/* or we'll timeout right away */
+	ilsettimeout(ic);
+}
+
+char*
+ilstart(Conv *c, int type, int fasttimeout)
+{
+	Ilcb *ic;
+	Ilpriv *ipriv;
+	char kpname[KNAMELEN];
+
+	ipriv = c->p->priv;
+
+	if(ipriv->ackprocstarted == 0){
+		qlock(&ipriv->apl);
+		if(ipriv->ackprocstarted == 0){
+			sprint(kpname, "#I%dilack", c->p->f->dev);
+			kproc(kpname, ilackproc, c->p);
+			ipriv->ackprocstarted = 1;
+		}
+		qunlock(&ipriv->apl);
+	}
+
+	ic = (Ilcb*)c->ptcl;
+	ic->conv = c;
+
+	if(ic->state != Ilclosed)
+		return nil;
+
+	ilcbinit(ic);
+
+	if(fasttimeout){
+		/* timeout if we can't connect quickly */
+		ic->fasttimeout = 1;
+		ic->timeout = NOW+Iltickms;
+		ic->rexmit = MaxRexmit - 4;
+	};
+
+	switch(type) {
+	default:
+		netlog(c->p->f, Logil, "il: start: type %d\n", type);
+		break;
+	case IL_LISTEN:
+		ic->state = Illistening;
+		iphtadd(&ipriv->ht, c);
+		break;
+	case IL_CONNECT:
+		ic->state = Ilsyncer;
+		iphtadd(&ipriv->ht, c);
+		ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0);
+		break;
+	}
+
+	return nil;
+}
+
+void
+ilfreeq(Ilcb *ic)
+{
+	Block *bp, *next;
+
+	qlock(&ic->ackq);
+	for(bp = ic->unacked; bp; bp = next) {
+		next = bp->list;
+		freeblist(bp);
+	}
+	ic->unacked = nil;
+	qunlock(&ic->ackq);
+
+	qlock(&ic->outo);
+	for(bp = ic->outoforder; bp; bp = next) {
+		next = bp->list;
+		freeblist(bp);
+	}
+	ic->outoforder = nil;
+	qunlock(&ic->outo);
+}
+
+void
+iladvise(Proto *il, Block *bp, char *msg)
+{
+	Ilhdr *h;
+	Ilcb *ic;		
+	uchar source[IPaddrlen], dest[IPaddrlen];
+	ushort psource;
+	Conv *s, **p;
+
+	h = (Ilhdr*)(bp->rp);
+
+	v4tov6(dest, h->dst);
+	v4tov6(source, h->src);
+	psource = nhgets(h->ilsrc);
+
+
+	/* Look for a connection, unfortunately the destination port is missing */
+	QLOCK(il);
+	for(p = il->conv; *p; p++) {
+		s = *p;
+		if(s->lport == psource)
+		if(ipcmp(s->laddr, source) == 0)
+		if(ipcmp(s->raddr, dest) == 0){
+			QUNLOCK(il);
+			ic = (Ilcb*)s->ptcl;
+			switch(ic->state){
+			case Ilsyncer:
+				ilhangup(s, msg);
+				break;
+			}
+			freeblist(bp);
+			return;
+		}
+	}
+	QUNLOCK(il);
+	freeblist(bp);
+}
+
+int
+ilnextqt(Ilcb *ic)
+{
+	int x;
+
+	qlock(&ic->ackq);
+	x = ic->qtx;
+	if(++x > Nqt)
+		x = 1;
+	ic->qtx = x;
+	ic->qt[x] = ic->next-1;	/* highest xmitted packet */
+	ic->qt[0] = ic->qt[x];	/* compatibility with old implementations */
+	qunlock(&ic->ackq);
+
+	return x;
+}
+
+/* calculate scale constants that converts fast ticks to ms (more or less) */
+static void
+inittimescale(void)
+{
+	uvlong hz;
+
+	fastticks(&hz);
+	if(hz > 1000){
+		scalediv = hz/1000;
+		scalemul = 1;
+	} else {
+		scalediv = 1;
+		scalemul = 1000/hz;
+	}
+}
+
+void
+ilinit(Fs *f)
+{
+	Proto *il;
+
+	inittimescale();
+
+	il = smalloc(sizeof(Proto));
+	il->priv = smalloc(sizeof(Ilpriv));
+	il->name = "il";
+	il->connect = ilconnect;
+	il->announce = ilannounce;
+	il->state = ilstate;
+	il->create = ilcreate;
+	il->close = ilclose;
+	il->rcv = iliput;
+	il->ctl = nil;
+	il->advise = iladvise;
+	il->stats = ilxstats;
+	il->inuse = ilinuse;
+	il->gc = nil;
+	il->ipproto = IP_ILPROTO;
+	il->nc = scalednconv();
+	il->ptclsize = sizeof(Ilcb);
+	Fsproto(f, il);
+}
diff --git a/src/9vx/a/ip/inferno.c b/src/9vx/a/ip/inferno.c
@@ -0,0 +1,46 @@
+#include	"u.h"
+#include	"lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"error.h"
+#include	"ip.h"
+
+/*
+ *  some hacks for commonality twixt inferno and plan9
+ */
+
+char*
+commonuser(void)
+{
+	return up->user;
+}
+
+Chan*
+commonfdtochan(int fd, int mode, int a, int b)
+{
+	return fdtochan(fd, mode, a, b);
+}
+
+char*
+commonerror(void)
+{
+	return up->errstr;
+}
+
+char*
+bootp(Ipifc* _)
+{
+	return "unimplmented";
+}
+
+int
+bootpread(char* _, ulong __, int ___)
+{
+	return	0;
+}
+
+Medium tripmedium =
+{
+	"trip",
+};
diff --git a/src/9vx/a/ip/ip.c b/src/9vx/a/ip/ip.c
@@ -0,0 +1,776 @@
+#include	"u.h"
+#include	"lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"error.h"
+
+#include	"ip.h"
+
+typedef struct Fragment4	Fragment4;
+typedef struct Fragment6	Fragment6;
+typedef struct Ipfrag		Ipfrag;
+
+#define BLKIPVER(xp)	(((Ip4hdr*)((xp)->rp))->vihl&0xF0)
+
+/* MIB II counters */
+enum
+{
+	Forwarding,
+	DefaultTTL,
+	InReceives,
+	InHdrErrors,
+	InAddrErrors,
+	ForwDatagrams,
+	InUnknownProtos,
+	InDiscards,
+	InDelivers,
+	OutRequests,
+	OutDiscards,
+	OutNoRoutes,
+	ReasmTimeout,
+	ReasmReqds,
+	ReasmOKs,
+	ReasmFails,
+	FragOKs,
+	FragFails,
+	FragCreates,
+
+	Nstats,
+};
+
+struct Fragment4
+{
+	Block*	blist;
+	Fragment4*	next;
+	ulong 	src;
+	ulong 	dst;
+	ushort	id;
+	ulong 	age;
+};
+
+struct Fragment6
+{
+	Block*	blist;
+	Fragment6*	next;
+	uchar 	src[IPaddrlen];
+	uchar 	dst[IPaddrlen];
+	uint	id;
+	ulong 	age;
+};
+
+struct Ipfrag
+{
+	ushort	foff;
+	ushort	flen;
+};
+
+/* an instance of IP */
+struct IP
+{
+	ulong		stats[Nstats];
+
+	QLock		fraglock4;
+	Fragment4*	flisthead4;
+	Fragment4*	fragfree4;
+	Ref		id4;
+
+	QLock		fraglock6;
+	Fragment6*	flisthead6;
+	Fragment6*	fragfree6;
+	Ref		id6;
+
+	int		iprouting;	/* true if we route like a gateway */
+};
+
+static char *statnames[] =
+{
+[Forwarding]	"Forwarding",
+[DefaultTTL]	"DefaultTTL",
+[InReceives]	"InReceives",
+[InHdrErrors]	"InHdrErrors",
+[InAddrErrors]	"InAddrErrors",
+[ForwDatagrams]	"ForwDatagrams",
+[InUnknownProtos]	"InUnknownProtos",
+[InDiscards]	"InDiscards",
+[InDelivers]	"InDelivers",
+[OutRequests]	"OutRequests",
+[OutDiscards]	"OutDiscards",
+[OutNoRoutes]	"OutNoRoutes",
+[ReasmTimeout]	"ReasmTimeout",
+[ReasmReqds]	"ReasmReqds",
+[ReasmOKs]	"ReasmOKs",
+[ReasmFails]	"ReasmFails",
+[FragOKs]	"FragOKs",
+[FragFails]	"FragFails",
+[FragCreates]	"FragCreates",
+};
+
+#define BLKIP(xp)	((Ip4hdr*)((xp)->rp))
+/*
+ * This sleazy macro relies on the media header size being
+ * larger than sizeof(Ipfrag). ipreassemble checks this is true
+ */
+#define BKFG(xp)	((Ipfrag*)((xp)->base))
+
+ushort		ipcsum(uchar*);
+Block*		ip4reassemble(IP*, int, Block*, Ip4hdr*);
+void		ipfragfree4(IP*, Fragment4*);
+Fragment4*	ipfragallo4(IP*);
+
+void
+ip_init_6(Fs *f)
+{
+	v6params *v6p;
+
+	v6p = smalloc(sizeof(v6params));
+
+	v6p->rp.mflag		= 0;		/* default not managed */
+	v6p->rp.oflag		= 0;
+	v6p->rp.maxraint	= 600000;	/* millisecs */
+	v6p->rp.minraint	= 200000;
+	v6p->rp.linkmtu		= 0;		/* no mtu sent */
+	v6p->rp.reachtime	= 0;
+	v6p->rp.rxmitra		= 0;
+	v6p->rp.ttl		= MAXTTL;
+	v6p->rp.routerlt	= 3 * v6p->rp.maxraint;
+
+	v6p->hp.rxmithost	= 1000;		/* v6 RETRANS_TIMER */
+
+	v6p->cdrouter 		= -1;
+
+	f->v6p			= v6p;
+}
+
+void
+initfrag(IP *ip, int size)
+{
+	Fragment4 *fq4, *eq4;
+	Fragment6 *fq6, *eq6;
+
+	ip->fragfree4 = (Fragment4*)malloc(sizeof(Fragment4) * size);
+	if(ip->fragfree4 == nil)
+		panic("initfrag");
+
+	eq4 = &ip->fragfree4[size];
+	for(fq4 = ip->fragfree4; fq4 < eq4; fq4++)
+		fq4->next = fq4+1;
+
+	ip->fragfree4[size-1].next = nil;
+
+	ip->fragfree6 = (Fragment6*)malloc(sizeof(Fragment6) * size);
+	if(ip->fragfree6 == nil)
+		panic("initfrag");
+
+	eq6 = &ip->fragfree6[size];
+	for(fq6 = ip->fragfree6; fq6 < eq6; fq6++)
+		fq6->next = fq6+1;
+
+	ip->fragfree6[size-1].next = nil;
+}
+
+void
+ip_init(Fs *f)
+{
+	IP *ip;
+
+	ip = smalloc(sizeof(IP));
+	initfrag(ip, 100);
+	f->ip = ip;
+
+	ip_init_6(f);
+}
+
+void
+iprouting(Fs *f, int on)
+{
+	f->ip->iprouting = on;
+	if(f->ip->iprouting==0)
+		f->ip->stats[Forwarding] = 2;
+	else
+		f->ip->stats[Forwarding] = 1;
+}
+
+int
+ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+{
+	Ipifc *ifc;
+	uchar *gate;
+	ulong fragoff;
+	Block *xp, *nb;
+	Ip4hdr *eh, *feh;
+	int lid, len, seglen, chunk, dlen, blklen, offset, medialen;
+	Route *r, *sr;
+	IP *ip;
+	int rv = 0;
+
+	ip = f->ip;
+
+	/* Fill out the ip header */
+	eh = (Ip4hdr*)(bp->rp);
+
+	ip->stats[OutRequests]++;
+
+	/* Number of uchars in data and ip header to write */
+	len = blocklen(bp);
+
+	if(gating){
+		chunk = nhgets(eh->length);
+		if(chunk > len){
+			ip->stats[OutDiscards]++;
+			netlog(f, Logip, "short gated packet\n");
+			goto free;
+		}
+		if(chunk < len)
+			len = chunk;
+	}
+	if(len >= IP_MAX){
+		ip->stats[OutDiscards]++;
+		netlog(f, Logip, "exceeded ip max size %V\n", eh->dst);
+		goto free;
+	}
+
+	r = v4lookup(f, eh->dst, c);
+	if(r == nil){
+		ip->stats[OutNoRoutes]++;
+		netlog(f, Logip, "no interface %V\n", eh->dst);
+		rv = -1;
+		goto free;
+	}
+
+	ifc = r->ifc;
+	if(r->type & (Rifc|Runi))
+		gate = eh->dst;
+	else
+	if(r->type & (Rbcast|Rmulti)) {
+		gate = eh->dst;
+		sr = v4lookup(f, eh->src, nil);
+		if(sr != nil && (sr->type & Runi))
+			ifc = sr->ifc;
+	}
+	else
+		gate = r->v4.gate;
+
+	if(!gating)
+		eh->vihl = IP_VER4|IP_HLEN4;
+	eh->ttl = ttl;
+	if(!gating)
+		eh->tos = tos;
+
+	if(!CANRLOCK(ifc))
+		goto free;
+	if(waserror()){
+		RUNLOCK(ifc);
+		nexterror();
+	}
+	if(ifc->m == nil)
+		goto raise;
+
+	/* If we dont need to fragment just send it */
+	medialen = ifc->maxtu - ifc->m->hsize;
+	if(len <= medialen) {
+		if(!gating)
+			hnputs(eh->id, incref(&ip->id4));
+		hnputs(eh->length, len);
+		if(!gating){
+			eh->frag[0] = 0;
+			eh->frag[1] = 0;
+		}
+		eh->cksum[0] = 0;
+		eh->cksum[1] = 0;
+		hnputs(eh->cksum, ipcsum(&eh->vihl));
+		ifc->m->bwrite(ifc, bp, V4, gate);
+		RUNLOCK(ifc);
+		poperror();
+		return 0;
+	}
+
+if((eh->frag[0] & (IP_DF>>8)) && !gating) print("%V: DF set\n", eh->dst);
+
+	if(eh->frag[0] & (IP_DF>>8)){
+		ip->stats[FragFails]++;
+		ip->stats[OutDiscards]++;
+		icmpcantfrag(f, bp, medialen);
+		netlog(f, Logip, "%V: eh->frag[0] & (IP_DF>>8)\n", eh->dst);
+		goto raise;
+	}
+
+	seglen = (medialen - IP4HDR) & ~7;
+	if(seglen < 8){
+		ip->stats[FragFails]++;
+		ip->stats[OutDiscards]++;
+		netlog(f, Logip, "%V seglen < 8\n", eh->dst);
+		goto raise;
+	}
+
+	dlen = len - IP4HDR;
+	xp = bp;
+	if(gating)
+		lid = nhgets(eh->id);
+	else
+		lid = incref(&ip->id4);
+
+	offset = IP4HDR;
+	while(xp != nil && offset && offset >= BLEN(xp)) {
+		offset -= BLEN(xp);
+		xp = xp->next;
+	}
+	xp->rp += offset;
+
+	if(gating)
+		fragoff = nhgets(eh->frag)<<3;
+	else
+		fragoff = 0;
+	dlen += fragoff;
+	for(; fragoff < dlen; fragoff += seglen) {
+		nb = allocb(IP4HDR+seglen);
+		feh = (Ip4hdr*)(nb->rp);
+
+		memmove(nb->wp, eh, IP4HDR);
+		nb->wp += IP4HDR;
+
+		if((fragoff + seglen) >= dlen) {
+			seglen = dlen - fragoff;
+			hnputs(feh->frag, fragoff>>3);
+		}
+		else
+			hnputs(feh->frag, (fragoff>>3)|IP_MF);
+
+		hnputs(feh->length, seglen + IP4HDR);
+		hnputs(feh->id, lid);
+
+		/* Copy up the data area */
+		chunk = seglen;
+		while(chunk) {
+			if(!xp) {
+				ip->stats[OutDiscards]++;
+				ip->stats[FragFails]++;
+				freeblist(nb);
+				netlog(f, Logip, "!xp: chunk %d\n", chunk);
+				goto raise;
+			}
+			blklen = chunk;
+			if(BLEN(xp) < chunk)
+				blklen = BLEN(xp);
+			memmove(nb->wp, xp->rp, blklen);
+			nb->wp += blklen;
+			xp->rp += blklen;
+			chunk -= blklen;
+			if(xp->rp == xp->wp)
+				xp = xp->next;
+		}
+
+		feh->cksum[0] = 0;
+		feh->cksum[1] = 0;
+		hnputs(feh->cksum, ipcsum(&feh->vihl));
+		ifc->m->bwrite(ifc, nb, V4, gate);
+		ip->stats[FragCreates]++;
+	}
+	ip->stats[FragOKs]++;
+raise:
+	RUNLOCK(ifc);
+	poperror();
+free:
+	freeblist(bp);
+	return rv;
+}
+
+void
+ipiput4(Fs *f, Ipifc *ifc, Block *bp)
+{
+	int hl;
+	int hop, tos, proto, olen;
+	Ip4hdr *h;
+	Proto *p;
+	ushort frag;
+	int notforme;
+	uchar *dp, v6dst[IPaddrlen];
+	IP *ip;
+	Route *r;
+
+	if(BLKIPVER(bp) != IP_VER4) {
+		ipiput6(f, ifc, bp);
+		return;
+	}
+
+	ip = f->ip;
+	ip->stats[InReceives]++;
+
+	/*
+	 *  Ensure we have all the header info in the first
+	 *  block.  Make life easier for other protocols by
+	 *  collecting up to the first 64 bytes in the first block.
+	 */
+	if(BLEN(bp) < 64) {
+		hl = blocklen(bp);
+		if(hl < IP4HDR)
+			hl = IP4HDR;
+		if(hl > 64)
+			hl = 64;
+		bp = pullupblock(bp, hl);
+		if(bp == nil)
+			return;
+	}
+
+	h = (Ip4hdr*)(bp->rp);
+
+	/* dump anything that whose header doesn't checksum */
+	if((bp->flag & Bipck) == 0 && ipcsum(&h->vihl)) {
+		ip->stats[InHdrErrors]++;
+		netlog(f, Logip, "ip: checksum error %V\n", h->src);
+		freeblist(bp);
+		return;
+	}
+	v4tov6(v6dst, h->dst);
+	notforme = ipforme(f, v6dst) == 0;
+
+	/* Check header length and version */
+	if((h->vihl&0x0F) != IP_HLEN4) {
+		hl = (h->vihl&0xF)<<2;
+		if(hl < (IP_HLEN4<<2)) {
+			ip->stats[InHdrErrors]++;
+			netlog(f, Logip, "ip: %V bad hivl %ux\n", h->src, h->vihl);
+			freeblist(bp);
+			return;
+		}
+		/* If this is not routed strip off the options */
+		if(notforme == 0) {
+			olen = nhgets(h->length);
+			dp = bp->rp + (hl - (IP_HLEN4<<2));
+			memmove(dp, h, IP_HLEN4<<2);
+			bp->rp = dp;
+			h = (Ip4hdr*)(bp->rp);
+			h->vihl = (IP_VER4|IP_HLEN4);
+			hnputs(h->length, olen-hl+(IP_HLEN4<<2));
+		}
+	}
+
+	/* route */
+	if(notforme) {
+		Conv conv;
+
+		if(!ip->iprouting){
+			freeb(bp);
+			return;
+		}
+
+		/* don't forward to source's network */
+		conv.r = nil;
+		r = v4lookup(f, h->dst, &conv);
+		if(r == nil || r->ifc == ifc){
+			ip->stats[OutDiscards]++;
+			freeblist(bp);
+			return;
+		}
+
+		/* don't forward if packet has timed out */
+		hop = h->ttl;
+		if(hop < 1) {
+			ip->stats[InHdrErrors]++;
+			icmpttlexceeded(f, ifc->lifc->local, bp);
+			freeblist(bp);
+			return;
+		}
+
+		/* reassemble if the interface expects it */
+if(r->ifc == nil) panic("nil route rfc");
+		if(r->ifc->reassemble){
+			frag = nhgets(h->frag);
+			if(frag) {
+				h->tos = 0;
+				if(frag & IP_MF)
+					h->tos = 1;
+				bp = ip4reassemble(ip, frag, bp, h);
+				if(bp == nil)
+					return;
+				h = (Ip4hdr*)(bp->rp);
+			}
+		}
+
+		ip->stats[ForwDatagrams]++;
+		tos = h->tos;
+		hop = h->ttl;
+		ipoput4(f, bp, 1, hop - 1, tos, &conv);
+		return;
+	}
+
+	frag = nhgets(h->frag);
+	if(frag) {
+		h->tos = 0;
+		if(frag & IP_MF)
+			h->tos = 1;
+		bp = ip4reassemble(ip, frag, bp, h);
+		if(bp == nil)
+			return;
+		h = (Ip4hdr*)(bp->rp);
+	}
+
+	/* don't let any frag info go up the stack */
+	h->frag[0] = 0;
+	h->frag[1] = 0;
+
+	proto = h->proto;
+	p = Fsrcvpcol(f, proto);
+	if(p != nil && p->rcv != nil) {
+		ip->stats[InDelivers]++;
+		(*p->rcv)(p, ifc, bp);
+		return;
+	}
+	ip->stats[InDiscards]++;
+	ip->stats[InUnknownProtos]++;
+	freeblist(bp);
+}
+
+int
+ipstats(Fs *f, char *buf, int len)
+{
+	IP *ip;
+	char *p, *e;
+	int i;
+
+	ip = f->ip;
+	ip->stats[DefaultTTL] = MAXTTL;
+
+	p = buf;
+	e = p+len;
+	for(i = 0; i < Nstats; i++)
+		p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]);
+	return p - buf;
+}
+
+Block*
+ip4reassemble(IP *ip, int offset, Block *bp, Ip4hdr *ih)
+{
+	int fend;
+	ushort id;
+	Fragment4 *f, *fnext;
+	ulong src, dst;
+	Block *bl, **l, *last, *prev;
+	int ovlap, len, fragsize, pktposn;
+
+	src = nhgetl(ih->src);
+	dst = nhgetl(ih->dst);
+	id = nhgets(ih->id);
+
+	/*
+	 *  block lists are too hard, pullupblock into a single block
+	 */
+	if(bp->next){
+		bp = pullupblock(bp, blocklen(bp));
+		ih = (Ip4hdr*)(bp->rp);
+	}
+
+	qlock(&ip->fraglock4);
+
+	/*
+	 *  find a reassembly queue for this fragment
+	 */
+	for(f = ip->flisthead4; f; f = fnext){
+		fnext = f->next;	/* because ipfragfree4 changes the list */
+		if(f->src == src && f->dst == dst && f->id == id)
+			break;
+		if(f->age < NOW){
+			ip->stats[ReasmTimeout]++;
+			ipfragfree4(ip, f);
+		}
+	}
+
+	/*
+	 *  if this isn't a fragmented packet, accept it
+	 *  and get rid of any fragments that might go
+	 *  with it.
+	 */
+	if(!ih->tos && (offset & ~(IP_MF|IP_DF)) == 0) {
+		if(f != nil) {
+			ipfragfree4(ip, f);
+			ip->stats[ReasmFails]++;
+		}
+		qunlock(&ip->fraglock4);
+		return bp;
+	}
+
+	if(bp->base+sizeof(Ipfrag) >= bp->rp){
+		bp = padblock(bp, sizeof(Ipfrag));
+		bp->rp += sizeof(Ipfrag);
+	}
+
+	BKFG(bp)->foff = offset<<3;
+	BKFG(bp)->flen = nhgets(ih->length)-IP4HDR;
+
+	/* First fragment allocates a reassembly queue */
+	if(f == nil) {
+		f = ipfragallo4(ip);
+		f->id = id;
+		f->src = src;
+		f->dst = dst;
+
+		f->blist = bp;
+
+		qunlock(&ip->fraglock4);
+		ip->stats[ReasmReqds]++;
+		return nil;
+	}
+
+	/*
+	 *  find the new fragment's position in the queue
+	 */
+	prev = nil;
+	l = &f->blist;
+	bl = f->blist;
+	while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+		prev = bl;
+		l = &bl->next;
+		bl = bl->next;
+	}
+
+	/* Check overlap of a previous fragment - trim away as necessary */
+	if(prev) {
+		ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+		if(ovlap > 0) {
+			if(ovlap >= BKFG(bp)->flen) {
+				freeblist(bp);
+				qunlock(&ip->fraglock4);
+				return nil;
+			}
+			BKFG(prev)->flen -= ovlap;
+		}
+	}
+
+	/* Link onto assembly queue */
+	bp->next = *l;
+	*l = bp;
+
+	/* Check to see if succeeding segments overlap */
+	if(bp->next) {
+		l = &bp->next;
+		fend = BKFG(bp)->foff + BKFG(bp)->flen;
+		/* Take completely covered segments out */
+		while(*l) {
+			ovlap = fend - BKFG(*l)->foff;
+			if(ovlap <= 0)
+				break;
+			if(ovlap < BKFG(*l)->flen) {
+				BKFG(*l)->flen -= ovlap;
+				BKFG(*l)->foff += ovlap;
+				/* move up ih hdrs */
+				memmove((*l)->rp + ovlap, (*l)->rp, IP4HDR);
+				(*l)->rp += ovlap;
+				break;
+			}
+			last = (*l)->next;
+			(*l)->next = nil;
+			freeblist(*l);
+			*l = last;
+		}
+	}
+
+	/*
+	 *  look for a complete packet.  if we get to a fragment
+	 *  without IP_MF set, we're done.
+	 */
+	pktposn = 0;
+	for(bl = f->blist; bl; bl = bl->next) {
+		if(BKFG(bl)->foff != pktposn)
+			break;
+		if((BLKIP(bl)->frag[0]&(IP_MF>>8)) == 0) {
+			bl = f->blist;
+			len = nhgets(BLKIP(bl)->length);
+			bl->wp = bl->rp + len;
+
+			/* Pullup all the fragment headers and
+			 * return a complete packet
+			 */
+			for(bl = bl->next; bl; bl = bl->next) {
+				fragsize = BKFG(bl)->flen;
+				len += fragsize;
+				bl->rp += IP4HDR;
+				bl->wp = bl->rp + fragsize;
+			}
+
+			bl = f->blist;
+			f->blist = nil;
+			ipfragfree4(ip, f);
+			ih = BLKIP(bl);
+			hnputs(ih->length, len);
+			qunlock(&ip->fraglock4);
+			ip->stats[ReasmOKs]++;
+			return bl;
+		}
+		pktposn += BKFG(bl)->flen;
+	}
+	qunlock(&ip->fraglock4);
+	return nil;
+}
+
+/*
+ * ipfragfree4 - Free a list of fragments - assume hold fraglock4
+ */
+void
+ipfragfree4(IP *ip, Fragment4 *frag)
+{
+	Fragment4 *fl, **l;
+
+	if(frag->blist)
+		freeblist(frag->blist);
+
+	frag->src = 0;
+	frag->id = 0;
+	frag->blist = nil;
+
+	l = &ip->flisthead4;
+	for(fl = *l; fl; fl = fl->next) {
+		if(fl == frag) {
+			*l = frag->next;
+			break;
+		}
+		l = &fl->next;
+	}
+
+	frag->next = ip->fragfree4;
+	ip->fragfree4 = frag;
+
+}
+
+/*
+ * ipfragallo4 - allocate a reassembly queue - assume hold fraglock4
+ */
+Fragment4 *
+ipfragallo4(IP *ip)
+{
+	Fragment4 *f;
+
+	while(ip->fragfree4 == nil) {
+		/* free last entry on fraglist */
+		for(f = ip->flisthead4; f->next; f = f->next)
+			;
+		ipfragfree4(ip, f);
+	}
+	f = ip->fragfree4;
+	ip->fragfree4 = f->next;
+	f->next = ip->flisthead4;
+	ip->flisthead4 = f;
+	f->age = NOW + 30000;
+
+	return f;
+}
+
+ushort
+ipcsum(uchar *addr)
+{
+	int len;
+	ulong sum;
+
+	sum = 0;
+	len = (addr[0]&0xf)<<2;
+
+	while(len > 0) {
+		sum += addr[0]<<8 | addr[1] ;
+		len -= 2;
+		addr += 2;
+	}
+
+	sum = (sum & 0xffff) + (sum >> 16);
+	sum = (sum & 0xffff) + (sum >> 16);
+
+	return (sum^0xffff);
+}
diff --git a/src/9vx/a/ip/ip.h b/src/9vx/a/ip/ip.h
@@ -0,0 +1,677 @@
+typedef struct	Conv	Conv;
+typedef struct	Fs	Fs;
+typedef union	Hwaddr	Hwaddr;
+typedef struct	IP	IP;
+typedef struct	IPaux	IPaux;
+typedef struct	Ipself	Ipself;
+typedef struct	Ipselftab	Ipselftab;
+typedef struct	Iplink	Iplink;
+typedef struct	Iplifc	Iplifc;
+typedef struct	Ipmulti	Ipmulti;
+typedef struct	Ipifc	Ipifc;
+typedef struct	Iphash	Iphash;
+typedef struct	Ipht	Ipht;
+typedef struct	Netlog	Netlog;
+typedef struct	Medium	Medium;
+typedef struct	Proto	Proto;
+typedef struct	Arpent	Arpent;
+typedef struct	Arp Arp;
+typedef struct	Route	Route;
+
+typedef struct	Routerparams	Routerparams;
+typedef struct 	Hostparams	Hostparams;
+typedef struct 	v6router	v6router;
+typedef struct	v6params	v6params;
+
+enum
+{
+	Addrlen=	64,
+	Maxproto=	20,
+	Nhash=		64,
+	Maxincall=	5,
+	Nchans=		1024,
+	MAClen=		16,		/* longest mac address */
+
+	MAXTTL=		255,
+	DFLTTOS=	0,
+
+	IPaddrlen=	16,
+	IPv4addrlen=	4,
+	IPv4off=	12,
+	IPllen=		4,
+
+	/* ip versions */
+	V4=		4,
+	V6=		6,
+	IP_VER4= 	0x40,
+	IP_VER6=	0x60,
+	IP_HLEN4=	5,		/* v4: Header length in words */
+	IP_DF=		0x4000,		/* v4: Don't fragment */
+	IP_MF=		0x2000,		/* v4: More fragments */
+	IP4HDR=		20,		/* sizeof(Ip4hdr) */
+	IP_MAX=		64*1024,	/* Max. Internet packet size, v4 & v6 */
+
+	/* 2^Lroot trees in the root table */
+	Lroot=		10,
+
+	Maxpath =	64,
+};
+
+enum
+{
+	Idle=		0,
+	Announcing=	1,
+	Announced=	2,
+	Connecting=	3,
+	Connected=	4,
+};
+
+/* on the wire packet header */
+typedef struct Ip4hdr		Ip4hdr;
+struct Ip4hdr
+{
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	length[2];	/* packet length */
+	uchar	id[2];		/* ip->identification */
+	uchar	frag[2];	/* Fragment information */
+	uchar	ttl;      	/* Time to live */
+	uchar	proto;		/* Protocol */
+	uchar	cksum[2];	/* Header checksum */
+	uchar	src[4];		/* IP source */
+	uchar	dst[4];		/* IP destination */
+};
+
+/*
+ *  one per conversation directory
+ */
+struct Conv
+{
+	QLock	qlock;
+
+	int	x;			/* conversation index */
+	Proto*	p;
+
+	int	restricted;		/* remote port is restricted */
+	uint	ttl;			/* max time to live */
+	uint	tos;			/* type of service */
+	int	ignoreadvice;		/* don't terminate connection on icmp errors */
+
+	uchar	ipversion;
+	uchar	laddr[IPaddrlen];	/* local IP address */
+	uchar	raddr[IPaddrlen];	/* remote IP address */
+	ushort	lport;			/* local port number */
+	ushort	rport;			/* remote port number */
+
+	char	*owner;			/* protections */
+	int	perm;
+	int	inuse;			/* opens of listen/data/ctl */
+	int	length;
+	int	state;
+
+	int	maxfragsize;		/* If set, used for fragmentation */
+
+	/* udp specific */
+	int	headers;		/* data src/dst headers in udp */
+	int	reliable;		/* true if reliable udp */
+
+	Conv*	incall;			/* calls waiting to be listened for */
+	Conv*	next;
+
+	Queue*	rq;			/* queued data waiting to be read */
+	Queue*	wq;			/* queued data waiting to be written */
+	Queue*	eq;			/* returned error packets */
+	Queue*	sq;			/* snooping queue */
+	Ref	snoopers;		/* number of processes with snoop open */
+
+	QLock	car;
+	Rendez	cr;
+	char	cerr[ERRMAX];
+
+	QLock	listenq;
+	Rendez	listenr;
+
+	Ipmulti	*multi;			/* multicast bindings for this interface */
+
+	void*	ptcl;			/* protocol specific stuff */
+
+	Route	*r;			/* last route used */
+	ulong	rgen;			/* routetable generation for *r */
+};
+
+struct Medium
+{
+	char	*name;
+	int	hsize;		/* medium header size */
+	int	mintu;		/* default min mtu */
+	int	maxtu;		/* default max mtu */
+	int	maclen;		/* mac address length  */
+	void	(*bind)(Ipifc*, int, char**);
+	void	(*unbind)(Ipifc*);
+	void	(*bwrite)(Ipifc *ifc, Block *b, int version, uchar *ip);
+
+	/* for arming interfaces to receive multicast */
+	void	(*addmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+	void	(*remmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+
+	/* process packets written to 'data' */
+	void	(*pktin)(Fs *f, Ipifc *ifc, Block *bp);
+
+	/* routes for router boards */
+	void	(*addroute)(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
+	void	(*remroute)(Ipifc *ifc, int, uchar*, uchar*);
+	void	(*flushroutes)(Ipifc *ifc);
+
+	/* for routing multicast groups */
+	void	(*joinmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+	void	(*leavemulti)(Ipifc *ifc, uchar *a, uchar *ia);
+
+	/* address resolution */
+	void	(*ares)(Fs*, int, uchar*, uchar*, int, int);	/* resolve */
+	void	(*areg)(Ipifc*, uchar*);			/* register */
+
+	/* v6 address generation */
+	void	(*pref2addr)(uchar *pref, uchar *ea);
+
+	int	unbindonclose;	/* if non-zero, unbind on last close */
+};
+
+/* logical interface associated with a physical one */
+struct Iplifc
+{
+	uchar	local[IPaddrlen];
+	uchar	mask[IPaddrlen];
+	uchar	remote[IPaddrlen];
+	uchar	net[IPaddrlen];
+	uchar	tentative;	/* =1 => v6 dup disc on, =0 => confirmed unique */
+	uchar	onlink;		/* =1 => onlink, =0 offlink. */
+	uchar	autoflag;	/* v6 autonomous flag */
+	long 	validlt;	/* v6 valid lifetime */
+	long 	preflt;		/* v6 preferred lifetime */
+	long	origint;	/* time when addr was added */
+	Iplink	*link;		/* addresses linked to this lifc */
+	Iplifc	*next;
+};
+
+/* binding twixt Ipself and Iplifc */
+struct Iplink
+{
+	Ipself	*self;
+	Iplifc	*lifc;
+	Iplink	*selflink;	/* next link for this local address */
+	Iplink	*lifclink;	/* next link for this ifc */
+	ulong	expire;
+	Iplink	*next;		/* free list */
+	int	ref;
+};
+
+/* rfc 2461, pp.40—43. */
+
+/* default values, one per stack */
+struct Routerparams {
+	int	mflag;		/* flag: managed address configuration */
+	int	oflag;		/* flag: other stateful configuration */
+	int 	maxraint;	/* max. router adv interval (ms) */
+	int	minraint;	/* min. router adv interval (ms) */
+	int	linkmtu;	/* mtu options */
+	int	reachtime;	/* reachable time */
+	int	rxmitra;	/* retransmit interval */
+	int	ttl;		/* cur hop count limit */
+	int	routerlt;	/* router lifetime */
+};
+
+struct Hostparams {
+	int	rxmithost;
+};
+
+struct Ipifc
+{
+	RWlock	rwlock;
+
+	Conv	*conv;		/* link to its conversation structure */
+	char	dev[64];	/* device we're attached to */
+	Medium	*m;		/* Media pointer */
+	int	maxtu;		/* Maximum transfer unit */
+	int	mintu;		/* Minumum tranfer unit */
+	int	mbps;		/* megabits per second */
+	void	*arg;		/* medium specific */
+	int	reassemble;	/* reassemble IP packets before forwarding */
+
+	/* these are used so that we can unbind on the fly */
+	Lock	idlock;
+	uchar	ifcid;		/* incremented each 'bind/unbind/add/remove' */
+	int	ref;		/* number of proc's using this ipifc */
+	Rendez	wait;		/* where unbinder waits for ref == 0 */
+	int	unbinding;
+
+	uchar	mac[MAClen];	/* MAC address */
+
+	Iplifc	*lifc;		/* logical interfaces on this physical one */
+
+	ulong	in, out;	/* message statistics */
+	ulong	inerr, outerr;	/* ... */
+
+	uchar	sendra6;	/* flag: send router advs on this ifc */
+	uchar	recvra6;	/* flag: recv router advs on this ifc */
+	Routerparams rp;	/* router parameters as in RFC 2461, pp.40—43.
+					used only if node is router */
+};
+
+/*
+ *  one per multicast-lifc pair used by a Conv
+ */
+struct Ipmulti
+{
+	uchar	ma[IPaddrlen];
+	uchar	ia[IPaddrlen];
+	Ipmulti	*next;
+};
+
+/*
+ *  hash table for 2 ip addresses + 2 ports
+ */
+enum
+{
+	Nipht=		521,	/* convenient prime */
+
+	IPmatchexact=	0,	/* match on 4 tuple */
+	IPmatchany,		/* *!* */
+	IPmatchport,		/* *!port */
+	IPmatchaddr,		/* addr!* */
+	IPmatchpa,		/* addr!port */
+};
+struct Iphash
+{
+	Iphash	*next;
+	Conv	*c;
+	int	match;
+};
+struct Ipht
+{
+	Lock	lk;
+
+	Iphash	*tab[Nipht];
+};
+void iphtadd(Ipht*, Conv*);
+void iphtrem(Ipht*, Conv*);
+Conv* iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp);
+
+/*
+ *  one per multiplexed protocol
+ */
+struct Proto
+{
+	QLock		qlock;
+
+	char*		name;		/* protocol name */
+	int		x;		/* protocol index */
+	int		ipproto;	/* ip protocol type */
+
+	char*		(*connect)(Conv*, char**, int);
+	char*		(*announce)(Conv*, char**, int);
+	char*		(*bind)(Conv*, char**, int);
+	int		(*state)(Conv*, char*, int);
+	void		(*create)(Conv*);
+	void		(*close)(Conv*);
+	void		(*rcv)(Proto*, Ipifc*, Block*);
+	char*		(*ctl)(Conv*, char**, int);
+	void		(*advise)(Proto*, Block*, char*);
+	int		(*stats)(Proto*, char*, int);
+	int		(*local)(Conv*, char*, int);
+	int		(*remote)(Conv*, char*, int);
+	int		(*inuse)(Conv*);
+	int		(*gc)(Proto*);	/* returns true if any conversations are freed */
+
+	Fs		*f;		/* file system this proto is part of */
+	Conv		**conv;		/* array of conversations */
+	int		ptclsize;	/* size of per protocol ctl block */
+	int		nc;		/* number of conversations */
+	int		ac;
+	Qid		qid;		/* qid for protocol directory */
+	ushort		nextrport;
+
+	void		*priv;
+};
+
+
+/*
+ *  one per IP protocol stack
+ */
+struct Fs
+{
+	RWlock	rwlock;
+
+	Conv	*conv;		/* link to its conversation structure */
+	int	dev;
+
+	int	np;
+	Proto*	p[Maxproto+1];		/* list of supported protocols */
+	Proto*	t2p[256];		/* vector of all protocols */
+	Proto*	ipifc;			/* kludge for ipifcremroute & ipifcaddroute */
+	Proto*	ipmux;			/* kludge for finding an ip multiplexor */
+
+	IP	*ip;
+	Ipselftab	*self;
+	Arp	*arp;
+	v6params	*v6p;
+
+	Route	*v4root[1<<Lroot];	/* v4 routing forest */
+	Route	*v6root[1<<Lroot];	/* v6 routing forest */
+	Route	*queue;			/* used as temp when reinjecting routes */
+
+	Netlog	*alog;
+
+	char	ndb[1024];		/* an ndb entry for this interface */
+	int	ndbvers;
+	long	ndbmtime;
+};
+
+/* one per default router known to host */
+struct v6router {
+	uchar	inuse;
+	Ipifc	*ifc;
+	int	ifcid;
+	uchar	routeraddr[IPaddrlen];
+	long	ltorigin;
+	Routerparams	rp;
+};
+
+struct v6params
+{
+	Routerparams	rp;		/* v6 params, one copy per node now */
+	Hostparams	hp;
+	v6router	v6rlist[3];	/* max 3 default routers, currently */
+	int		cdrouter;	/* uses only v6rlist[cdrouter] if   */
+					/* cdrouter >= 0. */
+};
+
+
+int	Fsconnected(Conv*, char*);
+Conv*	Fsnewcall(Conv*, uchar*, ushort, uchar*, ushort, uchar);
+int	Fspcolstats(char*, int);
+int	Fsproto(Fs*, Proto*);
+int	Fsbuiltinproto(Fs*, uchar);
+Conv*	Fsprotoclone(Proto*, char*);
+Proto*	Fsrcvpcol(Fs*, uchar);
+Proto*	Fsrcvpcolx(Fs*, uchar);
+char*	Fsstdconnect(Conv*, char**, int);
+char*	Fsstdannounce(Conv*, char**, int);
+char*	Fsstdbind(Conv*, char**, int);
+ulong	scalednconv(void);
+void	closeconv(Conv*);
+/*
+ *  logging
+ */
+enum
+{
+	Logip=		1<<1,
+	Logtcp=		1<<2,
+	Logfs=		1<<3,
+	Logil=		1<<4,
+	Logicmp=	1<<5,
+	Logudp=		1<<6,
+	Logcompress=	1<<7,
+	Logilmsg=	1<<8,
+	Loggre=		1<<9,
+	Logppp=		1<<10,
+	Logtcprxmt=	1<<11,
+	Logigmp=	1<<12,
+	Logudpmsg=	1<<13,
+	Logipmsg=	1<<14,
+	Logrudp=	1<<15,
+	Logrudpmsg=	1<<16,
+	Logesp=		1<<17,
+	Logtcpwin=	1<<18,
+};
+
+void	netloginit(Fs*);
+void	netlogopen(Fs*);
+void	netlogclose(Fs*);
+void	netlogctl(Fs*, char*, int);
+long	netlogread(Fs*, void*, ulong, long);
+void	netlog(Fs*, int, char*, ...);
+void	ifcloginit(Fs*);
+long	ifclogread(Fs*, Chan *,void*, ulong, long);
+void	ifclog(Fs*, uchar *, int);
+void	ifclogopen(Fs*, Chan*);
+void	ifclogclose(Fs*, Chan*);
+
+/*
+ *  iproute.c
+ */
+typedef	struct RouteTree RouteTree;
+typedef struct Routewalk Routewalk;
+typedef struct V4route V4route;
+typedef struct V6route V6route;
+
+enum
+{
+
+	/* type bits */
+	Rv4=		(1<<0),		/* this is a version 4 route */
+	Rifc=		(1<<1),		/* this route is a directly connected interface */
+	Rptpt=		(1<<2),		/* this route is a pt to pt interface */
+	Runi=		(1<<3),		/* a unicast self address */
+	Rbcast=		(1<<4),		/* a broadcast self address */
+	Rmulti=		(1<<5),		/* a multicast self address */
+	Rproxy=		(1<<6),		/* this route should be proxied */
+};
+
+struct Routewalk
+{
+	int	o;
+	int	h;
+	char*	p;
+	char*	e;
+	void*	state;
+	void	(*walk)(Route*, Routewalk*);
+};
+
+struct	RouteTree
+{
+	Route*	right;
+	Route*	left;
+	Route*	mid;
+	uchar	depth;
+	uchar	type;
+	uchar	ifcid;		/* must match ifc->id */
+	Ipifc	*ifc;
+	char	tag[4];
+	int	ref;
+};
+
+struct V4route
+{
+	ulong	address;
+	ulong	endaddress;
+	uchar	gate[IPv4addrlen];
+};
+
+struct V6route
+{
+	ulong	address[IPllen];
+	ulong	endaddress[IPllen];
+	uchar	gate[IPaddrlen];
+};
+
+struct Route
+{
+/*	RouteTree; */
+	Route*	right;
+	Route*	left;
+	Route*	mid;
+	uchar	depth;
+	uchar	type;
+	uchar	ifcid;		/* must match ifc->id */
+	Ipifc	*ifc;
+	char	tag[4];
+	int	ref;
+
+	union {
+		V6route	v6;
+		V4route v4;
+	};
+};
+extern void	v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
+extern void	v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
+extern void	v4delroute(Fs *f, uchar *a, uchar *mask, int dolock);
+extern void	v6delroute(Fs *f, uchar *a, uchar *mask, int dolock);
+extern Route*	v4lookup(Fs *f, uchar *a, Conv *c);
+extern Route*	v6lookup(Fs *f, uchar *a, Conv *c);
+extern long	routeread(Fs *f, char*, ulong, int);
+extern long	routewrite(Fs *f, Chan*, char*, int);
+extern void	routetype(int, char*);
+extern void	ipwalkroutes(Fs*, Routewalk*);
+extern void	convroute(Route*, uchar*, uchar*, uchar*, char*, int*);
+
+/*
+ *  devip.c
+ */
+
+/*
+ *  Hanging off every ip channel's ->aux is the following structure.
+ *  It maintains the state used by devip and iproute.
+ */
+struct IPaux
+{
+	char	*owner;		/* the user that did the attach */
+	char	tag[4];
+};
+
+extern IPaux*	newipaux(char*, char*);
+
+/*
+ *  arp.c
+ */
+struct Arpent
+{
+	uchar	ip[IPaddrlen];
+	uchar	mac[MAClen];
+	Medium	*type;			/* media type */
+	Arpent*	hash;
+	Block*	hold;
+	Block*	last;
+	uint	ctime;			/* time entry was created or refreshed */
+	uint	utime;			/* time entry was last used */
+	uchar	state;
+	Arpent	*nextrxt;		/* re-transmit chain */
+	uint	rtime;			/* time for next retransmission */
+	uchar	rxtsrem;
+	Ipifc	*ifc;
+	uchar	ifcid;			/* must match ifc->id */
+};
+
+extern void	arpinit(Fs*);
+extern int	arpread(Arp*, char*, ulong, int);
+extern int	arpwrite(Fs*, char*, int);
+extern Arpent*	arpget(Arp*, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *h);
+extern void	arprelease(Arp*, Arpent *a);
+extern Block*	arpresolve(Arp*, Arpent *a, Medium *type, uchar *mac);
+extern void	arpenter(Fs*, int version, uchar *ip, uchar *mac, int len, int norefresh);
+
+/*
+ * ipaux.c
+ */
+
+extern int	myetheraddr(uchar*, char*);
+extern vlong	parseip(uchar*, char*);
+extern vlong	parseipmask(uchar*, char*);
+extern char*	v4parseip(uchar*, char*);
+extern void	maskip(uchar *from, uchar *mask, uchar *to);
+extern int	parsemac(uchar *to, char *from, int len);
+extern uchar*	defmask(uchar*);
+extern int	isv4(uchar*);
+extern void	v4tov6(uchar *v6, uchar *v4);
+extern int	v6tov4(uchar *v4, uchar *v6);
+extern int	eipfmt(Fmt*);
+
+#define	ipmove(x, y) memmove(x, y, IPaddrlen)
+#define	ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) )
+
+extern uchar IPv4bcast[IPaddrlen];
+extern uchar IPv4bcastobs[IPaddrlen];
+extern uchar IPv4allsys[IPaddrlen];
+extern uchar IPv4allrouter[IPaddrlen];
+extern uchar IPnoaddr[IPaddrlen];
+extern uchar v4prefix[IPaddrlen];
+extern uchar IPallbits[IPaddrlen];
+
+#define	NOW	msec()
+
+/*
+ *  media
+ */
+extern Medium	ethermedium;
+extern Medium	nullmedium;
+extern Medium	pktmedium;
+extern Medium	tripmedium;
+
+/*
+ *  ipifc.c
+ */
+extern Medium*	ipfindmedium(char *name);
+extern void	addipmedium(Medium *med);
+extern int	ipforme(Fs*, uchar *addr);
+extern int	iptentative(Fs*, uchar *addr);
+extern int	ipisbm(uchar *);
+extern int	ipismulticast(uchar *);
+extern Ipifc*	findipifc(Fs*, uchar *remote, int type);
+extern void	findlocalip(Fs*, uchar *local, uchar *remote);
+extern int	ipv4local(Ipifc *ifc, uchar *addr);
+extern int	ipv6local(Ipifc *ifc, uchar *addr);
+extern int	ipv6anylocal(Ipifc *ifc, uchar *addr);
+extern Iplifc*	iplocalonifc(Ipifc *ifc, uchar *ip);
+extern int	ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip);
+extern int	ipismulticast(uchar *ip);
+extern int	ipisbooting(void);
+extern int	ipifccheckin(Ipifc *ifc, Medium *med);
+extern void	ipifccheckout(Ipifc *ifc);
+extern int	ipifcgrab(Ipifc *ifc);
+extern void	ipifcaddroute(Fs*, int, uchar*, uchar*, uchar*, int);
+extern void	ipifcremroute(Fs*, int, uchar*, uchar*);
+extern void	ipifcremmulti(Conv *c, uchar *ma, uchar *ia);
+extern void	ipifcaddmulti(Conv *c, uchar *ma, uchar *ia);
+extern char*	ipifcrem(Ipifc *ifc, char **argv, int argc);
+extern char*	ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp);
+extern long	ipselftabread(Fs*, char *a, ulong offset, int n);
+extern char*	ipifcadd6(Ipifc *ifc, char**argv, int argc);
+/*
+ *  ip.c
+ */
+extern void	iprouting(Fs*, int);
+extern void	icmpnoconv(Fs*, Block*);
+extern void	icmpcantfrag(Fs*, Block*, int);
+extern void	icmpttlexceeded(Fs*, uchar*, Block*);
+extern ushort	ipcsum(uchar*);
+extern void	ipiput4(Fs*, Ipifc*, Block*);
+extern void	ipiput6(Fs*, Ipifc*, Block*);
+extern int	ipoput4(Fs*, Block*, int, int, int, Conv*);
+extern int	ipoput6(Fs*, Block*, int, int, int, Conv*);
+extern int	ipstats(Fs*, char*, int);
+extern ushort	ptclbsum(uchar*, int);
+extern ushort	ptclcsum(Block*, int, int);
+extern void	ip_init(Fs*);
+extern void	update_mtucache(uchar*, ulong);
+extern ulong	restrict_mtu(uchar*, ulong);
+/*
+ * bootp.c
+ */
+extern char*	bootp(Ipifc*);
+extern int	bootpread(char*, ulong, int);
+
+/*
+ *  resolving inferno/plan9 differences
+ */
+Chan*		commonfdtochan(int, int, int, int);
+char*		commonuser(void);
+char*		commonerror(void);
+
+/*
+ * chandial.c
+ */
+extern Chan*	chandial(char*, char*, char*, Chan**);
+
+/*
+ *  global to all of the stack
+ */
+extern void	(*igmpreportfn)(Ipifc*, uchar*);
diff --git a/src/9vx/a/ip/ipaux.c b/src/9vx/a/ip/ipaux.c
@@ -0,0 +1,368 @@
+#include	"u.h"
+#include	"lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"error.h"
+#include	"ip.h"
+#include	"ipv6.h"
+
+char *v6hdrtypes[Maxhdrtype] =
+{
+	[HBH]		"HopbyHop",
+	[ICMP]		"ICMP",
+	[IGMP]		"IGMP",
+	[GGP]		"GGP",
+	[IPINIP]	"IP",
+	[ST]		"ST",
+	[TCP]		"TCP",
+	[UDP]		"UDP",
+	[ISO_TP4]	"ISO_TP4",
+	[RH]		"Routinghdr",
+	[FH]		"Fraghdr",
+	[IDRP]		"IDRP",
+	[RSVP]		"RSVP",
+	[AH]		"Authhdr",
+	[ESP]		"ESP",
+	[ICMPv6]	"ICMPv6",
+	[NNH]		"Nonexthdr",
+	[ISO_IP]	"ISO_IP",
+	[IGRP]		"IGRP",
+	[OSPF]		"OSPF",
+};
+
+/*
+ *  well known IPv6 addresses
+ */
+uchar v6Unspecified[IPaddrlen] = {
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0
+};
+uchar v6loopback[IPaddrlen] = {
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0x01
+};
+
+uchar v6linklocal[IPaddrlen] = {
+	0xfe, 0x80, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0
+};
+uchar v6linklocalmask[IPaddrlen] = {
+	0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff,
+	0, 0, 0, 0,
+	0, 0, 0, 0
+};
+int v6llpreflen = 8;	/* link-local prefix length in bytes */
+
+uchar v6multicast[IPaddrlen] = {
+	0xff, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0
+};
+uchar v6multicastmask[IPaddrlen] = {
+	0xff, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0
+};
+int v6mcpreflen = 1;	/* multicast prefix length */
+
+uchar v6allnodesN[IPaddrlen] = {
+	0xff, 0x01, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0x01
+};
+uchar v6allroutersN[IPaddrlen] = {
+	0xff, 0x01, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0x02
+};
+uchar v6allnodesNmask[IPaddrlen] = {
+	0xff, 0xff, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0
+};
+int v6aNpreflen = 2;	/* all nodes (N) prefix */
+
+uchar v6allnodesL[IPaddrlen] = {
+	0xff, 0x02, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0x01
+};
+uchar v6allroutersL[IPaddrlen] = {
+	0xff, 0x02, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0x02
+};
+uchar v6allnodesLmask[IPaddrlen] = {
+	0xff, 0xff, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0
+};
+int v6aLpreflen = 2;	/* all nodes (L) prefix */
+
+uchar v6solicitednode[IPaddrlen] = {
+	0xff, 0x02, 0, 0,
+	0, 0, 0, 0,
+	0, 0, 0, 0x01,
+	0xff, 0, 0, 0
+};
+uchar v6solicitednodemask[IPaddrlen] = {
+	0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff,
+	0xff, 0xff, 0xff, 0xff,
+	0xff, 0x0, 0x0, 0x0
+};
+int v6snpreflen = 13;
+
+ushort
+ptclcsum(Block *bp, int offset, int len)
+{
+	uchar *addr;
+	ulong losum, hisum;
+	ushort csum;
+	int odd, blocklen, x;
+
+	/* Correct to front of data area */
+	while(bp != nil && offset && offset >= BLEN(bp)) {
+		offset -= BLEN(bp);
+		bp = bp->next;
+	}
+	if(bp == nil)
+		return 0;
+
+	addr = bp->rp + offset;
+	blocklen = BLEN(bp) - offset;
+
+	if(bp->next == nil) {
+		if(blocklen < len)
+			len = blocklen;
+		return ~ptclbsum(addr, len) & 0xffff;
+	}
+
+	losum = 0;
+	hisum = 0;
+
+	odd = 0;
+	while(len) {
+		x = blocklen;
+		if(len < x)
+			x = len;
+
+		csum = ptclbsum(addr, x);
+		if(odd)
+			hisum += csum;
+		else
+			losum += csum;
+		odd = (odd+x) & 1;
+		len -= x;
+
+		bp = bp->next;
+		if(bp == nil)
+			break;
+		blocklen = BLEN(bp);
+		addr = bp->rp;
+	}
+
+	losum += hisum>>8;
+	losum += (hisum&0xff)<<8;
+	while((csum = losum>>16) != 0)
+		losum = csum + (losum & 0xffff);
+
+	return ~losum & 0xffff;
+}
+
+enum
+{
+	Isprefix= 16,
+};
+
+#define CLASS(p) ((*(uchar*)(p))>>6)
+
+void
+ipv62smcast(uchar *smcast, uchar *a)
+{
+	assert(IPaddrlen == 16);
+	memmove(smcast, v6solicitednode, IPaddrlen);
+	smcast[13] = a[13];
+	smcast[14] = a[14];
+	smcast[15] = a[15];
+}
+
+
+/*
+ *  parse a hex mac address
+ */
+int
+parsemac(uchar *to, char *from, int len)
+{
+	char nip[4];
+	char *p;
+	int i;
+
+	p = from;
+	memset(to, 0, len);
+	for(i = 0; i < len; i++){
+		if(p[0] == '\0' || p[1] == '\0')
+			break;
+
+		nip[0] = p[0];
+		nip[1] = p[1];
+		nip[2] = '\0';
+		p += 2;
+
+		to[i] = strtoul(nip, 0, 16);
+		if(*p == ':')
+			p++;
+	}
+	return i;
+}
+
+/*
+ *  hashing tcp, udp, ... connections
+ */
+ulong
+iphash(uchar *sa, ushort sp, uchar *da, ushort dp)
+{
+	return (ulong)(sa[IPaddrlen-1]<<24 ^ sp<< 16 ^ da[IPaddrlen-1]<<8 ^ dp) % Nhash;
+}
+
+void
+iphtadd(Ipht *ht, Conv *c)
+{
+	ulong hv;
+	Iphash *h;
+
+	hv = iphash(c->raddr, c->rport, c->laddr, c->lport);
+	h = smalloc(sizeof(*h));
+	if(ipcmp(c->raddr, IPnoaddr) != 0)
+		h->match = IPmatchexact;
+	else {
+		if(ipcmp(c->laddr, IPnoaddr) != 0){
+			if(c->lport == 0)
+				h->match = IPmatchaddr;
+			else
+				h->match = IPmatchpa;
+		} else {
+			if(c->lport == 0)
+				h->match = IPmatchany;
+			else
+				h->match = IPmatchport;
+		}
+	}
+	h->c = c;
+
+	LOCK(ht);
+	h->next = ht->tab[hv];
+	ht->tab[hv] = h;
+	UNLOCK(ht);
+}
+
+void
+iphtrem(Ipht *ht, Conv *c)
+{
+	ulong hv;
+	Iphash **l, *h;
+
+	hv = iphash(c->raddr, c->rport, c->laddr, c->lport);
+	LOCK(ht);
+	for(l = &ht->tab[hv]; (*l) != nil; l = &(*l)->next)
+		if((*l)->c == c){
+			h = *l;
+			(*l) = h->next;
+			free(h);
+			break;
+		}
+	UNLOCK(ht);
+}
+
+/* look for a matching conversation with the following precedence
+ *	connected && raddr,rport,laddr,lport
+ *	announced && laddr,lport
+ *	announced && *,lport
+ *	announced && laddr,*
+ *	announced && *,*
+ */
+Conv*
+iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp)
+{
+	ulong hv;
+	Iphash *h;
+	Conv *c;
+
+	/* exact 4 pair match (connection) */
+	hv = iphash(sa, sp, da, dp);
+	LOCK(ht);
+	for(h = ht->tab[hv]; h != nil; h = h->next){
+		if(h->match != IPmatchexact)
+			continue;
+		c = h->c;
+		if(sp == c->rport && dp == c->lport
+		&& ipcmp(sa, c->raddr) == 0 && ipcmp(da, c->laddr) == 0){
+			UNLOCK(ht);
+			return c;
+		}
+	}
+
+	/* match local address and port */
+	hv = iphash(IPnoaddr, 0, da, dp);
+	for(h = ht->tab[hv]; h != nil; h = h->next){
+		if(h->match != IPmatchpa)
+			continue;
+		c = h->c;
+		if(dp == c->lport && ipcmp(da, c->laddr) == 0){
+			UNLOCK(ht);
+			return c;
+		}
+	}
+
+	/* match just port */
+	hv = iphash(IPnoaddr, 0, IPnoaddr, dp);
+	for(h = ht->tab[hv]; h != nil; h = h->next){
+		if(h->match != IPmatchport)
+			continue;
+		c = h->c;
+		if(dp == c->lport){
+			UNLOCK(ht);
+			return c;
+		}
+	}
+
+	/* match local address */
+	hv = iphash(IPnoaddr, 0, da, 0);
+	for(h = ht->tab[hv]; h != nil; h = h->next){
+		if(h->match != IPmatchaddr)
+			continue;
+		c = h->c;
+		if(ipcmp(da, c->laddr) == 0){
+			UNLOCK(ht);
+			return c;
+		}
+	}
+
+	/* look for something that matches anything */
+	hv = iphash(IPnoaddr, 0, IPnoaddr, 0);
+	for(h = ht->tab[hv]; h != nil; h = h->next){
+		if(h->match != IPmatchany)
+			continue;
+		c = h->c;
+		UNLOCK(ht);
+		return c;
+	}
+	UNLOCK(ht);
+	return nil;
+}
diff --git a/src/9vx/a/ip/ipifc.c b/src/9vx/a/ip/ipifc.c
@@ -0,0 +1,1654 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+
+#define DPRINT if(0)print
+
+enum {
+	Maxmedia	= 32,
+	Nself		= Maxmedia*5,
+	NHASH		= 1<<6,
+	NCACHE		= 256,
+	QMAX		= 64*1024-1,
+};
+
+Medium *media[Maxmedia] = { 0 };
+
+/*
+ *  cache of local addresses (addresses we answer to)
+ */
+struct Ipself
+{
+	uchar	a[IPaddrlen];
+	Ipself	*hnext;		/* next address in the hash table */
+	Iplink	*link;		/* binding twixt Ipself and Ipifc */
+	ulong	expire;
+	uchar	type;		/* type of address */
+	int	ref;
+	Ipself	*next;		/* free list */
+};
+
+struct Ipselftab
+{
+	QLock	qlock;
+	int	inited;
+	int	acceptall;	/* true if an interface has the null address */
+	Ipself	*hash[NHASH];	/* hash chains */
+};
+
+/*
+ *  Multicast addresses are chained onto a Chan so that
+ *  we can remove them when the Chan is closed.
+ */
+typedef struct Ipmcast Ipmcast;
+struct Ipmcast
+{
+	Ipmcast	*next;
+	uchar	ma[IPaddrlen];	/* multicast address */
+	uchar	ia[IPaddrlen];	/* interface address */
+};
+
+/* quick hash for ip addresses */
+#define hashipa(a) ( (ulong)(((a)[IPaddrlen-2]<<8) | (a)[IPaddrlen-1])%NHASH )
+
+static char tifc[] = "ifc ";
+
+static void	addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type);
+static void	remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a);
+static char*	ipifcjoinmulti(Ipifc *ifc, char **argv, int argc);
+static char*	ipifcleavemulti(Ipifc *ifc, char **argv, int argc);
+static void	ipifcregisterproxy(Fs*, Ipifc*, uchar*);
+static char*	ipifcremlifc(Ipifc*, Iplifc*);
+
+/*
+ *  link in a new medium
+ */
+void
+addipmedium(Medium *med)
+{
+	int i;
+
+	for(i = 0; i < nelem(media)-1; i++)
+		if(media[i] == nil){
+			media[i] = med;
+			break;
+		}
+}
+
+/*
+ *  find the medium with this name
+ */
+Medium*
+ipfindmedium(char *name)
+{
+	Medium **mp;
+
+	for(mp = media; *mp != nil; mp++)
+		if(strcmp((*mp)->name, name) == 0)
+			break;
+	return *mp;
+}
+
+/*
+ *  attach a device (or pkt driver) to the interface.
+ *  called with c locked
+ */
+static char*
+ipifcbind(Conv *c, char **argv, int argc)
+{
+	Ipifc *ifc;
+	Medium *m;
+
+	if(argc < 2)
+		return Ebadarg;
+
+	ifc = (Ipifc*)c->ptcl;
+
+	/* bind the device to the interface */
+	m = ipfindmedium(argv[1]);
+	if(m == nil)
+		return "unknown interface type";
+
+	WLOCK(ifc);
+	if(ifc->m != nil){
+		WUNLOCK(ifc);
+		return "interface already bound";
+	}
+	if(waserror()){
+		WUNLOCK(ifc);
+		nexterror();
+	}
+
+	/* do medium specific binding */
+	(*m->bind)(ifc, argc, argv);
+
+	/* set the bound device name */
+	if(argc > 2)
+		strncpy(ifc->dev, argv[2], sizeof(ifc->dev));
+	else
+		snprint(ifc->dev, sizeof ifc->dev, "%s%d", m->name, c->x);
+	ifc->dev[sizeof(ifc->dev)-1] = 0;
+
+	/* set up parameters */
+	ifc->m = m;
+	ifc->mintu = ifc->m->mintu;
+	ifc->maxtu = ifc->m->maxtu;
+	if(ifc->m->unbindonclose == 0)
+		ifc->conv->inuse++;
+	ifc->rp.mflag = 0;		/* default not managed */
+	ifc->rp.oflag = 0;
+	ifc->rp.maxraint = 600000;	/* millisecs */
+	ifc->rp.minraint = 200000;
+	ifc->rp.linkmtu = 0;		/* no mtu sent */
+	ifc->rp.reachtime = 0;
+	ifc->rp.rxmitra = 0;
+	ifc->rp.ttl = MAXTTL;
+	ifc->rp.routerlt = 3 * ifc->rp.maxraint;
+
+	/* any ancillary structures (like routes) no longer pertain */
+	ifc->ifcid++;
+
+	/* reopen all the queues closed by a previous unbind */
+	qreopen(c->rq);
+	qreopen(c->eq);
+	qreopen(c->sq);
+
+	WUNLOCK(ifc);
+	poperror();
+
+	return nil;
+}
+
+/*
+ *  detach a device from an interface, close the interface
+ *  called with ifc->conv closed
+ */
+static char*
+ipifcunbind(Ipifc *ifc)
+{
+	char *err;
+
+	if(waserror()){
+		WUNLOCK(ifc);
+		nexterror();
+	}
+	WLOCK(ifc);
+
+	/* dissociate routes */
+	if(ifc->m != nil && ifc->m->unbindonclose == 0)
+		ifc->conv->inuse--;
+	ifc->ifcid++;
+
+	/* disassociate logical interfaces (before zeroing ifc->arg) */
+	while(ifc->lifc){
+		err = ipifcremlifc(ifc, ifc->lifc);
+		/*
+		 * note: err non-zero means lifc not found,
+		 * which can't happen in this case.
+		 */
+		if(err)
+			error(err);
+	}
+
+	/* disassociate device */
+	if(ifc->m && ifc->m->unbind)
+		(*ifc->m->unbind)(ifc);
+	memset(ifc->dev, 0, sizeof(ifc->dev));
+	ifc->arg = nil;
+	ifc->reassemble = 0;
+
+	/* close queues to stop queuing of packets */
+	qclose(ifc->conv->rq);
+	qclose(ifc->conv->wq);
+	qclose(ifc->conv->sq);
+
+	ifc->m = nil;
+	WUNLOCK(ifc);
+	poperror();
+	return nil;
+}
+
+char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag"
+" %d maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt"
+" %d pktin %lud pktout %lud errin %lud errout %lud\n";
+
+char slineformat[] = "	%-40I %-10M %-40I %-12lud %-12lud\n";
+
+static int
+ipifcstate(Conv *c, char *state, int n)
+{
+	Ipifc *ifc;
+	Iplifc *lifc;
+	int m;
+
+	ifc = (Ipifc*)c->ptcl;
+	m = snprint(state, n, sfixedformat,
+		ifc->dev, ifc->maxtu, ifc->sendra6, ifc->recvra6,
+		ifc->rp.mflag, ifc->rp.oflag, ifc->rp.maxraint,
+		ifc->rp.minraint, ifc->rp.linkmtu, ifc->rp.reachtime,
+		ifc->rp.rxmitra, ifc->rp.ttl, ifc->rp.routerlt,
+		ifc->in, ifc->out, ifc->inerr, ifc->outerr);
+
+	RLOCK(ifc);
+	for(lifc = ifc->lifc; lifc && n > m; lifc = lifc->next)
+		m += snprint(state+m, n - m, slineformat, lifc->local,
+			lifc->mask, lifc->remote, lifc->validlt, lifc->preflt);
+	if(ifc->lifc == nil)
+		m += snprint(state+m, n - m, "\n");
+	RUNLOCK(ifc);
+	return m;
+}
+
+static int
+ipifclocal(Conv *c, char *state, int n)
+{
+	Ipifc *ifc;
+	Iplifc *lifc;
+	Iplink *link;
+	int m;
+
+	ifc = (Ipifc*)c->ptcl;
+	m = 0;
+
+	RLOCK(ifc);
+	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+		m += snprint(state+m, n - m, "%-40.40I ->", lifc->local);
+		for(link = lifc->link; link; link = link->lifclink)
+			m += snprint(state+m, n - m, " %-40.40I", link->self->a);
+		m += snprint(state+m, n - m, "\n");
+	}
+	RUNLOCK(ifc);
+	return m;
+}
+
+static int
+ipifcinuse(Conv *c)
+{
+	Ipifc *ifc;
+
+	ifc = (Ipifc*)c->ptcl;
+	return ifc->m != nil;
+}
+
+/*
+ *  called when a process writes to an interface's 'data'
+ */
+static void
+ipifckick(void *x)
+{
+	Conv *c = x;
+	Block *bp;
+	Ipifc *ifc;
+
+	bp = qget(c->wq);
+	if(bp == nil)
+		return;
+
+	ifc = (Ipifc*)c->ptcl;
+	if(!CANRLOCK(ifc)){
+		freeb(bp);
+		return;
+	}
+	if(waserror()){
+		RUNLOCK(ifc);
+		nexterror();
+	}
+	if(ifc->m == nil || ifc->m->pktin == nil)
+		freeb(bp);
+	else
+		(*ifc->m->pktin)(c->p->f, ifc, bp);
+	RUNLOCK(ifc);
+	poperror();
+}
+
+/*
+ *  called when a new ipifc structure is created
+ */
+static void
+ipifccreate(Conv *c)
+{
+	Ipifc *ifc;
+
+	c->rq = qopen(QMAX, 0, 0, 0);
+	c->sq = qopen(2*QMAX, 0, 0, 0);
+	c->wq = qopen(QMAX, Qkick, ipifckick, c);
+	ifc = (Ipifc*)c->ptcl;
+	ifc->conv = c;
+	ifc->unbinding = 0;
+	ifc->m = nil;
+	ifc->reassemble = 0;
+}
+
+/*
+ *  called after last close of ipifc data or ctl
+ *  called with c locked, we must unlock
+ */
+static void
+ipifcclose(Conv *c)
+{
+	Ipifc *ifc;
+	Medium *m;
+
+	ifc = (Ipifc*)c->ptcl;
+	m = ifc->m;
+	if(m && m->unbindonclose)
+		ipifcunbind(ifc);
+}
+
+/*
+ *  change an interface's mtu
+ */
+char*
+ipifcsetmtu(Ipifc *ifc, char **argv, int argc)
+{
+	int mtu;
+
+	if(argc < 2 || ifc->m == nil)
+		return Ebadarg;
+	mtu = strtoul(argv[1], 0, 0);
+	if(mtu < ifc->m->mintu || mtu > ifc->m->maxtu)
+		return Ebadarg;
+	ifc->maxtu = mtu;
+	return nil;
+}
+
+/*
+ *  add an address to an interface.
+ */
+char*
+ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp)
+{
+	int i, type, mtu, sendnbrdisc = 0;
+	uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen];
+	uchar bcast[IPaddrlen], net[IPaddrlen];
+	Iplifc *lifc, **l;
+	Fs *f;
+
+	if(ifc->m == nil)
+		return "ipifc not yet bound to device";
+
+	f = ifc->conv->p->f;
+
+	type = Rifc;
+	memset(ip, 0, IPaddrlen);
+	memset(mask, 0, IPaddrlen);
+	memset(rem, 0, IPaddrlen);
+	switch(argc){
+	case 6:
+		if(strcmp(argv[5], "proxy") == 0)
+			type |= Rproxy;
+		/* fall through */
+	case 5:
+		mtu = strtoul(argv[4], 0, 0);
+		if(mtu >= ifc->m->mintu && mtu <= ifc->m->maxtu)
+			ifc->maxtu = mtu;
+		/* fall through */
+	case 4:
+		if (parseip(ip, argv[1]) == -1 || parseip(rem, argv[3]) == -1)
+			return Ebadip;
+		parseipmask(mask, argv[2]);
+		maskip(rem, mask, net);
+		break;
+	case 3:
+		if (parseip(ip, argv[1]) == -1)
+			return Ebadip;
+		parseipmask(mask, argv[2]);
+		maskip(ip, mask, rem);
+		maskip(rem, mask, net);
+		break;
+	case 2:
+		if (parseip(ip, argv[1]) == -1)
+			return Ebadip;
+		memmove(mask, defmask(ip), IPaddrlen);
+		maskip(ip, mask, rem);
+		maskip(rem, mask, net);
+		break;
+	default:
+		return Ebadarg;
+	}
+	if(isv4(ip))
+		tentative = 0;
+	WLOCK(ifc);
+
+	/* ignore if this is already a local address for this ifc */
+	for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
+		if(ipcmp(lifc->local, ip) == 0) {
+			if(lifc->tentative != tentative)
+				lifc->tentative = tentative;
+			if(lifcp) {
+				lifc->onlink = lifcp->onlink;
+				lifc->autoflag = lifcp->autoflag;
+				lifc->validlt = lifcp->validlt;
+				lifc->preflt = lifcp->preflt;
+				lifc->origint = lifcp->origint;
+			}
+			goto out;
+		}
+	}
+
+	/* add the address to the list of logical ifc's for this ifc */
+	lifc = smalloc(sizeof(Iplifc));
+	ipmove(lifc->local, ip);
+	ipmove(lifc->mask, mask);
+	ipmove(lifc->remote, rem);
+	ipmove(lifc->net, net);
+	lifc->tentative = tentative;
+	if(lifcp) {
+		lifc->onlink = lifcp->onlink;
+		lifc->autoflag = lifcp->autoflag;
+		lifc->validlt = lifcp->validlt;
+		lifc->preflt = lifcp->preflt;
+		lifc->origint = lifcp->origint;
+	} else {		/* default values */
+		lifc->onlink = lifc->autoflag = 1;
+		lifc->validlt = lifc->preflt = ~0L;
+		lifc->origint = NOW / 1000;
+	}
+	lifc->next = nil;
+
+	for(l = &ifc->lifc; *l; l = &(*l)->next)
+		;
+	*l = lifc;
+
+	/* check for point-to-point interface */
+	if(ipcmp(ip, v6loopback)) /* skip v6 loopback, it's a special address */
+	if(ipcmp(mask, IPallbits) == 0)
+		type |= Rptpt;
+
+	/* add local routes */
+	if(isv4(ip))
+		v4addroute(f, tifc, rem+IPv4off, mask+IPv4off, rem+IPv4off, type);
+	else
+		v6addroute(f, tifc, rem, mask, rem, type);
+
+	addselfcache(f, ifc, lifc, ip, Runi);
+
+	if((type & (Rproxy|Rptpt)) == (Rproxy|Rptpt)){
+		ipifcregisterproxy(f, ifc, rem);
+		goto out;
+	}
+
+	if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0) {
+		/* add subnet directed broadcast address to the self cache */
+		for(i = 0; i < IPaddrlen; i++)
+			bcast[i] = (ip[i] & mask[i]) | ~mask[i];
+		addselfcache(f, ifc, lifc, bcast, Rbcast);
+
+		/* add subnet directed network address to the self cache */
+		for(i = 0; i < IPaddrlen; i++)
+			bcast[i] = (ip[i] & mask[i]) & mask[i];
+		addselfcache(f, ifc, lifc, bcast, Rbcast);
+
+		/* add network directed broadcast address to the self cache */
+		memmove(mask, defmask(ip), IPaddrlen);
+		for(i = 0; i < IPaddrlen; i++)
+			bcast[i] = (ip[i] & mask[i]) | ~mask[i];
+		addselfcache(f, ifc, lifc, bcast, Rbcast);
+
+		/* add network directed network address to the self cache */
+		memmove(mask, defmask(ip), IPaddrlen);
+		for(i = 0; i < IPaddrlen; i++)
+			bcast[i] = (ip[i] & mask[i]) & mask[i];
+		addselfcache(f, ifc, lifc, bcast, Rbcast);
+
+		addselfcache(f, ifc, lifc, IPv4bcast, Rbcast);
+	}
+	else {
+		if(ipcmp(ip, v6loopback) == 0) {
+			/* add node-local mcast address */
+			addselfcache(f, ifc, lifc, v6allnodesN, Rmulti);
+
+			/* add route for all node multicast */
+			v6addroute(f, tifc, v6allnodesN, v6allnodesNmask,
+				v6allnodesN, Rmulti);
+		}
+
+		/* add all nodes multicast address */
+		addselfcache(f, ifc, lifc, v6allnodesL, Rmulti);
+
+		/* add route for all nodes multicast */
+		v6addroute(f, tifc, v6allnodesL, v6allnodesLmask, v6allnodesL,
+			Rmulti);
+
+		/* add solicited-node multicast address */
+		ipv62smcast(bcast, ip);
+		addselfcache(f, ifc, lifc, bcast, Rmulti);
+
+		sendnbrdisc = 1;
+	}
+
+	/* register the address on this network for address resolution */
+	if(isv4(ip) && ifc->m->areg != nil)
+		(*ifc->m->areg)(ifc, ip);
+
+out:
+	WUNLOCK(ifc);
+	if(tentative && sendnbrdisc)
+		icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+	return nil;
+}
+
+/*
+ *  remove a logical interface from an ifc
+ *  always called with ifc WLOCK'd
+ */
+static char*
+ipifcremlifc(Ipifc *ifc, Iplifc *lifc)
+{
+	Iplifc **l;
+	Fs *f;
+
+	f = ifc->conv->p->f;
+
+	/*
+	 *  find address on this interface and remove from chain.
+	 *  for pt to pt we actually specify the remote address as the
+	 *  addresss to remove.
+	 */
+	for(l = &ifc->lifc; *l != nil && *l != lifc; l = &(*l)->next)
+		;
+	if(*l == nil)
+		return "address not on this interface";
+	*l = lifc->next;
+
+	/* disassociate any addresses */
+	while(lifc->link)
+		remselfcache(f, ifc, lifc, lifc->link->self->a);
+
+	/* remove the route for this logical interface */
+	if(isv4(lifc->local))
+		v4delroute(f, lifc->remote+IPv4off, lifc->mask+IPv4off, 1);
+	else {
+		v6delroute(f, lifc->remote, lifc->mask, 1);
+		if(ipcmp(lifc->local, v6loopback) == 0)
+			/* remove route for all node multicast */
+			v6delroute(f, v6allnodesN, v6allnodesNmask, 1);
+		else if(memcmp(lifc->local, v6linklocal, v6llpreflen) == 0)
+			/* remove route for all link multicast */
+			v6delroute(f, v6allnodesL, v6allnodesLmask, 1);
+	}
+
+	free(lifc);
+	return nil;
+}
+
+/*
+ *  remove an address from an interface.
+ *  called with c->car locked
+ */
+char*
+ipifcrem(Ipifc *ifc, char **argv, int argc)
+{
+	char *rv;
+	uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen];
+	Iplifc *lifc;
+
+	if(argc < 3)
+		return Ebadarg;
+
+	if (parseip(ip, argv[1]) == -1)
+		return Ebadip;
+	parseipmask(mask, argv[2]);
+	if(argc < 4)
+		maskip(ip, mask, rem);
+	else
+		if (parseip(rem, argv[3]) == -1)
+			return Ebadip;
+
+	WLOCK(ifc);
+
+	/*
+	 *  find address on this interface and remove from chain.
+	 *  for pt to pt we actually specify the remote address as the
+	 *  addresss to remove.
+	 */
+	for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next) {
+		if (memcmp(ip, lifc->local, IPaddrlen) == 0
+		&& memcmp(mask, lifc->mask, IPaddrlen) == 0
+		&& memcmp(rem, lifc->remote, IPaddrlen) == 0)
+			break;
+	}
+
+	rv = ipifcremlifc(ifc, lifc);
+	WUNLOCK(ifc);
+	return rv;
+}
+
+/*
+ * distribute routes to active interfaces like the
+ * TRIP linecards
+ */
+void
+ipifcaddroute(Fs *f, int vers, uchar *addr, uchar *mask, uchar *gate, int type)
+{
+	Medium *m;
+	Conv **cp, **e;
+	Ipifc *ifc;
+
+	e = &f->ipifc->conv[f->ipifc->nc];
+	for(cp = f->ipifc->conv; cp < e; cp++){
+		if(*cp != nil) {
+			ifc = (Ipifc*)(*cp)->ptcl;
+			m = ifc->m;
+			if(m && m->addroute)
+				m->addroute(ifc, vers, addr, mask, gate, type);
+		}
+	}
+}
+
+void
+ipifcremroute(Fs *f, int vers, uchar *addr, uchar *mask)
+{
+	Medium *m;
+	Conv **cp, **e;
+	Ipifc *ifc;
+
+	e = &f->ipifc->conv[f->ipifc->nc];
+	for(cp = f->ipifc->conv; cp < e; cp++){
+		if(*cp != nil) {
+			ifc = (Ipifc*)(*cp)->ptcl;
+			m = ifc->m;
+			if(m && m->remroute)
+				m->remroute(ifc, vers, addr, mask);
+		}
+	}
+}
+
+/*
+ *  associate an address with the interface.  This wipes out any previous
+ *  addresses.  This is a macro that means, remove all the old interfaces
+ *  and add a new one.
+ */
+static char*
+ipifcconnect(Conv* c, char **argv, int argc)
+{
+	char *err;
+	Ipifc *ifc;
+
+	ifc = (Ipifc*)c->ptcl;
+
+	if(ifc->m == nil)
+		 return "ipifc not yet bound to device";
+
+	if(waserror()){
+		WUNLOCK(ifc);
+		nexterror();
+	}
+	WLOCK(ifc);
+	while(ifc->lifc){
+		err = ipifcremlifc(ifc, ifc->lifc);
+		if(err)
+			error(err);
+	}
+	WUNLOCK(ifc);
+	poperror();
+
+	err = ipifcadd(ifc, argv, argc, 0, nil);
+	if(err)
+		return err;
+
+	Fsconnected(c, nil);
+	return nil;
+}
+
+char*
+ipifcra6(Ipifc *ifc, char **argv, int argc)
+{
+	int i, argsleft, vmax = ifc->rp.maxraint, vmin = ifc->rp.minraint;
+
+	argsleft = argc - 1;
+	i = 1;
+
+	if(argsleft % 2 != 0)
+		return Ebadarg;
+
+	while (argsleft > 1) {
+		if(strcmp(argv[i], "recvra") == 0)
+			ifc->recvra6 = (atoi(argv[i+1]) != 0);
+		else if(strcmp(argv[i], "sendra") == 0)
+			ifc->sendra6 = (atoi(argv[i+1]) != 0);
+		else if(strcmp(argv[i], "mflag") == 0)
+			ifc->rp.mflag = (atoi(argv[i+1]) != 0);
+		else if(strcmp(argv[i], "oflag") == 0)
+			ifc->rp.oflag = (atoi(argv[i+1]) != 0);
+		else if(strcmp(argv[i], "maxraint") == 0)
+			ifc->rp.maxraint = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "minraint") == 0)
+			ifc->rp.minraint = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "linkmtu") == 0)
+			ifc->rp.linkmtu = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "reachtime") == 0)
+			ifc->rp.reachtime = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "rxmitra") == 0)
+			ifc->rp.rxmitra = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "ttl") == 0)
+			ifc->rp.ttl = atoi(argv[i+1]);
+		else if(strcmp(argv[i], "routerlt") == 0)
+			ifc->rp.routerlt = atoi(argv[i+1]);
+		else
+			return Ebadarg;
+
+		argsleft -= 2;
+		i += 2;
+	}
+
+	/* consistency check */
+	if(ifc->rp.maxraint < ifc->rp.minraint) {
+		ifc->rp.maxraint = vmax;
+		ifc->rp.minraint = vmin;
+		return Ebadarg;
+	}
+	return nil;
+}
+
+/*
+ *  non-standard control messages.
+ *  called with c->car locked.
+ */
+static char*
+ipifcctl(Conv* c, char**argv, int argc)
+{
+	Ipifc *ifc;
+	int i;
+
+	ifc = (Ipifc*)c->ptcl;
+	if(strcmp(argv[0], "add") == 0)
+		return ipifcadd(ifc, argv, argc, 0, nil);
+	else if(strcmp(argv[0], "try") == 0)
+		return ipifcadd(ifc, argv, argc, 1, nil);
+	else if(strcmp(argv[0], "remove") == 0)
+		return ipifcrem(ifc, argv, argc);
+	else if(strcmp(argv[0], "unbind") == 0)
+		return ipifcunbind(ifc);
+	else if(strcmp(argv[0], "joinmulti") == 0)
+		return ipifcjoinmulti(ifc, argv, argc);
+	else if(strcmp(argv[0], "leavemulti") == 0)
+		return ipifcleavemulti(ifc, argv, argc);
+	else if(strcmp(argv[0], "mtu") == 0)
+		return ipifcsetmtu(ifc, argv, argc);
+	else if(strcmp(argv[0], "reassemble") == 0){
+		ifc->reassemble = 1;
+		return nil;
+	}
+	else if(strcmp(argv[0], "iprouting") == 0){
+		i = 1;
+		if(argc > 1)
+			i = atoi(argv[1]);
+		iprouting(c->p->f, i);
+		return nil;
+	}
+	else if(strcmp(argv[0], "add6") == 0)
+		return ipifcadd6(ifc, argv, argc);
+	else if(strcmp(argv[0], "ra6") == 0)
+		return ipifcra6(ifc, argv, argc);
+	return "unsupported ctl";
+}
+
+int
+ipifcstats(Proto *ipifc, char *buf, int len)
+{
+	return ipstats(ipifc->f, buf, len);
+}
+
+void
+ipifcinit(Fs *f)
+{
+	Proto *ipifc;
+
+	ipifc = smalloc(sizeof(Proto));
+	ipifc->name = "ipifc";
+	ipifc->connect = ipifcconnect;
+	ipifc->announce = nil;
+	ipifc->bind = ipifcbind;
+	ipifc->state = ipifcstate;
+	ipifc->create = ipifccreate;
+	ipifc->close = ipifcclose;
+	ipifc->rcv = nil;
+	ipifc->ctl = ipifcctl;
+	ipifc->advise = nil;
+	ipifc->stats = ipifcstats;
+	ipifc->inuse = ipifcinuse;
+	ipifc->local = ipifclocal;
+	ipifc->ipproto = -1;
+	ipifc->nc = Maxmedia;
+	ipifc->ptclsize = sizeof(Ipifc);
+
+	f->ipifc = ipifc;	/* hack for ipifcremroute, findipifc, ... */
+	f->self = smalloc(sizeof(Ipselftab));	/* hack for ipforme */
+
+	Fsproto(f, ipifc);
+}
+
+/*
+ *  add to self routing cache
+ *	called with c->car locked
+ */
+static void
+addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type)
+{
+	Ipself *p;
+	Iplink *lp;
+	int h;
+
+	QLOCK(f->self);
+
+	/* see if the address already exists */
+	h = hashipa(a);
+	for(p = f->self->hash[h]; p; p = p->next)
+		if(memcmp(a, p->a, IPaddrlen) == 0)
+			break;
+
+	/* allocate a local address and add to hash chain */
+	if(p == nil){
+		p = smalloc(sizeof(*p));
+		ipmove(p->a, a);
+		p->type = type;
+		p->next = f->self->hash[h];
+		f->self->hash[h] = p;
+
+		/* if the null address, accept all packets */
+		if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
+			f->self->acceptall = 1;
+	}
+
+	/* look for a link for this lifc */
+	for(lp = p->link; lp; lp = lp->selflink)
+		if(lp->lifc == lifc)
+			break;
+
+	/* allocate a lifc-to-local link and link to both */
+	if(lp == nil){
+		lp = smalloc(sizeof(*lp));
+		lp->ref = 1;
+		lp->lifc = lifc;
+		lp->self = p;
+		lp->selflink = p->link;
+		p->link = lp;
+		lp->lifclink = lifc->link;
+		lifc->link = lp;
+
+		/* add to routing table */
+		if(isv4(a))
+			v4addroute(f, tifc, a+IPv4off, IPallbits+IPv4off,
+				a+IPv4off, type);
+		else
+			v6addroute(f, tifc, a, IPallbits, a, type);
+
+		if((type & Rmulti) && ifc->m->addmulti != nil)
+			(*ifc->m->addmulti)(ifc, a, lifc->local);
+	} else
+		lp->ref++;
+
+	QUNLOCK(f->self);
+}
+
+/*
+ *  These structures are unlinked from their chains while
+ *  other threads may be using them.  To avoid excessive locking,
+ *  just put them aside for a while before freeing them.
+ *	called with f->self locked
+ */
+static Iplink *freeiplink;
+static Ipself *freeipself;
+
+static void
+iplinkfree(Iplink *p)
+{
+	Iplink **l, *np;
+	ulong now = NOW;
+
+	l = &freeiplink;
+	for(np = *l; np; np = *l){
+		if(np->expire > now){
+			*l = np->next;
+			free(np);
+			continue;
+		}
+		l = &np->next;
+	}
+	p->expire = now + 5000;	/* give other threads 5 secs to get out */
+	p->next = nil;
+	*l = p;
+}
+
+static void
+ipselffree(Ipself *p)
+{
+	Ipself **l, *np;
+	ulong now = NOW;
+
+	l = &freeipself;
+	for(np = *l; np; np = *l){
+		if(np->expire > now){
+			*l = np->next;
+			free(np);
+			continue;
+		}
+		l = &np->next;
+	}
+	p->expire = now + 5000;	/* give other threads 5 secs to get out */
+	p->next = nil;
+	*l = p;
+}
+
+/*
+ *  Decrement reference for this address on this link.
+ *  Unlink from selftab if this is the last ref.
+ *	called with c->car locked
+ */
+static void
+remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a)
+{
+	Ipself *p, **l;
+	Iplink *link, **l_self, **l_lifc;
+
+	QLOCK(f->self);
+
+	/* find the unique selftab entry */
+	l = &f->self->hash[hashipa(a)];
+	for(p = *l; p; p = *l){
+		if(ipcmp(p->a, a) == 0)
+			break;
+		l = &p->next;
+	}
+
+	if(p == nil)
+		goto out;
+
+	/*
+	 *  walk down links from an ifc looking for one
+	 *  that matches the selftab entry
+	 */
+	l_lifc = &lifc->link;
+	for(link = *l_lifc; link; link = *l_lifc){
+		if(link->self == p)
+			break;
+		l_lifc = &link->lifclink;
+	}
+
+	if(link == nil)
+		goto out;
+
+	/*
+	 *  walk down the links from the selftab looking for
+	 *  the one we just found
+	 */
+	l_self = &p->link;
+	for(link = *l_self; link; link = *l_self){
+		if(link == *l_lifc)
+			break;
+		l_self = &link->selflink;
+	}
+
+	if(link == nil)
+		panic("remselfcache");
+
+	if(--(link->ref) != 0)
+		goto out;
+
+	if((p->type & Rmulti) && ifc->m->remmulti != nil)
+		(*ifc->m->remmulti)(ifc, a, lifc->local);
+
+	/* ref == 0, remove from both chains and free the link */
+	*l_lifc = link->lifclink;
+	*l_self = link->selflink;
+	iplinkfree(link);
+
+	if(p->link != nil)
+		goto out;
+
+	/* remove from routing table */
+	if(isv4(a))
+		v4delroute(f, a+IPv4off, IPallbits+IPv4off, 1);
+	else
+		v6delroute(f, a, IPallbits, 1);
+
+	/* no more links, remove from hash and free */
+	*l = p->next;
+	ipselffree(p);
+
+	/* if IPnoaddr, forget */
+	if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
+		f->self->acceptall = 0;
+
+out:
+	QUNLOCK(f->self);
+}
+
+static char *stformat = "%-44.44I %2.2d %4.4s\n";
+enum
+{
+	Nstformat= 41,
+};
+
+long
+ipselftabread(Fs *f, char *cp, ulong offset, int n)
+{
+	int i, m, nifc, off;
+	Ipself *p;
+	Iplink *link;
+	char state[8];
+
+	m = 0;
+	off = offset;
+	QLOCK(f->self);
+	for(i = 0; i < NHASH && m < n; i++){
+		for(p = f->self->hash[i]; p != nil && m < n; p = p->next){
+			nifc = 0;
+			for(link = p->link; link; link = link->selflink)
+				nifc++;
+			routetype(p->type, state);
+			m += snprint(cp + m, n - m, stformat, p->a, nifc, state);
+			if(off > 0){
+				off -= m;
+				m = 0;
+			}
+		}
+	}
+	QUNLOCK(f->self);
+	return m;
+}
+
+int
+iptentative(Fs *f, uchar *addr)
+{
+ 	Ipself *p;
+
+	p = f->self->hash[hashipa(addr)];
+	for(; p; p = p->next){
+		if(ipcmp(addr, p->a) == 0)
+			return p->link->lifc->tentative;
+	}
+	return 0;
+}
+
+/*
+ *  returns
+ *	0		- no match
+ *	Runi
+ *	Rbcast
+ *	Rmcast
+ */
+int
+ipforme(Fs *f, uchar *addr)
+{
+	Ipself *p;
+
+	p = f->self->hash[hashipa(addr)];
+	for(; p; p = p->next){
+		if(ipcmp(addr, p->a) == 0)
+			return p->type;
+	}
+
+	/* hack to say accept anything */
+	if(f->self->acceptall)
+		return Runi;
+	return 0;
+}
+
+/*
+ *  find the ifc on same net as the remote system.  If none,
+ *  return nil.
+ */
+Ipifc*
+findipifc(Fs *f, uchar *remote, int type)
+{
+	Ipifc *ifc, *x;
+	Iplifc *lifc;
+	Conv **cp, **e;
+	uchar gnet[IPaddrlen], xmask[IPaddrlen];
+
+	x = nil;
+	memset(xmask, 0, IPaddrlen);
+
+	/* find most specific match */
+	e = &f->ipifc->conv[f->ipifc->nc];
+	for(cp = f->ipifc->conv; cp < e; cp++){
+		if(*cp == 0)
+			continue;
+		ifc = (Ipifc*)(*cp)->ptcl;
+		for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+			maskip(remote, lifc->mask, gnet);
+			if(ipcmp(gnet, lifc->net) == 0){
+				if(x == nil || ipcmp(lifc->mask, xmask) > 0){
+					x = ifc;
+					ipmove(xmask, lifc->mask);
+				}
+			}
+		}
+	}
+	if(x != nil)
+		return x;
+
+	/* for now for broadcast and multicast, just use first interface */
+	if(type & (Rbcast|Rmulti)){
+		for(cp = f->ipifc->conv; cp < e; cp++){
+			if(*cp == 0)
+				continue;
+			ifc = (Ipifc*)(*cp)->ptcl;
+			if(ifc->lifc != nil)
+				return ifc;
+		}
+	}
+	return nil;
+}
+
+enum {
+	unknownv6,		/* UGH */
+//	multicastv6,
+	unspecifiedv6,
+	linklocalv6,
+	globalv6,
+};
+
+int
+v6addrtype(uchar *addr)
+{
+	if(islinklocal(addr) ||
+	    (isv6mcast(addr) && (addr[1] & 0xF) <= Link_local_scop))
+		return linklocalv6;
+	else
+		return globalv6;
+}
+
+#define v6addrcurr(lifc) ((lifc)->preflt == ~0L || \
+			(lifc)->origint + (lifc)->preflt >= NOW/1000)
+
+static void
+findprimaryipv6(Fs *f, uchar *local)
+{
+	int atype, atypel;
+	Conv **cp, **e;
+	Ipifc *ifc;
+	Iplifc *lifc;
+
+	ipmove(local, v6Unspecified);
+	atype = unspecifiedv6;
+
+	/*
+	 * find "best" (global > link local > unspecified)
+	 * local address; address must be current.
+	 */
+	e = &f->ipifc->conv[f->ipifc->nc];
+	for(cp = f->ipifc->conv; cp < e; cp++){
+		if(*cp == 0)
+			continue;
+		ifc = (Ipifc*)(*cp)->ptcl;
+		for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+			atypel = v6addrtype(lifc->local);
+			if(atypel > atype && v6addrcurr(lifc)) {
+				ipmove(local, lifc->local);
+				atype = atypel;
+				if(atype == globalv6)
+					return;
+			}
+		}
+	}
+}
+
+/*
+ *  returns first ip address configured
+ */
+static void
+findprimaryipv4(Fs *f, uchar *local)
+{
+	Conv **cp, **e;
+	Ipifc *ifc;
+	Iplifc *lifc;
+
+	/* find first ifc local address */
+	e = &f->ipifc->conv[f->ipifc->nc];
+	for(cp = f->ipifc->conv; cp < e; cp++){
+		if(*cp == 0)
+			continue;
+		ifc = (Ipifc*)(*cp)->ptcl;
+		if((lifc = ifc->lifc) != nil){
+			ipmove(local, lifc->local);
+			return;
+		}
+	}
+}
+
+/*
+ *  find the local address 'closest' to the remote system, copy it to
+ *  local and return the ifc for that address
+ */
+void
+findlocalip(Fs *f, uchar *local, uchar *remote)
+{
+	int version, atype = unspecifiedv6, atypel = unknownv6;
+	int atyper, deprecated;
+	uchar gate[IPaddrlen], gnet[IPaddrlen];
+	Ipifc *ifc;
+	Iplifc *lifc;
+	Route *r;
+
+	QLOCK(f->ipifc);
+	r = v6lookup(f, remote, nil);
+ 	version = (memcmp(remote, v4prefix, IPv4off) == 0)? V4: V6;
+
+	if(r != nil){
+		ifc = r->ifc;
+		if(r->type & Rv4)
+			v4tov6(gate, r->v4.gate);
+		else {
+			ipmove(gate, r->v6.gate);
+			ipmove(local, v6Unspecified);
+		}
+
+		switch(version) {
+		case V4:
+			/* find ifc address closest to the gateway to use */
+			for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+				maskip(gate, lifc->mask, gnet);
+				if(ipcmp(gnet, lifc->net) == 0){
+					ipmove(local, lifc->local);
+					goto out;
+				}
+			}
+			break;
+		case V6:
+			/* find ifc address with scope matching the destination */
+			atyper = v6addrtype(remote);
+			deprecated = 0;
+			for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+				atypel = v6addrtype(lifc->local);
+				/* prefer appropriate scope */
+				if((atypel > atype && atype < atyper) ||
+				   (atypel < atype && atype > atyper)){
+					ipmove(local, lifc->local);
+					deprecated = !v6addrcurr(lifc);
+					atype = atypel;
+				} else if(atypel == atype){
+					/* avoid deprecated addresses */
+					if(deprecated && v6addrcurr(lifc)){
+						ipmove(local, lifc->local);
+						atype = atypel;
+						deprecated = 0;
+					}
+				}
+				if(atype == atyper && !deprecated)
+					goto out;
+			}
+			if(atype >= atyper)
+				goto out;
+			break;
+		default:
+			panic("findlocalip: version %d", version);
+		}
+	}
+
+	switch(version){
+	case V4:
+		findprimaryipv4(f, local);
+		break;
+	case V6:
+		findprimaryipv6(f, local);
+		break;
+	default:
+		panic("findlocalip2: version %d", version);
+	}
+
+out:
+	QUNLOCK(f->ipifc);
+}
+
+/*
+ *  return first v4 address associated with an interface
+ */
+int
+ipv4local(Ipifc *ifc, uchar *addr)
+{
+	Iplifc *lifc;
+
+	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+		if(isv4(lifc->local)){
+			memmove(addr, lifc->local+IPv4off, IPv4addrlen);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ *  return first v6 address associated with an interface
+ */
+int
+ipv6local(Ipifc *ifc, uchar *addr)
+{
+	Iplifc *lifc;
+
+	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+		if(!isv4(lifc->local) && !(lifc->tentative)){
+			ipmove(addr, lifc->local);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+int
+ipv6anylocal(Ipifc *ifc, uchar *addr)
+{
+	Iplifc *lifc;
+
+	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+		if(!isv4(lifc->local)){
+			ipmove(addr, lifc->local);
+			return SRC_UNI;
+		}
+	}
+	return SRC_UNSPEC;
+}
+
+/*
+ *  see if this address is bound to the interface
+ */
+Iplifc*
+iplocalonifc(Ipifc *ifc, uchar *ip)
+{
+	Iplifc *lifc;
+
+	for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+		if(ipcmp(ip, lifc->local) == 0)
+			return lifc;
+	return nil;
+}
+
+
+/*
+ *  See if we're proxying for this address on this interface
+ */
+int
+ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip)
+{
+	Route *r;
+	uchar net[IPaddrlen];
+	Iplifc *lifc;
+
+	/* see if this is a direct connected pt to pt address */
+	r = v6lookup(f, ip, nil);
+	if(r == nil || (r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
+		return 0;
+
+	/* see if this is on the right interface */
+	for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+		maskip(ip, lifc->mask, net);
+		if(ipcmp(net, lifc->remote) == 0)
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ *  return multicast version if any
+ */
+int
+ipismulticast(uchar *ip)
+{
+	if(isv4(ip)){
+		if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
+			return V4;
+	}
+	else if(ip[0] == 0xff)
+		return V6;
+	return 0;
+}
+int
+ipisbm(uchar *ip)
+{
+	if(isv4(ip)){
+		if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
+			return V4;
+		else if(ipcmp(ip, IPv4bcast) == 0)
+			return V4;
+	}
+	else if(ip[0] == 0xff)
+		return V6;
+	return 0;
+}
+
+
+/*
+ *  add a multicast address to an interface, called with c->car locked
+ */
+void
+ipifcaddmulti(Conv *c, uchar *ma, uchar *ia)
+{
+	Ipifc *ifc;
+	Iplifc *lifc;
+	Conv **p;
+	Ipmulti *multi, **l;
+	Fs *f;
+
+	f = c->p->f;
+
+	for(l = &c->multi; *l; l = &(*l)->next)
+		if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
+			return;		/* it's already there */
+
+	multi = *l = smalloc(sizeof(*multi));
+	ipmove(multi->ma, ma);
+	ipmove(multi->ia, ia);
+	multi->next = nil;
+
+	for(p = f->ipifc->conv; *p; p++){
+		if((*p)->inuse == 0)
+			continue;
+		ifc = (Ipifc*)(*p)->ptcl;
+		if(waserror()){
+			WUNLOCK(ifc);
+			nexterror();
+		}
+		WLOCK(ifc);
+		for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+			if(ipcmp(ia, lifc->local) == 0)
+				addselfcache(f, ifc, lifc, ma, Rmulti);
+		WUNLOCK(ifc);
+		poperror();
+	}
+}
+
+
+/*
+ *  remove a multicast address from an interface, called with c->car locked
+ */
+void
+ipifcremmulti(Conv *c, uchar *ma, uchar *ia)
+{
+	Ipmulti *multi, **l;
+	Iplifc *lifc;
+	Conv **p;
+	Ipifc *ifc;
+	Fs *f;
+
+	f = c->p->f;
+
+	for(l = &c->multi; *l; l = &(*l)->next)
+		if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
+			break;
+
+	multi = *l;
+	if(multi == nil)
+		return; 	/* we don't have it open */
+
+	*l = multi->next;
+
+	for(p = f->ipifc->conv; *p; p++){
+		if((*p)->inuse == 0)
+			continue;
+
+		ifc = (Ipifc*)(*p)->ptcl;
+		if(waserror()){
+			WUNLOCK(ifc);
+			nexterror();
+		}
+		WLOCK(ifc);
+		for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+			if(ipcmp(ia, lifc->local) == 0)
+				remselfcache(f, ifc, lifc, ma);
+		WUNLOCK(ifc);
+		poperror();
+	}
+
+	free(multi);
+}
+
+/*
+ *  make lifc's join and leave multicast groups
+ */
+static char*
+ipifcjoinmulti(Ipifc *ifc, char **argv, int argc)
+{
+	return nil;
+}
+
+static char*
+ipifcleavemulti(Ipifc *ifc, char **argv, int argc)
+{
+	return nil;
+}
+
+static void
+ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip)
+{
+	Conv **cp, **e;
+	Ipifc *nifc;
+	Iplifc *lifc;
+	Medium *m;
+	uchar net[IPaddrlen];
+
+	/* register the address on any network that will proxy for us */
+	e = &f->ipifc->conv[f->ipifc->nc];
+
+	if(!isv4(ip)) {				/* V6 */
+		for(cp = f->ipifc->conv; cp < e; cp++){
+			if(*cp == nil || (nifc = (Ipifc*)(*cp)->ptcl) == ifc)
+				continue;
+			RLOCK(nifc);
+			m = nifc->m;
+			if(m == nil || m->addmulti == nil) {
+				RUNLOCK(nifc);
+				continue;
+			}
+			for(lifc = nifc->lifc; lifc; lifc = lifc->next){
+				maskip(ip, lifc->mask, net);
+				if(ipcmp(net, lifc->remote) == 0) {
+					/* add solicited-node multicast addr */
+					ipv62smcast(net, ip);
+					addselfcache(f, nifc, lifc, net, Rmulti);
+					arpenter(f, V6, ip, nifc->mac, 6, 0);
+					// (*m->addmulti)(nifc, net, ip);
+					break;
+				}
+			}
+			RUNLOCK(nifc);
+		}
+	}
+	else {					/* V4 */
+		for(cp = f->ipifc->conv; cp < e; cp++){
+			if(*cp == nil || (nifc = (Ipifc*)(*cp)->ptcl) == ifc)
+				continue;
+			RLOCK(nifc);
+			m = nifc->m;
+			if(m == nil || m->areg == nil){
+				RUNLOCK(nifc);
+				continue;
+			}
+			for(lifc = nifc->lifc; lifc; lifc = lifc->next){
+				maskip(ip, lifc->mask, net);
+				if(ipcmp(net, lifc->remote) == 0){
+					(*m->areg)(nifc, ip);
+					break;
+				}
+			}
+			RUNLOCK(nifc);
+		}
+	}
+}
+
+
+/* added for new v6 mesg types */
+static void
+adddefroute6(Fs *f, uchar *gate, int force)
+{
+	Route *r;
+
+	r = v6lookup(f, v6Unspecified, nil);
+	/*
+	 * route entries generated by all other means take precedence
+	 * over router announcements.
+	 */
+	if (r && !force && strcmp(r->tag, "ra") != 0)
+		return;
+
+	v6delroute(f, v6Unspecified, v6Unspecified, 1);
+	v6addroute(f, "ra", v6Unspecified, v6Unspecified, gate, 0);
+}
+
+enum {
+	Ngates = 3,
+};
+
+char*
+ipifcadd6(Ipifc *ifc, char**argv, int argc)
+{
+	int plen = 64;
+	long origint = NOW / 1000, preflt = ~0L, validlt = ~0L;
+	char addr[40], preflen[6];
+	char *params[3];
+	uchar autoflag = 1, onlink = 1;
+	uchar prefix[IPaddrlen];
+	Iplifc *lifc;
+
+	switch(argc) {
+	case 7:
+		preflt = atoi(argv[6]);
+		/* fall through */
+	case 6:
+		validlt = atoi(argv[5]);
+		/* fall through */
+	case 5:
+		autoflag = atoi(argv[4]);
+		/* fall through */
+	case 4:
+		onlink = atoi(argv[3]);
+		/* fall through */
+	case 3:
+		plen = atoi(argv[2]);
+		/* fall through */
+	case 2:
+		break;
+	default:
+		return Ebadarg;
+	}
+
+	if (parseip(prefix, argv[1]) != 6 || validlt < preflt || plen < 0 ||
+	    plen > 64 || islinklocal(prefix))
+		return Ebadarg;
+
+	lifc = smalloc(sizeof(Iplifc));
+	lifc->onlink = (onlink != 0);
+	lifc->autoflag = (autoflag != 0);
+	lifc->validlt = validlt;
+	lifc->preflt = preflt;
+	lifc->origint = origint;
+
+	/* issue "add" ctl msg for v6 link-local addr and prefix len */
+	if(!ifc->m->pref2addr)
+		return Ebadarg;
+	ifc->m->pref2addr(prefix, ifc->mac);	/* mac → v6 link-local addr */
+	sprint(addr, "%I", prefix);
+	sprint(preflen, "/%d", plen);
+	params[0] = "add";
+	params[1] = addr;
+	params[2] = preflen;
+
+	return ipifcadd(ifc, params, 3, 0, lifc);
+}
diff --git a/src/9vx/a/ip/ipmux.c b/src/9vx/a/ip/ipmux.c
@@ -0,0 +1,842 @@
+/*
+ * IP packet filter
+ */
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+
+typedef struct Ipmuxrock  Ipmuxrock;
+typedef struct Ipmux      Ipmux;
+
+typedef struct Myip4hdr Myip4hdr;
+struct Myip4hdr
+{
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	length[2];	/* packet length */
+	uchar	id[2];		/* ip->identification */
+	uchar	frag[2];	/* Fragment information */
+	uchar	ttl;		/* Time to live */
+	uchar	proto;		/* Protocol */
+	uchar	cksum[2];	/* Header checksum */
+	uchar	src[4];		/* IP source */
+	uchar	dst[4];		/* IP destination */
+
+	uchar	data[1];	/* start of data */
+};
+Myip4hdr *ipoff = 0;
+
+enum
+{
+	Tproto,
+	Tdata,
+	Tiph,
+	Tdst,
+	Tsrc,
+	Tifc,
+
+	Cother = 0,
+	Cbyte,		/* single byte */
+	Cmbyte,		/* single byte with mask */
+	Cshort,		/* single short */
+	Cmshort,	/* single short with mask */
+	Clong,		/* single long */
+	Cmlong,		/* single long with mask */
+	Cifc,
+	Cmifc,
+};
+
+char *ftname[] = 
+{
+[Tproto]	"proto",
+[Tdata]		"data",
+[Tiph]	 	"iph",
+[Tdst]		"dst",
+[Tsrc]		"src",
+[Tifc]		"ifc",
+};
+
+/*
+ *  a node in the decision tree
+ */
+struct Ipmux
+{
+	Ipmux	*yes;
+	Ipmux	*no;
+	uchar	type;		/* type of field(Txxxx) */
+	uchar	ctype;		/* tupe of comparison(Cxxxx) */
+	uchar	len;		/* length in bytes of item to compare */
+	uchar	n;		/* number of items val points to */
+	short	off;		/* offset of comparison */
+	short	eoff;		/* end offset of comparison */
+	uchar	skiphdr;	/* should offset start after ipheader */
+	uchar	*val;
+	uchar	*mask;
+	uchar	*e;		/* val+n*len*/
+
+	int	ref;		/* so we can garbage collect */
+	Conv	*conv;
+};
+
+/*
+ *  someplace to hold per conversation data
+ */
+struct Ipmuxrock
+{
+	Ipmux	*chain;
+};
+
+static int	ipmuxsprint(Ipmux*, int, char*, int);
+static void	ipmuxkick(void *x);
+
+static char*
+skipwhite(char *p)
+{
+	while(*p == ' ' || *p == '\t')
+		p++;
+	return p;
+}
+
+static char*
+follows(char *p, char c)
+{
+	char *f;
+
+	f = strchr(p, c);
+	if(f == nil)
+		return nil;
+	*f++ = 0;
+	f = skipwhite(f);
+	if(*f == 0)
+		return nil;
+	return f;
+}
+
+static Ipmux*
+parseop(char **pp)
+{
+	char *p = *pp;
+	int type, off, end, len;
+	Ipmux *f;
+
+	p = skipwhite(p);
+	if(strncmp(p, "dst", 3) == 0){
+		type = Tdst;
+		off = (ulong)(ipoff->dst);
+		len = IPv4addrlen;
+		p += 3;
+	}
+	else if(strncmp(p, "src", 3) == 0){
+		type = Tsrc;
+		off = (ulong)(ipoff->src);
+		len = IPv4addrlen;
+		p += 3;
+	}
+	else if(strncmp(p, "ifc", 3) == 0){
+		type = Tifc;
+		off = -IPv4addrlen;
+		len = IPv4addrlen;
+		p += 3;
+	}
+	else if(strncmp(p, "proto", 5) == 0){
+		type = Tproto;
+		off = (ulong)&(ipoff->proto);
+		len = 1;
+		p += 5;
+	}
+	else if(strncmp(p, "data", 4) == 0 || strncmp(p, "iph", 3) == 0){
+		if(strncmp(p, "data", 4) == 0) {
+			type = Tdata;
+			p += 4;
+		}
+		else {
+			type = Tiph;
+			p += 3;
+		}
+		p = skipwhite(p);
+		if(*p != '[')
+			return nil;
+		p++;
+		off = strtoul(p, &p, 0);
+		if(off < 0 || off > (64-IP4HDR))
+			return nil;
+		p = skipwhite(p);
+		if(*p != ':')
+			end = off;
+		else {
+			p++;
+			p = skipwhite(p);
+			end = strtoul(p, &p, 0);
+			if(end < off)
+				return nil;
+			p = skipwhite(p);
+		}
+		if(*p != ']')
+			return nil;
+		p++;
+		len = end - off + 1;
+	}
+	else
+		return nil;
+
+	f = smalloc(sizeof(*f));
+	f->type = type;
+	f->len = len;
+	f->off = off;
+	f->val = nil;
+	f->mask = nil;
+	f->n = 1;
+	f->ref = 1;
+	if(type == Tdata)
+		f->skiphdr = 1;
+	else
+		f->skiphdr = 0;
+
+	return f;	
+}
+
+static int
+htoi(char x)
+{
+	if(x >= '0' && x <= '9')
+		x -= '0';
+	else if(x >= 'a' && x <= 'f')
+		x -= 'a' - 10;
+	else if(x >= 'A' && x <= 'F')
+		x -= 'A' - 10;
+	else
+		x = 0;
+	return x;
+}
+
+static int
+hextoi(char *p)
+{
+	return (htoi(p[0])<<4) | htoi(p[1]);
+}
+
+static void
+parseval(uchar *v, char *p, int len)
+{
+	while(*p && len-- > 0){
+		*v++ = hextoi(p);
+		p += 2;
+	}
+}
+
+static Ipmux*
+parsemux(char *p)
+{
+	int n, nomask;
+	Ipmux *f;
+	char *val;
+	char *mask;
+	char *vals[20];
+	uchar *v;
+
+	/* parse operand */
+	f = parseop(&p);
+	if(f == nil)
+		return nil;
+
+	/* find value */
+	val = follows(p, '=');
+	if(val == nil)
+		goto parseerror;
+
+	/* parse mask */
+	mask = follows(p, '&');
+	if(mask != nil){
+		switch(f->type){
+		case Tsrc:
+		case Tdst:
+		case Tifc:
+			f->mask = smalloc(f->len);
+			v4parseip(f->mask, mask);
+			break;
+		case Tdata:
+		case Tiph:
+			f->mask = smalloc(f->len);
+			parseval(f->mask, mask, f->len);
+			break;
+		default:
+			goto parseerror;
+		}
+		nomask = 0;
+	} else {
+		nomask = 1;
+		f->mask = smalloc(f->len);
+		memset(f->mask, 0xff, f->len);
+	}
+
+	/* parse vals */
+	f->n = getfields(val, vals, sizeof(vals)/sizeof(char*), 1, "|");
+	if(f->n == 0)
+		goto parseerror;
+	f->val = smalloc(f->n*f->len);
+	v = f->val;
+	for(n = 0; n < f->n; n++){
+		switch(f->type){
+		case Tsrc:
+		case Tdst:
+		case Tifc:
+			v4parseip(v, vals[n]);
+			break;
+		case Tproto:
+		case Tdata:
+		case Tiph:
+			parseval(v, vals[n], f->len);
+			break;
+		}
+		v += f->len;
+	}
+
+	f->eoff = f->off + f->len;
+	f->e = f->val + f->n*f->len;
+	f->ctype = Cother;
+	if(f->n == 1){
+		switch(f->len){
+		case 1:
+			f->ctype = nomask ? Cbyte : Cmbyte;
+			break;
+		case 2:
+			f->ctype = nomask ? Cshort : Cmshort;
+			break;
+		case 4:
+			if(f->type == Tifc)
+				f->ctype = nomask ? Cifc : Cmifc;
+			else
+				f->ctype = nomask ? Clong : Cmlong;
+			break;
+		}
+	}
+	return f;
+
+parseerror:
+	if(f->mask)
+		free(f->mask);
+	if(f->val)
+		free(f->val);
+	free(f);
+	return nil;
+}
+
+/*
+ *  Compare relative ordering of two ipmuxs.  This doesn't compare the
+ *  values, just the fields being looked at.  
+ *
+ *  returns:	<0 if a is a more specific match
+ *		 0 if a and b are matching on the same fields
+ *		>0 if b is a more specific match
+ */
+static int
+ipmuxcmp(Ipmux *a, Ipmux *b)
+{
+	int n;
+
+	/* compare types, lesser ones are more important */
+	n = a->type - b->type;
+	if(n != 0)
+		return n;
+
+	/* compare offsets, call earlier ones more specific */
+	n = (a->off+((int)a->skiphdr)*(ulong)ipoff->data) - 
+		(b->off+((int)b->skiphdr)*(ulong)ipoff->data);
+	if(n != 0)
+		return n;
+
+	/* compare match lengths, longer ones are more specific */
+	n = b->len - a->len;
+	if(n != 0)
+		return n;
+
+	/*
+	 *  if we get here we have two entries matching
+	 *  the same bytes of the record.  Now check
+	 *  the mask for equality.  Longer masks are
+	 *  more specific.
+	 */
+	if(a->mask != nil && b->mask == nil)
+		return -1;
+	if(a->mask == nil && b->mask != nil)
+		return 1;
+	if(a->mask != nil && b->mask != nil){
+		n = memcmp(b->mask, a->mask, a->len);
+		if(n != 0)
+			return n;
+	}
+	return 0;
+}
+
+/*
+ *  Compare the values of two ipmuxs.  We're assuming that ipmuxcmp
+ *  returned 0 comparing them.
+ */
+static int
+ipmuxvalcmp(Ipmux *a, Ipmux *b)
+{
+	int n;
+
+	n = b->len*b->n - a->len*a->n;
+	if(n != 0)
+		return n;
+	return memcmp(a->val, b->val, a->len*a->n);
+} 
+
+/*
+ *  add onto an existing ipmux chain in the canonical comparison
+ *  order
+ */
+static void
+ipmuxchain(Ipmux **l, Ipmux *f)
+{
+	for(; *l; l = &(*l)->yes)
+		if(ipmuxcmp(f, *l) < 0)
+			break;
+	f->yes = *l;
+	*l = f;
+}
+
+/*
+ *  copy a tree
+ */
+static Ipmux*
+ipmuxcopy(Ipmux *f)
+{
+	Ipmux *nf;
+
+	if(f == nil)
+		return nil;
+	nf = smalloc(sizeof *nf);
+	*nf = *f;
+	nf->no = ipmuxcopy(f->no);
+	nf->yes = ipmuxcopy(f->yes);
+	nf->val = smalloc(f->n*f->len);
+	nf->e = nf->val + f->len*f->n;
+	memmove(nf->val, f->val, f->n*f->len);
+	return nf;
+}
+
+static void
+ipmuxfree(Ipmux *f)
+{
+	if(f->val != nil)
+		free(f->val);
+	free(f);
+}
+
+static void
+ipmuxtreefree(Ipmux *f)
+{
+	if(f == nil)
+		return;
+	if(f->no != nil)
+		ipmuxfree(f->no);
+	if(f->yes != nil)
+		ipmuxfree(f->yes);
+	ipmuxfree(f);
+}
+
+/*
+ *  merge two trees
+ */
+static Ipmux*
+ipmuxmerge(Ipmux *a, Ipmux *b)
+{
+	int n;
+	Ipmux *f;
+
+	if(a == nil)
+		return b;
+	if(b == nil)
+		return a;
+	n = ipmuxcmp(a, b);
+	if(n < 0){
+		f = ipmuxcopy(b);
+		a->yes = ipmuxmerge(a->yes, b);
+		a->no = ipmuxmerge(a->no, f);
+		return a;
+	}
+	if(n > 0){
+		f = ipmuxcopy(a);
+		b->yes = ipmuxmerge(b->yes, a);
+		b->no = ipmuxmerge(b->no, f);
+		return b;
+	}
+	if(ipmuxvalcmp(a, b) == 0){
+		a->yes = ipmuxmerge(a->yes, b->yes);
+		a->no = ipmuxmerge(a->no, b->no);
+		a->ref++;
+		ipmuxfree(b);
+		return a;
+	}
+	a->no = ipmuxmerge(a->no, b);
+	return a;
+}
+
+/*
+ *  remove a chain from a demux tree.  This is like merging accept that
+ *  we remove instead of insert.
+ */
+static int
+ipmuxremove(Ipmux **l, Ipmux *f)
+{
+	int n, rv;
+	Ipmux *ft;
+
+	if(f == nil)
+		return 0;		/* we've removed it all */
+	if(*l == nil)
+		return -1;
+
+	ft = *l;
+	n = ipmuxcmp(ft, f);
+	if(n < 0){
+		/* *l is maching an earlier field, descend both paths */
+		rv = ipmuxremove(&ft->yes, f);
+		rv += ipmuxremove(&ft->no, f);
+		return rv;
+	}
+	if(n > 0){
+		/* f represents an earlier field than *l, this should be impossible */
+		return -1;
+	}
+
+	/* if we get here f and *l are comparing the same fields */
+	if(ipmuxvalcmp(ft, f) != 0){
+		/* different values mean mutually exclusive */
+		return ipmuxremove(&ft->no, f);
+	}
+
+	/* we found a match */
+	if(--(ft->ref) == 0){
+		/*
+		 *  a dead node implies the whole yes side is also dead.
+		 *  since our chain is constrained to be on that side,
+		 *  we're done.
+		 */
+		ipmuxtreefree(ft->yes);
+		*l = ft->no;
+		ipmuxfree(ft);
+		return 0;
+	}
+
+	/*
+	 *  free the rest of the chain.  it is constrained to match the
+	 *  yes side.
+	 */
+	return ipmuxremove(&ft->yes, f->yes);
+}
+
+/*
+ *  connection request is a semi separated list of filters
+ *  e.g. proto=17;data[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
+ *
+ *  there's no protection against overlapping specs.
+ */
+static char*
+ipmuxconnect(Conv *c, char **argv, int argc)
+{
+	int i, n;
+	char *field[10];
+	Ipmux *mux, *chain;
+	Ipmuxrock *r;
+	Fs *f;
+
+	f = c->p->f;
+
+	if(argc != 2)
+		return Ebadarg;
+
+	n = getfields(argv[1], field, nelem(field), 1, ";");
+	if(n <= 0)
+		return Ebadarg;
+
+	chain = nil;
+	mux = nil;
+	for(i = 0; i < n; i++){
+		mux = parsemux(field[i]);
+		if(mux == nil){
+			ipmuxtreefree(chain);
+			return Ebadarg;
+		}
+		ipmuxchain(&chain, mux);
+	}
+	if(chain == nil)
+		return Ebadarg;
+	mux->conv = c;
+
+	/* save a copy of the chain so we can later remove it */
+	mux = ipmuxcopy(chain);
+	r = (Ipmuxrock*)(c->ptcl);
+	r->chain = chain;
+
+	/* add the chain to the protocol demultiplexor tree */
+	WLOCK(f);
+	f->ipmux->priv = ipmuxmerge(f->ipmux->priv, mux);
+	WUNLOCK(f);
+
+	Fsconnected(c, nil);
+	return nil;
+}
+
+static int
+ipmuxstate(Conv *c, char *state, int n)
+{
+	Ipmuxrock *r;
+	
+	r = (Ipmuxrock*)(c->ptcl);
+	return ipmuxsprint(r->chain, 0, state, n);
+}
+
+static void
+ipmuxcreate(Conv *c)
+{
+	Ipmuxrock *r;
+
+	c->rq = qopen(64*1024, Qmsg, 0, c);
+	c->wq = qopen(64*1024, Qkick, ipmuxkick, c);
+	r = (Ipmuxrock*)(c->ptcl);
+	r->chain = nil;
+}
+
+static char*
+ipmuxannounce(Conv* _, char** __, int ___)
+{
+	return "ipmux does not support announce";
+}
+
+static void
+ipmuxclose(Conv *c)
+{
+	Ipmux *i;
+	Ipmuxrock *r;
+	Fs *f = c->p->f;
+
+	r = (Ipmuxrock*)(c->ptcl);
+
+	qclose(c->rq);
+	qclose(c->wq);
+	qclose(c->eq);
+	ipmove(c->laddr, IPnoaddr);
+	ipmove(c->raddr, IPnoaddr);
+	c->lport = 0;
+	c->rport = 0;
+
+	WLOCK(f);
+	i = (Ipmux *)c->p->priv;
+	ipmuxremove(&i, r->chain);
+	WUNLOCK(f);
+	ipmuxtreefree(r->chain);
+	r->chain = nil;
+}
+
+/*
+ *  takes a fully formed ip packet and just passes it down
+ *  the stack
+ */
+static void
+ipmuxkick(void *x)
+{
+	Conv *c = x;
+	Block *bp;
+
+	bp = qget(c->wq);
+	if(bp != nil) {
+		Myip4hdr *ih4 = (Myip4hdr*)(bp->rp);
+
+		if((ih4->vihl & 0xF0) != IP_VER6)
+			ipoput4(c->p->f, bp, 0, ih4->ttl, ih4->tos, nil);
+		else
+			ipoput6(c->p->f, bp, 0, ((Ip6hdr*)ih4)->ttl, 0, nil);
+	}
+}
+
+static void
+ipmuxiput(Proto *p, Ipifc *ifc, Block *bp)
+{
+	int len, hl;
+	Fs *f = p->f;
+	uchar *m, *h, *v, *e, *ve, *hp;
+	Conv *c;
+	Ipmux *mux;
+	Myip4hdr *ip;
+	Ip6hdr *ip6;
+
+	ip = (Myip4hdr*)bp->rp;
+	hl = (ip->vihl&0x0F)<<2;
+
+	if(p->priv == nil)
+		goto nomatch;
+
+	h = bp->rp;
+	len = BLEN(bp);
+
+	/* run the v4 filter */
+	RLOCK(f);
+	c = nil;
+	mux = f->ipmux->priv;
+	while(mux != nil){
+		if(mux->eoff > len){
+			mux = mux->no;
+			continue;
+		}
+		hp = h + mux->off + ((int)mux->skiphdr)*hl;
+		switch(mux->ctype){
+		case Cbyte:
+			if(*mux->val == *hp)
+				goto yes;
+			break;
+		case Cmbyte:
+			if((*hp & *mux->mask) == *mux->val)
+				goto yes;
+			break;
+		case Cshort:
+			if(*((ushort*)mux->val) == *(ushort*)hp)
+				goto yes;
+			break;
+		case Cmshort:
+			if((*(ushort*)hp & (*((ushort*)mux->mask))) == *((ushort*)mux->val))
+				goto yes;
+			break;
+		case Clong:
+			if(*((ulong*)mux->val) == *(ulong*)hp)
+				goto yes;
+			break;
+		case Cmlong:
+			if((*(ulong*)hp & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
+				goto yes;
+			break;
+		case Cifc:
+			if(*((ulong*)mux->val) == *(ulong*)(ifc->lifc->local + IPv4off))
+				goto yes;
+			break;
+		case Cmifc:
+			if((*(ulong*)(ifc->lifc->local + IPv4off) & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
+				goto yes;
+			break;
+		default:
+			v = mux->val;
+			for(e = mux->e; v < e; v = ve){
+				m = mux->mask;
+				hp = h + mux->off;
+				for(ve = v + mux->len; v < ve; v++){
+					if((*hp++ & *m++) != *v)
+						break;
+				}
+				if(v == ve)
+					goto yes;
+			}
+		}
+		mux = mux->no;
+		continue;
+yes:
+		if(mux->conv != nil)
+			c = mux->conv;
+		mux = mux->yes;
+	}
+	RUNLOCK(f);
+
+	if(c != nil){
+		/* tack on interface address */
+		bp = padblock(bp, IPaddrlen);
+		ipmove(bp->rp, ifc->lifc->local);
+		bp = concatblock(bp);
+		if(bp != nil)
+			if(qpass(c->rq, bp) < 0)
+				print("Q");
+		return;
+	}
+
+nomatch:
+	/* doesn't match any filter, hand it to the specific protocol handler */
+	ip = (Myip4hdr*)bp->rp;
+	if((ip->vihl & 0xF0) == IP_VER4) {
+		p = f->t2p[ip->proto];
+	} else {
+		ip6 = (Ip6hdr*)bp->rp;
+		p = f->t2p[ip6->proto];
+	}
+	if(p && p->rcv)
+		(*p->rcv)(p, ifc, bp);
+	else
+		freeblist(bp);
+	return;
+}
+
+static int
+ipmuxsprint(Ipmux *mux, int level, char *buf, int len)
+{
+	int i, j, n;
+	uchar *v;
+
+	n = 0;
+	for(i = 0; i < level; i++)
+		n += snprint(buf+n, len-n, " ");
+	if(mux == nil){
+		n += snprint(buf+n, len-n, "\n");
+		return n;
+	}
+	n += snprint(buf+n, len-n, "h[%d:%d]&", 
+               mux->off+((int)mux->skiphdr)*((int)ipoff->data), 
+               mux->off+(((int)mux->skiphdr)*((int)ipoff->data))+mux->len-1);
+	for(i = 0; i < mux->len; i++)
+		n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+	n += snprint(buf+n, len-n, "=");
+	v = mux->val;
+	for(j = 0; j < mux->n; j++){
+		for(i = 0; i < mux->len; i++)
+			n += snprint(buf+n, len - n, "%2.2ux", *v++);
+		n += snprint(buf+n, len-n, "|");
+	}
+	n += snprint(buf+n, len-n, "\n");
+	level++;
+	n += ipmuxsprint(mux->no, level, buf+n, len-n);
+	n += ipmuxsprint(mux->yes, level, buf+n, len-n);
+	return n;
+}
+
+static int
+ipmuxstats(Proto *p, char *buf, int len)
+{
+	int n;
+	Fs *f = p->f;
+
+	RLOCK(f);
+	n = ipmuxsprint(p->priv, 0, buf, len);
+	RUNLOCK(f);
+
+	return n;
+}
+
+void
+ipmuxinit(Fs *f)
+{
+	Proto *ipmux;
+
+	ipmux = smalloc(sizeof(Proto));
+	ipmux->priv = nil;
+	ipmux->name = "ipmux";
+	ipmux->connect = ipmuxconnect;
+	ipmux->announce = ipmuxannounce;
+	ipmux->state = ipmuxstate;
+	ipmux->create = ipmuxcreate;
+	ipmux->close = ipmuxclose;
+	ipmux->rcv = ipmuxiput;
+	ipmux->ctl = nil;
+	ipmux->advise = nil;
+	ipmux->stats = ipmuxstats;
+	ipmux->ipproto = -1;
+	ipmux->nc = 64;
+	ipmux->ptclsize = sizeof(Ipmuxrock);
+
+	f->ipmux = ipmux;			/* hack for Fsrcvpcol */
+
+	Fsproto(f, ipmux);
+}
diff --git a/src/9vx/a/ip/iproute.c b/src/9vx/a/ip/iproute.c
@@ -0,0 +1,854 @@
+#include	"u.h"
+#include	"lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"error.h"
+
+#include	"ip.h"
+
+static void	walkadd(Fs*, Route**, Route*);
+static void	addnode(Fs*, Route**, Route*);
+static void	calcd(Route*);
+
+/* these are used for all instances of IP */
+static Route*	v4freelist;
+static Route*	v6freelist;
+static RWlock	routelock;
+static ulong	v4routegeneration, v6routegeneration;
+
+static void
+freeroute(Route *r)
+{
+	Route **l;
+
+	r->left = nil;
+	r->right = nil;
+	if(r->type & Rv4)
+		l = &v4freelist;
+	else
+		l = &v6freelist;
+	r->mid = *l;
+	*l = r;
+}
+
+static Route*
+allocroute(int type)
+{
+	Route *r;
+	int n;
+	Route **l;
+
+	if(type & Rv4){
+		n = sizeof(RouteTree) + sizeof(V4route);
+		l = &v4freelist;
+	} else {
+		n = sizeof(RouteTree) + sizeof(V6route);
+		l = &v6freelist;
+	}
+
+	r = *l;
+	if(r != nil){
+		*l = r->mid;
+	} else {
+		r = malloc(n);
+		if(r == nil)
+			panic("out of routing nodes");
+	}
+	memset(r, 0, n);
+	r->type = type;
+	r->ifc = nil;
+	r->ref = 1;
+
+	return r;
+}
+
+static void
+addqueue(Route **q, Route *r)
+{
+	Route *l;
+
+	if(r == nil)
+		return;
+
+	l = allocroute(r->type);
+	l->mid = *q;
+	*q = l;
+	l->left = r;
+}
+
+/*
+ *   compare 2 v6 addresses
+ */
+static int
+lcmp(ulong *a, ulong *b)
+{
+	int i;
+
+	for(i = 0; i < IPllen; i++){
+		if(a[i] > b[i])
+			return 1;
+		if(a[i] < b[i])
+			return -1;
+	}
+	return 0;
+}
+
+/*
+ *  compare 2 v4 or v6 ranges
+ */
+enum
+{
+	Rpreceeds,
+	Rfollows,
+	Requals,
+	Rcontains,
+	Rcontained,
+};
+
+static int
+rangecompare(Route *a, Route *b)
+{
+	if(a->type & Rv4){
+		if(a->v4.endaddress < b->v4.address)
+			return Rpreceeds;
+
+		if(a->v4.address > b->v4.endaddress)
+			return Rfollows;
+
+		if(a->v4.address <= b->v4.address
+		&& a->v4.endaddress >= b->v4.endaddress){
+			if(a->v4.address == b->v4.address
+			&& a->v4.endaddress == b->v4.endaddress)
+				return Requals;
+			return Rcontains;
+		}
+		return Rcontained;
+	}
+
+	if(lcmp(a->v6.endaddress, b->v6.address) < 0)
+		return Rpreceeds;
+
+	if(lcmp(a->v6.address, b->v6.endaddress) > 0)
+		return Rfollows;
+
+	if(lcmp(a->v6.address, b->v6.address) <= 0
+	&& lcmp(a->v6.endaddress, b->v6.endaddress) >= 0){
+		if(lcmp(a->v6.address, b->v6.address) == 0
+		&& lcmp(a->v6.endaddress, b->v6.endaddress) == 0)
+				return Requals;
+		return Rcontains;
+	}
+
+	return Rcontained;
+}
+
+static void
+copygate(Route *old, Route *new)
+{
+	if(new->type & Rv4)
+		memmove(old->v4.gate, new->v4.gate, IPv4addrlen);
+	else
+		memmove(old->v6.gate, new->v6.gate, IPaddrlen);
+}
+
+/*
+ *  walk down a tree adding nodes back in
+ */
+static void
+walkadd(Fs *f, Route **root, Route *p)
+{
+	Route *l, *r;
+
+	l = p->left;
+	r = p->right;
+	p->left = 0;
+	p->right = 0;
+	addnode(f, root, p);
+	if(l)
+		walkadd(f, root, l);
+	if(r)
+		walkadd(f, root, r);
+}
+
+/*
+ *  calculate depth
+ */
+static void
+calcd(Route *p)
+{
+	Route *q;
+	int d;
+
+	if(p) {
+		d = 0;
+		q = p->left;
+		if(q)
+			d = q->depth;
+		q = p->right;
+		if(q && q->depth > d)
+			d = q->depth;
+		q = p->mid;
+		if(q && q->depth > d)
+			d = q->depth;
+		p->depth = d+1;
+	}
+}
+
+/*
+ *  balance the tree at the current node
+ */
+static void
+balancetree(Route **cur)
+{
+	Route *p, *l, *r;
+	int dl, dr;
+
+	/*
+	 * if left and right are
+	 * too out of balance,
+	 * rotate tree node
+	 */
+	p = *cur;
+	dl = 0; if((l = p->left) != nil) dl = l->depth;
+	dr = 0; if((r = p->right) != nil) dr = r->depth;
+
+	if(dl > dr+1) {
+		p->left = l->right;
+		l->right = p;
+		*cur = l;
+		calcd(p);
+		calcd(l);
+	} else
+	if(dr > dl+1) {
+		p->right = r->left;
+		r->left = p;
+		*cur = r;
+		calcd(p);
+		calcd(r);
+	} else
+		calcd(p);
+}
+
+/*
+ *  add a new node to the tree
+ */
+static void
+addnode(Fs *f, Route **cur, Route *new)
+{
+	Route *p;
+
+	p = *cur;
+	if(p == 0) {
+		*cur = new;
+		new->depth = 1;
+		return;
+	}
+
+	switch(rangecompare(new, p)){
+	case Rpreceeds:
+		addnode(f, &p->left, new);
+		break;
+	case Rfollows:
+		addnode(f, &p->right, new);
+		break;
+	case Rcontains:
+		/*
+		 *  if new node is superset
+		 *  of tree node,
+		 *  replace tree node and
+		 *  queue tree node to be
+		 *  merged into root.
+		 */
+		*cur = new;
+		new->depth = 1;
+		addqueue(&f->queue, p);
+		break;
+	case Requals:
+		/*
+		 *  supercede the old entry if the old one isn't
+		 *  a local interface.
+		 */
+		if((p->type & Rifc) == 0){
+			p->type = new->type;
+			p->ifcid = -1;
+			copygate(p, new);
+		} else if(new->type & Rifc)
+			p->ref++;
+		freeroute(new);
+		break;
+	case Rcontained:
+		addnode(f, &p->mid, new);
+		break;
+	}
+	
+	balancetree(cur);
+}
+
+#define	V4H(a)	((a&0x07ffffff)>>(32-Lroot-5))
+
+void
+v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+{
+	Route *p;
+	ulong sa;
+	ulong m;
+	ulong ea;
+	int h, eh;
+
+	m = nhgetl(mask);
+	sa = nhgetl(a) & m;
+	ea = sa | ~m;
+
+	eh = V4H(ea);
+	for(h=V4H(sa); h<=eh; h++) {
+		p = allocroute(Rv4 | type);
+		p->v4.address = sa;
+		p->v4.endaddress = ea;
+		memmove(p->v4.gate, gate, sizeof(p->v4.gate));
+		memmove(p->tag, tag, sizeof(p->tag));
+
+		wlock(&routelock);
+		addnode(f, &f->v4root[h], p);
+		while((p = f->queue) != nil) {
+			f->queue = p->mid;
+			walkadd(f, &f->v4root[h], p->left);
+			freeroute(p);
+		}
+		wunlock(&routelock);
+	}
+	v4routegeneration++;
+
+	ipifcaddroute(f, Rv4, a, mask, gate, type);
+}
+
+#define	V6H(a)	(((a)[IPllen-1] & 0x07ffffff)>>(32-Lroot-5))
+#define ISDFLT(a, mask, tag) ((ipcmp((a),v6Unspecified)==0) && (ipcmp((mask),v6Unspecified)==0) && (strcmp((tag), "ra")!=0))
+
+void
+v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+{
+	Route *p;
+	ulong sa[IPllen], ea[IPllen];
+	ulong x, y;
+	int h, eh;
+
+	/*
+	if(ISDFLT(a, mask, tag))
+		f->v6p->cdrouter = -1;
+	*/
+
+
+	for(h = 0; h < IPllen; h++){
+		x = nhgetl(a+4*h);
+		y = nhgetl(mask+4*h);
+		sa[h] = x & y;
+		ea[h] = x | ~y;
+	}
+
+	eh = V6H(ea);
+	for(h = V6H(sa); h <= eh; h++) {
+		p = allocroute(type);
+		memmove(p->v6.address, sa, IPaddrlen);
+		memmove(p->v6.endaddress, ea, IPaddrlen);
+		memmove(p->v6.gate, gate, IPaddrlen);
+		memmove(p->tag, tag, sizeof(p->tag));
+
+		wlock(&routelock);
+		addnode(f, &f->v6root[h], p);
+		while((p = f->queue) != nil) {
+			f->queue = p->mid;
+			walkadd(f, &f->v6root[h], p->left);
+			freeroute(p);
+		}
+		wunlock(&routelock);
+	}
+	v6routegeneration++;
+
+	ipifcaddroute(f, 0, a, mask, gate, type);
+}
+
+Route**
+looknode(Route **cur, Route *r)
+{
+	Route *p;
+
+	for(;;){
+		p = *cur;
+		if(p == 0)
+			return 0;
+	
+		switch(rangecompare(r, p)){
+		case Rcontains:
+			return 0;
+		case Rpreceeds:
+			cur = &p->left;
+			break;
+		case Rfollows:
+			cur = &p->right;
+			break;
+		case Rcontained:
+			cur = &p->mid;
+			break;
+		case Requals:
+			return cur;
+		}
+	}
+}
+
+void
+v4delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+{
+	Route **r, *p;
+	Route rt;
+	int h, eh;
+	ulong m;
+
+	m = nhgetl(mask);
+	rt.v4.address = nhgetl(a) & m;
+	rt.v4.endaddress = rt.v4.address | ~m;
+	rt.type = Rv4;
+
+	eh = V4H(rt.v4.endaddress);
+	for(h=V4H(rt.v4.address); h<=eh; h++) {
+		if(dolock)
+			wlock(&routelock);
+		r = looknode(&f->v4root[h], &rt);
+		if(r) {
+			p = *r;
+			if(--(p->ref) == 0){
+				*r = 0;
+				addqueue(&f->queue, p->left);
+				addqueue(&f->queue, p->mid);
+				addqueue(&f->queue, p->right);
+				freeroute(p);
+				while((p = f->queue) != nil) {
+					f->queue = p->mid;
+					walkadd(f, &f->v4root[h], p->left);
+					freeroute(p);
+				}
+			}
+		}
+		if(dolock)
+			wunlock(&routelock);
+	}
+	v4routegeneration++;
+
+	ipifcremroute(f, Rv4, a, mask);
+}
+
+void
+v6delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+{
+	Route **r, *p;
+	Route rt;
+	int h, eh;
+	ulong x, y;
+
+	for(h = 0; h < IPllen; h++){
+		x = nhgetl(a+4*h);
+		y = nhgetl(mask+4*h);
+		rt.v6.address[h] = x & y;
+		rt.v6.endaddress[h] = x | ~y;
+	}
+	rt.type = 0;
+
+	eh = V6H(rt.v6.endaddress);
+	for(h=V6H(rt.v6.address); h<=eh; h++) {
+		if(dolock)
+			wlock(&routelock);
+		r = looknode(&f->v6root[h], &rt);
+		if(r) {
+			p = *r;
+			if(--(p->ref) == 0){
+				*r = 0;
+				addqueue(&f->queue, p->left);
+				addqueue(&f->queue, p->mid);
+				addqueue(&f->queue, p->right);
+				freeroute(p);
+				while((p = f->queue) != nil) {
+					f->queue = p->mid;
+					walkadd(f, &f->v6root[h], p->left);
+					freeroute(p);
+				}
+			}
+		}
+		if(dolock)
+			wunlock(&routelock);
+	}
+	v6routegeneration++;
+
+	ipifcremroute(f, 0, a, mask);
+}
+
+Route*
+v4lookup(Fs *f, uchar *a, Conv *c)
+{
+	Route *p, *q;
+	ulong la;
+	uchar gate[IPaddrlen];
+	Ipifc *ifc;
+
+	if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v4routegeneration)
+		return c->r;
+
+	la = nhgetl(a);
+	q = nil;
+	for(p=f->v4root[V4H(la)]; p;)
+		if(la >= p->v4.address) {
+			if(la <= p->v4.endaddress) {
+				q = p;
+				p = p->mid;
+			} else
+				p = p->right;
+		} else
+			p = p->left;
+
+	if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
+		if(q->type & Rifc) {
+			hnputl(gate+IPv4off, q->v4.address);
+			memmove(gate, v4prefix, IPv4off);
+		} else
+			v4tov6(gate, q->v4.gate);
+		ifc = findipifc(f, gate, q->type);
+		if(ifc == nil)
+			return nil;
+		q->ifc = ifc;
+		q->ifcid = ifc->ifcid;
+	}
+
+	if(c != nil){
+		c->r = q;
+		c->rgen = v4routegeneration;
+	}
+
+	return q;
+}
+
+Route*
+v6lookup(Fs *f, uchar *a, Conv *c)
+{
+	Route *p, *q;
+	ulong la[IPllen];
+	int h;
+	ulong x, y;
+	uchar gate[IPaddrlen];
+	Ipifc *ifc;
+
+	if(memcmp(a, v4prefix, IPv4off) == 0){
+		q = v4lookup(f, a+IPv4off, c);
+		if(q != nil)
+			return q;
+	}
+
+	if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v6routegeneration)
+		return c->r;
+
+	for(h = 0; h < IPllen; h++)
+		la[h] = nhgetl(a+4*h);
+
+	q = 0;
+	for(p=f->v6root[V6H(la)]; p;){
+		for(h = 0; h < IPllen; h++){
+			x = la[h];
+			y = p->v6.address[h];
+			if(x == y)
+				continue;
+			if(x < y){
+				p = p->left;
+				goto next;
+			}
+			break;
+		}
+		for(h = 0; h < IPllen; h++){
+			x = la[h];
+			y = p->v6.endaddress[h];
+			if(x == y)
+				continue;
+			if(x > y){
+				p = p->right;
+				goto next;
+			}
+			break;
+		}
+		q = p;
+		p = p->mid;
+next:		;
+	}
+
+	if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
+		if(q->type & Rifc) {
+			for(h = 0; h < IPllen; h++)
+				hnputl(gate+4*h, q->v6.address[h]);
+			ifc = findipifc(f, gate, q->type);
+		} else
+			ifc = findipifc(f, q->v6.gate, q->type);
+		if(ifc == nil)
+			return nil;
+		q->ifc = ifc;
+		q->ifcid = ifc->ifcid;
+	}
+	if(c != nil){
+		c->r = q;
+		c->rgen = v6routegeneration;
+	}
+	
+	return q;
+}
+
+void
+routetype(int type, char *p)
+{
+	memset(p, ' ', 4);
+	p[4] = 0;
+	if(type & Rv4)
+		*p++ = '4';
+	else
+		*p++ = '6';
+	if(type & Rifc)
+		*p++ = 'i';
+	if(type & Runi)
+		*p++ = 'u';
+	else if(type & Rbcast)
+		*p++ = 'b';
+	else if(type & Rmulti)
+		*p++ = 'm';
+	if(type & Rptpt)
+		*p = 'p';
+}
+
+static char *rformat = "%-15I %-4M %-15I %4.4s %4.4s %3s\n";
+
+void
+convroute(Route *r, uchar *addr, uchar *mask, uchar *gate, char *t, int *nifc)
+{
+	int i;
+
+	if(r->type & Rv4){
+		memmove(addr, v4prefix, IPv4off);
+		hnputl(addr+IPv4off, r->v4.address);
+		memset(mask, 0xff, IPv4off);
+		hnputl(mask+IPv4off, ~(r->v4.endaddress ^ r->v4.address));
+		memmove(gate, v4prefix, IPv4off);
+		memmove(gate+IPv4off, r->v4.gate, IPv4addrlen);
+	} else {
+		for(i = 0; i < IPllen; i++){
+			hnputl(addr + 4*i, r->v6.address[i]);
+			hnputl(mask + 4*i, ~(r->v6.endaddress[i] ^ r->v6.address[i]));
+		}
+		memmove(gate, r->v6.gate, IPaddrlen);
+	}
+
+	routetype(r->type, t);
+
+	if(r->ifc)
+		*nifc = r->ifc->conv->x;
+	else
+		*nifc = -1;
+}
+
+/*
+ *  this code is not in rr to reduce stack size
+ */
+static void
+sprintroute(Route *r, Routewalk *rw)
+{
+	int nifc, n;
+	char t[5], *iname, ifbuf[5];
+	uchar addr[IPaddrlen], mask[IPaddrlen], gate[IPaddrlen];
+	char *p;
+
+	convroute(r, addr, mask, gate, t, &nifc);
+	iname = "-";
+	if(nifc != -1) {
+		iname = ifbuf;
+		snprint(ifbuf, sizeof ifbuf, "%d", nifc);
+	}
+	p = seprint(rw->p, rw->e, rformat, addr, mask, gate, t, r->tag, iname);
+	if(rw->o < 0){
+		n = p - rw->p;
+		if(n > -rw->o){
+			memmove(rw->p, rw->p-rw->o, n+rw->o);
+			rw->p = p + rw->o;
+		}
+		rw->o += n;
+	} else
+		rw->p = p;
+}
+
+/*
+ *  recurse descending tree, applying the function in Routewalk
+ */
+static int
+rr(Route *r, Routewalk *rw)
+{
+	int h;
+
+	if(rw->e <= rw->p)
+		return 0;
+	if(r == nil)
+		return 1;
+
+	if(rr(r->left, rw) == 0)
+		return 0;
+
+	if(r->type & Rv4)
+		h = V4H(r->v4.address);
+	else
+		h = V6H(r->v6.address);
+
+	if(h == rw->h)
+		rw->walk(r, rw);
+
+	if(rr(r->mid, rw) == 0)
+		return 0;
+
+	return rr(r->right, rw);
+}
+
+void
+ipwalkroutes(Fs *f, Routewalk *rw)
+{
+	rlock(&routelock);
+	if(rw->e > rw->p) {
+		for(rw->h = 0; rw->h < nelem(f->v4root); rw->h++)
+			if(rr(f->v4root[rw->h], rw) == 0)
+				break;
+	}
+	if(rw->e > rw->p) {
+		for(rw->h = 0; rw->h < nelem(f->v6root); rw->h++)
+			if(rr(f->v6root[rw->h], rw) == 0)
+				break;
+	}
+	runlock(&routelock);
+}
+
+long
+routeread(Fs *f, char *p, ulong offset, int n)
+{
+	Routewalk rw;
+
+	rw.p = p;
+	rw.e = p+n;
+	rw.o = -offset;
+	rw.walk = sprintroute;
+
+	ipwalkroutes(f, &rw);
+
+	return rw.p - p;
+}
+
+/*
+ *  this code is not in routeflush to reduce stack size
+ */
+void
+delroute(Fs *f, Route *r, int dolock)
+{
+	uchar addr[IPaddrlen];
+	uchar mask[IPaddrlen];
+	uchar gate[IPaddrlen];
+	char t[5];
+	int nifc;
+
+	convroute(r, addr, mask, gate, t, &nifc);
+	if(r->type & Rv4)
+		v4delroute(f, addr+IPv4off, mask+IPv4off, dolock);
+	else
+		v6delroute(f, addr, mask, dolock);
+}
+
+/*
+ *  recurse until one route is deleted
+ *    returns 0 if nothing is deleted, 1 otherwise
+ */
+int
+routeflush(Fs *f, Route *r, char *tag)
+{
+	if(r == nil)
+		return 0;
+	if(routeflush(f, r->mid, tag))
+		return 1;
+	if(routeflush(f, r->left, tag))
+		return 1;
+	if(routeflush(f, r->right, tag))
+		return 1;
+	if((r->type & Rifc) == 0){
+		if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0){
+			delroute(f, r, 0);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+long
+routewrite(Fs *f, Chan *c, char *p, int n)
+{
+	int h, changed;
+	char *tag;
+	Cmdbuf *cb;
+	uchar addr[IPaddrlen];
+	uchar mask[IPaddrlen];
+	uchar gate[IPaddrlen];
+	IPaux *a, *na;
+
+	cb = parsecmd(p, n);
+	if(waserror()){
+		free(cb);
+		nexterror();
+	}
+
+	if(strcmp(cb->f[0], "flush") == 0){
+		tag = cb->f[1];
+		for(h = 0; h < nelem(f->v4root); h++)
+			for(changed = 1; changed;){
+				wlock(&routelock);
+				changed = routeflush(f, f->v4root[h], tag);
+				wunlock(&routelock);
+			}
+		for(h = 0; h < nelem(f->v6root); h++)
+			for(changed = 1; changed;){
+				wlock(&routelock);
+				changed = routeflush(f, f->v6root[h], tag);
+				wunlock(&routelock);
+			}
+	} else if(strcmp(cb->f[0], "remove") == 0){
+		if(cb->nf < 3)
+			error(Ebadarg);
+		if (parseip(addr, cb->f[1]) == -1)
+			error(Ebadip);
+		parseipmask(mask, cb->f[2]);
+		if(memcmp(addr, v4prefix, IPv4off) == 0)
+			v4delroute(f, addr+IPv4off, mask+IPv4off, 1);
+		else
+			v6delroute(f, addr, mask, 1);
+	} else if(strcmp(cb->f[0], "add") == 0){
+		if(cb->nf < 4)
+			error(Ebadarg);
+		if(parseip(addr, cb->f[1]) == -1 ||
+		    parseip(gate, cb->f[3]) == -1)
+			error(Ebadip);
+		parseipmask(mask, cb->f[2]);
+		tag = "none";
+		if(c != nil){
+			a = c->aux;
+			tag = a->tag;
+		}
+		if(memcmp(addr, v4prefix, IPv4off) == 0)
+			v4addroute(f, tag, addr+IPv4off, mask+IPv4off, gate+IPv4off, 0);
+		else
+			v6addroute(f, tag, addr, mask, gate, 0);
+	} else if(strcmp(cb->f[0], "tag") == 0) {
+		if(cb->nf < 2)
+			error(Ebadarg);
+
+		a = c->aux;
+		na = newipaux(a->owner, cb->f[1]);
+		c->aux = na;
+		free(a);
+	}
+
+	poperror();
+	free(cb);
+	return n;
+}
diff --git a/src/9vx/a/ip/ipv6.c b/src/9vx/a/ip/ipv6.c
@@ -0,0 +1,718 @@
+#include	"u.h"
+#include	"lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"error.h"
+
+#include	"ip.h"
+#include	"ipv6.h"
+
+enum
+{
+	IP6FHDR		= 8, 		/* sizeof(Fraghdr6) */
+};
+
+#define IPV6CLASS(hdr)	(((hdr)->vcf[0]&0x0F)<<2 | ((hdr)->vcf[1]&0xF0)>>2)
+#define BLKIPVER(xp)	(((Ip6hdr*)((xp)->rp))->vcf[0] & 0xF0)
+/*
+ * This sleazy macro is stolen shamelessly from ip.c, see comment there.
+ */
+#define BKFG(xp)	((Ipfrag*)((xp)->base))
+
+typedef struct	Fragment4	Fragment4;
+typedef struct	Fragment6	Fragment6;
+typedef struct	Ipfrag	Ipfrag;
+
+Block*		ip6reassemble(IP*, int, Block*, Ip6hdr*);
+Fragment6*	ipfragallo6(IP*);
+void		ipfragfree6(IP*, Fragment6*);
+Block*		procopts(Block *bp);
+static Block*	procxtns(IP *ip, Block *bp, int doreasm);
+int		unfraglen(Block *bp, uchar *nexthdr, int setfh);
+
+/* MIB II counters */
+enum
+{
+	Forwarding,
+	DefaultTTL,
+	InReceives,
+	InHdrErrors,
+	InAddrErrors,
+	ForwDatagrams,
+	InUnknownProtos,
+	InDiscards,
+	InDelivers,
+	OutRequests,
+	OutDiscards,
+	OutNoRoutes,
+	ReasmTimeout,
+	ReasmReqds,
+	ReasmOKs,
+	ReasmFails,
+	FragOKs,
+	FragFails,
+	FragCreates,
+
+	Nstats,
+};
+
+static char *statnames[] =
+{
+[Forwarding]	"Forwarding",
+[DefaultTTL]	"DefaultTTL",
+[InReceives]	"InReceives",
+[InHdrErrors]	"InHdrErrors",
+[InAddrErrors]	"InAddrErrors",
+[ForwDatagrams]	"ForwDatagrams",
+[InUnknownProtos]	"InUnknownProtos",
+[InDiscards]	"InDiscards",
+[InDelivers]	"InDelivers",
+[OutRequests]	"OutRequests",
+[OutDiscards]	"OutDiscards",
+[OutNoRoutes]	"OutNoRoutes",
+[ReasmTimeout]	"ReasmTimeout",
+[ReasmReqds]	"ReasmReqds",
+[ReasmOKs]	"ReasmOKs",
+[ReasmFails]	"ReasmFails",
+[FragOKs]	"FragOKs",
+[FragFails]	"FragFails",
+[FragCreates]	"FragCreates",
+};
+
+struct Fragment4
+{
+	Block*	blist;
+	Fragment4*	next;
+	ulong 	src;
+	ulong 	dst;
+	ushort	id;
+	ulong 	age;
+};
+
+struct Fragment6
+{
+	Block*	blist;
+	Fragment6*	next;
+	uchar 	src[IPaddrlen];
+	uchar 	dst[IPaddrlen];
+	uint	id;
+	ulong 	age;
+};
+
+struct Ipfrag
+{
+	ushort	foff;
+	ushort	flen;
+};
+
+/* an instance of IP */
+struct IP
+{
+	ulong		stats[Nstats];
+
+	QLock		fraglock4;
+	Fragment4*	flisthead4;
+	Fragment4*	fragfree4;
+	Ref		id4;
+
+	QLock		fraglock6;
+	Fragment6*	flisthead6;
+	Fragment6*	fragfree6;
+	Ref		id6;
+
+	int		iprouting;	/* true if we route like a gateway */
+};
+
+int
+ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+{
+	int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff;
+	int morefrags, blklen, rv = 0, tentative;
+	uchar *gate, nexthdr;
+	Block *xp, *nb;
+	Fraghdr6 fraghdr;
+	IP *ip;
+	Ip6hdr *eh;
+	Ipifc *ifc;
+	Route *r, *sr;
+
+	ip = f->ip;
+
+	/* Fill out the ip header */
+	eh = (Ip6hdr*)(bp->rp);
+
+	ip->stats[OutRequests]++;
+
+	/* Number of uchars in data and ip header to write */
+	len = blocklen(bp);
+
+	tentative = iptentative(f, eh->src);
+	if(tentative){
+		netlog(f, Logip, "reject tx of packet with tentative src address %I\n",
+			eh->src);
+		goto free;
+	}
+
+	if(gating){
+		chunk = nhgets(eh->ploadlen);
+		if(chunk > len){
+			ip->stats[OutDiscards]++;
+			netlog(f, Logip, "short gated packet\n");
+			goto free;
+		}
+		if(chunk + IP6HDR < len)
+			len = chunk + IP6HDR;
+	}
+
+	if(len >= IP_MAX){
+		ip->stats[OutDiscards]++;
+		netlog(f, Logip, "exceeded ip max size %I\n", eh->dst);
+		goto free;
+	}
+
+	r = v6lookup(f, eh->dst, c);
+	if(r == nil){
+//		print("no route for %I, src %I free\n", eh->dst, eh->src);
+		ip->stats[OutNoRoutes]++;
+		netlog(f, Logip, "no interface %I\n", eh->dst);
+		rv = -1;
+		goto free;
+	}
+
+	ifc = r->ifc;
+	if(r->type & (Rifc|Runi))
+		gate = eh->dst;
+	else if(r->type & (Rbcast|Rmulti)) {
+		gate = eh->dst;
+		sr = v6lookup(f, eh->src, nil);
+		if(sr && (sr->type & Runi))
+			ifc = sr->ifc;
+	}
+	else
+		gate = r->v6.gate;
+
+	if(!gating)
+		eh->vcf[0] = IP_VER6;
+	eh->ttl = ttl;
+	if(!gating) {
+		eh->vcf[0] |= tos >> 4;
+		eh->vcf[1]  = tos << 4;
+	}
+
+	if(!CANRLOCK(ifc))
+		goto free;
+
+	if(waserror()){
+		RUNLOCK(ifc);
+		nexterror();
+	}
+
+	if(ifc->m == nil)
+		goto raise;
+
+	/* If we dont need to fragment just send it */
+	medialen = ifc->maxtu - ifc->m->hsize;
+	if(len <= medialen) {
+		hnputs(eh->ploadlen, len - IP6HDR);
+		ifc->m->bwrite(ifc, bp, V6, gate);
+		RUNLOCK(ifc);
+		poperror();
+		return 0;
+	}
+
+	if(gating && ifc->reassemble <= 0) {
+		/*
+		 * v6 intermediate nodes are not supposed to fragment pkts;
+		 * we fragment if ifc->reassemble is turned on; an exception
+		 * needed for nat.
+		 */
+		ip->stats[OutDiscards]++;
+		icmppkttoobig6(f, ifc, bp);
+		netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst);
+		goto raise;
+	}
+
+	/* start v6 fragmentation */
+	uflen = unfraglen(bp, &nexthdr, 1);
+	if(uflen > medialen) {
+		ip->stats[FragFails]++;
+		ip->stats[OutDiscards]++;
+		netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst);
+		goto raise;
+	}
+
+	flen = len - uflen;
+	seglen = (medialen - (uflen + IP6FHDR)) & ~7;
+	if(seglen < 8) {
+		ip->stats[FragFails]++;
+		ip->stats[OutDiscards]++;
+		netlog(f, Logip, "%I: seglen < 8\n", eh->dst);
+		goto raise;
+	}
+
+	lid = incref(&ip->id6);
+	fraghdr.nexthdr = nexthdr;
+	fraghdr.res = 0;
+	hnputl(fraghdr.id, lid);
+
+	xp = bp;
+	offset = uflen;
+	while (xp && offset && offset >= BLEN(xp)) {
+		offset -= BLEN(xp);
+		xp = xp->next;
+	}
+	xp->rp += offset;
+
+	fragoff = 0;
+	morefrags = 1;
+
+	for(; fragoff < flen; fragoff += seglen) {
+		nb = allocb(uflen + IP6FHDR + seglen);
+
+		if(fragoff + seglen >= flen) {
+			seglen = flen - fragoff;
+			morefrags = 0;
+		}
+
+		hnputs(eh->ploadlen, seglen+IP6FHDR);
+		memmove(nb->wp, eh, uflen);
+		nb->wp += uflen;
+
+		hnputs(fraghdr.offsetRM, fragoff); /* last 3 bits must be 0 */
+		fraghdr.offsetRM[1] |= morefrags;
+		memmove(nb->wp, &fraghdr, IP6FHDR);
+		nb->wp += IP6FHDR;
+
+		/* Copy data */
+		chunk = seglen;
+		while (chunk) {
+			if(!xp) {
+				ip->stats[OutDiscards]++;
+				ip->stats[FragFails]++;
+				freeblist(nb);
+				netlog(f, Logip, "!xp: chunk in v6%d\n", chunk);
+				goto raise;
+			}
+			blklen = chunk;
+			if(BLEN(xp) < chunk)
+				blklen = BLEN(xp);
+			memmove(nb->wp, xp->rp, blklen);
+
+			nb->wp += blklen;
+			xp->rp += blklen;
+			chunk -= blklen;
+			if(xp->rp == xp->wp)
+				xp = xp->next;
+		}
+
+		ifc->m->bwrite(ifc, nb, V6, gate);
+		ip->stats[FragCreates]++;
+	}
+	ip->stats[FragOKs]++;
+
+raise:
+	RUNLOCK(ifc);
+	poperror();
+free:
+	freeblist(bp);
+	return rv;
+}
+
+void
+ipiput6(Fs *f, Ipifc *ifc, Block *bp)
+{
+	int hl, hop, tos, notforme, tentative;
+	uchar proto;
+	uchar v6dst[IPaddrlen];
+	IP *ip;
+	Ip6hdr *h;
+	Proto *p;
+	Route *r, *sr;
+
+	ip = f->ip;
+	ip->stats[InReceives]++;
+
+	/*
+	 *  Ensure we have all the header info in the first
+	 *  block.  Make life easier for other protocols by
+	 *  collecting up to the first 64 bytes in the first block.
+	 */
+	if(BLEN(bp) < 64) {
+		hl = blocklen(bp);
+		if(hl < IP6HDR)
+			hl = IP6HDR;
+		if(hl > 64)
+			hl = 64;
+		bp = pullupblock(bp, hl);
+		if(bp == nil)
+			return;
+	}
+
+	h = (Ip6hdr *)bp->rp;
+
+	memmove(&v6dst[0], &h->dst[0], IPaddrlen);
+	notforme = ipforme(f, v6dst) == 0;
+	tentative = iptentative(f, v6dst);
+
+	if(tentative && h->proto != ICMPv6) {
+		print("tentative addr, drop\n");
+		freeblist(bp);
+		return;
+	}
+
+	/* Check header version */
+	if(BLKIPVER(bp) != IP_VER6) {
+		ip->stats[InHdrErrors]++;
+		netlog(f, Logip, "ip: bad version %ux\n", (h->vcf[0]&0xF0)>>2);
+		freeblist(bp);
+		return;
+	}
+
+	/* route */
+	if(notforme) {
+		if(!ip->iprouting){
+			freeb(bp);
+			return;
+		}
+
+		/* don't forward to link-local destinations */
+		if(islinklocal(h->dst) ||
+		   (isv6mcast(h->dst) && (h->dst[1]&0xF) <= Link_local_scop)){
+			ip->stats[OutDiscards]++;
+			freeblist(bp);
+			return;
+		}
+			
+		/* don't forward to source's network */
+		sr = v6lookup(f, h->src, nil);
+		r  = v6lookup(f, h->dst, nil);
+
+		if(r == nil || sr == r){
+			ip->stats[OutDiscards]++;
+			freeblist(bp);
+			return;
+		}
+
+		/* don't forward if packet has timed out */
+		hop = h->ttl;
+		if(hop < 1) {
+			ip->stats[InHdrErrors]++;
+			icmpttlexceeded6(f, ifc, bp);
+			freeblist(bp);
+			return;
+		}
+
+		/* process headers & reassemble if the interface expects it */
+		bp = procxtns(ip, bp, r->ifc->reassemble);
+		if(bp == nil)
+			return;
+
+		ip->stats[ForwDatagrams]++;
+		h = (Ip6hdr *)bp->rp;
+		tos = IPV6CLASS(h);
+		hop = h->ttl;
+		ipoput6(f, bp, 1, hop-1, tos, nil);
+		return;
+	}
+
+	/* reassemble & process headers if needed */
+	bp = procxtns(ip, bp, 1);
+	if(bp == nil)
+		return;
+
+	h = (Ip6hdr *) (bp->rp);
+	proto = h->proto;
+	p = Fsrcvpcol(f, proto);
+	if(p && p->rcv) {
+		ip->stats[InDelivers]++;
+		(*p->rcv)(p, ifc, bp);
+		return;
+	}
+
+	ip->stats[InDiscards]++;
+	ip->stats[InUnknownProtos]++;
+	freeblist(bp);
+}
+
+/*
+ * ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6
+ */
+void
+ipfragfree6(IP *ip, Fragment6 *frag)
+{
+	Fragment6 *fl, **l;
+
+	if(frag->blist)
+		freeblist(frag->blist);
+
+	memset(frag->src, 0, IPaddrlen);
+	frag->id = 0;
+	frag->blist = nil;
+
+	l = &ip->flisthead6;
+	for(fl = *l; fl; fl = fl->next) {
+		if(fl == frag) {
+			*l = frag->next;
+			break;
+		}
+		l = &fl->next;
+	}
+
+	frag->next = ip->fragfree6;
+	ip->fragfree6 = frag;
+}
+
+/*
+ * ipfragallo6 - copied from ipfragalloc4
+ */
+Fragment6*
+ipfragallo6(IP *ip)
+{
+	Fragment6 *f;
+
+	while(ip->fragfree6 == nil) {
+		/* free last entry on fraglist */
+		for(f = ip->flisthead6; f->next; f = f->next)
+			;
+		ipfragfree6(ip, f);
+	}
+	f = ip->fragfree6;
+	ip->fragfree6 = f->next;
+	f->next = ip->flisthead6;
+	ip->flisthead6 = f;
+	f->age = NOW + 30000;
+
+	return f;
+}
+
+static Block*
+procxtns(IP *ip, Block *bp, int doreasm)
+{
+	int offset;
+	uchar proto;
+	Ip6hdr *h;
+
+	h = (Ip6hdr *)bp->rp;
+	offset = unfraglen(bp, &proto, 0);
+
+	if(proto == FH && doreasm != 0) {
+		bp = ip6reassemble(ip, offset, bp, h);
+		if(bp == nil)
+			return nil;
+		offset = unfraglen(bp, &proto, 0);
+	}
+
+	if(proto == DOH || offset > IP6HDR)
+		bp = procopts(bp);
+	return bp;
+}
+
+/*
+ * returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
+ * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
+ * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
+ * field of the last header in the "Unfragmentable part" is set to FH.
+ */
+int
+unfraglen(Block *bp, uchar *nexthdr, int setfh)
+{
+	uchar *p, *q;
+	int ufl, hs;
+
+	p = bp->rp;
+	q = p+6;   /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
+	*nexthdr = *q;
+	ufl = IP6HDR;
+	p += ufl;
+
+	while (*nexthdr == HBH || *nexthdr == RH) {
+		*nexthdr = *p;
+		hs = ((int)*(p+1) + 1) * 8;
+		ufl += hs;
+		q = p;
+		p += hs;
+	}
+
+	if(*nexthdr == FH)
+		*q = *p;
+	if(setfh)
+		*q = FH;
+	return ufl;
+}
+
+Block*
+procopts(Block *bp)
+{
+	return bp;
+}
+
+Block*
+ip6reassemble(IP* ip, int uflen, Block* bp, Ip6hdr* ih)
+{
+	int fend, offset, ovlap, len, fragsize, pktposn;
+	uint id;
+	uchar src[IPaddrlen], dst[IPaddrlen];
+	Block *bl, **l, *last, *prev;
+	Fraghdr6 *fraghdr;
+	Fragment6 *f, *fnext;
+
+	fraghdr = (Fraghdr6 *)(bp->rp + uflen);
+	memmove(src, ih->src, IPaddrlen);
+	memmove(dst, ih->dst, IPaddrlen);
+	id = nhgetl(fraghdr->id);
+	offset = nhgets(fraghdr->offsetRM) & ~7;
+
+	/*
+	 *  block lists are too hard, pullupblock into a single block
+	 */
+	if(bp->next){
+		bp = pullupblock(bp, blocklen(bp));
+		ih = (Ip6hdr *)bp->rp;
+	}
+
+	qlock(&ip->fraglock6);
+
+	/*
+	 *  find a reassembly queue for this fragment
+	 */
+	for(f = ip->flisthead6; f; f = fnext){
+		fnext = f->next;
+		if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id)
+			break;
+		if(f->age < NOW){
+			ip->stats[ReasmTimeout]++;
+			ipfragfree6(ip, f);
+		}
+	}
+
+	/*
+	 *  if this isn't a fragmented packet, accept it
+	 *  and get rid of any fragments that might go
+	 *  with it.
+	 */
+	if(nhgets(fraghdr->offsetRM) == 0) {	/* 1st frag is also last */
+		if(f) {
+			ipfragfree6(ip, f);
+			ip->stats[ReasmFails]++;
+		}
+		qunlock(&ip->fraglock6);
+		return bp;
+	}
+
+	if(bp->base+sizeof(Ipfrag) >= bp->rp){
+		bp = padblock(bp, sizeof(Ipfrag));
+		bp->rp += sizeof(Ipfrag);
+	}
+
+	BKFG(bp)->foff = offset;
+	BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR;
+
+	/* First fragment allocates a reassembly queue */
+	if(f == nil) {
+		f = ipfragallo6(ip);
+		f->id = id;
+		memmove(f->src, src, IPaddrlen);
+		memmove(f->dst, dst, IPaddrlen);
+
+		f->blist = bp;
+
+		qunlock(&ip->fraglock6);
+		ip->stats[ReasmReqds]++;
+		return nil;
+	}
+
+	/*
+	 *  find the new fragment's position in the queue
+	 */
+	prev = nil;
+	l = &f->blist;
+	bl = f->blist;
+	while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+		prev = bl;
+		l = &bl->next;
+		bl = bl->next;
+	}
+
+	/* Check overlap of a previous fragment - trim away as necessary */
+	if(prev) {
+		ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+		if(ovlap > 0) {
+			if(ovlap >= BKFG(bp)->flen) {
+				freeblist(bp);
+				qunlock(&ip->fraglock6);
+				return nil;
+			}
+			BKFG(prev)->flen -= ovlap;
+		}
+	}
+
+	/* Link onto assembly queue */
+	bp->next = *l;
+	*l = bp;
+
+	/* Check to see if succeeding segments overlap */
+	if(bp->next) {
+		l = &bp->next;
+		fend = BKFG(bp)->foff + BKFG(bp)->flen;
+
+		/* Take completely covered segments out */
+		while(*l) {
+			ovlap = fend - BKFG(*l)->foff;
+			if(ovlap <= 0)
+				break;
+			if(ovlap < BKFG(*l)->flen) {
+				BKFG(*l)->flen -= ovlap;
+				BKFG(*l)->foff += ovlap;
+				/* move up ih hdrs */
+				memmove((*l)->rp + ovlap, (*l)->rp, uflen);
+				(*l)->rp += ovlap;
+				break;
+			}
+			last = (*l)->next;
+			(*l)->next = nil;
+			freeblist(*l);
+			*l = last;
+		}
+	}
+
+	/*
+	 *  look for a complete packet.  if we get to a fragment
+	 *  with the trailing bit of fraghdr->offsetRM[1] set, we're done.
+	 */
+	pktposn = 0;
+	for(bl = f->blist; bl && BKFG(bl)->foff == pktposn; bl = bl->next) {
+		fraghdr = (Fraghdr6 *)(bl->rp + uflen);
+		if((fraghdr->offsetRM[1] & 1) == 0) {
+			bl = f->blist;
+
+			/* get rid of frag header in first fragment */
+			memmove(bl->rp + IP6FHDR, bl->rp, uflen);
+			bl->rp += IP6FHDR;
+			len = nhgets(((Ip6hdr*)bl->rp)->ploadlen) - IP6FHDR;
+			bl->wp = bl->rp + len + IP6HDR;
+			/*
+			 * Pullup all the fragment headers and
+			 * return a complete packet
+			 */
+			for(bl = bl->next; bl; bl = bl->next) {
+				fragsize = BKFG(bl)->flen;
+				len += fragsize;
+				bl->rp += uflen + IP6FHDR;
+				bl->wp = bl->rp + fragsize;
+			}
+
+			bl = f->blist;
+			f->blist = nil;
+			ipfragfree6(ip, f);
+			ih = (Ip6hdr*)bl->rp;
+			hnputs(ih->ploadlen, len);
+			qunlock(&ip->fraglock6);
+			ip->stats[ReasmOKs]++;
+			return bl;
+		}
+		pktposn += BKFG(bl)->flen;
+	}
+	qunlock(&ip->fraglock6);
+	return nil;
+}
diff --git a/src/9vx/a/ip/ipv6.h b/src/9vx/a/ip/ipv6.h
@@ -0,0 +1,185 @@
+/*
+ * Internet Protocol Version 6
+ *
+ * rfc2460 defines the protocol, rfc2461 neighbour discovery, and
+ * rfc2462 address autoconfiguration.  rfc4443 defines ICMP; was rfc2463.
+ * rfc4291 defines the address architecture (including prefices), was rfc3513.
+ * rfc4007 defines the scoped address architecture.
+ *
+ * global unicast is anything but unspecified (::), loopback (::1),
+ * multicast (ff00::/8), and link-local unicast (fe80::/10).
+ *
+ * site-local (fec0::/10) is now deprecated, originally by rfc3879.
+ *
+ * Unique Local IPv6 Unicast Addresses are defined by rfc4193.
+ * prefix is fc00::/7, scope is global, routing is limited to roughly a site.
+ */
+#define isv6mcast(addr)	  ((addr)[0] == 0xff)
+#define islinklocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0x80)
+
+#define optexsts(np)	(nhgets((np)->ploadlen) > 24)
+#define issmcast(addr)	(memcmp((addr), v6solicitednode, 13) == 0)
+
+#ifndef MIN
+#define MIN(a, b) ((a) <= (b)? (a): (b))
+#endif
+
+#undef ESP
+
+enum {				/* Header Types */
+	HBH		= 0,	/* hop-by-hop multicast routing protocol */
+	ICMP		= 1,
+	IGMP		= 2,
+	GGP		= 3,
+	IPINIP		= 4,
+	ST		= 5,
+	TCP		= 6,
+	UDP		= 17,
+	ISO_TP4		= 29,
+	RH		= 43,
+	FH		= 44,
+	IDRP		= 45,
+	RSVP		= 46,
+	AH		= 51,
+	ESP		= 52,
+	ICMPv6		= 58,
+	NNH		= 59,
+	DOH		= 60,
+	ISO_IP		= 80,
+	IGRP		= 88,
+	OSPF		= 89,
+
+	Maxhdrtype	= 256,
+};
+
+enum {
+	/* multicast flags and scopes */
+
+//	Well_known_flg	= 0,
+//	Transient_flg	= 1,
+
+//	Interface_local_scop = 1,
+	Link_local_scop	= 2,
+//	Site_local_scop	= 5,
+//	Org_local_scop	= 8,
+	Global_scop	= 14,
+
+	/* various prefix lengths */
+	SOLN_PREF_LEN	= 13,
+
+	/* icmpv6 unreachability codes */
+	Icmp6_no_route		= 0,
+	Icmp6_ad_prohib		= 1,
+	Icmp6_out_src_scope	= 2,
+	Icmp6_adr_unreach	= 3,
+	Icmp6_port_unreach	= 4,
+	Icmp6_gress_src_fail	= 5,
+	Icmp6_rej_route		= 6,
+	Icmp6_unknown		= 7,  /* our own invention for internal use */
+
+	/* various flags & constants */
+	v6MINTU		= 1280,
+	HOP_LIMIT	= 255,
+	IP6HDR		= 20,		/* sizeof(Ip6hdr) */
+
+	/* option types */
+
+	/* neighbour discovery */
+	SRC_LLADDR	= 1,
+	TARGET_LLADDR	= 2,
+	PREFIX_INFO	= 3,
+	REDIR_HEADER	= 4,
+	MTU_OPTION	= 5,
+	/* new since rfc2461; see iana.org/assignments/icmpv6-parameters */
+	V6nd_home	= 8,
+	V6nd_srcaddrs	= 9,		/* rfc3122 */
+	V6nd_ip		= 17,
+	/* /lib/rfc/drafts/draft-jeong-dnsop-ipv6-dns-discovery-12.txt */
+	V6nd_rdns	= 25,
+	/* plan 9 extensions */
+	V6nd_9fs	= 250,
+	V6nd_9auth	= 251,
+
+	SRC_UNSPEC	= 0,
+	SRC_UNI		= 1,
+	TARG_UNI	= 2,
+	TARG_MULTI	= 3,
+
+	Tunitent	= 1,
+	Tuniproxy	= 2,
+	Tunirany	= 3,
+
+	/* Node constants */
+	MAX_MULTICAST_SOLICIT	= 3,
+	RETRANS_TIMER		= 1000,
+};
+
+typedef struct Ip6hdr	Ip6hdr;
+typedef struct Opthdr	Opthdr;
+typedef struct Routinghdr Routinghdr;
+typedef struct Fraghdr6	Fraghdr6;
+
+struct	Ip6hdr {
+	uchar	vcf[4];		/* version:4, traffic class:8, flow label:20 */
+	uchar	ploadlen[2];	/* payload length: packet length - 40 */
+	uchar	proto;		/* next header type */
+	uchar	ttl;		/* hop limit */
+	uchar	src[IPaddrlen];
+	uchar	dst[IPaddrlen];
+};
+
+struct	Opthdr {
+	uchar	nexthdr;
+	uchar	len;
+};
+
+/*
+ * Beware routing header type 0 (loose source routing); see
+ * http://www.secdev.org/conf/IPv6_RH_security-csw07.pdf.
+ * Type 1 is unused.  Type 2 is for MIPv6 (mobile IPv6) filtering
+ * against type 0 header.
+ */
+struct	Routinghdr {
+	uchar	nexthdr;
+	uchar	len;
+	uchar	rtetype;
+	uchar	segrem;
+};
+
+struct	Fraghdr6 {
+	uchar	nexthdr;
+	uchar	res;
+	uchar	offsetRM[2];	/* Offset, Res, M flag */
+	uchar	id[4];
+};
+
+extern uchar v6allnodesN[IPaddrlen];
+extern uchar v6allnodesL[IPaddrlen];
+extern uchar v6allroutersN[IPaddrlen];
+extern uchar v6allroutersL[IPaddrlen];
+extern uchar v6allnodesNmask[IPaddrlen];
+extern uchar v6allnodesLmask[IPaddrlen];
+extern uchar v6solicitednode[IPaddrlen];
+extern uchar v6solicitednodemask[IPaddrlen];
+extern uchar v6Unspecified[IPaddrlen];
+extern uchar v6loopback[IPaddrlen];
+extern uchar v6loopbackmask[IPaddrlen];
+extern uchar v6linklocal[IPaddrlen];
+extern uchar v6linklocalmask[IPaddrlen];
+extern uchar v6multicast[IPaddrlen];
+extern uchar v6multicastmask[IPaddrlen];
+
+extern int v6llpreflen;
+extern int v6mcpreflen;
+extern int v6snpreflen;
+extern int v6aNpreflen;
+extern int v6aLpreflen;
+
+extern int ReTransTimer;
+
+void ipv62smcast(uchar *, uchar *);
+void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
+void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
+void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
+void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
+void icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free);
diff --git a/src/9vx/a/ip/loopbackmedium.c b/src/9vx/a/ip/loopbackmedium.c
@@ -0,0 +1,120 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+enum
+{
+	Maxtu=	16*1024,
+};
+
+typedef struct LB LB;
+struct LB
+{
+	Proc	*readp;
+	Queue	*q;
+	Fs	*f;
+};
+
+static void loopbackread(void *a);
+
+static void
+loopbackbind(Ipifc *ifc, int _, char** __)
+{
+	LB *lb;
+
+	lb = smalloc(sizeof(*lb));
+	lb->f = ifc->conv->p->f;
+	lb->q = qopen(1024*1024, Qmsg, nil, nil);
+	ifc->arg = lb;
+	ifc->mbps = 1000;
+
+	kproc("loopbackread", loopbackread, ifc);
+
+}
+
+static void
+loopbackunbind(Ipifc *ifc)
+{
+	LB *lb = ifc->arg;
+
+	if(lb->readp)
+		postnote(lb->readp, 1, "unbind", 0);
+
+	/* wait for reader to die */
+	while(lb->readp != 0)
+		tsleep(&up->sleep, return0, 0, 300);
+
+	/* clean up */
+	qfree(lb->q);
+	free(lb);
+}
+
+static void
+loopbackbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
+{
+	LB *lb;
+
+	lb = ifc->arg;
+	if(qpass(lb->q, bp) < 0)
+		ifc->outerr++;
+	ifc->out++;
+}
+
+static void
+loopbackread(void *a)
+{
+	Ipifc *ifc;
+	Block *bp;
+	LB *lb;
+
+	ifc = a;
+	lb = ifc->arg;
+	lb->readp = up;	/* hide identity under a rock for unbind */
+	if(waserror()){
+		lb->readp = 0;
+		pexit("hangup", 1);
+	}
+	for(;;){
+		bp = qbread(lb->q, Maxtu);
+		if(bp == nil)
+			continue;
+		ifc->in++;
+		if(!CANRLOCK(ifc)){
+			freeb(bp);
+			continue;
+		}
+		if(waserror()){
+			RUNLOCK(ifc);
+			nexterror();
+		}
+		if(ifc->lifc == nil)
+			freeb(bp);
+		else
+			ipiput4(lb->f, ifc, bp);
+		RUNLOCK(ifc);
+		poperror();
+	}
+}
+
+Medium loopbackmedium =
+{
+.hsize=		0,
+.mintu=		0,
+.maxtu=		Maxtu,
+.maclen=	0,
+.name=		"loopback",
+.bind=		loopbackbind,
+.unbind=	loopbackunbind,
+.bwrite=	loopbackbwrite,
+};
+
+void
+loopbackmediumlink(void)
+{
+	addipmedium(&loopbackmedium);
+}
diff --git a/src/9vx/a/ip/netdevmedium.c b/src/9vx/a/ip/netdevmedium.c
@@ -0,0 +1,153 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+static void	netdevbind(Ipifc *ifc, int argc, char **argv);
+static void	netdevunbind(Ipifc *ifc);
+static void	netdevbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
+static void	netdevread(void *a);
+
+typedef struct	Netdevrock Netdevrock;
+struct Netdevrock
+{
+	Fs	*f;		/* file system we belong to */
+	Proc	*readp;		/* reading process */
+	Chan	*mchan;		/* Data channel */
+};
+
+Medium netdevmedium =
+{
+.name=		"netdev",
+.hsize=		0,
+.mintu=	0,
+.maxtu=	64000,
+.maclen=	0,
+.bind=		netdevbind,
+.unbind=	netdevunbind,
+.bwrite=	netdevbwrite,
+.unbindonclose=	0,
+};
+
+/*
+ *  called to bind an IP ifc to a generic network device
+ *  called with ifc qlock'd
+ */
+static void
+netdevbind(Ipifc *ifc, int argc, char **argv)
+{
+	Chan *mchan;
+	Netdevrock *er;
+
+	if(argc < 2)
+		error(Ebadarg);
+
+	mchan = namec(argv[2], Aopen, ORDWR, 0);
+
+	er = smalloc(sizeof(*er));
+	er->mchan = mchan;
+	er->f = ifc->conv->p->f;
+
+	ifc->arg = er;
+
+	kproc("netdevread", netdevread, ifc);
+}
+
+/*
+ *  called with ifc wlock'd
+ */
+static void
+netdevunbind(Ipifc *ifc)
+{
+	Netdevrock *er = ifc->arg;
+
+	if(er->readp != nil)
+		postnote(er->readp, 1, "unbind", 0);
+
+	/* wait for readers to die */
+	while(er->readp != nil)
+		tsleep(&up->sleep, return0, 0, 300);
+
+	if(er->mchan != nil)
+		cclose(er->mchan);
+
+	free(er);
+}
+
+/*
+ *  called by ipoput with a single block to write
+ */
+static void
+netdevbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
+{
+	Netdevrock *er = ifc->arg;
+
+	if(bp->next)
+		bp = concatblock(bp);
+	if(BLEN(bp) < ifc->mintu)
+		bp = adjustblock(bp, ifc->mintu);
+
+	devtab[er->mchan->type]->bwrite(er->mchan, bp, 0);
+	ifc->out++;
+}
+
+/*
+ *  process to read from the device
+ */
+static void
+netdevread(void *a)
+{
+	Ipifc *ifc;
+	Block *bp;
+	Netdevrock *er;
+	char *argv[1];
+
+	ifc = a;
+	er = ifc->arg;
+	er->readp = up;	/* hide identity under a rock for unbind */
+	if(waserror()){
+		er->readp = nil;
+		pexit("hangup", 1);
+	}
+	for(;;){
+		bp = devtab[er->mchan->type]->bread(er->mchan, ifc->maxtu, 0);
+		if(bp == nil){
+			/*
+			 * get here if mchan is a pipe and other side hangs up
+			 * clean up this interface & get out
+ZZZ is this a good idea?
+			 */
+			poperror();
+			er->readp = nil;
+			argv[0] = "unbind";
+			if(!waserror())
+				ifc->conv->p->ctl(ifc->conv, argv, 1);
+			pexit("hangup", 1);
+		}
+		if(!CANRLOCK(ifc)){
+			freeb(bp);
+			continue;
+		}
+		if(waserror()){
+			RUNLOCK(ifc);
+			nexterror();
+		}
+		ifc->in++;
+		if(ifc->lifc == nil)
+			freeb(bp);
+		else
+			ipiput4(er->f, ifc, bp);
+		RUNLOCK(ifc);
+		poperror();
+	}
+}
+
+void
+netdevmediumlink(void)
+{
+	addipmedium(&netdevmedium);
+}
diff --git a/src/9vx/a/ip/netlog.c b/src/9vx/a/ip/netlog.c
@@ -0,0 +1,261 @@
+#include	"u.h"
+#include	"lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"error.h"
+#include	"ip/ip.h"
+
+enum {
+	Nlog		= 16*1024,
+};
+
+/*
+ *  action log
+ */
+struct Netlog {
+	Lock	lk;
+	int	opens;
+	char*	buf;
+	char	*end;
+	char	*rptr;
+	int	len;
+
+	int	logmask;			/* mask of things to debug */
+	uchar	iponly[IPaddrlen];		/* ip address to print debugging for */
+	int	iponlyset;
+
+	QLock	qlock;
+	Rendez	rendez;
+};
+
+typedef struct Netlogflag {
+	char*	name;
+	int	mask;
+} Netlogflag;
+
+static Netlogflag flags[] =
+{
+	{ "ppp",	Logppp, },
+	{ "ip",		Logip, },
+	{ "fs",		Logfs, },
+	{ "tcp",	Logtcp, },
+	{ "icmp",	Logicmp, },
+	{ "udp",	Logudp, },
+	{ "compress",	Logcompress, },
+	{ "gre",	Loggre, },
+	{ "tcpwin",	Logtcp|Logtcpwin, },
+	{ "tcprxmt",	Logtcp|Logtcprxmt, },
+	{ "udpmsg",	Logudp|Logudpmsg, },
+	{ "ipmsg",	Logip|Logipmsg, },
+	{ "esp",	Logesp, },
+	{ nil,		0, },
+};
+
+char Ebadnetctl[] = "too few arguments for netlog control message";
+
+enum
+{
+	CMset,
+	CMclear,
+	CMonly,
+};
+
+static
+Cmdtab routecmd[] = {
+	CMset,		"set",		0,
+	CMclear,	"clear",	0,
+	CMonly,		"only",		0,
+};
+
+void
+netloginit(Fs *f)
+{
+	f->alog = smalloc(sizeof(Netlog));
+}
+
+void
+netlogopen(Fs *f)
+{
+	LOCK(f->alog);
+	if(waserror()){
+		UNLOCK(f->alog);
+		nexterror();
+	}
+	if(f->alog->opens == 0){
+		if(f->alog->buf == nil)
+			f->alog->buf = malloc(Nlog);
+		f->alog->rptr = f->alog->buf;
+		f->alog->end = f->alog->buf + Nlog;
+	}
+	f->alog->opens++;
+	UNLOCK(f->alog);
+	poperror();
+}
+
+void
+netlogclose(Fs *f)
+{
+	LOCK(f->alog);
+	if(waserror()){
+		UNLOCK(f->alog);
+		nexterror();
+	}
+	f->alog->opens--;
+	if(f->alog->opens == 0){
+		free(f->alog->buf);
+		f->alog->buf = nil;
+	}
+	UNLOCK(f->alog);
+	poperror();
+}
+
+static int
+netlogready(void *a)
+{
+	Fs *f = a;
+
+	return f->alog->len;
+}
+
+long
+netlogread(Fs *f, void *a, ulong _, long n)
+{
+	int i, d;
+	char *p, *rptr;
+
+	QLOCK(f->alog);
+	if(waserror()){
+		QUNLOCK(f->alog);
+		nexterror();
+	}
+
+	for(;;){
+		LOCK(f->alog);
+		if(f->alog->len){
+			if(n > f->alog->len)
+				n = f->alog->len;
+			d = 0;
+			rptr = f->alog->rptr;
+			f->alog->rptr += n;
+			if(f->alog->rptr >= f->alog->end){
+				d = f->alog->rptr - f->alog->end;
+				f->alog->rptr = f->alog->buf + d;
+			}
+			f->alog->len -= n;
+			UNLOCK(f->alog);
+
+			i = n-d;
+			p = a;
+			memmove(p, rptr, i);
+			memmove(p+i, f->alog->buf, d);
+			break;
+		}
+		else
+			UNLOCK(f->alog);
+
+		sleep(&f->alog->rendez, netlogready, f);
+	}
+
+	QUNLOCK(f->alog);
+	poperror();
+
+	return n;
+}
+
+void
+netlogctl(Fs *f, char* s, int n)
+{
+	int i, set;
+	Netlogflag *fp;
+	Cmdbuf *cb;
+	Cmdtab *ct;
+
+	cb = parsecmd(s, n);
+	if(waserror()){
+		free(cb);
+		nexterror();
+	}
+
+	if(cb->nf < 2)
+		error(Ebadnetctl);
+
+	ct = lookupcmd(cb, routecmd, nelem(routecmd));
+
+	set = 1;
+
+	switch(ct->index){
+	case CMset:
+		set = 1;
+		break;
+
+	case CMclear:
+		set = 0;
+		break;
+
+	case CMonly:
+		parseip(f->alog->iponly, cb->f[1]);
+		if(ipcmp(f->alog->iponly, IPnoaddr) == 0)
+			f->alog->iponlyset = 0;
+		else
+			f->alog->iponlyset = 1;
+		free(cb);
+		return;
+
+	default:
+		cmderror(cb, "unknown ip control message");
+	}
+
+	for(i = 1; i < cb->nf; i++){
+		for(fp = flags; fp->name; fp++)
+			if(strcmp(fp->name, cb->f[i]) == 0)
+				break;
+		if(fp->name == nil)
+			continue;
+		if(set)
+			f->alog->logmask |= fp->mask;
+		else
+			f->alog->logmask &= ~fp->mask;
+	}
+
+	free(cb);
+	poperror();
+}
+
+void
+netlog(Fs *f, int mask, char *fmt, ...)
+{
+	char buf[128], *t, *fp;
+	int i, n;
+	va_list arg;
+
+	if(!(f->alog->logmask & mask))
+		return;
+
+	if(f->alog->opens == 0)
+		return;
+
+	va_start(arg, fmt);
+	n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf;
+	va_end(arg);
+
+	LOCK(f->alog);
+	i = f->alog->len + n - Nlog;
+	if(i > 0){
+		f->alog->len -= i;
+		f->alog->rptr += i;
+		if(f->alog->rptr >= f->alog->end)
+			f->alog->rptr = f->alog->buf + (f->alog->rptr - f->alog->end);
+	}
+	t = f->alog->rptr + f->alog->len;
+	fp = buf;
+	f->alog->len += n;
+	while(n-- > 0){
+		if(t >= f->alog->end)
+			t = f->alog->buf + (t - f->alog->end);
+		*t++ = *fp++;
+	}
+	UNLOCK(f->alog);
+
+	wakeup(&f->alog->rendez);
+}
diff --git a/src/9vx/a/ip/nullmedium.c b/src/9vx/a/ip/nullmedium.c
@@ -0,0 +1,39 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+static void
+nullbind(Ipifc* _, int __, char** ___)
+{
+	error("cannot bind null device");
+}
+
+static void
+nullunbind(Ipifc* _)
+{
+}
+
+static void
+nullbwrite(Ipifc* _, Block* __, int ___, uchar* ____)
+{
+	error("nullbwrite");
+}
+
+Medium nullmedium =
+{
+.name=		"null",
+.bind=		nullbind,
+.unbind=	nullunbind,
+.bwrite=	nullbwrite,
+};
+
+void
+nullmediumlink(void)
+{
+	addipmedium(&nullmedium);
+}
diff --git a/src/9vx/a/ip/pktmedium.c b/src/9vx/a/ip/pktmedium.c
@@ -0,0 +1,78 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+
+static void	pktbind(Ipifc*, int, char**);
+static void	pktunbind(Ipifc*);
+static void	pktbwrite(Ipifc*, Block*, int, uchar*);
+static void	pktin(Fs*, Ipifc*, Block*);
+
+Medium pktmedium =
+{
+.name=		"pkt",
+.hsize=		14,
+.mintu=		40,
+.maxtu=		4*1024,
+.maclen=	6,
+.bind=		pktbind,
+.unbind=	pktunbind,
+.bwrite=	pktbwrite,
+.pktin=		pktin,
+};
+
+/*
+ *  called to bind an IP ifc to an ethernet device
+ *  called with ifc wlock'd
+ */
+static void
+pktbind(Ipifc* _, int argc, char **argv)
+{
+}
+
+/*
+ *  called with ifc wlock'd
+ */
+static void
+pktunbind(Ipifc* _)
+{
+}
+
+/*
+ *  called by ipoput with a single packet to write
+ */
+static void
+pktbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
+{
+	/* enqueue onto the conversation's rq */
+	bp = concatblock(bp);
+	if(ifc->conv->snoopers.ref > 0)
+		qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
+	qpass(ifc->conv->rq, bp);
+}
+
+/*
+ *  called with ifc rlocked when someone write's to 'data'
+ */
+static void
+pktin(Fs *f, Ipifc *ifc, Block *bp)
+{
+	if(ifc->lifc == nil)
+		freeb(bp);
+	else {
+		if(ifc->conv->snoopers.ref > 0)
+			qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
+		ipiput4(f, ifc, bp);
+	}
+}
+
+void
+pktmediumlink(void)
+{
+	addipmedium(&pktmedium);
+}
diff --git a/src/9vx/a/ip/ptclbsum.c b/src/9vx/a/ip/ptclbsum.c
@@ -0,0 +1,72 @@
+#include	"u.h"
+#include	"lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"error.h"
+#include	"ip.h"
+
+static	short	endian	= 1;
+static	uchar*	aendian	= (uchar*)&endian;
+#define	LITTLE	*aendian
+
+ushort
+ptclbsum(uchar *addr, int len)
+{
+	ulong losum, hisum, mdsum, x;
+	ulong t1, t2;
+
+	losum = 0;
+	hisum = 0;
+	mdsum = 0;
+
+	x = 0;
+	if((ulong)addr & 1) {
+		if(len) {
+			hisum += addr[0];
+			len--;
+			addr++;
+		}
+		x = 1;
+	}
+	while(len >= 16) {
+		t1 = *(ushort*)(addr+0);
+		t2 = *(ushort*)(addr+2);	mdsum += t1;
+		t1 = *(ushort*)(addr+4);	mdsum += t2;
+		t2 = *(ushort*)(addr+6);	mdsum += t1;
+		t1 = *(ushort*)(addr+8);	mdsum += t2;
+		t2 = *(ushort*)(addr+10);	mdsum += t1;
+		t1 = *(ushort*)(addr+12);	mdsum += t2;
+		t2 = *(ushort*)(addr+14);	mdsum += t1;
+		mdsum += t2;
+		len -= 16;
+		addr += 16;
+	}
+	while(len >= 2) {
+		mdsum += *(ushort*)addr;
+		len -= 2;
+		addr += 2;
+	}
+	if(x) {
+		if(len)
+			losum += addr[0];
+		if(LITTLE)
+			losum += mdsum;
+		else
+			hisum += mdsum;
+	} else {
+		if(len)
+			hisum += addr[0];
+		if(LITTLE)
+			hisum += mdsum;
+		else
+			losum += mdsum;
+	}
+
+	losum += hisum >> 8;
+	losum += (hisum & 0xff) << 8;
+	while((hisum = losum>>16))
+		losum = hisum + (losum & 0xffff);
+
+	return losum & 0xffff;
+}
diff --git a/src/9vx/a/ip/rudp.c b/src/9vx/a/ip/rudp.c
@@ -0,0 +1,1055 @@
+/*
+ *  Reliable User Datagram Protocol, currently only for IPv4.
+ *  This protocol is compatible with UDP's packet format.
+ *  It could be done over UDP if need be.
+ */
+#include	"u.h"
+#include	"lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"error.h"
+
+#include	"ip.h"
+
+#define DEBUG	0
+#define DPRINT if(DEBUG)print
+
+#define SEQDIFF(a,b) ( (a)>=(b)?\
+			(a)-(b):\
+			0xffffffffUL-((b)-(a)) )
+#define INSEQ(a,start,end) ( (start)<=(end)?\
+				((a)>(start)&&(a)<=(end)):\
+				((a)>(start)||(a)<=(end)) )
+#define UNACKED(r) SEQDIFF(r->sndseq, r->ackrcvd)
+#define NEXTSEQ(a) ( (a)+1 == 0 ? 1 : (a)+1 )
+
+enum
+{
+	UDP_PHDRSIZE	= 12,	/* pseudo header */
+//	UDP_HDRSIZE	= 20,	/* pseudo header + udp header */
+	UDP_RHDRSIZE	= 36,	/* pseudo header + udp header + rudp header */
+	UDP_IPHDR	= 8,	/* ip header */
+	IP_UDPPROTO	= 254,
+	UDP_USEAD7	= 52,	/* size of new ipv6 headers struct */
+
+	Rudprxms	= 200,
+	Rudptickms	= 50,
+	Rudpmaxxmit	= 10,
+	Maxunacked	= 100,
+};
+
+#define Hangupgen	0xffffffff	/* used only in hangup messages */
+
+typedef struct Udphdr Udphdr;
+struct Udphdr
+{
+	/* ip header */
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	length[2];	/* packet length */
+	uchar	id[2];		/* Identification */
+	uchar	frag[2];	/* Fragment information */
+
+	/* pseudo header starts here */
+	uchar	Unused;
+	uchar	udpproto;	/* Protocol */
+	uchar	udpplen[2];	/* Header plus data length */
+	uchar	udpsrc[4];	/* Ip source */
+	uchar	udpdst[4];	/* Ip destination */
+
+	/* udp header */
+	uchar	udpsport[2];	/* Source port */
+	uchar	udpdport[2];	/* Destination port */
+	uchar	udplen[2];	/* data length */
+	uchar	udpcksum[2];	/* Checksum */
+};
+
+typedef struct Rudphdr Rudphdr;
+struct Rudphdr
+{
+	/* ip header */
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	length[2];	/* packet length */
+	uchar	id[2];		/* Identification */
+	uchar	frag[2];	/* Fragment information */
+
+	/* pseudo header starts here */
+	uchar	Unused;
+	uchar	udpproto;	/* Protocol */
+	uchar	udpplen[2];	/* Header plus data length */
+	uchar	udpsrc[4];	/* Ip source */
+	uchar	udpdst[4];	/* Ip destination */
+
+	/* udp header */
+	uchar	udpsport[2];	/* Source port */
+	uchar	udpdport[2];	/* Destination port */
+	uchar	udplen[2];	/* data length (includes rudp header) */
+	uchar	udpcksum[2];	/* Checksum */
+
+	/* rudp header */
+	uchar	relseq[4];	/* id of this packet (or 0) */
+	uchar	relsgen[4];	/* generation/time stamp */
+	uchar	relack[4];	/* packet being acked (or 0) */
+	uchar	relagen[4];	/* generation/time stamp */
+};
+
+
+/*
+ *  one state structure per destination
+ */
+typedef struct Reliable Reliable;
+struct Reliable
+{
+	Ref;
+
+	Reliable *next;
+
+	uchar	addr[IPaddrlen];	/* always V6 when put here */
+	ushort	port;
+
+	Block	*unacked;	/* unacked msg list */
+	Block	*unackedtail;	/*  and its tail */
+
+	int	timeout;	/* time since first unacked msg sent */
+	int	xmits;		/* number of times first unacked msg sent */
+
+	ulong	sndseq;		/* next packet to be sent */
+	ulong	sndgen;		/*  and its generation */
+
+	ulong	rcvseq;		/* last packet received */
+	ulong	rcvgen;		/*  and its generation */
+
+	ulong	acksent;	/* last ack sent */
+	ulong	ackrcvd;	/* last msg for which ack was rcvd */
+
+	/* flow control */
+	QLock	lock;
+	Rendez	vous;
+	int	blocked;
+};
+
+
+
+/* MIB II counters */
+typedef struct Rudpstats Rudpstats;
+struct Rudpstats
+{
+	ulong	rudpInDatagrams;
+	ulong	rudpNoPorts;
+	ulong	rudpInErrors;
+	ulong	rudpOutDatagrams;
+};
+
+typedef struct Rudppriv Rudppriv;
+struct Rudppriv
+{
+	Ipht	ht;
+
+	/* MIB counters */
+	Rudpstats	ustats;
+
+	/* non-MIB stats */
+	ulong	csumerr;		/* checksum errors */
+	ulong	lenerr;			/* short packet */
+	ulong	rxmits;			/* # of retransmissions */
+	ulong	orders;			/* # of out of order pkts */
+
+	/* keeping track of the ack kproc */
+	int	ackprocstarted;
+	QLock	apl;
+};
+
+
+static ulong generation = 0;
+static Rendez rend;
+
+/*
+ *  protocol specific part of Conv
+ */
+typedef struct Rudpcb Rudpcb;
+struct Rudpcb
+{
+	QLock;
+	uchar	headers;
+	uchar	randdrop;
+	Reliable *r;
+};
+
+/*
+ * local functions 
+ */
+void	relsendack(Conv*, Reliable*, int);
+int	reliput(Conv*, Block*, uchar*, ushort);
+Reliable *relstate(Rudpcb*, uchar*, ushort, char*);
+void	relput(Reliable*);
+void	relforget(Conv *, uchar*, int, int);
+void	relackproc(void *);
+void	relackq(Reliable *, Block*);
+void	relhangup(Conv *, Reliable*);
+void	relrexmit(Conv *, Reliable*);
+void	relput(Reliable*);
+void	rudpkick(void *x);
+
+static void
+rudpstartackproc(Proto *rudp)
+{
+	Rudppriv *rpriv;
+	char kpname[KNAMELEN];
+
+	rpriv = rudp->priv;
+	if(rpriv->ackprocstarted == 0){
+		qlock(&rpriv->apl);
+		if(rpriv->ackprocstarted == 0){
+			sprint(kpname, "#I%drudpack", rudp->f->dev);
+			kproc(kpname, relackproc, rudp);
+			rpriv->ackprocstarted = 1;
+		}
+		qunlock(&rpriv->apl);
+	}
+}
+
+static char*
+rudpconnect(Conv *c, char **argv, int argc)
+{
+	char *e;
+	Rudppriv *upriv;
+
+	upriv = c->p->priv;
+	rudpstartackproc(c->p);
+	e = Fsstdconnect(c, argv, argc);
+	Fsconnected(c, e);
+	iphtadd(&upriv->ht, c);
+
+	return e;
+}
+
+
+static int
+rudpstate(Conv *c, char *state, int n)
+{
+	Rudpcb *ucb;
+	Reliable *r;
+	int m;
+
+	m = snprint(state, n, "%s", c->inuse?"Open":"Closed");
+	ucb = (Rudpcb*)c->ptcl;
+	qlock(ucb);
+	for(r = ucb->r; r; r = r->next)
+		m += snprint(state+m, n-m, " %I/%ld", r->addr, UNACKED(r));
+	m += snprint(state+m, n-m, "\n");
+	qunlock(ucb);
+	return m;
+}
+
+static char*
+rudpannounce(Conv *c, char** argv, int argc)
+{
+	char *e;
+	Rudppriv *upriv;
+
+	upriv = c->p->priv;
+	rudpstartackproc(c->p);
+	e = Fsstdannounce(c, argv, argc);
+	if(e != nil)
+		return e;
+	Fsconnected(c, nil);
+	iphtadd(&upriv->ht, c);
+
+	return nil;
+}
+
+static void
+rudpcreate(Conv *c)
+{
+	c->rq = qopen(64*1024, Qmsg, 0, 0);
+	c->wq = qopen(64*1024, Qkick, rudpkick, c);
+}
+
+static void
+rudpclose(Conv *c)
+{
+	Rudpcb *ucb;
+	Reliable *r, *nr;
+	Rudppriv *upriv;
+
+	upriv = c->p->priv;
+	iphtrem(&upriv->ht, c);
+
+	/* force out any delayed acks */
+	ucb = (Rudpcb*)c->ptcl;
+	qlock(ucb);
+	for(r = ucb->r; r; r = r->next){
+		if(r->acksent != r->rcvseq)
+			relsendack(c, r, 0);
+	}
+	qunlock(ucb);
+
+	qclose(c->rq);
+	qclose(c->wq);
+	qclose(c->eq);
+	ipmove(c->laddr, IPnoaddr);
+	ipmove(c->raddr, IPnoaddr);
+	c->lport = 0;
+	c->rport = 0;
+
+	ucb->headers = 0;
+	ucb->randdrop = 0;
+	qlock(ucb);
+	for(r = ucb->r; r; r = nr){
+		if(r->acksent != r->rcvseq)
+			relsendack(c, r, 0);
+		nr = r->next;
+		relhangup(c, r);
+		relput(r);
+	}
+	ucb->r = 0;
+
+	qunlock(ucb);
+}
+
+/*
+ *  randomly don't send packets
+ */
+static void
+doipoput(Conv *c, Fs *f, Block *bp, int x, int ttl, int tos)
+{
+	Rudpcb *ucb;
+
+	ucb = (Rudpcb*)c->ptcl;
+	if(ucb->randdrop && nrand(100) < ucb->randdrop)
+		freeblist(bp);
+	else
+		ipoput4(f, bp, x, ttl, tos, nil);
+}
+
+int
+flow(void *v)
+{
+	Reliable *r = v;
+
+	return UNACKED(r) <= Maxunacked;
+}
+
+void
+rudpkick(void *x)
+{
+	Conv *c = x;
+	Udphdr *uh;
+	ushort rport;
+	uchar laddr[IPaddrlen], raddr[IPaddrlen];
+	Block *bp;
+	Rudpcb *ucb;
+	Rudphdr *rh;
+	Reliable *r;
+	int dlen, ptcllen;
+	Rudppriv *upriv;
+	Fs *f;
+
+	upriv = c->p->priv;
+	f = c->p->f;
+
+	netlog(c->p->f, Logrudp, "rudp: kick\n");
+	bp = qget(c->wq);
+	if(bp == nil)
+		return;
+
+	ucb = (Rudpcb*)c->ptcl;
+	switch(ucb->headers) {
+	case 7:
+		/* get user specified addresses */
+		bp = pullupblock(bp, UDP_USEAD7);
+		if(bp == nil)
+			return;
+		ipmove(raddr, bp->rp);
+		bp->rp += IPaddrlen;
+		ipmove(laddr, bp->rp);
+		bp->rp += IPaddrlen;
+		/* pick interface closest to dest */
+		if(ipforme(f, laddr) != Runi)
+			findlocalip(f, laddr, raddr);
+		bp->rp += IPaddrlen;		/* Ignore ifc address */
+		rport = nhgets(bp->rp);
+		bp->rp += 2+2;			/* Ignore local port */
+		break;
+	default:
+		ipmove(raddr, c->raddr);
+		ipmove(laddr, c->laddr);
+		rport = c->rport;
+		break;
+	}
+
+	dlen = blocklen(bp);
+
+	/* Make space to fit rudp & ip header */
+	bp = padblock(bp, UDP_IPHDR+UDP_RHDRSIZE);
+	if(bp == nil)
+		return;
+
+	uh = (Udphdr *)(bp->rp);
+	uh->vihl = IP_VER4;
+
+	rh = (Rudphdr*)uh;
+
+	ptcllen = dlen + (UDP_RHDRSIZE-UDP_PHDRSIZE);
+	uh->Unused = 0;
+	uh->udpproto = IP_UDPPROTO;
+	uh->frag[0] = 0;
+	uh->frag[1] = 0;
+	hnputs(uh->udpplen, ptcllen);
+	switch(ucb->headers){
+	case 7:
+		v6tov4(uh->udpdst, raddr);
+		hnputs(uh->udpdport, rport);
+		v6tov4(uh->udpsrc, laddr);
+		break;
+	default:
+		v6tov4(uh->udpdst, c->raddr);
+		hnputs(uh->udpdport, c->rport);
+		if(ipcmp(c->laddr, IPnoaddr) == 0)
+			findlocalip(f, c->laddr, c->raddr);
+		v6tov4(uh->udpsrc, c->laddr);
+		break;
+	}
+	hnputs(uh->udpsport, c->lport);
+	hnputs(uh->udplen, ptcllen);
+	uh->udpcksum[0] = 0;
+	uh->udpcksum[1] = 0;
+
+	qlock(ucb);
+	r = relstate(ucb, raddr, rport, "kick");
+	r->sndseq = NEXTSEQ(r->sndseq);
+	hnputl(rh->relseq, r->sndseq);
+	hnputl(rh->relsgen, r->sndgen);
+
+	hnputl(rh->relack, r->rcvseq);  /* ACK last rcvd packet */
+	hnputl(rh->relagen, r->rcvgen);
+
+	if(r->rcvseq != r->acksent)
+		r->acksent = r->rcvseq;
+
+	hnputs(uh->udpcksum, ptclcsum(bp, UDP_IPHDR, dlen+UDP_RHDRSIZE));
+
+	relackq(r, bp);
+	qunlock(ucb);
+
+	upriv->ustats.rudpOutDatagrams++;
+
+	DPRINT("sent: %lud/%lud, %lud/%lud\n", 
+		r->sndseq, r->sndgen, r->rcvseq, r->rcvgen);
+
+	doipoput(c, f, bp, 0, c->ttl, c->tos);
+
+	if(waserror()) {
+		relput(r);
+		qunlock(&r->lock);
+		nexterror();
+	}
+
+	/* flow control of sorts */
+	qlock(&r->lock);
+	if(UNACKED(r) > Maxunacked){
+		r->blocked = 1;
+		sleep(&r->vous, flow, r);
+		r->blocked = 0;
+	}
+
+	qunlock(&r->lock);
+	relput(r);
+	poperror();
+}
+
+void
+rudpiput(Proto *rudp, Ipifc *ifc, Block *bp)
+{
+	int len, olen, ottl;
+	Udphdr *uh;
+	Conv *c;
+	Rudpcb *ucb;
+	uchar raddr[IPaddrlen], laddr[IPaddrlen];
+	ushort rport, lport;
+	Rudppriv *upriv;
+	Fs *f;
+	uchar *p;
+
+	upriv = rudp->priv;
+	f = rudp->f;
+
+	upriv->ustats.rudpInDatagrams++;
+
+	uh = (Udphdr*)(bp->rp);
+
+	/* Put back pseudo header for checksum 
+	 * (remember old values for icmpnoconv()) 
+	 */
+	ottl = uh->Unused;
+	uh->Unused = 0;
+	len = nhgets(uh->udplen);
+	olen = nhgets(uh->udpplen);
+	hnputs(uh->udpplen, len);
+
+	v4tov6(raddr, uh->udpsrc);
+	v4tov6(laddr, uh->udpdst);
+	lport = nhgets(uh->udpdport);
+	rport = nhgets(uh->udpsport);
+
+	if(nhgets(uh->udpcksum)) {
+		if(ptclcsum(bp, UDP_IPHDR, len+UDP_PHDRSIZE)) {
+			upriv->ustats.rudpInErrors++;
+			upriv->csumerr++;
+			netlog(f, Logrudp, "rudp: checksum error %I\n", raddr);
+			DPRINT("rudp: checksum error %I\n", raddr);
+			freeblist(bp);
+			return;
+		}
+	}
+
+	qlock(rudp);
+
+	c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
+	if(c == nil){
+		/* no conversation found */
+		upriv->ustats.rudpNoPorts++;
+		qunlock(rudp);
+		netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
+			laddr, lport);
+		uh->Unused = ottl;
+		hnputs(uh->udpplen, olen);
+		icmpnoconv(f, bp);
+		freeblist(bp);
+		return;
+	}
+	ucb = (Rudpcb*)c->ptcl;
+	qlock(ucb);
+	qunlock(rudp);
+
+	if(reliput(c, bp, raddr, rport) < 0){
+		qunlock(ucb);
+		freeb(bp);
+		return;
+	}
+
+	/*
+	 * Trim the packet down to data size
+	 */
+
+	len -= (UDP_RHDRSIZE-UDP_PHDRSIZE);
+	bp = trimblock(bp, UDP_IPHDR+UDP_RHDRSIZE, len);
+	if(bp == nil) {
+		netlog(f, Logrudp, "rudp: len err %I.%d -> %I.%d\n", 
+			raddr, rport, laddr, lport);
+		DPRINT("rudp: len err %I.%d -> %I.%d\n", 
+			raddr, rport, laddr, lport);
+		upriv->lenerr++;
+		return;
+	}
+
+	netlog(f, Logrudpmsg, "rudp: %I.%d -> %I.%d l %d\n", 
+		raddr, rport, laddr, lport, len);
+
+	switch(ucb->headers){
+	case 7:
+		/* pass the src address */
+		bp = padblock(bp, UDP_USEAD7);
+		p = bp->rp;
+		ipmove(p, raddr); p += IPaddrlen;
+		ipmove(p, laddr); p += IPaddrlen;
+		ipmove(p, ifc->lifc->local); p += IPaddrlen;
+		hnputs(p, rport); p += 2;
+		hnputs(p, lport);
+		break;
+	default:
+		/* connection oriented rudp */
+		if(ipcmp(c->raddr, IPnoaddr) == 0){
+			/* save the src address in the conversation */
+		 	ipmove(c->raddr, raddr);
+			c->rport = rport;
+
+			/* reply with the same ip address (if not broadcast) */
+			if(ipforme(f, laddr) == Runi)
+				ipmove(c->laddr, laddr);
+			else
+				v4tov6(c->laddr, ifc->lifc->local);
+		}
+		break;
+	}
+	if(bp->next)
+		bp = concatblock(bp);
+
+	if(qfull(c->rq)) {
+		netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n", raddr, rport,
+			laddr, lport);
+		freeblist(bp);
+	}
+	else
+		qpass(c->rq, bp);
+	
+	qunlock(ucb);
+}
+
+static char *rudpunknown = "unknown rudp ctl request";
+
+char*
+rudpctl(Conv *c, char **f, int n)
+{
+	Rudpcb *ucb;
+	uchar ip[IPaddrlen];
+	int x;
+
+	ucb = (Rudpcb*)c->ptcl;
+	if(n < 1)
+		return rudpunknown;
+
+	if(strcmp(f[0], "headers") == 0){
+		ucb->headers = 7;		/* new headers format */
+		return nil;
+	} else if(strcmp(f[0], "hangup") == 0){
+		if(n < 3)
+			return "bad syntax";
+		if (parseip(ip, f[1]) == -1)
+			return Ebadip;
+		x = atoi(f[2]);
+		qlock(ucb);
+		relforget(c, ip, x, 1);
+		qunlock(ucb);
+		return nil;
+	} else if(strcmp(f[0], "randdrop") == 0){
+		x = 10;			/* default is 10% */
+		if(n > 1)
+			x = atoi(f[1]);
+		if(x > 100 || x < 0)
+			return "illegal rudp drop rate";
+		ucb->randdrop = x;
+		return nil;
+	}
+	return rudpunknown;
+}
+
+void
+rudpadvise(Proto *rudp, Block *bp, char *msg)
+{
+	Udphdr *h;
+	uchar source[IPaddrlen], dest[IPaddrlen];
+	ushort psource, pdest;
+	Conv *s, **p;
+
+	h = (Udphdr*)(bp->rp);
+
+	v4tov6(dest, h->udpdst);
+	v4tov6(source, h->udpsrc);
+	psource = nhgets(h->udpsport);
+	pdest = nhgets(h->udpdport);
+
+	/* Look for a connection */
+	for(p = rudp->conv; *p; p++) {
+		s = *p;
+		if(s->rport == pdest)
+		if(s->lport == psource)
+		if(ipcmp(s->raddr, dest) == 0)
+		if(ipcmp(s->laddr, source) == 0){
+			qhangup(s->rq, msg);
+			qhangup(s->wq, msg);
+			break;
+		}
+	}
+	freeblist(bp);
+}
+
+int
+rudpstats(Proto *rudp, char *buf, int len)
+{
+	Rudppriv *upriv;
+
+	upriv = rudp->priv;
+	return snprint(buf, len, "%lud %lud %lud %lud %lud %lud\n",
+		upriv->ustats.rudpInDatagrams,
+		upriv->ustats.rudpNoPorts,
+		upriv->ustats.rudpInErrors,
+		upriv->ustats.rudpOutDatagrams,
+		upriv->rxmits,
+		upriv->orders);
+}
+
+void
+rudpinit(Fs *fs)
+{
+
+	Proto *rudp;
+
+	rudp = smalloc(sizeof(Proto));
+	rudp->priv = smalloc(sizeof(Rudppriv));
+	rudp->name = "rudp";
+	rudp->connect = rudpconnect;
+	rudp->announce = rudpannounce;
+	rudp->ctl = rudpctl;
+	rudp->state = rudpstate;
+	rudp->create = rudpcreate;
+	rudp->close = rudpclose;
+	rudp->rcv = rudpiput;
+	rudp->advise = rudpadvise;
+	rudp->stats = rudpstats;
+	rudp->ipproto = IP_UDPPROTO;
+	rudp->nc = 16;
+	rudp->ptclsize = sizeof(Rudpcb);
+
+	Fsproto(fs, rudp);
+}
+
+/*********************************************/
+/* Here starts the reliable helper functions */
+/*********************************************/
+/*
+ *  Enqueue a copy of an unacked block for possible retransmissions
+ */
+void
+relackq(Reliable *r, Block *bp)
+{
+	Block *np;
+
+	np = copyblock(bp, blocklen(bp));
+	if(r->unacked)
+		r->unackedtail->list = np;
+	else {
+		/* restart timer */
+		r->timeout = 0;
+		r->xmits = 1;
+		r->unacked = np;
+	}
+	r->unackedtail = np;
+	np->list = nil;
+}
+
+/*
+ *  retransmit unacked blocks
+ */
+void
+relackproc(void *a)
+{
+	Rudpcb *ucb;
+	Proto *rudp;
+	Reliable *r;
+	Conv **s, *c;
+
+	rudp = (Proto *)a;
+
+loop:
+	tsleep(&up->sleep, return0, 0, Rudptickms);
+
+	for(s = rudp->conv; *s; s++) {
+		c = *s;
+		ucb = (Rudpcb*)c->ptcl;
+		qlock(ucb);
+
+		for(r = ucb->r; r; r = r->next) {
+			if(r->unacked != nil){
+				r->timeout += Rudptickms;
+				if(r->timeout > Rudprxms*r->xmits)
+					relrexmit(c, r);
+			}
+			if(r->acksent != r->rcvseq)
+				relsendack(c, r, 0);
+		}
+		qunlock(ucb);
+	}
+	goto loop;
+}
+
+/*
+ *  get the state record for a conversation
+ */
+Reliable*
+relstate(Rudpcb *ucb, uchar *addr, ushort port, char *from)
+{
+	Reliable *r, **l;
+
+	l = &ucb->r;
+	for(r = *l; r; r = *l){
+		if(memcmp(addr, r->addr, IPaddrlen) == 0 && 
+		    port == r->port)
+			break;
+		l = &r->next;
+	}
+
+	/* no state for this addr/port, create some */
+	if(r == nil){
+		while(generation == 0)
+			generation = rand();
+
+		DPRINT("from %s new state %lud for %I!%ud\n", 
+		        from, generation, addr, port);
+
+		r = smalloc(sizeof(Reliable));
+		memmove(r->addr, addr, IPaddrlen);
+		r->port = port;
+		r->unacked = 0;
+		if(generation == Hangupgen)
+			generation++;
+		r->sndgen = generation++;
+		r->sndseq = 0;
+		r->ackrcvd = 0;
+		r->rcvgen = 0;
+		r->rcvseq = 0;
+		r->acksent = 0;
+		r->xmits = 0;
+		r->timeout = 0;
+		r->ref = 0;
+		incref(r);	/* one reference for being in the list */
+
+		*l = r;
+	}
+
+	incref(r);
+	return r;
+}
+
+void
+relput(Reliable *r)
+{
+	if(decref(r) == 0)
+		free(r);
+}
+
+/*
+ *  forget a Reliable state
+ */
+void
+relforget(Conv *c, uchar *ip, int port, int originator)
+{
+	Rudpcb *ucb;
+	Reliable *r, **l;
+
+	ucb = (Rudpcb*)c->ptcl;
+
+	l = &ucb->r;
+	for(r = *l; r; r = *l){
+		if(ipcmp(ip, r->addr) == 0 && port == r->port){
+			*l = r->next;
+			if(originator)
+				relsendack(c, r, 1);
+			relhangup(c, r);
+			relput(r);	/* remove from the list */
+			break;
+		}
+		l = &r->next;
+	}
+}
+
+/* 
+ *  process a rcvd reliable packet. return -1 if not to be passed to user process,
+ *  0 therwise.
+ *
+ *  called with ucb locked.
+ */
+int
+reliput(Conv *c, Block *bp, uchar *addr, ushort port)
+{
+	Block *nbp;
+	Rudpcb *ucb;
+	Rudppriv *upriv;
+	Udphdr *uh;
+	Reliable *r;
+	Rudphdr *rh;
+	ulong seq, ack, sgen, agen, ackreal;
+	int rv = -1;
+
+	/* get fields */
+	uh = (Udphdr*)(bp->rp);
+	rh = (Rudphdr*)uh;
+	seq = nhgetl(rh->relseq);
+	sgen = nhgetl(rh->relsgen);
+	ack = nhgetl(rh->relack);
+	agen = nhgetl(rh->relagen);
+
+	upriv = c->p->priv;
+	ucb = (Rudpcb*)c->ptcl;
+	r = relstate(ucb, addr, port, "input");
+
+	DPRINT("rcvd %lud/%lud, %lud/%lud, r->sndgen = %lud\n", 
+		seq, sgen, ack, agen, r->sndgen);
+
+	/* if acking an incorrect generation, ignore */
+	if(ack && agen != r->sndgen)
+		goto out;
+
+	/* Look for a hangup */
+	if(sgen == Hangupgen) {
+		if(agen == r->sndgen)
+			relforget(c, addr, port, 0);
+		goto out;
+	}
+
+	/* make sure we're not talking to a new remote side */
+	if(r->rcvgen != sgen){
+		if(seq != 0 && seq != 1)
+			goto out;
+
+		/* new connection */
+		if(r->rcvgen != 0){
+			DPRINT("new con r->rcvgen = %lud, sgen = %lud\n", r->rcvgen, sgen);
+			relhangup(c, r);
+		}
+		r->rcvgen = sgen;
+	}
+
+	/* dequeue acked packets */
+	if(ack && agen == r->sndgen){
+		ackreal = 0;
+		while(r->unacked != nil && INSEQ(ack, r->ackrcvd, r->sndseq)){
+			nbp = r->unacked;
+			r->unacked = nbp->list;
+			DPRINT("%lud/%lud acked, r->sndgen = %lud\n", 
+			       ack, agen, r->sndgen);
+			freeb(nbp);
+			r->ackrcvd = NEXTSEQ(r->ackrcvd);
+			ackreal = 1;
+		}
+
+		/* flow control */
+		if(UNACKED(r) < Maxunacked/8 && r->blocked)
+			wakeup(&r->vous);
+
+		/*
+		 *  retransmit next packet if the acked packet
+		 *  was transmitted more than once
+		 */
+		if(ackreal && r->unacked != nil){
+			r->timeout = 0;
+			if(r->xmits > 1){
+				r->xmits = 1;
+				relrexmit(c, r);
+			}
+		}
+		
+	}
+
+	/* no message or input queue full */
+	if(seq == 0 || qfull(c->rq))
+		goto out;
+
+	/* refuse out of order delivery */
+	if(seq != NEXTSEQ(r->rcvseq)){
+		relsendack(c, r, 0);	/* tell him we got it already */
+		upriv->orders++;
+		DPRINT("out of sequence %lud not %lud\n", seq, NEXTSEQ(r->rcvseq));
+		goto out;
+	}
+	r->rcvseq = seq;
+
+	rv = 0;
+out:
+	relput(r);
+	return rv;
+}
+
+void
+relsendack(Conv *c, Reliable *r, int hangup)
+{
+	Udphdr *uh;
+	Block *bp;
+	Rudphdr *rh;
+	int ptcllen;
+	Fs *f;
+
+	bp = allocb(UDP_IPHDR + UDP_RHDRSIZE);
+	if(bp == nil)
+		return;
+	bp->wp += UDP_IPHDR + UDP_RHDRSIZE;
+	f = c->p->f;
+	uh = (Udphdr *)(bp->rp);
+	uh->vihl = IP_VER4;
+	rh = (Rudphdr*)uh;
+
+	ptcllen = (UDP_RHDRSIZE-UDP_PHDRSIZE);
+	uh->Unused = 0;
+	uh->udpproto = IP_UDPPROTO;
+	uh->frag[0] = 0;
+	uh->frag[1] = 0;
+	hnputs(uh->udpplen, ptcllen);
+
+	v6tov4(uh->udpdst, r->addr);
+	hnputs(uh->udpdport, r->port);
+	hnputs(uh->udpsport, c->lport);
+	if(ipcmp(c->laddr, IPnoaddr) == 0)
+		findlocalip(f, c->laddr, c->raddr);
+	v6tov4(uh->udpsrc, c->laddr);
+	hnputs(uh->udplen, ptcllen);
+
+	if(hangup)
+		hnputl(rh->relsgen, Hangupgen);
+	else
+		hnputl(rh->relsgen, r->sndgen);
+	hnputl(rh->relseq, 0);
+	hnputl(rh->relagen, r->rcvgen);
+	hnputl(rh->relack, r->rcvseq);
+
+	if(r->acksent < r->rcvseq)
+		r->acksent = r->rcvseq;
+
+	uh->udpcksum[0] = 0;
+	uh->udpcksum[1] = 0;
+	hnputs(uh->udpcksum, ptclcsum(bp, UDP_IPHDR, UDP_RHDRSIZE));
+
+	DPRINT("sendack: %lud/%lud, %lud/%lud\n", 0L, r->sndgen, r->rcvseq, r->rcvgen);
+	doipoput(c, f, bp, 0, c->ttl, c->tos);
+}
+
+
+/*
+ *  called with ucb locked (and c locked if user initiated close)
+ */
+void
+relhangup(Conv *c, Reliable *r)
+{
+	int n;
+	Block *bp;
+	char hup[ERRMAX];
+
+	n = snprint(hup, sizeof(hup), "hangup %I!%d", r->addr, r->port);
+	qproduce(c->eq, hup, n);
+
+	/*
+	 *  dump any unacked outgoing messages
+	 */
+	for(bp = r->unacked; bp != nil; bp = r->unacked){
+		r->unacked = bp->list;
+		bp->list = nil;
+		freeb(bp);
+	}
+
+	r->rcvgen = 0;
+	r->rcvseq = 0;
+	r->acksent = 0;
+	if(generation == Hangupgen)
+		generation++;
+	r->sndgen = generation++;
+	r->sndseq = 0;
+	r->ackrcvd = 0;
+	r->xmits = 0;
+	r->timeout = 0;
+	wakeup(&r->vous);
+}
+
+/*
+ *  called with ucb locked
+ */
+void
+relrexmit(Conv *c, Reliable *r)
+{
+	Rudppriv *upriv;
+	Block *np;
+	Fs *f;
+
+	upriv = c->p->priv;
+	f = c->p->f;
+	r->timeout = 0;
+	if(r->xmits++ > Rudpmaxxmit){
+		relhangup(c, r);
+		return;
+	}
+
+	upriv->rxmits++;
+	np = copyblock(r->unacked, blocklen(r->unacked));
+	DPRINT("rxmit r->ackrvcd+1 = %lud\n", r->ackrcvd+1);
+	doipoput(c, f, np, 0, c->ttl, c->tos);
+}
diff --git a/src/9vx/a/ip/tcp.c b/src/9vx/a/ip/tcp.c
@@ -0,0 +1,3209 @@
+#include	"u.h"
+#include	"lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"error.h"
+
+#include	"ip.h"
+
+enum
+{
+	QMAX		= 64*1024-1,
+	IP_TCPPROTO	= 6,
+
+	TCP4_IPLEN	= 8,
+	TCP4_PHDRSIZE	= 12,
+	TCP4_HDRSIZE	= 20,
+	TCP4_TCBPHDRSZ	= 40,
+	TCP4_PKT	= TCP4_IPLEN+TCP4_PHDRSIZE,
+
+	TCP6_IPLEN	= 0,
+	TCP6_PHDRSIZE	= 40,
+	TCP6_HDRSIZE	= 20,
+	TCP6_TCBPHDRSZ	= 60,
+	TCP6_PKT	= TCP6_IPLEN+TCP6_PHDRSIZE,
+
+	TcptimerOFF	= 0,
+	TcptimerON	= 1,
+	TcptimerDONE	= 2,
+	MAX_TIME 	= (1<<20),	/* Forever */
+	TCP_ACK		= 50,		/* Timed ack sequence in ms */
+	MAXBACKMS	= 9*60*1000,	/* longest backoff time (ms) before hangup */
+
+	URG		= 0x20,		/* Data marked urgent */
+	ACK		= 0x10,		/* Acknowledge is valid */
+	PSH		= 0x08,		/* Whole data pipe is pushed */
+	RST		= 0x04,		/* Reset connection */
+	SYN		= 0x02,		/* Pkt. is synchronise */
+	FIN		= 0x01,		/* Start close down */
+
+	EOLOPT		= 0,
+	NOOPOPT		= 1,
+	MSSOPT		= 2,
+	MSS_LENGTH	= 4,		/* Mean segment size */
+	WSOPT		= 3,
+	WS_LENGTH	= 3,		/* Bits to scale window size by */
+	MSL2		= 10,
+	MSPTICK		= 50,		/* Milliseconds per timer tick */
+	DEF_MSS		= 1460,		/* Default mean segment */
+	DEF_MSS6	= 1280,		/* Default mean segment (min) for v6 */
+	DEF_RTT		= 500,		/* Default round trip */
+	DEF_KAT		= 120000,	/* Default time (ms) between keep alives */
+	TCP_LISTEN	= 0,		/* Listen connection */
+	TCP_CONNECT	= 1,		/* Outgoing connection */
+	SYNACK_RXTIMER	= 250,		/* ms between SYNACK retransmits */
+
+	TCPREXMTTHRESH	= 3,		/* dupack threshhold for rxt */
+
+	FORCE		= 1,
+	CLONE		= 2,
+	RETRAN		= 4,
+	ACTIVE		= 8,
+	SYNACK		= 16,
+
+	LOGAGAIN	= 3,
+	LOGDGAIN	= 2,
+
+	Closed		= 0,		/* Connection states */
+	Listen,
+	Syn_sent,
+	Syn_received,
+	Established,
+	Finwait1,
+	Finwait2,
+	Close_wait,
+	Closing,
+	Last_ack,
+	Time_wait,
+
+	Maxlimbo	= 1000,		/* maximum procs waiting for response to SYN ACK */
+	NLHT		= 256,		/* hash table size, must be a power of 2 */
+	LHTMASK		= NLHT-1,
+
+	HaveWS		= 1<<8,
+};
+
+/* Must correspond to the enumeration above */
+char *tcpstates[] =
+{
+	"Closed", 	"Listen", 	"Syn_sent", "Syn_received",
+	"Established", 	"Finwait1",	"Finwait2", "Close_wait",
+	"Closing", 	"Last_ack", 	"Time_wait"
+};
+
+typedef struct Tcptimer Tcptimer;
+struct Tcptimer
+{
+	Tcptimer	*next;
+	Tcptimer	*prev;
+	Tcptimer	*readynext;
+	int	state;
+	int	start;
+	int	count;
+	void	(*func)(void*);
+	void	*arg;
+};
+
+/*
+ *  v4 and v6 pseudo headers used for
+ *  checksuming tcp
+ */
+typedef struct Tcp4hdr Tcp4hdr;
+struct Tcp4hdr
+{
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	length[2];	/* packet length */
+	uchar	id[2];		/* Identification */
+	uchar	frag[2];	/* Fragment information */
+	uchar	Unused;
+	uchar	proto;
+	uchar	tcplen[2];
+	uchar	tcpsrc[4];
+	uchar	tcpdst[4];
+	uchar	tcpsport[2];
+	uchar	tcpdport[2];
+	uchar	tcpseq[4];
+	uchar	tcpack[4];
+	uchar	tcpflag[2];
+	uchar	tcpwin[2];
+	uchar	tcpcksum[2];
+	uchar	tcpurg[2];
+	/* Options segment */
+	uchar	tcpopt[1];
+};
+
+typedef struct Tcp6hdr Tcp6hdr;
+struct Tcp6hdr
+{
+	uchar	vcf[4];
+	uchar	ploadlen[2];
+	uchar	proto;
+	uchar	ttl;
+	uchar	tcpsrc[IPaddrlen];
+	uchar	tcpdst[IPaddrlen];
+	uchar	tcpsport[2];
+	uchar	tcpdport[2];
+	uchar	tcpseq[4];
+	uchar	tcpack[4];
+	uchar	tcpflag[2];
+	uchar	tcpwin[2];
+	uchar	tcpcksum[2];
+	uchar	tcpurg[2];
+	/* Options segment */
+	uchar	tcpopt[1];
+};
+
+/*
+ *  this represents the control info
+ *  for a single packet.  It is derived from
+ *  a packet in ntohtcp{4,6}() and stuck into
+ *  a packet in htontcp{4,6}().
+ */
+typedef struct Tcp Tcp;
+struct	Tcp
+{
+	ushort	source;
+	ushort	dest;
+	ulong	seq;
+	ulong	ack;
+	uchar	flags;
+	ushort	ws;	/* window scale option (if not zero) */
+	ulong	wnd;
+	ushort	urg;
+	ushort	mss;	/* max segment size option (if not zero) */
+	ushort	len;	/* size of data */
+};
+
+/*
+ *  this header is malloc'd to thread together fragments
+ *  waiting to be coalesced
+ */
+typedef struct Reseq Reseq;
+struct Reseq
+{
+	Reseq	*next;
+	Tcp	seg;
+	Block	*bp;
+	ushort	length;
+};
+
+/*
+ *  the QLOCK in the Conv locks this structure
+ */
+typedef struct Tcpctl Tcpctl;
+struct Tcpctl
+{
+	uchar	state;			/* Connection state */
+	uchar	type;			/* Listening or active connection */
+	uchar	code;			/* Icmp code */
+	struct {
+		ulong	una;		/* Unacked data pointer */
+		ulong	nxt;		/* Next sequence expected */
+		ulong	ptr;		/* Data pointer */
+		ulong	wnd;		/* Tcp send window */
+		ulong	urg;		/* Urgent data pointer */
+		ulong	wl2;
+		int	scale;		/* how much to right shift window in xmitted packets */
+		/* to implement tahoe and reno TCP */
+		ulong	dupacks;	/* number of duplicate acks rcvd */
+		int	recovery;	/* loss recovery flag */
+		ulong	rxt;		/* right window marker for recovery */
+	} snd;
+	struct {
+		ulong	nxt;		/* Receive pointer to next uchar slot */
+		ulong	wnd;		/* Receive window incoming */
+		ulong	urg;		/* Urgent pointer */
+		int	blocked;
+		int	una;		/* unacked data segs */
+		int	scale;		/* how much to left shift window in rcved packets */
+	} rcv;
+	ulong	iss;			/* Initial sequence number */
+	int	sawwsopt;		/* true if we saw a wsopt on the incoming SYN */
+	ulong	cwind;			/* Congestion window */
+	int	scale;			/* desired snd.scale */
+	ushort	ssthresh;		/* Slow start threshold */
+	int	resent;			/* Bytes just resent */
+	int	irs;			/* Initial received squence */
+	ushort	mss;			/* Mean segment size */
+	int	rerecv;			/* Overlap of data rerecevived */
+	ulong	window;			/* Recevive window */
+	uchar	backoff;		/* Exponential backoff counter */
+	int	backedoff;		/* ms we've backed off for rexmits */
+	uchar	flags;			/* State flags */
+	Reseq	*reseq;			/* Resequencing queue */
+	Tcptimer	timer;			/* Activity timer */
+	Tcptimer	acktimer;		/* Acknowledge timer */
+	Tcptimer	rtt_timer;		/* Round trip timer */
+	Tcptimer	katimer;		/* keep alive timer */
+	ulong	rttseq;			/* Round trip sequence */
+	int	srtt;			/* Shortened round trip */
+	int	mdev;			/* Mean deviation of round trip */
+	int	kacounter;		/* count down for keep alive */
+	uint	sndsyntime;		/* time syn sent */
+	ulong	time;			/* time Finwait2 or Syn_received was sent */
+	int	nochecksum;		/* non-zero means don't send checksums */
+	int	flgcnt;			/* number of flags in the sequence (FIN,SEQ) */
+
+	union {
+		Tcp4hdr	tcp4hdr;
+		Tcp6hdr	tcp6hdr;
+	} protohdr;		/* prototype header */
+};
+
+/*
+ *  New calls are put in limbo rather than having a conversation structure
+ *  allocated.  Thus, a SYN attack results in lots of limbo'd calls but not
+ *  any real Conv structures mucking things up.  Calls in limbo rexmit their
+ *  SYN ACK every SYNACK_RXTIMER ms up to 4 times, i.e., they disappear after 1 second.
+ *
+ *  In particular they aren't on a listener's queue so that they don't figure
+ *  in the input queue limit.
+ *
+ *  If 1/2 of a T3 was attacking SYN packets, we'ld have a permanent queue
+ *  of 70000 limbo'd calls.  Not great for a linear list but doable.  Therefore
+ *  there is no hashing of this list.
+ */
+typedef struct Limbo Limbo;
+struct Limbo
+{
+	Limbo	*next;
+
+	uchar	laddr[IPaddrlen];
+	uchar	raddr[IPaddrlen];
+	ushort	lport;
+	ushort	rport;
+	ulong	irs;		/* initial received sequence */
+	ulong	iss;		/* initial sent sequence */
+	ushort	mss;		/* mss from the other end */
+	ushort	rcvscale;	/* how much to scale rcvd windows */
+	ushort	sndscale;	/* how much to scale sent windows */
+	ulong	lastsend;	/* last time we sent a synack */
+	uchar	version;	/* v4 or v6 */
+	uchar	rexmits;	/* number of retransmissions */
+};
+
+int	tcp_irtt = DEF_RTT;	/* Initial guess at round trip time */
+ushort	tcp_mss = DEF_MSS;	/* Maximum segment size to be sent */
+
+enum {
+	/* MIB stats */
+	MaxConn,
+	ActiveOpens,
+	PassiveOpens,
+	EstabResets,
+	CurrEstab,
+	InSegs,
+	OutSegs,
+	RetransSegs,
+	RetransTimeouts,
+	InErrs,
+	OutRsts,
+
+	/* non-MIB stats */
+	CsumErrs,
+	HlenErrs,
+	LenErrs,
+	OutOfOrder,
+
+	Nstats
+};
+
+static char *statnames[] =
+{
+[MaxConn]	"MaxConn",
+[ActiveOpens]	"ActiveOpens",
+[PassiveOpens]	"PassiveOpens",
+[EstabResets]	"EstabResets",
+[CurrEstab]	"CurrEstab",
+[InSegs]	"InSegs",
+[OutSegs]	"OutSegs",
+[RetransSegs]	"RetransSegs",
+[RetransTimeouts]	"RetransTimeouts",
+[InErrs]	"InErrs",
+[OutRsts]	"OutRsts",
+[CsumErrs]	"CsumErrs",
+[HlenErrs]	"HlenErrs",
+[LenErrs]	"LenErrs",
+[OutOfOrder]	"OutOfOrder",
+};
+
+typedef struct Tcppriv Tcppriv;
+struct Tcppriv
+{
+	/* List of active timers */
+	QLock 	tl;
+	Tcptimer *timers;
+
+	/* hash table for matching conversations */
+	Ipht	ht;
+
+	/* calls in limbo waiting for an ACK to our SYN ACK */
+	int	nlimbo;
+	Limbo	*lht[NLHT];
+
+	/* for keeping track of tcpackproc */
+	QLock	apl;
+	int	ackprocstarted;
+
+	ulong	stats[Nstats];
+};
+
+/*
+ *  Setting tcpporthogdefense to non-zero enables Dong Lin's
+ *  solution to hijacked systems staking out port's as a form
+ *  of DoS attack.
+ *
+ *  To avoid stateless Conv hogs, we pick a sequence number at random.  If
+ *  that number gets acked by the other end, we shut down the connection.
+ *  Look for tcpporthogdefense in the code.
+ */
+int tcpporthogdefense = 0;
+
+int	addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
+void	getreseq(Tcpctl*, Tcp*, Block**, ushort*);
+void	localclose(Conv*, char*);
+void	procsyn(Conv*, Tcp*);
+void	tcpiput(Proto*, Ipifc*, Block*);
+void	tcpoutput(Conv*);
+int	tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
+void	tcpstart(Conv*, int);
+void	tcptimeout(void*);
+void	tcpsndsyn(Conv*, Tcpctl*);
+void	tcprcvwin(Conv*);
+void	tcpacktimer(void*);
+void	tcpkeepalive(void*);
+void	tcpsetkacounter(Tcpctl*);
+void	tcprxmit(Conv*);
+void	tcpsettimer(Tcpctl*);
+void	tcpsynackrtt(Conv*);
+void	tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+
+static void limborexmit(Proto*);
+static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
+
+void
+tcpsetstate(Conv *s, uchar newstate)
+{
+	Tcpctl *tcb;
+	uchar oldstate;
+	Tcppriv *tpriv;
+
+	tpriv = s->p->priv;
+
+	tcb = (Tcpctl*)s->ptcl;
+
+	oldstate = tcb->state;
+	if(oldstate == newstate)
+		return;
+
+	if(oldstate == Established)
+		tpriv->stats[CurrEstab]--;
+	if(newstate == Established)
+		tpriv->stats[CurrEstab]++;
+
+	/**
+	print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport,
+		tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab );
+	**/
+
+	switch(newstate) {
+	case Closed:
+		qclose(s->rq);
+		qclose(s->wq);
+		qclose(s->eq);
+		break;
+
+	case Close_wait:		/* Remote closes */
+		qhangup(s->rq, nil);
+		break;
+	}
+
+	tcb->state = newstate;
+
+	if(oldstate == Syn_sent && newstate != Closed)
+		Fsconnected(s, nil);
+}
+
+static char*
+tcpconnect(Conv *c, char **argv, int argc)
+{
+	char *e;
+	Tcpctl *tcb;
+
+	tcb = (Tcpctl*)(c->ptcl);
+	if(tcb->state != Closed)
+		return Econinuse;
+
+	e = Fsstdconnect(c, argv, argc);
+	if(e != nil)
+		return e;
+	tcpstart(c, TCP_CONNECT);
+
+	return nil;
+}
+
+static int
+tcpstate(Conv *c, char *state, int n)
+{
+	Tcpctl *s;
+
+	s = (Tcpctl*)(c->ptcl);
+
+	return snprint(state, n,
+		"%s qin %d qout %d srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
+		tcpstates[s->state],
+		c->rq ? qlen(c->rq) : 0,
+		c->wq ? qlen(c->wq) : 0,
+		s->srtt, s->mdev,
+		s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale,
+		s->timer.start, s->timer.count, s->rerecv,
+		s->katimer.start, s->katimer.count);
+}
+
+static int
+tcpinuse(Conv *c)
+{
+	Tcpctl *s;
+
+	s = (Tcpctl*)(c->ptcl);
+	return s->state != Closed;
+}
+
+static char*
+tcpannounce(Conv *c, char **argv, int argc)
+{
+	char *e;
+	Tcpctl *tcb;
+
+	tcb = (Tcpctl*)(c->ptcl);
+	if(tcb->state != Closed)
+		return Econinuse;
+
+	e = Fsstdannounce(c, argv, argc);
+	if(e != nil)
+		return e;
+	tcpstart(c, TCP_LISTEN);
+	Fsconnected(c, nil);
+
+	return nil;
+}
+
+/*
+ *  tcpclose is always called with the q locked
+ */
+static void
+tcpclose(Conv *c)
+{
+	Tcpctl *tcb;
+
+	tcb = (Tcpctl*)c->ptcl;
+
+	qhangup(c->rq, nil);
+	qhangup(c->wq, nil);
+	qhangup(c->eq, nil);
+	qflush(c->rq);
+
+	switch(tcb->state) {
+	case Listen:
+		/*
+		 *  reset any incoming calls to this listener
+		 */
+		Fsconnected(c, "Hangup");
+
+		localclose(c, nil);
+		break;
+	case Closed:
+	case Syn_sent:
+		localclose(c, nil);
+		break;
+	case Syn_received:
+	case Established:
+		tcb->flgcnt++;
+		tcb->snd.nxt++;
+		tcpsetstate(c, Finwait1);
+		tcpoutput(c);
+		break;
+	case Close_wait:
+		tcb->flgcnt++;
+		tcb->snd.nxt++;
+		tcpsetstate(c, Last_ack);
+		tcpoutput(c);
+		break;
+	}
+}
+
+void
+tcpkick(void *x)
+{
+	Conv *s = x;
+	Tcpctl *tcb;
+
+	tcb = (Tcpctl*)s->ptcl;
+
+	if(waserror()){
+		QUNLOCK(s);
+		nexterror();
+	}
+	QLOCK(s);
+
+	switch(tcb->state) {
+	case Syn_sent:
+	case Syn_received:
+	case Established:
+	case Close_wait:
+		/*
+		 * Push data
+		 */
+		tcprcvwin(s);
+		tcpoutput(s);
+		break;
+	default:
+		localclose(s, "Hangup");
+		break;
+	}
+
+	QUNLOCK(s);
+	poperror();
+}
+
+void
+tcprcvwin(Conv *s)				/* Call with tcb locked */
+{
+	int w;
+	Tcpctl *tcb;
+
+	tcb = (Tcpctl*)s->ptcl;
+	w = tcb->window - qlen(s->rq);
+	if(w < 0)
+		w = 0;
+	tcb->rcv.wnd = w;
+	if(w == 0)
+		tcb->rcv.blocked = 1;
+}
+
+void
+tcpacktimer(void *v)
+{
+	Tcpctl *tcb;
+	Conv *s;
+
+	s = v;
+	tcb = (Tcpctl*)s->ptcl;
+
+	if(waserror()){
+		QUNLOCK(s);
+		nexterror();
+	}
+	QLOCK(s);
+	if(tcb->state != Closed){
+		tcb->flags |= FORCE;
+		tcprcvwin(s);
+		tcpoutput(s);
+	}
+	QUNLOCK(s);
+	poperror();
+}
+
+static void
+tcpcreate(Conv *c)
+{
+	c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c);
+	c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c);
+}
+
+static void
+timerstate(Tcppriv *priv, Tcptimer *t, int newstate)
+{
+	if(newstate != TcptimerON){
+		if(t->state == TcptimerON){
+			/* unchain */
+			if(priv->timers == t){
+				priv->timers = t->next;
+				if(t->prev != nil)
+					panic("timerstate1");
+			}
+			if(t->next)
+				t->next->prev = t->prev;
+			if(t->prev)
+				t->prev->next = t->next;
+			t->next = t->prev = nil;
+		}
+	} else {
+		if(t->state != TcptimerON){
+			/* chain */
+			if(t->prev != nil || t->next != nil)
+				panic("timerstate2");
+			t->prev = nil;
+			t->next = priv->timers;
+			if(t->next)
+				t->next->prev = t;
+			priv->timers = t;
+		}
+	}
+	t->state = newstate;
+}
+
+void
+tcpackproc(void *a)
+{
+	Tcptimer *t, *tp, *timeo;
+	Proto *tcp;
+	Tcppriv *priv;
+	int loop;
+
+	tcp = a;
+	priv = tcp->priv;
+
+	for(;;) {
+		tsleep(&up->sleep, return0, 0, MSPTICK);
+
+		qlock(&priv->tl);
+		timeo = nil;
+		loop = 0;
+		for(t = priv->timers; t != nil; t = tp) {
+			if(loop++ > 10000)
+				panic("tcpackproc1");
+			tp = t->next;
+ 			if(t->state == TcptimerON) {
+				t->count--;
+				if(t->count == 0) {
+					timerstate(priv, t, TcptimerDONE);
+					t->readynext = timeo;
+					timeo = t;
+				}
+			}
+		}
+		qunlock(&priv->tl);
+
+		loop = 0;
+		for(t = timeo; t != nil; t = t->readynext) {
+			if(loop++ > 10000)
+				panic("tcpackproc2");
+			if(t->state == TcptimerDONE && t->func != nil && !waserror()){
+				(*t->func)(t->arg);
+				poperror();
+			}
+		}
+
+		limborexmit(tcp);
+	}
+}
+
+void
+tcpgo(Tcppriv *priv, Tcptimer *t)
+{
+	if(t == nil || t->start == 0)
+		return;
+
+	qlock(&priv->tl);
+	t->count = t->start;
+	timerstate(priv, t, TcptimerON);
+	qunlock(&priv->tl);
+}
+
+void
+tcphalt(Tcppriv *priv, Tcptimer *t)
+{
+	if(t == nil)
+		return;
+
+	qlock(&priv->tl);
+	timerstate(priv, t, TcptimerOFF);
+	qunlock(&priv->tl);
+}
+
+int
+backoff(int n)
+{
+	return 1 << n;
+}
+
+void
+localclose(Conv *s, char *reason)	/* called with tcb locked */
+{
+	Tcpctl *tcb;
+	Reseq *rp,*rp1;
+	Tcppriv *tpriv;
+
+	tpriv = s->p->priv;
+	tcb = (Tcpctl*)s->ptcl;
+
+	iphtrem(&tpriv->ht, s);
+
+	tcphalt(tpriv, &tcb->timer);
+	tcphalt(tpriv, &tcb->rtt_timer);
+	tcphalt(tpriv, &tcb->acktimer);
+	tcphalt(tpriv, &tcb->katimer);
+
+	/* Flush reassembly queue; nothing more can arrive */
+	for(rp = tcb->reseq; rp != nil; rp = rp1) {
+		rp1 = rp->next;
+		freeblist(rp->bp);
+		free(rp);
+	}
+	tcb->reseq = nil;
+
+	if(tcb->state == Syn_sent)
+		Fsconnected(s, reason);
+	if(s->state == Announced)
+		wakeup(&s->listenr);
+
+	qhangup(s->rq, reason);
+	qhangup(s->wq, reason);
+
+	tcpsetstate(s, Closed);
+}
+
+/* mtu (- TCP + IP hdr len) of 1st hop */
+int
+tcpmtu(Proto *tcp, uchar *addr, int version, int *scale)
+{
+	Ipifc *ifc;
+	int mtu;
+
+	ifc = findipifc(tcp->f, addr, 0);
+	switch(version){
+	default:
+	case V4:
+		mtu = DEF_MSS;
+		if(ifc != nil)
+			mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
+		break;
+	case V6:
+		mtu = DEF_MSS6;
+		if(ifc != nil)
+			mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
+		break;
+	}
+	if(ifc != nil){
+		if(ifc->mbps > 1000)
+			*scale = HaveWS | 4;
+		else if(ifc->mbps > 100)
+			*scale = HaveWS | 3;
+		else if(ifc->mbps > 10)
+			*scale = HaveWS | 1;
+		else
+			*scale = HaveWS | 0;
+	} else
+		*scale = HaveWS | 0;
+
+	return mtu;
+}
+
+void
+inittcpctl(Conv *s, int mode)
+{
+	Tcpctl *tcb;
+	Tcp4hdr* h4;
+	Tcp6hdr* h6;
+	int mss;
+
+	tcb = (Tcpctl*)s->ptcl;
+
+	memset(tcb, 0, sizeof(Tcpctl));
+
+	tcb->ssthresh = 65535;
+	tcb->srtt = tcp_irtt<<LOGAGAIN;
+	tcb->mdev = 0;
+
+	/* setup timers */
+	tcb->timer.start = tcp_irtt / MSPTICK;
+	tcb->timer.func = tcptimeout;
+	tcb->timer.arg = s;
+	tcb->rtt_timer.start = MAX_TIME;
+	tcb->acktimer.start = TCP_ACK / MSPTICK;
+	tcb->acktimer.func = tcpacktimer;
+	tcb->acktimer.arg = s;
+	tcb->katimer.start = DEF_KAT / MSPTICK;
+	tcb->katimer.func = tcpkeepalive;
+	tcb->katimer.arg = s;
+
+	mss = DEF_MSS;
+
+	/* create a prototype(pseudo) header */
+	if(mode != TCP_LISTEN){
+		if(ipcmp(s->laddr, IPnoaddr) == 0)
+			findlocalip(s->p->f, s->laddr, s->raddr);
+
+		switch(s->ipversion){
+		case V4:
+			h4 = &tcb->protohdr.tcp4hdr;
+			memset(h4, 0, sizeof(*h4));
+			h4->proto = IP_TCPPROTO;
+			hnputs(h4->tcpsport, s->lport);
+			hnputs(h4->tcpdport, s->rport);
+			v6tov4(h4->tcpsrc, s->laddr);
+			v6tov4(h4->tcpdst, s->raddr);
+			break;
+		case V6:
+			h6 = &tcb->protohdr.tcp6hdr;
+			memset(h6, 0, sizeof(*h6));
+			h6->proto = IP_TCPPROTO;
+			hnputs(h6->tcpsport, s->lport);
+			hnputs(h6->tcpdport, s->rport);
+			ipmove(h6->tcpsrc, s->laddr);
+			ipmove(h6->tcpdst, s->raddr);
+			mss = DEF_MSS6;
+			break;
+		default:
+			panic("inittcpctl: version %d", s->ipversion);
+		}
+	}
+
+	tcb->mss = tcb->cwind = mss;
+
+	/* default is no window scaling */
+	tcb->window = QMAX;
+	tcb->rcv.wnd = QMAX;
+	tcb->rcv.scale = 0;
+	tcb->snd.scale = 0;
+	qsetlimit(s->rq, QMAX);
+}
+
+/*
+ *  called with s QLOCKed
+ */
+void
+tcpstart(Conv *s, int mode)
+{
+	Tcpctl *tcb;
+	Tcppriv *tpriv;
+	char kpname[KNAMELEN];
+
+	tpriv = s->p->priv;
+
+	if(tpriv->ackprocstarted == 0){
+		qlock(&tpriv->apl);
+		if(tpriv->ackprocstarted == 0){
+			sprint(kpname, "#I%dtcpack", s->p->f->dev);
+			kproc(kpname, tcpackproc, s->p);
+			tpriv->ackprocstarted = 1;
+		}
+		qunlock(&tpriv->apl);
+	}
+
+	tcb = (Tcpctl*)s->ptcl;
+
+	inittcpctl(s, mode);
+
+	iphtadd(&tpriv->ht, s);
+	switch(mode) {
+	case TCP_LISTEN:
+		tpriv->stats[PassiveOpens]++;
+		tcb->flags |= CLONE;
+		tcpsetstate(s, Listen);
+		break;
+
+	case TCP_CONNECT:
+		tpriv->stats[ActiveOpens]++;
+		tcb->flags |= ACTIVE;
+		tcpsndsyn(s, tcb);
+		tcpsetstate(s, Syn_sent);
+		tcpoutput(s);
+		break;
+	}
+}
+
+static char*
+tcpflag(ushort flag)
+{
+	static char buf[128];
+
+	sprint(buf, "%d", flag>>10);	/* Head len */
+	if(flag & URG)
+		strcat(buf, " URG");
+	if(flag & ACK)
+		strcat(buf, " ACK");
+	if(flag & PSH)
+		strcat(buf, " PSH");
+	if(flag & RST)
+		strcat(buf, " RST");
+	if(flag & SYN)
+		strcat(buf, " SYN");
+	if(flag & FIN)
+		strcat(buf, " FIN");
+
+	return buf;
+}
+
+Block *
+htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb)
+{
+	int dlen;
+	Tcp6hdr *h;
+	ushort csum;
+	ushort hdrlen, optpad = 0;
+	uchar *opt;
+
+	hdrlen = TCP6_HDRSIZE;
+	if(tcph->flags & SYN){
+		if(tcph->mss)
+			hdrlen += MSS_LENGTH;
+		if(tcph->ws)
+			hdrlen += WS_LENGTH;
+		optpad = hdrlen & 3;
+		if(optpad)
+			optpad = 4 - optpad;
+		hdrlen += optpad;
+	}
+
+	if(data) {
+		dlen = blocklen(data);
+		data = padblock(data, hdrlen + TCP6_PKT);
+		if(data == nil)
+			return nil;
+	}
+	else {
+		dlen = 0;
+		data = allocb(hdrlen + TCP6_PKT + 64);	/* the 64 pad is to meet mintu's */
+		if(data == nil)
+			return nil;
+		data->wp += hdrlen + TCP6_PKT;
+	}
+
+	/* copy in pseudo ip header plus port numbers */
+	h = (Tcp6hdr *)(data->rp);
+	memmove(h, ph, TCP6_TCBPHDRSZ);
+
+	/* compose pseudo tcp header, do cksum calculation */
+	hnputl(h->vcf, hdrlen + dlen);
+	h->ploadlen[0] = h->ploadlen[1] = h->proto = 0;
+	h->ttl = ph->proto;
+
+	/* copy in variable bits */
+	hnputl(h->tcpseq, tcph->seq);
+	hnputl(h->tcpack, tcph->ack);
+	hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags);
+	hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0));
+	hnputs(h->tcpurg, tcph->urg);
+
+	if(tcph->flags & SYN){
+		opt = h->tcpopt;
+		if(tcph->mss != 0){
+			*opt++ = MSSOPT;
+			*opt++ = MSS_LENGTH;
+			hnputs(opt, tcph->mss);
+			opt += 2;
+		}
+		if(tcph->ws != 0){
+			*opt++ = WSOPT;
+			*opt++ = WS_LENGTH;
+			*opt++ = tcph->ws;
+		}
+		while(optpad-- > 0)
+			*opt++ = NOOPOPT;
+	}
+
+	if(tcb != nil && tcb->nochecksum){
+		h->tcpcksum[0] = h->tcpcksum[1] = 0;
+	} else {
+		csum = ptclcsum(data, TCP6_IPLEN, hdrlen+dlen+TCP6_PHDRSIZE);
+		hnputs(h->tcpcksum, csum);
+	}
+
+	/* move from pseudo header back to normal ip header */
+	memset(h->vcf, 0, 4);
+	h->vcf[0] = IP_VER6;
+	hnputs(h->ploadlen, hdrlen+dlen);
+	h->proto = ph->proto;
+
+	return data;
+}
+
+Block *
+htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb)
+{
+	int dlen;
+	Tcp4hdr *h;
+	ushort csum;
+	ushort hdrlen, optpad = 0;
+	uchar *opt;
+
+	hdrlen = TCP4_HDRSIZE;
+	if(tcph->flags & SYN){
+		if(tcph->mss)
+			hdrlen += MSS_LENGTH;
+		if(tcph->ws)
+			hdrlen += WS_LENGTH;
+		optpad = hdrlen & 3;
+		if(optpad)
+			optpad = 4 - optpad;
+		hdrlen += optpad;
+	}
+
+	if(data) {
+		dlen = blocklen(data);
+		data = padblock(data, hdrlen + TCP4_PKT);
+		if(data == nil)
+			return nil;
+	}
+	else {
+		dlen = 0;
+		data = allocb(hdrlen + TCP4_PKT + 64);	/* the 64 pad is to meet mintu's */
+		if(data == nil)
+			return nil;
+		data->wp += hdrlen + TCP4_PKT;
+	}
+
+	/* copy in pseudo ip header plus port numbers */
+	h = (Tcp4hdr *)(data->rp);
+	memmove(h, ph, TCP4_TCBPHDRSZ);
+
+	/* copy in variable bits */
+	hnputs(h->tcplen, hdrlen + dlen);
+	hnputl(h->tcpseq, tcph->seq);
+	hnputl(h->tcpack, tcph->ack);
+	hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags);
+	hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0));
+	hnputs(h->tcpurg, tcph->urg);
+
+	if(tcph->flags & SYN){
+		opt = h->tcpopt;
+		if(tcph->mss != 0){
+			*opt++ = MSSOPT;
+			*opt++ = MSS_LENGTH;
+			hnputs(opt, tcph->mss);
+			opt += 2;
+		}
+		if(tcph->ws != 0){
+			*opt++ = WSOPT;
+			*opt++ = WS_LENGTH;
+			*opt++ = tcph->ws;
+		}
+		while(optpad-- > 0)
+			*opt++ = NOOPOPT;
+	}
+
+	if(tcb != nil && tcb->nochecksum){
+		h->tcpcksum[0] = h->tcpcksum[1] = 0;
+	} else {
+		csum = ptclcsum(data, TCP4_IPLEN, hdrlen+dlen+TCP4_PHDRSIZE);
+		hnputs(h->tcpcksum, csum);
+	}
+
+	return data;
+}
+
+int
+ntohtcp6(Tcp *tcph, Block **bpp)
+{
+	Tcp6hdr *h;
+	uchar *optr;
+	ushort hdrlen;
+	ushort optlen;
+	int n;
+
+	*bpp = pullupblock(*bpp, TCP6_PKT+TCP6_HDRSIZE);
+	if(*bpp == nil)
+		return -1;
+
+	h = (Tcp6hdr *)((*bpp)->rp);
+	tcph->source = nhgets(h->tcpsport);
+	tcph->dest = nhgets(h->tcpdport);
+	tcph->seq = nhgetl(h->tcpseq);
+	tcph->ack = nhgetl(h->tcpack);
+	hdrlen = (h->tcpflag[0]>>2) & ~3;
+	if(hdrlen < TCP6_HDRSIZE) {
+		freeblist(*bpp);
+		return -1;
+	}
+
+	tcph->flags = h->tcpflag[1];
+	tcph->wnd = nhgets(h->tcpwin);
+	tcph->urg = nhgets(h->tcpurg);
+	tcph->mss = 0;
+	tcph->ws = 0;
+	tcph->len = nhgets(h->ploadlen) - hdrlen;
+
+	*bpp = pullupblock(*bpp, hdrlen+TCP6_PKT);
+	if(*bpp == nil)
+		return -1;
+
+	optr = h->tcpopt;
+	n = hdrlen - TCP6_HDRSIZE;
+	while(n > 0 && *optr != EOLOPT) {
+		if(*optr == NOOPOPT) {
+			n--;
+			optr++;
+			continue;
+		}
+		optlen = optr[1];
+		if(optlen < 2 || optlen > n)
+			break;
+		switch(*optr) {
+		case MSSOPT:
+			if(optlen == MSS_LENGTH)
+				tcph->mss = nhgets(optr+2);
+			break;
+		case WSOPT:
+			if(optlen == WS_LENGTH && *(optr+2) <= 14)
+				tcph->ws = HaveWS | *(optr+2);
+			break;
+		}
+		n -= optlen;
+		optr += optlen;
+	}
+	return hdrlen;
+}
+
+int
+ntohtcp4(Tcp *tcph, Block **bpp)
+{
+	Tcp4hdr *h;
+	uchar *optr;
+	ushort hdrlen;
+	ushort optlen;
+	int n;
+
+	*bpp = pullupblock(*bpp, TCP4_PKT+TCP4_HDRSIZE);
+	if(*bpp == nil)
+		return -1;
+
+	h = (Tcp4hdr *)((*bpp)->rp);
+	tcph->source = nhgets(h->tcpsport);
+	tcph->dest = nhgets(h->tcpdport);
+	tcph->seq = nhgetl(h->tcpseq);
+	tcph->ack = nhgetl(h->tcpack);
+
+	hdrlen = (h->tcpflag[0]>>2) & ~3;
+	if(hdrlen < TCP4_HDRSIZE) {
+		freeblist(*bpp);
+		return -1;
+	}
+
+	tcph->flags = h->tcpflag[1];
+	tcph->wnd = nhgets(h->tcpwin);
+	tcph->urg = nhgets(h->tcpurg);
+	tcph->mss = 0;
+	tcph->ws = 0;
+	tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT);
+
+	*bpp = pullupblock(*bpp, hdrlen+TCP4_PKT);
+	if(*bpp == nil)
+		return -1;
+
+	optr = h->tcpopt;
+	n = hdrlen - TCP4_HDRSIZE;
+	while(n > 0 && *optr != EOLOPT) {
+		if(*optr == NOOPOPT) {
+			n--;
+			optr++;
+			continue;
+		}
+		optlen = optr[1];
+		if(optlen < 2 || optlen > n)
+			break;
+		switch(*optr) {
+		case MSSOPT:
+			if(optlen == MSS_LENGTH)
+				tcph->mss = nhgets(optr+2);
+			break;
+		case WSOPT:
+			if(optlen == WS_LENGTH && *(optr+2) <= 14)
+				tcph->ws = HaveWS | *(optr+2);
+			break;
+		}
+		n -= optlen;
+		optr += optlen;
+	}
+	return hdrlen;
+}
+
+/*
+ *  For outgiing calls, generate an initial sequence
+ *  number and put a SYN on the send queue
+ */
+void
+tcpsndsyn(Conv *s, Tcpctl *tcb)
+{
+	tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16);
+	tcb->rttseq = tcb->iss;
+	tcb->snd.wl2 = tcb->iss;
+	tcb->snd.una = tcb->iss;
+	tcb->snd.ptr = tcb->rttseq;
+	tcb->snd.nxt = tcb->rttseq;
+	tcb->flgcnt++;
+	tcb->flags |= FORCE;
+	tcb->sndsyntime = NOW;
+
+	/* set desired mss and scale */
+	tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale);
+}
+
+void
+sndrst(Proto *tcp, uchar *source, uchar *dest, ushort length, Tcp *seg, uchar version, char *reason)
+{
+	Block *hbp;
+	uchar rflags;
+	Tcppriv *tpriv;
+	Tcp4hdr ph4;
+	Tcp6hdr ph6;
+
+	netlog(tcp->f, Logtcp, "sndrst: %s\n", reason);
+
+	tpriv = tcp->priv;
+
+	if(seg->flags & RST)
+		return;
+
+	/* make pseudo header */
+	switch(version) {
+	case V4:
+		memset(&ph4, 0, sizeof(ph4));
+		ph4.vihl = IP_VER4;
+		v6tov4(ph4.tcpsrc, dest);
+		v6tov4(ph4.tcpdst, source);
+		ph4.proto = IP_TCPPROTO;
+		hnputs(ph4.tcplen, TCP4_HDRSIZE);
+		hnputs(ph4.tcpsport, seg->dest);
+		hnputs(ph4.tcpdport, seg->source);
+		break;
+	case V6:
+		memset(&ph6, 0, sizeof(ph6));
+		ph6.vcf[0] = IP_VER6;
+		ipmove(ph6.tcpsrc, dest);
+		ipmove(ph6.tcpdst, source);
+		ph6.proto = IP_TCPPROTO;
+		hnputs(ph6.ploadlen, TCP6_HDRSIZE);
+		hnputs(ph6.tcpsport, seg->dest);
+		hnputs(ph6.tcpdport, seg->source);
+		break;
+	default:
+		panic("sndrst: version %d", version);
+	}
+
+	tpriv->stats[OutRsts]++;
+	rflags = RST;
+
+	/* convince the other end that this reset is in band */
+	if(seg->flags & ACK) {
+		seg->seq = seg->ack;
+		seg->ack = 0;
+	}
+	else {
+		rflags |= ACK;
+		seg->ack = seg->seq;
+		seg->seq = 0;
+		if(seg->flags & SYN)
+			seg->ack++;
+		seg->ack += length;
+		if(seg->flags & FIN)
+			seg->ack++;
+	}
+	seg->flags = rflags;
+	seg->wnd = 0;
+	seg->urg = 0;
+	seg->mss = 0;
+	seg->ws = 0;
+	switch(version) {
+	case V4:
+		hbp = htontcp4(seg, nil, &ph4, nil);
+		if(hbp == nil)
+			return;
+		ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
+		break;
+	case V6:
+		hbp = htontcp6(seg, nil, &ph6, nil);
+		if(hbp == nil)
+			return;
+		ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
+		break;
+	default:
+		panic("sndrst2: version %d", version);
+	}
+}
+
+/*
+ *  send a reset to the remote side and close the conversation
+ *  called with s QLOCKed
+ */
+char*
+tcphangup(Conv *s)
+{
+	Tcp seg;
+	Tcpctl *tcb;
+	Block *hbp;
+
+	tcb = (Tcpctl*)s->ptcl;
+	if(waserror())
+		return commonerror();
+	if(ipcmp(s->raddr, IPnoaddr) != 0) {
+		if(!waserror()){
+			seg.flags = RST | ACK;
+			seg.ack = tcb->rcv.nxt;
+			tcb->rcv.una = 0;
+			seg.seq = tcb->snd.ptr;
+			seg.wnd = 0;
+			seg.urg = 0;
+			seg.mss = 0;
+			seg.ws = 0;
+			switch(s->ipversion) {
+			case V4:
+				tcb->protohdr.tcp4hdr.vihl = IP_VER4;
+				hbp = htontcp4(&seg, nil, &tcb->protohdr.tcp4hdr, tcb);
+				ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s);
+				break;
+			case V6:
+				tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
+				hbp = htontcp6(&seg, nil, &tcb->protohdr.tcp6hdr, tcb);
+				ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s);
+				break;
+			default:
+				panic("tcphangup: version %d", s->ipversion);
+			}
+			poperror();
+		}
+	}
+	localclose(s, nil);
+	poperror();
+	return nil;
+}
+
+/*
+ *  (re)send a SYN ACK
+ */
+int
+sndsynack(Proto *tcp, Limbo *lp)
+{
+	Block *hbp;
+	Tcp4hdr ph4;
+	Tcp6hdr ph6;
+	Tcp seg;
+	int scale;
+
+	/* make pseudo header */
+	switch(lp->version) {
+	case V4:
+		memset(&ph4, 0, sizeof(ph4));
+		ph4.vihl = IP_VER4;
+		v6tov4(ph4.tcpsrc, lp->laddr);
+		v6tov4(ph4.tcpdst, lp->raddr);
+		ph4.proto = IP_TCPPROTO;
+		hnputs(ph4.tcplen, TCP4_HDRSIZE);
+		hnputs(ph4.tcpsport, lp->lport);
+		hnputs(ph4.tcpdport, lp->rport);
+		break;
+	case V6:
+		memset(&ph6, 0, sizeof(ph6));
+		ph6.vcf[0] = IP_VER6;
+		ipmove(ph6.tcpsrc, lp->laddr);
+		ipmove(ph6.tcpdst, lp->raddr);
+		ph6.proto = IP_TCPPROTO;
+		hnputs(ph6.ploadlen, TCP6_HDRSIZE);
+		hnputs(ph6.tcpsport, lp->lport);
+		hnputs(ph6.tcpdport, lp->rport);
+		break;
+	default:
+		panic("sndrst: version %d", lp->version);
+	}
+
+	seg.seq = lp->iss;
+	seg.ack = lp->irs+1;
+	seg.flags = SYN|ACK;
+	seg.urg = 0;
+	seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale);
+	seg.wnd = QMAX;
+
+	/* if the other side set scale, we should too */
+	if(lp->rcvscale){
+		seg.ws = scale;
+		lp->sndscale = scale;
+	} else {
+		seg.ws = 0;
+		lp->sndscale = 0;
+	}
+
+	switch(lp->version) {
+	case V4:
+		hbp = htontcp4(&seg, nil, &ph4, nil);
+		if(hbp == nil)
+			return -1;
+		ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
+		break;
+	case V6:
+		hbp = htontcp6(&seg, nil, &ph6, nil);
+		if(hbp == nil)
+			return -1;
+		ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
+		break;
+	default:
+		panic("sndsnack: version %d", lp->version);
+	}
+	lp->lastsend = NOW;
+	return 0;
+}
+
+#define hashipa(a, p) ( ( (a)[IPaddrlen-2] + (a)[IPaddrlen-1] + p )&LHTMASK )
+
+/*
+ *  put a call into limbo and respond with a SYN ACK
+ *
+ *  called with proto locked
+ */
+static void
+limbo(Conv *s, uchar *source, uchar *dest, Tcp *seg, int version)
+{
+	Limbo *lp, **l;
+	Tcppriv *tpriv;
+	int h;
+
+	tpriv = s->p->priv;
+	h = hashipa(source, seg->source);
+
+	for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){
+		lp = *l;
+		if(lp->lport != seg->dest || lp->rport != seg->source || lp->version != version)
+			continue;
+		if(ipcmp(lp->raddr, source) != 0)
+			continue;
+		if(ipcmp(lp->laddr, dest) != 0)
+			continue;
+
+		/* each new SYN restarts the retransmits */
+		lp->irs = seg->seq;
+		break;
+	}
+	lp = *l;
+	if(lp == nil){
+		if(tpriv->nlimbo >= Maxlimbo && tpriv->lht[h]){
+			lp = tpriv->lht[h];
+			tpriv->lht[h] = lp->next;
+			lp->next = nil;
+		} else {
+			lp = malloc(sizeof(*lp));
+			if(lp == nil)
+				return;
+			tpriv->nlimbo++;
+		}
+		*l = lp;
+		lp->version = version;
+		ipmove(lp->laddr, dest);
+		ipmove(lp->raddr, source);
+		lp->lport = seg->dest;
+		lp->rport = seg->source;
+		lp->mss = seg->mss;
+		lp->rcvscale = seg->ws;
+		lp->irs = seg->seq;
+		lp->iss = (nrand(1<<16)<<16)|nrand(1<<16);
+	}
+
+	if(sndsynack(s->p, lp) < 0){
+		*l = lp->next;
+		tpriv->nlimbo--;
+		free(lp);
+	}
+}
+
+/*
+ *  resend SYN ACK's once every SYNACK_RXTIMER ms.
+ */
+static void
+limborexmit(Proto *tcp)
+{
+	Tcppriv *tpriv;
+	Limbo **l, *lp;
+	int h;
+	int seen;
+	ulong now;
+
+	tpriv = tcp->priv;
+
+	if(!CANQLOCK(tcp))
+		return;
+	seen = 0;
+	now = NOW;
+	for(h = 0; h < NLHT && seen < tpriv->nlimbo; h++){
+		for(l = &tpriv->lht[h]; *l != nil && seen < tpriv->nlimbo; ){
+			lp = *l;
+			seen++;
+			if(now - lp->lastsend < (lp->rexmits+1)*SYNACK_RXTIMER)
+				continue;
+
+			/* time it out after 1 second */
+			if(++(lp->rexmits) > 5){
+				tpriv->nlimbo--;
+				*l = lp->next;
+				free(lp);
+				continue;
+			}
+
+			/* if we're being attacked, don't bother resending SYN ACK's */
+			if(tpriv->nlimbo > 100)
+				continue;
+
+			if(sndsynack(tcp, lp) < 0){
+				tpriv->nlimbo--;
+				*l = lp->next;
+				free(lp);
+				continue;
+			}
+
+			l = &lp->next;
+		}
+	}
+	QUNLOCK(tcp);
+}
+
+/*
+ *  lookup call in limbo.  if found, throw it out.
+ *
+ *  called with proto locked
+ */
+static void
+limborst(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version)
+{
+	Limbo *lp, **l;
+	int h;
+	Tcppriv *tpriv;
+
+	tpriv = s->p->priv;
+
+	/* find a call in limbo */
+	h = hashipa(src, segp->source);
+	for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){
+		lp = *l;
+		if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version)
+			continue;
+		if(ipcmp(lp->laddr, dst) != 0)
+			continue;
+		if(ipcmp(lp->raddr, src) != 0)
+			continue;
+
+		/* RST can only follow the SYN */
+		if(segp->seq == lp->irs+1){
+			tpriv->nlimbo--;
+			*l = lp->next;
+			free(lp);
+		}
+		break;
+	}
+}
+
+/*
+ *  come here when we finally get an ACK to our SYN-ACK.
+ *  lookup call in limbo.  if found, create a new conversation
+ *
+ *  called with proto locked
+ */
+static Conv*
+tcpincoming(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version)
+{
+	Conv *new;
+	Tcpctl *tcb;
+	Tcppriv *tpriv;
+	Tcp4hdr *h4;
+	Tcp6hdr *h6;
+	Limbo *lp, **l;
+	int h;
+
+	/* unless it's just an ack, it can't be someone coming out of limbo */
+	if((segp->flags & SYN) || (segp->flags & ACK) == 0)
+		return nil;
+
+	tpriv = s->p->priv;
+
+	/* find a call in limbo */
+	h = hashipa(src, segp->source);
+	for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){
+		netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d\n",
+			src, segp->source, lp->raddr, lp->rport,
+			dst, segp->dest, lp->laddr, lp->lport,
+			version, lp->version
+ 		);
+
+		if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version)
+			continue;
+		if(ipcmp(lp->laddr, dst) != 0)
+			continue;
+		if(ipcmp(lp->raddr, src) != 0)
+			continue;
+
+		/* we're assuming no data with the initial SYN */
+		if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){
+			netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux\n",
+				segp->seq, lp->irs+1, segp->ack, lp->iss+1);
+			lp = nil;
+		} else {
+			tpriv->nlimbo--;
+			*l = lp->next;
+		}
+		break;
+	}
+	if(lp == nil)
+		return nil;
+
+	new = Fsnewcall(s, src, segp->source, dst, segp->dest, version);
+	if(new == nil)
+		return nil;
+
+	memmove(new->ptcl, s->ptcl, sizeof(Tcpctl));
+	tcb = (Tcpctl*)new->ptcl;
+	tcb->flags &= ~CLONE;
+	tcb->timer.arg = new;
+	tcb->timer.state = TcptimerOFF;
+	tcb->acktimer.arg = new;
+	tcb->acktimer.state = TcptimerOFF;
+	tcb->katimer.arg = new;
+	tcb->katimer.state = TcptimerOFF;
+	tcb->rtt_timer.arg = new;
+	tcb->rtt_timer.state = TcptimerOFF;
+
+	tcb->irs = lp->irs;
+	tcb->rcv.nxt = tcb->irs+1;
+	tcb->rcv.urg = tcb->rcv.nxt;
+
+	tcb->iss = lp->iss;
+	tcb->rttseq = tcb->iss;
+	tcb->snd.wl2 = tcb->iss;
+	tcb->snd.una = tcb->iss+1;
+	tcb->snd.ptr = tcb->iss+1;
+	tcb->snd.nxt = tcb->iss+1;
+	tcb->flgcnt = 0;
+	tcb->flags |= SYNACK;
+
+	/* our sending max segment size cannot be bigger than what he asked for */
+	if(lp->mss != 0 && lp->mss < tcb->mss)
+		tcb->mss = lp->mss;
+
+	/* window scaling */
+	tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale);
+
+	/* the congestion window always starts out as a single segment */
+	tcb->snd.wnd = segp->wnd;
+	tcb->cwind = tcb->mss;
+
+	/* set initial round trip time */
+	tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER;
+	tcpsynackrtt(new);
+
+	free(lp);
+
+	/* set up proto header */
+	switch(version){
+	case V4:
+		h4 = &tcb->protohdr.tcp4hdr;
+		memset(h4, 0, sizeof(*h4));
+		h4->proto = IP_TCPPROTO;
+		hnputs(h4->tcpsport, new->lport);
+		hnputs(h4->tcpdport, new->rport);
+		v6tov4(h4->tcpsrc, dst);
+		v6tov4(h4->tcpdst, src);
+		break;
+	case V6:
+		h6 = &tcb->protohdr.tcp6hdr;
+		memset(h6, 0, sizeof(*h6));
+		h6->proto = IP_TCPPROTO;
+		hnputs(h6->tcpsport, new->lport);
+		hnputs(h6->tcpdport, new->rport);
+		ipmove(h6->tcpsrc, dst);
+		ipmove(h6->tcpdst, src);
+		break;
+	default:
+		panic("tcpincoming: version %d", new->ipversion);
+	}
+
+	tcpsetstate(new, Established);
+
+	iphtadd(&tpriv->ht, new);
+
+	return new;
+}
+
+int
+seq_within(ulong x, ulong low, ulong high)
+{
+	if(low <= high){
+		if(low <= x && x <= high)
+			return 1;
+	}
+	else {
+		if(x >= low || x <= high)
+			return 1;
+	}
+	return 0;
+}
+
+int
+seq_lt(ulong x, ulong y)
+{
+	return (int)(x-y) < 0;
+}
+
+int
+seq_le(ulong x, ulong y)
+{
+	return (int)(x-y) <= 0;
+}
+
+int
+seq_gt(ulong x, ulong y)
+{
+	return (int)(x-y) > 0;
+}
+
+int
+seq_ge(ulong x, ulong y)
+{
+	return (int)(x-y) >= 0;
+}
+
+/*
+ *  use the time between the first SYN and it's ack as the
+ *  initial round trip time
+ */
+void
+tcpsynackrtt(Conv *s)
+{
+	Tcpctl *tcb;
+	int delta;
+	Tcppriv *tpriv;
+
+	tcb = (Tcpctl*)s->ptcl;
+	tpriv = s->p->priv;
+
+	delta = NOW - tcb->sndsyntime;
+	tcb->srtt = delta<<LOGAGAIN;
+	tcb->mdev = delta<<LOGDGAIN;
+
+	/* halt round trip timer */
+	tcphalt(tpriv, &tcb->rtt_timer);
+}
+
+void
+update(Conv *s, Tcp *seg)
+{
+	int rtt, delta;
+	Tcpctl *tcb;
+	ulong acked;
+	ulong expand;
+	Tcppriv *tpriv;
+
+	tpriv = s->p->priv;
+	tcb = (Tcpctl*)s->ptcl;
+
+	/* if everything has been acked, force output(?) */
+	if(seq_gt(seg->ack, tcb->snd.nxt)) {
+		tcb->flags |= FORCE;
+		return;
+	}
+
+	/* added by Dong Lin for fast retransmission */
+	if(seg->ack == tcb->snd.una
+	&& tcb->snd.una != tcb->snd.nxt
+	&& seg->len == 0
+	&& seg->wnd == tcb->snd.wnd) {
+
+		/* this is a pure ack w/o window update */
+		netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
+			tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd);
+
+		if(++tcb->snd.dupacks == TCPREXMTTHRESH) {
+			/*
+			 *  tahoe tcp rxt the packet, half sshthresh,
+ 			 *  and set cwnd to one packet
+			 */
+			tcb->snd.recovery = 1;
+			tcb->snd.rxt = tcb->snd.nxt;
+			netlog(s->p->f, Logtcprxmt, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt);
+			tcprxmit(s);
+		} else {
+			/* do reno tcp here. */
+		}
+	}
+
+	/*
+	 *  update window
+	 */
+	if(seq_gt(seg->ack, tcb->snd.wl2)
+	|| (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){
+		tcb->snd.wnd = seg->wnd;
+		tcb->snd.wl2 = seg->ack;
+	}
+
+	if(!seq_gt(seg->ack, tcb->snd.una)){
+		/*
+		 *  don't let us hangup if sending into a closed window and
+		 *  we're still getting acks
+		 */
+		if((tcb->flags&RETRAN) && tcb->snd.wnd == 0){
+			tcb->backedoff = MAXBACKMS/4;
+		}
+		return;
+	}
+
+	/*
+	 *  any positive ack turns off fast rxt,
+	 *  (should we do new-reno on partial acks?)
+	 */
+	if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) {
+		tcb->snd.dupacks = 0;
+		tcb->snd.recovery = 0;
+	} else
+		netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
+
+	/* Compute the new send window size */
+	acked = seg->ack - tcb->snd.una;
+
+	/* avoid slow start and timers for SYN acks */
+	if((tcb->flags & SYNACK) == 0) {
+		tcb->flags |= SYNACK;
+		acked--;
+		tcb->flgcnt--;
+		goto done;
+	}
+
+	/* slow start as long as we're not recovering from lost packets */
+	if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) {
+		if(tcb->cwind < tcb->ssthresh) {
+			expand = tcb->mss;
+			if(acked < expand)
+				expand = acked;
+		}
+		else
+			expand = ((int)tcb->mss * tcb->mss) / tcb->cwind;
+
+		if(tcb->cwind + expand < tcb->cwind)
+			expand = tcb->snd.wnd - tcb->cwind;
+		if(tcb->cwind + expand > tcb->snd.wnd)
+			expand = tcb->snd.wnd - tcb->cwind;
+		tcb->cwind += expand;
+	}
+
+	/* Adjust the timers according to the round trip time */
+	if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) {
+		tcphalt(tpriv, &tcb->rtt_timer);
+		if((tcb->flags&RETRAN) == 0) {
+			tcb->backoff = 0;
+			tcb->backedoff = 0;
+			rtt = tcb->rtt_timer.start - tcb->rtt_timer.count;
+			if(rtt == 0)
+				rtt = 1;	/* otherwise all close systems will rexmit in 0 time */
+			rtt *= MSPTICK;
+			if(tcb->srtt == 0) {
+				tcb->srtt = rtt << LOGAGAIN;
+				tcb->mdev = rtt << LOGDGAIN;
+			} else {
+				delta = rtt - (tcb->srtt>>LOGAGAIN);
+				tcb->srtt += delta;
+				if(tcb->srtt <= 0)
+					tcb->srtt = 1;
+
+				delta = abs(delta) - (tcb->mdev>>LOGDGAIN);
+				tcb->mdev += delta;
+				if(tcb->mdev <= 0)
+					tcb->mdev = 1;
+			}
+			tcpsettimer(tcb);
+		}
+	}
+
+done:
+	if(qdiscard(s->wq, acked) < acked)
+		tcb->flgcnt--;
+
+	tcb->snd.una = seg->ack;
+	if(seq_gt(seg->ack, tcb->snd.urg))
+		tcb->snd.urg = seg->ack;
+
+	if(tcb->snd.una != tcb->snd.nxt)
+		tcpgo(tpriv, &tcb->timer);
+	else
+		tcphalt(tpriv, &tcb->timer);
+
+	if(seq_lt(tcb->snd.ptr, tcb->snd.una))
+		tcb->snd.ptr = tcb->snd.una;
+
+	tcb->flags &= ~RETRAN;
+	tcb->backoff = 0;
+	tcb->backedoff = 0;
+}
+
+void
+tcpiput(Proto *tcp, Ipifc* _, Block *bp)
+{
+	Tcp seg;
+	Tcp4hdr *h4;
+	Tcp6hdr *h6;
+	int hdrlen;
+	Tcpctl *tcb;
+	ushort length, csum;
+	uchar source[IPaddrlen], dest[IPaddrlen];
+	Conv *s;
+	Fs *f;
+	Tcppriv *tpriv;
+	uchar version;
+
+	f = tcp->f;
+	tpriv = tcp->priv;
+
+	tpriv->stats[InSegs]++;
+
+	h4 = (Tcp4hdr*)(bp->rp);
+	h6 = (Tcp6hdr*)(bp->rp);
+
+	if((h4->vihl&0xF0)==IP_VER4) {
+		version = V4;
+		length = nhgets(h4->length);
+		v4tov6(dest, h4->tcpdst);
+		v4tov6(source, h4->tcpsrc);
+
+		h4->Unused = 0;
+		hnputs(h4->tcplen, length-TCP4_PKT);
+		if(!(bp->flag & Btcpck) && (h4->tcpcksum[0] || h4->tcpcksum[1]) &&
+			ptclcsum(bp, TCP4_IPLEN, length-TCP4_IPLEN)) {
+			tpriv->stats[CsumErrs]++;
+			tpriv->stats[InErrs]++;
+			netlog(f, Logtcp, "bad tcp proto cksum\n");
+			freeblist(bp);
+			return;
+		}
+
+		hdrlen = ntohtcp4(&seg, &bp);
+		if(hdrlen < 0){
+			tpriv->stats[HlenErrs]++;
+			tpriv->stats[InErrs]++;
+			netlog(f, Logtcp, "bad tcp hdr len\n");
+			return;
+		}
+
+		/* trim the packet to the size claimed by the datagram */
+		length -= hdrlen+TCP4_PKT;
+		bp = trimblock(bp, hdrlen+TCP4_PKT, length);
+		if(bp == nil){
+			tpriv->stats[LenErrs]++;
+			tpriv->stats[InErrs]++;
+			netlog(f, Logtcp, "tcp len < 0 after trim\n");
+			return;
+		}
+	}
+	else {
+		int ttl = h6->ttl;
+		int proto = h6->proto;
+
+		version = V6;
+		length = nhgets(h6->ploadlen);
+		ipmove(dest, h6->tcpdst);
+		ipmove(source, h6->tcpsrc);
+
+		h6->ploadlen[0] = h6->ploadlen[1] = h6->proto = 0;
+		h6->ttl = proto;
+		hnputl(h6->vcf, length);
+		if((h6->tcpcksum[0] || h6->tcpcksum[1]) &&
+		    (csum = ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) != 0) {
+			tpriv->stats[CsumErrs]++;
+			tpriv->stats[InErrs]++;
+			netlog(f, Logtcp,
+			    "bad tcpv6 proto cksum: got %#ux, computed %#ux\n",
+				h6->tcpcksum[0]<<8 | h6->tcpcksum[1], csum);
+			freeblist(bp);
+			return;
+		}
+		h6->ttl = ttl;
+		h6->proto = proto;
+		hnputs(h6->ploadlen, length);
+
+		hdrlen = ntohtcp6(&seg, &bp);
+		if(hdrlen < 0){
+			tpriv->stats[HlenErrs]++;
+			tpriv->stats[InErrs]++;
+			netlog(f, Logtcp, "bad tcpv6 hdr len\n");
+			return;
+		}
+
+		/* trim the packet to the size claimed by the datagram */
+		length -= hdrlen;
+		bp = trimblock(bp, hdrlen+TCP6_PKT, length);
+		if(bp == nil){
+			tpriv->stats[LenErrs]++;
+			tpriv->stats[InErrs]++;
+			netlog(f, Logtcp, "tcpv6 len < 0 after trim\n");
+			return;
+		}
+	}
+
+	/* lock protocol while searching for a conversation */
+	QLOCK(tcp);
+
+	/* Look for a matching conversation */
+	s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
+	if(s == nil){
+		netlog(f, Logtcp, "iphtlook failed\n");
+reset:
+		QUNLOCK(tcp);
+		sndrst(tcp, source, dest, length, &seg, version, "no conversation");
+		freeblist(bp);
+		return;
+	}
+
+	/* if it's a listener, look for the right flags and get a new conv */
+	tcb = (Tcpctl*)s->ptcl;
+	if(tcb->state == Listen){
+		if(seg.flags & RST){
+			limborst(s, &seg, source, dest, version);
+			QUNLOCK(tcp);
+			freeblist(bp);
+			return;
+		}
+
+		/* if this is a new SYN, put the call into limbo */
+		if((seg.flags & SYN) && (seg.flags & ACK) == 0){
+			limbo(s, source, dest, &seg, version);
+			QUNLOCK(tcp);
+			freeblist(bp);
+			return;
+		}
+
+		/*
+		 *  if there's a matching call in limbo, tcpincoming will
+		 *  return it in state Syn_received
+		 */
+		s = tcpincoming(s, &seg, source, dest, version);
+		if(s == nil)
+			goto reset;
+	}
+
+	/* The rest of the input state machine is run with the control block
+	 * locked and implements the state machine directly out of the RFC.
+	 * Out-of-band data is ignored - it was always a bad idea.
+	 */
+	tcb = (Tcpctl*)s->ptcl;
+	if(waserror()){
+		QUNLOCK(s);
+		nexterror();
+	}
+	QLOCK(s);
+	QUNLOCK(tcp);
+
+	/* fix up window */
+	seg.wnd <<= tcb->rcv.scale;
+
+	/* every input packet in puts off the keep alive time out */
+	tcpsetkacounter(tcb);
+
+	switch(tcb->state) {
+	case Closed:
+		sndrst(tcp, source, dest, length, &seg, version, "sending to Closed");
+		goto raise;
+	case Syn_sent:
+		if(seg.flags & ACK) {
+			if(!seq_within(seg.ack, tcb->iss+1, tcb->snd.nxt)) {
+				sndrst(tcp, source, dest, length, &seg, version,
+					 "bad seq in Syn_sent");
+				goto raise;
+			}
+		}
+		if(seg.flags & RST) {
+			if(seg.flags & ACK)
+				localclose(s, Econrefused);
+			goto raise;
+		}
+
+		if(seg.flags & SYN) {
+			procsyn(s, &seg);
+			if(seg.flags & ACK){
+				update(s, &seg);
+				tcpsynackrtt(s);
+				tcpsetstate(s, Established);
+				tcpsetscale(s, tcb, seg.ws, tcb->scale);
+			}
+			else {
+				tcb->time = NOW;
+				tcpsetstate(s, Syn_received);	/* DLP - shouldn't this be a reset? */
+			}
+
+			if(length != 0 || (seg.flags & FIN))
+				break;
+
+			freeblist(bp);
+			goto output;
+		}
+		else
+			freeblist(bp);
+
+		QUNLOCK(s);
+		poperror();
+		return;
+	case Syn_received:
+		/* doesn't matter if it's the correct ack, we're just trying to set timing */
+		if(seg.flags & ACK)
+			tcpsynackrtt(s);
+		break;
+	}
+
+	/*
+	 *  One DOS attack is to open connections to us and then forget about them,
+	 *  thereby tying up a conv at no long term cost to the attacker.
+	 *  This is an attempt to defeat these stateless DOS attacks.  See
+	 *  corresponding code in tcpsendka().
+	 */
+	if(tcb->state != Syn_received && (seg.flags & RST) == 0){
+		if(tcpporthogdefense
+		&& seq_within(seg.ack, tcb->snd.una-(1<<31), tcb->snd.una-(1<<29))){
+			print("stateless hog %I.%d->%I.%d f %ux %lux - %lux - %lux\n",
+				source, seg.source, dest, seg.dest, seg.flags,
+				tcb->snd.una-(1<<31), seg.ack, tcb->snd.una-(1<<29));
+			localclose(s, "stateless hog");
+		}
+	}
+
+	/* Cut the data to fit the receive window */
+	if(tcptrim(tcb, &seg, &bp, &length) == -1) {
+		netlog(f, Logtcp, "tcp len < 0, %lud %d\n", seg.seq, length);
+		update(s, &seg);
+		if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) {
+			tcphalt(tpriv, &tcb->rtt_timer);
+			tcphalt(tpriv, &tcb->acktimer);
+			tcphalt(tpriv, &tcb->katimer);
+			tcpsetstate(s, Time_wait);
+			tcb->timer.start = MSL2*(1000 / MSPTICK);
+			tcpgo(tpriv, &tcb->timer);
+		}
+		if(!(seg.flags & RST)) {
+			tcb->flags |= FORCE;
+			goto output;
+		}
+		QUNLOCK(s);
+		poperror();
+		return;
+	}
+
+	/* Cannot accept so answer with a rst */
+	if(length && tcb->state == Closed) {
+		sndrst(tcp, source, dest, length, &seg, version, "sending to Closed");
+		goto raise;
+	}
+
+	/* The segment is beyond the current receive pointer so
+	 * queue the data in the resequence queue
+	 */
+	if(seg.seq != tcb->rcv.nxt)
+	if(length != 0 || (seg.flags & (SYN|FIN))) {
+		update(s, &seg);
+		if(addreseq(tcb, tpriv, &seg, bp, length) < 0)
+			print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport);
+		tcb->flags |= FORCE;
+		goto output;
+	}
+
+	/*
+	 *  keep looping till we've processed this packet plus any
+	 *  adjacent packets in the resequence queue
+	 */
+	for(;;) {
+		if(seg.flags & RST) {
+			if(tcb->state == Established) {
+				tpriv->stats[EstabResets]++;
+				if(tcb->rcv.nxt != seg.seq)
+					print("out of order RST rcvd: %I.%d -> %I.%d, rcv.nxt %lux seq %lux\n", s->raddr, s->rport, s->laddr, s->lport, tcb->rcv.nxt, seg.seq);
+			}
+			localclose(s, Econrefused);
+			goto raise;
+		}
+
+		if((seg.flags&ACK) == 0)
+			goto raise;
+
+		switch(tcb->state) {
+		case Syn_received:
+			if(!seq_within(seg.ack, tcb->snd.una+1, tcb->snd.nxt)){
+				sndrst(tcp, source, dest, length, &seg, version,
+					"bad seq in Syn_received");
+				goto raise;
+			}
+			update(s, &seg);
+			tcpsetstate(s, Established);
+		case Established:
+		case Close_wait:
+			update(s, &seg);
+			break;
+		case Finwait1:
+			update(s, &seg);
+			if(qlen(s->wq)+tcb->flgcnt == 0){
+				tcphalt(tpriv, &tcb->rtt_timer);
+				tcphalt(tpriv, &tcb->acktimer);
+				tcpsetkacounter(tcb);
+				tcb->time = NOW;
+				tcpsetstate(s, Finwait2);
+				tcb->katimer.start = MSL2 * (1000 / MSPTICK);
+				tcpgo(tpriv, &tcb->katimer);
+			}
+			break;
+		case Finwait2:
+			update(s, &seg);
+			break;
+		case Closing:
+			update(s, &seg);
+			if(qlen(s->wq)+tcb->flgcnt == 0) {
+				tcphalt(tpriv, &tcb->rtt_timer);
+				tcphalt(tpriv, &tcb->acktimer);
+				tcphalt(tpriv, &tcb->katimer);
+				tcpsetstate(s, Time_wait);
+				tcb->timer.start = MSL2*(1000 / MSPTICK);
+				tcpgo(tpriv, &tcb->timer);
+			}
+			break;
+		case Last_ack:
+			update(s, &seg);
+			if(qlen(s->wq)+tcb->flgcnt == 0) {
+				localclose(s, nil);
+				goto raise;
+			}
+		case Time_wait:
+			tcb->flags |= FORCE;
+			if(tcb->timer.state != TcptimerON)
+				tcpgo(tpriv, &tcb->timer);
+		}
+
+		if((seg.flags&URG) && seg.urg) {
+			if(seq_gt(seg.urg + seg.seq, tcb->rcv.urg)) {
+				tcb->rcv.urg = seg.urg + seg.seq;
+				pullblock(&bp, seg.urg);
+			}
+		}
+		else
+		if(seq_gt(tcb->rcv.nxt, tcb->rcv.urg))
+			tcb->rcv.urg = tcb->rcv.nxt;
+
+		if(length == 0) {
+			if(bp != nil)
+				freeblist(bp);
+		}
+		else {
+			switch(tcb->state){
+			default:
+				/* Ignore segment text */
+				if(bp != nil)
+					freeblist(bp);
+				break;
+
+			case Syn_received:
+			case Established:
+			case Finwait1:
+				/* If we still have some data place on
+				 * receive queue
+				 */
+				if(bp) {
+					bp = packblock(bp);
+					if(bp == nil)
+						panic("tcp packblock");
+					qpassnolim(s->rq, bp);
+					bp = nil;
+
+					/*
+					 *  Force an ack every 2 data messages.  This is
+					 *  a hack for rob to make his home system run
+					 *  faster.
+					 *
+					 *  this also keeps the standard TCP congestion
+					 *  control working since it needs an ack every
+					 *  2 max segs worth.  This is not quite that,
+					 *  but under a real stream is equivalent since
+					 *  every packet has a max seg in it.
+					 */
+					if(++(tcb->rcv.una) >= 2)
+						tcb->flags |= FORCE;
+				}
+				tcb->rcv.nxt += length;
+
+				/*
+				 *  update our rcv window
+				 */
+				tcprcvwin(s);
+
+				/*
+				 *  turn on the acktimer if there's something
+				 *  to ack
+				 */
+				if(tcb->acktimer.state != TcptimerON)
+					tcpgo(tpriv, &tcb->acktimer);
+
+				break;
+			case Finwait2:
+				/* no process to read the data, send a reset */
+				if(bp != nil)
+					freeblist(bp);
+				sndrst(tcp, source, dest, length, &seg, version,
+					"send to Finwait2");
+				QUNLOCK(s);
+				poperror();
+				return;
+			}
+		}
+
+		if(seg.flags & FIN) {
+			tcb->flags |= FORCE;
+
+			switch(tcb->state) {
+			case Syn_received:
+			case Established:
+				tcb->rcv.nxt++;
+				tcpsetstate(s, Close_wait);
+				break;
+			case Finwait1:
+				tcb->rcv.nxt++;
+				if(qlen(s->wq)+tcb->flgcnt == 0) {
+					tcphalt(tpriv, &tcb->rtt_timer);
+					tcphalt(tpriv, &tcb->acktimer);
+					tcphalt(tpriv, &tcb->katimer);
+					tcpsetstate(s, Time_wait);
+					tcb->timer.start = MSL2*(1000/MSPTICK);
+					tcpgo(tpriv, &tcb->timer);
+				}
+				else
+					tcpsetstate(s, Closing);
+				break;
+			case Finwait2:
+				tcb->rcv.nxt++;
+				tcphalt(tpriv, &tcb->rtt_timer);
+				tcphalt(tpriv, &tcb->acktimer);
+				tcphalt(tpriv, &tcb->katimer);
+				tcpsetstate(s, Time_wait);
+				tcb->timer.start = MSL2 * (1000/MSPTICK);
+				tcpgo(tpriv, &tcb->timer);
+				break;
+			case Close_wait:
+			case Closing:
+			case Last_ack:
+				break;
+			case Time_wait:
+				tcpgo(tpriv, &tcb->timer);
+				break;
+			}
+		}
+
+		/*
+		 *  get next adjacent segment from the resequence queue.
+		 *  dump/trim any overlapping segments
+		 */
+		for(;;) {
+			if(tcb->reseq == nil)
+				goto output;
+
+			if(seq_ge(tcb->rcv.nxt, tcb->reseq->seg.seq) == 0)
+				goto output;
+
+			getreseq(tcb, &seg, &bp, &length);
+
+			if(tcptrim(tcb, &seg, &bp, &length) == 0)
+				break;
+		}
+	}
+output:
+	tcpoutput(s);
+	QUNLOCK(s);
+	poperror();
+	return;
+raise:
+	QUNLOCK(s);
+	poperror();
+	freeblist(bp);
+	tcpkick(s);
+}
+
+/*
+ *  always enters and exits with the s locked.  We drop
+ *  the lock to ipoput the packet so some care has to be
+ *  taken by callers.
+ */
+void
+tcpoutput(Conv *s)
+{
+	Tcp seg;
+	int msgs;
+	Tcpctl *tcb;
+	Block *hbp, *bp;
+	int sndcnt, n;
+	ulong ssize, dsize, usable, sent;
+	Fs *f;
+	Tcppriv *tpriv;
+	uchar version;
+
+	f = s->p->f;
+	tpriv = s->p->priv;
+	version = s->ipversion;
+
+	for(msgs = 0; msgs < 100; msgs++) {
+		tcb = (Tcpctl*)s->ptcl;
+
+		switch(tcb->state) {
+		case Listen:
+		case Closed:
+		case Finwait2:
+			return;
+		}
+
+		/* force an ack when a window has opened up */
+		if(tcb->rcv.blocked && tcb->rcv.wnd > 0){
+			tcb->rcv.blocked = 0;
+			tcb->flags |= FORCE;
+		}
+
+		sndcnt = qlen(s->wq)+tcb->flgcnt;
+		sent = tcb->snd.ptr - tcb->snd.una;
+
+		/* Don't send anything else until our SYN has been acked */
+		if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
+			break;
+
+		/* Compute usable segment based on offered window and limit
+		 * window probes to one
+		 */
+		if(tcb->snd.wnd == 0){
+			if(sent != 0) {
+				if((tcb->flags&FORCE) == 0)
+					break;
+//				tcb->snd.ptr = tcb->snd.una;
+			}
+			usable = 1;
+		}
+		else {
+			usable = tcb->cwind;
+			if(tcb->snd.wnd < usable)
+				usable = tcb->snd.wnd;
+			usable -= sent;
+		}
+		ssize = sndcnt-sent;
+		if(ssize && usable < 2)
+			netlog(s->p->f, Logtcp, "throttled snd.wnd %lud cwind %lud\n",
+				tcb->snd.wnd, tcb->cwind);
+		if(usable < ssize)
+			ssize = usable;
+		if(tcb->mss < ssize)
+			ssize = tcb->mss;
+		dsize = ssize;
+		seg.urg = 0;
+
+		if(ssize == 0)
+		if((tcb->flags&FORCE) == 0)
+			break;
+
+		tcb->flags &= ~FORCE;
+		tcprcvwin(s);
+
+		/* By default we will generate an ack */
+		tcphalt(tpriv, &tcb->acktimer);
+		tcb->rcv.una = 0;
+		seg.source = s->lport;
+		seg.dest = s->rport;
+		seg.flags = ACK;
+		seg.mss = 0;
+		seg.ws = 0;
+		switch(tcb->state){
+		case Syn_sent:
+			seg.flags = 0;
+			if(tcb->snd.ptr == tcb->iss){
+				seg.flags |= SYN;
+				dsize--;
+				seg.mss = tcb->mss;
+				seg.ws = tcb->scale;
+			}
+			break;
+		case Syn_received:
+			/*
+			 *  don't send any data with a SYN/ACK packet
+			 *  because Linux rejects the packet in its
+			 *  attempt to solve the SYN attack problem
+			 */
+			if(tcb->snd.ptr == tcb->iss){
+				seg.flags |= SYN;
+				dsize = 0;
+				ssize = 1;
+				seg.mss = tcb->mss;
+				seg.ws = tcb->scale;
+			}
+			break;
+		}
+		seg.seq = tcb->snd.ptr;
+		seg.ack = tcb->rcv.nxt;
+		seg.wnd = tcb->rcv.wnd;
+
+		/* Pull out data to send */
+		bp = nil;
+		if(dsize != 0) {
+			bp = qcopy(s->wq, dsize, sent);
+			if(BLEN(bp) != dsize) {
+				seg.flags |= FIN;
+				dsize--;
+			}
+		}
+
+		if(sent+dsize == sndcnt)
+			seg.flags |= PSH;
+
+		/* keep track of balance of resent data */
+		if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) {
+			n = tcb->snd.nxt - tcb->snd.ptr;
+			if(ssize < n)
+				n = ssize;
+			tcb->resent += n;
+			netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
+				s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr, tcb->snd.nxt);
+			tpriv->stats[RetransSegs]++;
+		}
+
+		tcb->snd.ptr += ssize;
+
+		/* Pull up the send pointer so we can accept acks
+		 * for this window
+		 */
+		if(seq_gt(tcb->snd.ptr,tcb->snd.nxt))
+			tcb->snd.nxt = tcb->snd.ptr;
+
+		/* Build header, link data and compute cksum */
+		switch(version){
+		case V4:
+			tcb->protohdr.tcp4hdr.vihl = IP_VER4;
+			hbp = htontcp4(&seg, bp, &tcb->protohdr.tcp4hdr, tcb);
+			if(hbp == nil) {
+				freeblist(bp);
+				return;
+			}
+			break;
+		case V6:
+			tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
+			hbp = htontcp6(&seg, bp, &tcb->protohdr.tcp6hdr, tcb);
+			if(hbp == nil) {
+				freeblist(bp);
+				return;
+			}
+			break;
+		default:
+			hbp = nil;	/* to suppress a warning */
+			panic("tcpoutput: version %d", version);
+		}
+
+		/* Start the transmission timers if there is new data and we
+		 * expect acknowledges
+		 */
+		if(ssize != 0){
+			if(tcb->timer.state != TcptimerON)
+				tcpgo(tpriv, &tcb->timer);
+
+			/*  If round trip timer isn't running, start it.
+			 *  measure the longest packet only in case the
+			 *  transmission time dominates RTT
+			 */
+			if(tcb->rtt_timer.state != TcptimerON)
+			if(ssize == tcb->mss) {
+				tcpgo(tpriv, &tcb->rtt_timer);
+				tcb->rttseq = tcb->snd.ptr;
+			}
+		}
+
+		tpriv->stats[OutSegs]++;
+
+		/* put off the next keep alive */
+		tcpgo(tpriv, &tcb->katimer);
+
+		switch(version){
+		case V4:
+			if(ipoput4(f, hbp, 0, s->ttl, s->tos, s) < 0){
+				/* a negative return means no route */
+				localclose(s, "no route");
+			}
+			break;
+		case V6:
+			if(ipoput6(f, hbp, 0, s->ttl, s->tos, s) < 0){
+				/* a negative return means no route */
+				localclose(s, "no route");
+			}
+			break;
+		default:
+			panic("tcpoutput2: version %d", version);
+		}
+		if((uint)(msgs%4) == 1){
+			QUNLOCK(s);
+			sched();
+			QLOCK(s);
+		}
+	}
+}
+
+/*
+ *  the BSD convention (hack?) for keep alives.  resend last uchar acked.
+ */
+void
+tcpsendka(Conv *s)
+{
+	Tcp seg;
+	Tcpctl *tcb;
+	Block *hbp,*dbp;
+
+	tcb = (Tcpctl*)s->ptcl;
+
+	dbp = nil;
+	seg.urg = 0;
+	seg.source = s->lport;
+	seg.dest = s->rport;
+	seg.flags = ACK|PSH;
+	seg.mss = 0;
+	seg.ws = 0;
+	if(tcpporthogdefense)
+		seg.seq = tcb->snd.una-(1<<30)-nrand(1<<20);
+	else
+		seg.seq = tcb->snd.una-1;
+	seg.ack = tcb->rcv.nxt;
+	tcb->rcv.una = 0;
+	seg.wnd = tcb->rcv.wnd;
+	if(tcb->state == Finwait2){
+		seg.flags |= FIN;
+	} else {
+		dbp = allocb(1);
+		dbp->wp++;
+	}
+
+	if(isv4(s->raddr)) {
+		/* Build header, link data and compute cksum */
+		tcb->protohdr.tcp4hdr.vihl = IP_VER4;
+		hbp = htontcp4(&seg, dbp, &tcb->protohdr.tcp4hdr, tcb);
+		if(hbp == nil) {
+			freeblist(dbp);
+			return;
+		}
+		ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s);
+	}
+	else {
+		/* Build header, link data and compute cksum */
+		tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
+		hbp = htontcp6(&seg, dbp, &tcb->protohdr.tcp6hdr, tcb);
+		if(hbp == nil) {
+			freeblist(dbp);
+			return;
+		}
+		ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s);
+	}
+}
+
+/*
+ *  set connection to time out after 12 minutes
+ */
+void
+tcpsetkacounter(Tcpctl *tcb)
+{
+	tcb->kacounter = (12 * 60 * 1000) / (tcb->katimer.start*MSPTICK);
+	if(tcb->kacounter < 3)
+		tcb->kacounter = 3;
+}
+
+/*
+ *  if we've timed out, close the connection
+ *  otherwise, send a keepalive and restart the timer
+ */
+void
+tcpkeepalive(void *v)
+{
+	Tcpctl *tcb;
+	Conv *s;
+
+	s = v;
+	tcb = (Tcpctl*)s->ptcl;
+	if(waserror()){
+		QUNLOCK(s);
+		nexterror();
+	}
+	QLOCK(s);
+	if(tcb->state != Closed){
+		if(--(tcb->kacounter) <= 0) {
+			localclose(s, Etimedout);
+		} else {
+			tcpsendka(s);
+			tcpgo(s->p->priv, &tcb->katimer);
+		}
+	}
+	QUNLOCK(s);
+	poperror();
+}
+
+/*
+ *  start keepalive timer
+ */
+char*
+tcpstartka(Conv *s, char **f, int n)
+{
+	Tcpctl *tcb;
+	int x;
+
+	tcb = (Tcpctl*)s->ptcl;
+	if(tcb->state != Established)
+		return "connection must be in Establised state";
+	if(n > 1){
+		x = atoi(f[1]);
+		if(x >= MSPTICK)
+			tcb->katimer.start = x/MSPTICK;
+	}
+	tcpsetkacounter(tcb);
+	tcpgo(s->p->priv, &tcb->katimer);
+
+	return nil;
+}
+
+/*
+ *  turn checksums on/off
+ */
+char*
+tcpsetchecksum(Conv *s, char **f, int _)
+{
+	Tcpctl *tcb;
+
+	tcb = (Tcpctl*)s->ptcl;
+	tcb->nochecksum = !atoi(f[1]);
+
+	return nil;
+}
+
+void
+tcprxmit(Conv *s)
+{
+	Tcpctl *tcb;
+
+	tcb = (Tcpctl*)s->ptcl;
+
+	tcb->flags |= RETRAN|FORCE;
+	tcb->snd.ptr = tcb->snd.una;
+
+	/*
+	 *  We should be halving the slow start threshhold (down to one
+	 *  mss) but leaving it at mss seems to work well enough
+	 */
+ 	tcb->ssthresh = tcb->mss;
+
+	/*
+	 *  pull window down to a single packet
+	 */
+	tcb->cwind = tcb->mss;
+	tcpoutput(s);
+}
+
+void
+tcptimeout(void *arg)
+{
+	Conv *s;
+	Tcpctl *tcb;
+	int maxback;
+	Tcppriv *tpriv;
+
+	s = (Conv*)arg;
+	tpriv = s->p->priv;
+	tcb = (Tcpctl*)s->ptcl;
+
+	if(waserror()){
+		QUNLOCK(s);
+		nexterror();
+	}
+	QLOCK(s);
+	switch(tcb->state){
+	default:
+		tcb->backoff++;
+		if(tcb->state == Syn_sent)
+			maxback = MAXBACKMS/2;
+		else
+			maxback = MAXBACKMS;
+		tcb->backedoff += tcb->timer.start * MSPTICK;
+		if(tcb->backedoff >= maxback) {
+			localclose(s, Etimedout);
+			break;
+		}
+		netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
+		tcpsettimer(tcb);
+		tcprxmit(s);
+		tpriv->stats[RetransTimeouts]++;
+		tcb->snd.dupacks = 0;
+		break;
+	case Time_wait:
+		localclose(s, nil);
+		break;
+	case Closed:
+		break;
+	}
+	QUNLOCK(s);
+	poperror();
+}
+
+int
+inwindow(Tcpctl *tcb, int seq)
+{
+	return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1);
+}
+
+/*
+ *  set up state for a received SYN (or SYN ACK) packet
+ */
+void
+procsyn(Conv *s, Tcp *seg)
+{
+	Tcpctl *tcb;
+
+	tcb = (Tcpctl*)s->ptcl;
+	tcb->flags |= FORCE;
+
+	tcb->rcv.nxt = seg->seq + 1;
+	tcb->rcv.urg = tcb->rcv.nxt;
+	tcb->irs = seg->seq;
+
+	/* our sending max segment size cannot be bigger than what he asked for */
+	if(seg->mss != 0 && seg->mss < tcb->mss)
+		tcb->mss = seg->mss;
+
+	/* the congestion window always starts out as a single segment */
+	tcb->snd.wnd = seg->wnd;
+	tcb->cwind = tcb->mss;
+}
+
+int
+addreseq(Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+{
+	Reseq *rp, *rp1;
+	int i, rqlen, qmax;
+
+	rp = malloc(sizeof(Reseq));
+	if(rp == nil){
+		freeblist(bp);	/* bp always consumed by add_reseq */
+		return 0;
+	}
+
+	rp->seg = *seg;
+	rp->bp = bp;
+	rp->length = length;
+
+	/* Place on reassembly list sorting by starting seq number */
+	rp1 = tcb->reseq;
+	if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) {
+		rp->next = rp1;
+		tcb->reseq = rp;
+		if(rp->next != nil)
+			tpriv->stats[OutOfOrder]++;
+		return 0;
+	}
+
+	rqlen = 0;
+	for(i = 0;; i++) {
+		rqlen += rp1->length;
+		if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) {
+			rp->next = rp1->next;
+			rp1->next = rp;
+			if(rp->next != nil)
+				tpriv->stats[OutOfOrder]++;
+			break;
+		}
+		rp1 = rp1->next;
+	}
+	qmax = QMAX<<tcb->rcv.scale;
+	if(rqlen > qmax){
+		print("resequence queue > window: %d > %d\n", rqlen, qmax);
+		i = 0;
+	  	for(rp1 = tcb->reseq; rp1 != nil; rp1 = rp1->next){
+	  		print("%#lux %#lux %#ux\n", rp1->seg.seq,
+	  			rp1->seg.ack, rp1->seg.flags);
+			if(i++ > 10){
+				print("...\n");
+				break;
+			}
+		}
+
+		/*
+		 * delete entire reassembly queue; wait for retransmit.
+		 * - should we be smarter and only delete the tail?
+		 */
+		for(rp = tcb->reseq; rp != nil; rp = rp1){
+			rp1 = rp->next;
+			freeblist(rp->bp);
+			free(rp);
+		}
+		tcb->reseq = nil;
+
+	  	return -1;
+	}
+	return 0;
+}
+
+void
+getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
+{
+	Reseq *rp;
+
+	rp = tcb->reseq;
+	if(rp == nil)
+		return;
+
+	tcb->reseq = rp->next;
+
+	*seg = rp->seg;
+	*bp = rp->bp;
+	*length = rp->length;
+
+	free(rp);
+}
+
+int
+tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
+{
+	ushort len;
+	uchar accept;
+	int dupcnt, excess;
+
+	accept = 0;
+	len = *length;
+	if(seg->flags & SYN)
+		len++;
+	if(seg->flags & FIN)
+		len++;
+
+	if(tcb->rcv.wnd == 0) {
+		if(len == 0 && seg->seq == tcb->rcv.nxt)
+			return 0;
+	}
+	else {
+		/* Some part of the segment should be in the window */
+		if(inwindow(tcb,seg->seq))
+			accept++;
+		else
+		if(len != 0) {
+			if(inwindow(tcb, seg->seq+len-1) ||
+			seq_within(tcb->rcv.nxt, seg->seq,seg->seq+len-1))
+				accept++;
+		}
+	}
+	if(!accept) {
+		freeblist(*bp);
+		return -1;
+	}
+	dupcnt = tcb->rcv.nxt - seg->seq;
+	if(dupcnt > 0){
+		tcb->rerecv += dupcnt;
+		if(seg->flags & SYN){
+			seg->flags &= ~SYN;
+			seg->seq++;
+
+			if(seg->urg > 1)
+				seg->urg--;
+			else
+				seg->flags &= ~URG;
+			dupcnt--;
+		}
+		if(dupcnt > 0){
+			pullblock(bp, (ushort)dupcnt);
+			seg->seq += dupcnt;
+			*length -= dupcnt;
+
+			if(seg->urg > dupcnt)
+				seg->urg -= dupcnt;
+			else {
+				seg->flags &= ~URG;
+				seg->urg = 0;
+			}
+		}
+	}
+	excess = seg->seq + *length - (tcb->rcv.nxt + tcb->rcv.wnd);
+	if(excess > 0) {
+		tcb->rerecv += excess;
+		*length -= excess;
+		*bp = trimblock(*bp, 0, *length);
+		if(*bp == nil)
+			panic("presotto is a boofhead");
+		seg->flags &= ~FIN;
+	}
+	return 0;
+}
+
+void
+tcpadvise(Proto *tcp, Block *bp, char *msg)
+{
+	Tcp4hdr *h4;
+	Tcp6hdr *h6;
+	Tcpctl *tcb;
+	uchar source[IPaddrlen];
+	uchar dest[IPaddrlen];
+	ushort psource, pdest;
+	Conv *s, **p;
+
+	h4 = (Tcp4hdr*)(bp->rp);
+	h6 = (Tcp6hdr*)(bp->rp);
+
+	if((h4->vihl&0xF0)==IP_VER4) {
+		v4tov6(dest, h4->tcpdst);
+		v4tov6(source, h4->tcpsrc);
+		psource = nhgets(h4->tcpsport);
+		pdest = nhgets(h4->tcpdport);
+	}
+	else {
+		ipmove(dest, h6->tcpdst);
+		ipmove(source, h6->tcpsrc);
+		psource = nhgets(h6->tcpsport);
+		pdest = nhgets(h6->tcpdport);
+	}
+
+	/* Look for a connection */
+	QLOCK(tcp);
+	for(p = tcp->conv; *p; p++) {
+		s = *p;
+		tcb = (Tcpctl*)s->ptcl;
+		if(s->rport == pdest)
+		if(s->lport == psource)
+		if(tcb->state != Closed)
+		if(ipcmp(s->raddr, dest) == 0)
+		if(ipcmp(s->laddr, source) == 0){
+			QLOCK(s);
+			QUNLOCK(tcp);
+			switch(tcb->state){
+			case Syn_sent:
+				localclose(s, msg);
+				break;
+			}
+			QUNLOCK(s);
+			freeblist(bp);
+			return;
+		}
+	}
+	QUNLOCK(tcp);
+	freeblist(bp);
+}
+
+static char*
+tcpporthogdefensectl(char *val)
+{
+	if(strcmp(val, "on") == 0)
+		tcpporthogdefense = 1;
+	else if(strcmp(val, "off") == 0)
+		tcpporthogdefense = 0;
+	else
+		return "unknown value for tcpporthogdefense";
+	return nil;
+}
+
+/* called with c QLOCKed */
+char*
+tcpctl(Conv* c, char** f, int n)
+{
+	if(n == 1 && strcmp(f[0], "hangup") == 0)
+		return tcphangup(c);
+	if(n >= 1 && strcmp(f[0], "keepalive") == 0)
+		return tcpstartka(c, f, n);
+	if(n >= 1 && strcmp(f[0], "checksum") == 0)
+		return tcpsetchecksum(c, f, n);
+	if(n >= 1 && strcmp(f[0], "tcpporthogdefense") == 0)
+		return tcpporthogdefensectl(f[1]);
+	return "unknown control request";
+}
+
+int
+tcpstats(Proto *tcp, char *buf, int len)
+{
+	Tcppriv *priv;
+	char *p, *e;
+	int i;
+
+	priv = tcp->priv;
+	p = buf;
+	e = p+len;
+	for(i = 0; i < Nstats; i++)
+		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+	return p - buf;
+}
+
+/*
+ *  garbage collect any stale conversations:
+ *	- SYN received but no SYN-ACK after 5 seconds (could be the SYN attack)
+ *	- Finwait2 after 5 minutes
+ *
+ *  this is called whenever we run out of channels.  Both checks are
+ *  of questionable validity so we try to use them only when we're
+ *  up against the wall.
+ */
+int
+tcpgc(Proto *tcp)
+{
+	Conv *c, **pp, **ep;
+	int n;
+	Tcpctl *tcb;
+
+
+	n = 0;
+	ep = &tcp->conv[tcp->nc];
+	for(pp = tcp->conv; pp < ep; pp++) {
+		c = *pp;
+		if(c == nil)
+			break;
+		if(!CANQLOCK(c))
+			continue;
+		tcb = (Tcpctl*)c->ptcl;
+		switch(tcb->state){
+		case Syn_received:
+			if(NOW - tcb->time > 5000){
+				localclose(c, "timed out");
+				n++;
+			}
+			break;
+		case Finwait2:
+			if(NOW - tcb->time > 5*60*1000){
+				localclose(c, "timed out");
+				n++;
+			}
+			break;
+		}
+		QUNLOCK(c);
+	}
+	return n;
+}
+
+void
+tcpsettimer(Tcpctl *tcb)
+{
+	int x;
+
+	/* round trip dependency */
+	x = backoff(tcb->backoff) *
+		(tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK;
+
+	/* bounded twixt 1/2 and 64 seconds */
+	if(x < 500/MSPTICK)
+		x = 500/MSPTICK;
+	else if(x > (64000/MSPTICK))
+		x = 64000/MSPTICK;
+	tcb->timer.start = x;
+}
+
+void
+tcpinit(Fs *fs)
+{
+	Proto *tcp;
+	Tcppriv *tpriv;
+
+	tcp = smalloc(sizeof(Proto));
+	tpriv = tcp->priv = smalloc(sizeof(Tcppriv));
+	tcp->name = "tcp";
+	tcp->connect = tcpconnect;
+	tcp->announce = tcpannounce;
+	tcp->ctl = tcpctl;
+	tcp->state = tcpstate;
+	tcp->create = tcpcreate;
+	tcp->close = tcpclose;
+	tcp->rcv = tcpiput;
+	tcp->advise = tcpadvise;
+	tcp->stats = tcpstats;
+	tcp->inuse = tcpinuse;
+	tcp->gc = tcpgc;
+	tcp->ipproto = IP_TCPPROTO;
+	tcp->nc = scalednconv();
+	tcp->ptclsize = sizeof(Tcpctl);
+	tpriv->stats[MaxConn] = tcp->nc;
+
+	Fsproto(fs, tcp);
+}
+
+void
+tcpsetscale(Conv *s, Tcpctl *tcb, ushort rcvscale, ushort sndscale)
+{
+	if(rcvscale){
+		tcb->rcv.scale = rcvscale & 0xff;
+		tcb->snd.scale = sndscale & 0xff;
+		tcb->window = QMAX<<tcb->snd.scale;
+		qsetlimit(s->rq, tcb->window);
+	} else {
+		tcb->rcv.scale = 0;
+		tcb->snd.scale = 0;
+		tcb->window = QMAX;
+		qsetlimit(s->rq, tcb->window);
+	}
+}
diff --git a/src/9vx/a/ip/tripmedium.c b/src/9vx/a/ip/tripmedium.c
@@ -0,0 +1,398 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+#include "trip.h"
+
+static void	tripread(void *a);
+static void	tripbind(Ipifc *ifc, int argc, char **argv);
+static void	tripunbind(Ipifc *ifc);
+static void	tripbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
+static void	tripaddmulti(Ipifc *ifc, uchar*, uchar*);
+static void	tripremmulti(Ipifc *ifc, uchar*, uchar*);
+static void	tripaddroute(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
+static void	tripremroute(Ipifc *ifc, int, uchar*, uchar*);
+static void	tripares(Fs*, int, uchar*, uchar*, int, int);
+
+Medium tripmedium =
+{
+.name=		"trip",
+.mintu=	20,
+.maxtu=	64*1024,
+.maclen=	LCIMACSIZE,
+.bind=		tripbind,
+.unbind=	tripunbind,
+.bwrite=	tripbwrite,
+.addmulti=	tripaddmulti,
+.remmulti=	tripremmulti,
+.addroute=	tripaddroute,
+.remroute=	tripremroute,
+.ares=		tripares,
+};
+
+typedef struct	Tripinfo Tripinfo;
+struct Tripinfo
+{
+	Fs*	fs;		/* my instance of the IP stack */
+	Ipifc*	ifc;		/* IP interface */
+	Card*	dev;
+	Proc*	readp;		/* reading process */
+	Chan*	mchan;		/* Data channel */
+};
+
+/*
+ *  called to bind an IP ifc to an ethernet device
+ *  called with ifc qlock'd
+ */
+static void
+tripbind(Ipifc *ifc, int argc, char **argv)
+{
+	int fd;
+	Chan *mchan;
+	Tripinfo *er;
+
+	if(argc < 2)
+		error(Ebadarg);
+
+	fd = kopen(argv[2], ORDWR);
+	if(fd < 0)
+		error("trip open failed");
+
+	mchan = fdtochan(up->env->fgrp, fd, ORDWR, 0, 1);
+	kclose(fd);
+
+	if(devtab[mchan->type]->dc != 'T') {
+		cclose(mchan);
+		error(Enoport);
+	}
+
+	er = smalloc(sizeof(*er));
+	er->mchan = mchan;
+	er->ifc = ifc;
+	er->dev = tripsetifc(mchan, ifc);
+	er->fs = ifc->conv->p->f;
+
+	ifc->arg = er;
+
+	kproc("tripread", tripread, ifc);
+}
+
+/*
+ *  called with ifc qlock'd
+ */
+static void
+tripunbind(Ipifc *ifc)
+{
+	Tripinfo *er = ifc->arg;
+/*
+	if(er->readp)
+		postnote(er->readp, 1, "unbind", 0);
+*/
+	tsleep(&up->sleep, return0, 0, 300);
+
+	if(er->mchan != nil)
+		cclose(er->mchan);
+
+	free(er);
+}
+
+/*
+ *  called by ipoput with a single block to write
+ */
+static void
+tripbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip)
+{
+	Tripinfo *er = ifc->arg;
+
+	/*
+	 * Packet is rerouted at linecard
+	 * so the gateway is ignored
+	 */
+	USED(ip);
+	USED(version);
+
+	if(waserror()) {
+		print("tripwrite failed\n");
+		return;
+	}
+
+	devtab[er->mchan->type]->bwrite(er->mchan, bp, 0);
+	poperror();
+	ifc->out++;
+}
+
+/*
+ *  process to read from the trip interface
+ */
+static void
+tripread(void *a)
+{
+	Ipifc *ifc;
+	Block *bp;
+	Tripinfo *er;
+
+	ifc = a;
+	er = ifc->arg;
+	er->readp = up;	/* hide identity under a rock for unbind */
+
+	for(;;) {
+		bp = devtab[er->mchan->type]->bread(er->mchan, ifc->maxtu, 0);
+		ifc->in++;
+		ipiput4(er->fs, ifc, bp);
+	}
+
+	pexit("hangup", 1);
+}
+
+static void
+tripaddroute(Ipifc *ifc, int v, uchar *addr, uchar *mask, uchar *gate, int t)
+{
+	int alen;
+	MTroute mtr;
+	Tripinfo *tinfo;
+
+	tinfo = ifc->arg;
+	if(!tinfo->dev->routing)
+		return;
+
+	/*
+	 * Multicast addresses are handled on the linecard by
+	 * the multicast port driver, so the route load is dumped.
+	 *	loaded by addmulti/remmulti for SBC routes
+	 *		  joinmulti/leavemulti for inter LC
+	 */
+	if(ipismulticast(addr))
+		return;
+
+	mtr.type = T_ROUTEADMIN;
+	if(v & Rv4) {
+		mtr.op = RTADD4;
+		alen = IPv4addrlen;
+	}
+	else {
+		mtr.op = RTADD6;
+		alen = IPaddrlen;
+	}
+	mtr.rtype = t;
+	memmove(mtr.addr, addr, alen);
+	memmove(mtr.mask, mask, alen);
+	memmove(mtr.gate, gate, alen);
+
+	i2osend(tinfo->dev, &mtr, sizeof(mtr));
+}
+
+static void
+tripremroute(Ipifc *ifc, int v, uchar *addr, uchar *mask)
+{
+	int alen;
+	MTroute mtr;
+	Tripinfo *tinfo;
+
+	tinfo = ifc->arg;
+	if(!tinfo->dev->routing)
+		return;
+
+	if(ipismulticast(addr))
+		return;
+
+	mtr.type = T_ROUTEADMIN;
+	if(v & Rv4) {
+		mtr.op = RTDEL4;
+		alen = IPv4addrlen;
+	}
+	else {
+		mtr.op = RTDEL6;
+		alen = IPaddrlen;
+	}
+	memmove(mtr.addr, addr, alen);
+	memmove(mtr.mask, mask, alen);
+
+	i2osend(tinfo->dev, &mtr, sizeof(mtr));
+}
+
+static void
+tripxmitroute(Route *r, Routewalk *rw)
+{
+	int nifc;
+	char t[5];
+	uchar a[IPaddrlen], m[IPaddrlen], g[IPaddrlen];
+
+	convroute(r, a, m, g, t, &nifc);
+	if(!(r->type & Rv4)) {
+		tripaddroute(rw->state, 0, a, m, g, r->type);
+		return;
+	}
+
+	tripaddroute(rw->state, Rv4, a+IPv4off, m+IPv4off, g+IPv4off, r->type);
+}
+
+static void
+sendifcinfo(Ipifc *dest)
+{
+	Conv **cp, **e;
+	Iplifc *l;
+	Ipifc *ifc;
+	MTifctl mtc;
+	Tripinfo *tinfo, *oinfo;
+	Proto *p;
+
+	tinfo = dest->arg;
+
+	/* Install interfaces */
+	p = tinfo->fs->ipifc;
+	e = &p->conv[p->nc];
+	for(cp = p->conv; cp < e; cp++) {
+
+		if(*cp == nil)
+			continue;
+
+		ifc = (Ipifc*)(*cp)->ptcl;
+		if(dest == ifc)
+			continue;
+
+		mtc.type = T_CTLIFADMIN;
+		mtc.maxtu = ifc->maxtu;
+		mtc.mintu = ifc->mintu;
+
+		mtc.port = 0;
+		if(ifc->m == &tripmedium) {
+			oinfo = ifc->arg;
+			mtc.port = oinfo->dev->bar[0].bar;
+		}
+
+		for(l = ifc->lifc; l != nil; l = l->next) {
+			if(isv4(l->local)) {
+				mtc.op = IFADD4;
+				memmove(mtc.addr, l->local+IPv4off, IPv4addrlen);
+				memmove(mtc.mask, l->mask+IPv4off, IPv4addrlen);
+			}
+			else {
+				mtc.op = IFADD6;
+				memmove(mtc.addr, l->local, sizeof(mtc.addr));
+				memmove(mtc.mask, l->mask, sizeof(mtc.mask));
+			}
+
+			i2osend(tinfo->dev, &mtc, sizeof(mtc));
+		}
+	}
+}
+
+void
+tripsync(Ipifc *ifc)
+{
+	Routewalk rw;
+
+	if(ifc == nil) {
+		print("tripsync: interface not bound\n");
+		return;
+	}
+
+	/* Mirror the route table into the lincard */
+	rw.o = 0;
+	rw.n = (1<<22);
+	rw.state = ifc;
+	rw.walk = tripxmitroute;
+
+	ipwalkroutes(ifc->conv->p->f, &rw);
+
+	/*
+	 * Tell the linecard about interfaces that already
+	 * exist elsewhere
+	 */
+	sendifcinfo(ifc);
+}
+
+/* Tell a line card the SBC is interested in listening
+ * to a multicast address
+ */
+static void
+tripaddmulti(Ipifc *ifc, uchar *addr, uchar *ifca)
+{
+	MTmultiears mt;
+	Tripinfo *tinfo;
+
+	/* print("tripaddmulti %I %I\n", addr, ifca); /**/
+
+	tinfo = ifc->arg;
+	if(!tinfo->dev->routing)
+		return;
+
+	mt.type = T_MULTIEAR;
+	mt.op = ADDMULTI;
+	memmove(mt.addr, addr, sizeof(mt.addr));
+	memmove(mt.ifca, ifca, sizeof(mt.ifca));
+
+	i2osend(tinfo->dev, &mt, sizeof(mt));
+}
+
+/* Tell a line card the SBC is no longer interested in listening
+ * to a multicast address
+ */
+static void
+tripremmulti(Ipifc *ifc, uchar *addr, uchar *ifca)
+{
+	MTmultiears mt;
+	Tripinfo *tinfo;
+
+	tinfo = ifc->arg;
+	if(!tinfo->dev->routing)
+		return;
+
+	mt.type = T_MULTIEAR;
+	mt.op = REMMULTI;
+	memmove(mt.addr, addr, sizeof(mt.addr));
+	memmove(mt.ifca, ifca, sizeof(mt.ifca));
+
+	i2osend(tinfo->dev, &mt, sizeof(mt));
+}
+
+static void
+tripares(Fs *fs, int vers, uchar *ip, uchar *mac, int l, int)
+{
+	Route *r;
+	Ipifc *ifc;
+	MTaresenter ta;
+	Tripinfo *tinfo;
+	uchar v6ip[IPaddrlen];
+
+	if(vers == V4) {
+		r = v4lookup(fs, ip);
+		v4tov6(v6ip, ip);
+		ip = v6ip;
+	}
+	else
+		r = v6lookup(fs, ip);
+
+	if(r == nil) {
+		print("tripares: no route for entry\n");
+		return;
+	}
+
+	ifc = r->ifc;
+
+	tinfo = ifc->arg;
+	if(!tinfo->dev->routing)
+		return;
+
+	if(vers == V4) {
+		v4tov6(v6ip, ip);
+		ip = v6ip;
+	}
+
+	ta.type = T_ARESENTER;
+	ta.maclen = l;
+	memmove(ta.addr, ip, IPaddrlen);
+	memmove(ta.amac, mac, l);
+
+	i2osend(tinfo->dev, &ta, sizeof(ta));
+}
+
+void
+tripmediumlink(void)
+{
+	addipmedium(&tripmedium);
+}
diff --git a/src/9vx/a/ip/udp.c b/src/9vx/a/ip/udp.c
@@ -0,0 +1,619 @@
+#include	"u.h"
+#include	"lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"error.h"
+
+#include	"ip.h"
+#include	"ipv6.h"
+
+
+#define DPRINT if(0)print
+
+enum
+{
+	UDP_UDPHDR_SZ	= 8,
+
+	UDP4_PHDR_OFF = 8,
+	UDP4_PHDR_SZ = 12,
+	UDP4_IPHDR_SZ = 20,
+	UDP6_IPHDR_SZ = 40,
+	UDP6_PHDR_SZ = 40,
+	UDP6_PHDR_OFF = 0,
+
+	IP_UDPPROTO	= 17,
+	UDP_USEAD7	= 52,
+
+	Udprxms		= 200,
+	Udptickms	= 100,
+	Udpmaxxmit	= 10,
+};
+
+typedef struct Udp4hdr Udp4hdr;
+struct Udp4hdr
+{
+	/* ip header */
+	uchar	vihl;		/* Version and header length */
+	uchar	tos;		/* Type of service */
+	uchar	length[2];	/* packet length */
+	uchar	id[2];		/* Identification */
+	uchar	frag[2];	/* Fragment information */
+	uchar	Unused;
+	uchar	udpproto;	/* Protocol */
+	uchar	udpplen[2];	/* Header plus data length */
+	uchar	udpsrc[IPv4addrlen];	/* Ip source */
+	uchar	udpdst[IPv4addrlen];	/* Ip destination */
+
+	/* udp header */
+	uchar	udpsport[2];	/* Source port */
+	uchar	udpdport[2];	/* Destination port */
+	uchar	udplen[2];	/* data length */
+	uchar	udpcksum[2];	/* Checksum */
+};
+
+typedef struct Udp6hdr Udp6hdr;
+struct Udp6hdr {
+	uchar viclfl[4];
+	uchar len[2];
+	uchar nextheader;
+	uchar hoplimit;
+	uchar udpsrc[IPaddrlen];
+	uchar udpdst[IPaddrlen];
+
+	/* udp header */
+	uchar	udpsport[2];	/* Source port */
+	uchar	udpdport[2];	/* Destination port */
+	uchar	udplen[2];	/* data length */
+	uchar	udpcksum[2];	/* Checksum */
+};
+
+/* MIB II counters */
+typedef struct Udpstats Udpstats;
+struct Udpstats
+{
+	ulong	udpInDatagrams;
+	ulong	udpNoPorts;
+	ulong	udpInErrors;
+	ulong	udpOutDatagrams;
+};
+
+typedef struct Udppriv Udppriv;
+struct Udppriv
+{
+	Ipht		ht;
+
+	/* MIB counters */
+	Udpstats	ustats;
+
+	/* non-MIB stats */
+	ulong		csumerr;		/* checksum errors */
+	ulong		lenerr;			/* short packet */
+};
+
+void (*etherprofiler)(char *name, int qlen);
+void udpkick(void *x, Block *bp);
+
+/*
+ *  protocol specific part of Conv
+ */
+typedef struct Udpcb Udpcb;
+struct Udpcb
+{
+	QLock	qlock;
+	uchar	headers;
+};
+
+static char*
+udpconnect(Conv *c, char **argv, int argc)
+{
+	char *e;
+	Udppriv *upriv;
+
+	upriv = c->p->priv;
+	e = Fsstdconnect(c, argv, argc);
+	Fsconnected(c, e);
+	if(e != nil)
+		return e;
+
+	iphtadd(&upriv->ht, c);
+	return nil;
+}
+
+
+static int
+udpstate(Conv *c, char *state, int n)
+{
+	return snprint(state, n, "%s qin %d qout %d\n",
+		c->inuse ? "Open" : "Closed",
+		c->rq ? qlen(c->rq) : 0,
+		c->wq ? qlen(c->wq) : 0
+	);
+}
+
+static char*
+udpannounce(Conv *c, char** argv, int argc)
+{
+	char *e;
+	Udppriv *upriv;
+
+	upriv = c->p->priv;
+	e = Fsstdannounce(c, argv, argc);
+	if(e != nil)
+		return e;
+	Fsconnected(c, nil);
+	iphtadd(&upriv->ht, c);
+
+	return nil;
+}
+
+static void
+udpcreate(Conv *c)
+{
+	c->rq = qopen(128*1024, Qmsg, 0, 0);
+	c->wq = qbypass(udpkick, c);
+}
+
+static void
+udpclose(Conv *c)
+{
+	Udpcb *ucb;
+	Udppriv *upriv;
+
+	upriv = c->p->priv;
+	iphtrem(&upriv->ht, c);
+
+	c->state = 0;
+	qclose(c->rq);
+	qclose(c->wq);
+	qclose(c->eq);
+	ipmove(c->laddr, IPnoaddr);
+	ipmove(c->raddr, IPnoaddr);
+	c->lport = 0;
+	c->rport = 0;
+
+	ucb = (Udpcb*)c->ptcl;
+	ucb->headers = 0;
+}
+
+void
+udpkick(void *x, Block *bp)
+{
+	Conv *c = x;
+	Udp4hdr *uh4;
+	Udp6hdr *uh6;
+	ushort rport;
+	uchar laddr[IPaddrlen], raddr[IPaddrlen];
+	Udpcb *ucb;
+	int dlen, ptcllen;
+	Udppriv *upriv;
+	Fs *f;
+	int version;
+	Conv *rc;
+
+	upriv = c->p->priv;
+	f = c->p->f;
+
+	netlog(c->p->f, Logudp, "udp: kick\n");
+	if(bp == nil)
+		return;
+
+	ucb = (Udpcb*)c->ptcl;
+	switch(ucb->headers) {
+	case 7:
+		/* get user specified addresses */
+		bp = pullupblock(bp, UDP_USEAD7);
+		if(bp == nil)
+			return;
+		ipmove(raddr, bp->rp);
+		bp->rp += IPaddrlen;
+		ipmove(laddr, bp->rp);
+		bp->rp += IPaddrlen;
+		/* pick interface closest to dest */
+		if(ipforme(f, laddr) != Runi)
+			findlocalip(f, laddr, raddr);
+		bp->rp += IPaddrlen;		/* Ignore ifc address */
+		rport = nhgets(bp->rp);
+		bp->rp += 2+2;			/* Ignore local port */
+		break;
+	default:
+		rport = 0;
+		break;
+	}
+
+	if(ucb->headers) {
+		if(memcmp(laddr, v4prefix, IPv4off) == 0
+		|| ipcmp(laddr, IPnoaddr) == 0)
+			version = 4;
+		else
+			version = 6;
+	} else {
+		if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
+			memcmp(c->laddr, v4prefix, IPv4off) == 0)
+			|| ipcmp(c->raddr, IPnoaddr) == 0)
+			version = 4;
+		else
+			version = 6;
+	}
+
+	dlen = blocklen(bp);
+
+	/* fill in pseudo header and compute checksum */
+	switch(version){
+	case V4:
+		bp = padblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ);
+		if(bp == nil)
+			return;
+
+		uh4 = (Udp4hdr *)(bp->rp);
+		ptcllen = dlen + UDP_UDPHDR_SZ;
+		uh4->Unused = 0;
+		uh4->udpproto = IP_UDPPROTO;
+		uh4->frag[0] = 0;
+		uh4->frag[1] = 0;
+		hnputs(uh4->udpplen, ptcllen);
+		if(ucb->headers) {
+			v6tov4(uh4->udpdst, raddr);
+			hnputs(uh4->udpdport, rport);
+			v6tov4(uh4->udpsrc, laddr);
+			rc = nil;
+		} else {
+			v6tov4(uh4->udpdst, c->raddr);
+			hnputs(uh4->udpdport, c->rport);
+			if(ipcmp(c->laddr, IPnoaddr) == 0)
+				findlocalip(f, c->laddr, c->raddr);
+			v6tov4(uh4->udpsrc, c->laddr);
+			rc = c;
+		}
+		hnputs(uh4->udpsport, c->lport);
+		hnputs(uh4->udplen, ptcllen);
+		uh4->udpcksum[0] = 0;
+		uh4->udpcksum[1] = 0;
+		hnputs(uh4->udpcksum,
+		       ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ));
+		uh4->vihl = IP_VER4;
+		ipoput4(f, bp, 0, c->ttl, c->tos, rc);
+		break;
+
+	case V6:
+		bp = padblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ);
+		if(bp == nil)
+			return;
+
+		/*
+		 * using the v6 ip header to create pseudo header
+		 * first then reset it to the normal ip header
+		 */
+		uh6 = (Udp6hdr *)(bp->rp);
+		memset(uh6, 0, 8);
+		ptcllen = dlen + UDP_UDPHDR_SZ;
+		hnputl(uh6->viclfl, ptcllen);
+		uh6->hoplimit = IP_UDPPROTO;
+		if(ucb->headers) {
+			ipmove(uh6->udpdst, raddr);
+			hnputs(uh6->udpdport, rport);
+			ipmove(uh6->udpsrc, laddr);
+			rc = nil;
+		} else {
+			ipmove(uh6->udpdst, c->raddr);
+			hnputs(uh6->udpdport, c->rport);
+			if(ipcmp(c->laddr, IPnoaddr) == 0)
+				findlocalip(f, c->laddr, c->raddr);
+			ipmove(uh6->udpsrc, c->laddr);
+			rc = c;
+		}
+		hnputs(uh6->udpsport, c->lport);
+		hnputs(uh6->udplen, ptcllen);
+		uh6->udpcksum[0] = 0;
+		uh6->udpcksum[1] = 0;
+		hnputs(uh6->udpcksum,
+		       ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ));
+		memset(uh6, 0, 8);
+		uh6->viclfl[0] = IP_VER6;
+		hnputs(uh6->len, ptcllen);
+		uh6->nextheader = IP_UDPPROTO;
+		ipoput6(f, bp, 0, c->ttl, c->tos, rc);
+		break;
+
+	default:
+		panic("udpkick: version %d", version);
+	}
+	upriv->ustats.udpOutDatagrams++;
+}
+
+void
+udpiput(Proto *udp, Ipifc *ifc, Block *bp)
+{
+	int len;
+	Udp4hdr *uh4;
+	Udp6hdr *uh6;
+	Conv *c;
+	Udpcb *ucb;
+	uchar raddr[IPaddrlen], laddr[IPaddrlen];
+	ushort rport, lport;
+	Udppriv *upriv;
+	Fs *f;
+	int version;
+	int ottl, oviclfl, olen;
+	uchar *p;
+
+	upriv = udp->priv;
+	f = udp->f;
+	upriv->ustats.udpInDatagrams++;
+
+	uh4 = (Udp4hdr*)(bp->rp);
+	version = ((uh4->vihl&0xF0)==IP_VER6) ? 6 : 4;
+
+	/* Put back pseudo header for checksum
+	 * (remember old values for icmpnoconv()) */
+	switch(version) {
+	case V4:
+		ottl = uh4->Unused;
+		uh4->Unused = 0;
+		len = nhgets(uh4->udplen);
+		olen = nhgets(uh4->udpplen);
+		hnputs(uh4->udpplen, len);
+
+		v4tov6(raddr, uh4->udpsrc);
+		v4tov6(laddr, uh4->udpdst);
+		lport = nhgets(uh4->udpdport);
+		rport = nhgets(uh4->udpsport);
+
+		if(nhgets(uh4->udpcksum)) {
+			if(ptclcsum(bp, UDP4_PHDR_OFF, len+UDP4_PHDR_SZ)) {
+				upriv->ustats.udpInErrors++;
+				netlog(f, Logudp, "udp: checksum error %I\n", raddr);
+				DPRINT("udp: checksum error %I\n", raddr);
+				freeblist(bp);
+				return;
+			}
+		}
+		uh4->Unused = ottl;
+		hnputs(uh4->udpplen, olen);
+		break;
+	case V6:
+		uh6 = (Udp6hdr*)(bp->rp);
+		len = nhgets(uh6->udplen);
+		oviclfl = nhgetl(uh6->viclfl);
+		olen = nhgets(uh6->len);
+		ottl = uh6->hoplimit;
+		ipmove(raddr, uh6->udpsrc);
+		ipmove(laddr, uh6->udpdst);
+		lport = nhgets(uh6->udpdport);
+		rport = nhgets(uh6->udpsport);
+		memset(uh6, 0, 8);
+		hnputl(uh6->viclfl, len);
+		uh6->hoplimit = IP_UDPPROTO;
+		if(ptclcsum(bp, UDP6_PHDR_OFF, len+UDP6_PHDR_SZ)) {
+			upriv->ustats.udpInErrors++;
+			netlog(f, Logudp, "udp: checksum error %I\n", raddr);
+			DPRINT("udp: checksum error %I\n", raddr);
+			freeblist(bp);
+			return;
+		}
+		hnputl(uh6->viclfl, oviclfl);
+		hnputs(uh6->len, olen);
+		uh6->nextheader = IP_UDPPROTO;
+		uh6->hoplimit = ottl;
+		break;
+	default:
+		panic("udpiput: version %d", version);
+		return;	/* to avoid a warning */
+	}
+
+	QLOCK(udp);
+
+	c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
+	if(c == nil){
+		/* no conversation found */
+		upriv->ustats.udpNoPorts++;
+		QUNLOCK(udp);
+		netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
+		       laddr, lport);
+
+		switch(version){
+		case V4:
+			icmpnoconv(f, bp);
+			break;
+		case V6:
+			icmphostunr(f, ifc, bp, Icmp6_port_unreach, 0);
+			break;
+		default:
+			panic("udpiput2: version %d", version);
+		}
+
+		freeblist(bp);
+		return;
+	}
+	ucb = (Udpcb*)c->ptcl;
+
+	if(c->state == Announced){
+		if(ucb->headers == 0){
+			/* create a new conversation */
+			if(ipforme(f, laddr) != Runi) {
+				switch(version){
+				case V4:
+					v4tov6(laddr, ifc->lifc->local);
+					break;
+				case V6:
+					ipmove(laddr, ifc->lifc->local);
+					break;
+				default:
+					panic("udpiput3: version %d", version);
+				}
+			}
+			c = Fsnewcall(c, raddr, rport, laddr, lport, version);
+			if(c == nil){
+				QUNLOCK(udp);
+				freeblist(bp);
+				return;
+			}
+			iphtadd(&upriv->ht, c);
+			ucb = (Udpcb*)c->ptcl;
+		}
+	}
+
+	QLOCK(c);
+	QUNLOCK(udp);
+
+	/*
+	 * Trim the packet down to data size
+	 */
+	len -= UDP_UDPHDR_SZ;
+	switch(version){
+	case V4:
+		bp = trimblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ, len);
+		break;
+	case V6:
+		bp = trimblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ, len);
+		break;
+	default:
+		bp = nil;
+		panic("udpiput4: version %d", version);
+	}
+	if(bp == nil){
+		QUNLOCK(c);
+		netlog(f, Logudp, "udp: len err %I.%d -> %I.%d\n", raddr, rport,
+		       laddr, lport);
+		upriv->lenerr++;
+		return;
+	}
+
+	netlog(f, Logudpmsg, "udp: %I.%d -> %I.%d l %d\n", raddr, rport,
+	       laddr, lport, len);
+
+	switch(ucb->headers){
+	case 7:
+		/* pass the src address */
+		bp = padblock(bp, UDP_USEAD7);
+		p = bp->rp;
+		ipmove(p, raddr); p += IPaddrlen;
+		ipmove(p, laddr); p += IPaddrlen;
+		ipmove(p, ifc->lifc->local); p += IPaddrlen;
+		hnputs(p, rport); p += 2;
+		hnputs(p, lport);
+		break;
+	}
+
+	if(bp->next)
+		bp = concatblock(bp);
+
+	if(qfull(c->rq)){
+		QUNLOCK(c);
+		netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport,
+		       laddr, lport);
+		freeblist(bp);
+		return;
+	}
+
+	qpass(c->rq, bp);
+	QUNLOCK(c);
+
+}
+
+char*
+udpctl(Conv *c, char **f, int n)
+{
+	Udpcb *ucb;
+
+	ucb = (Udpcb*)c->ptcl;
+	if(n == 1){
+		if(strcmp(f[0], "headers") == 0){
+			ucb->headers = 7;	/* new headers format */
+			return nil;
+		}
+	}
+	return "unknown control request";
+}
+
+void
+udpadvise(Proto *udp, Block *bp, char *msg)
+{
+	Udp4hdr *h4;
+	Udp6hdr *h6;
+	uchar source[IPaddrlen], dest[IPaddrlen];
+	ushort psource, pdest;
+	Conv *s, **p;
+	int version;
+
+	h4 = (Udp4hdr*)(bp->rp);
+	version = ((h4->vihl&0xF0)==IP_VER6) ? 6 : 4;
+
+	switch(version) {
+	case V4:
+		v4tov6(dest, h4->udpdst);
+		v4tov6(source, h4->udpsrc);
+		psource = nhgets(h4->udpsport);
+		pdest = nhgets(h4->udpdport);
+		break;
+	case V6:
+		h6 = (Udp6hdr*)(bp->rp);
+		ipmove(dest, h6->udpdst);
+		ipmove(source, h6->udpsrc);
+		psource = nhgets(h6->udpsport);
+		pdest = nhgets(h6->udpdport);
+		break;
+	default:
+		panic("udpadvise: version %d", version);
+		return;  /* to avoid a warning */
+	}
+
+	/* Look for a connection */
+	QLOCK(udp);
+	for(p = udp->conv; *p; p++) {
+		s = *p;
+		if(s->rport == pdest)
+		if(s->lport == psource)
+		if(ipcmp(s->raddr, dest) == 0)
+		if(ipcmp(s->laddr, source) == 0){
+			if(s->ignoreadvice)
+				break;
+			QLOCK(s);
+			QUNLOCK(udp);
+			qhangup(s->rq, msg);
+			qhangup(s->wq, msg);
+			QUNLOCK(s);
+			freeblist(bp);
+			return;
+		}
+	}
+	QUNLOCK(udp);
+	freeblist(bp);
+}
+
+int
+udpstats(Proto *udp, char *buf, int len)
+{
+	Udppriv *upriv;
+
+	upriv = udp->priv;
+	return snprint(buf, len, "InDatagrams: %lud\nNoPorts: %lud\nInErrors: %lud\nOutDatagrams: %lud\n",
+		upriv->ustats.udpInDatagrams,
+		upriv->ustats.udpNoPorts,
+		upriv->ustats.udpInErrors,
+		upriv->ustats.udpOutDatagrams);
+}
+
+void
+udpinit(Fs *fs)
+{
+	Proto *udp;
+
+	udp = smalloc(sizeof(Proto));
+	udp->priv = smalloc(sizeof(Udppriv));
+	udp->name = "udp";
+	udp->connect = udpconnect;
+	udp->announce = udpannounce;
+	udp->ctl = udpctl;
+	udp->state = udpstate;
+	udp->create = udpcreate;
+	udp->close = udpclose;
+	udp->rcv = udpiput;
+	udp->advise = udpadvise;
+	udp->stats = udpstats;
+	udp->ipproto = IP_UDPPROTO;
+	udp->nc = Nchans;
+	udp->ptclsize = sizeof(Udpcb);
+
+	Fsproto(fs, udp);
+}
diff --git a/src/9vx/a/kfs.h b/src/9vx/a/kfs.h
@@ -0,0 +1,57 @@
+typedef struct Qid9p1 Qid9p1;
+typedef struct Dentry Dentry;
+typedef struct Kfsfile Kfsfile;
+typedef struct Kfs Kfs;
+
+/* DONT TOUCH, this is the disk structure */
+struct	Qid9p1
+{
+	long	path;
+	long	version;
+};
+
+#define	NAMELEN		28		/* size of names */
+#define	NDBLOCK		6		/* number of direct blocks in Dentry */
+
+/* DONT TOUCH, this is the disk structure */
+struct	Dentry
+{
+	char	name[NAMELEN];
+	short	uid;
+	short	gid;
+	ushort	mode;
+/*
+		#define	DALLOC	0x8000
+		#define	DDIR	0x4000
+		#define	DAPND	0x2000
+		#define	DLOCK	0x1000
+		#define	DREAD	0x4
+		#define	DWRITE	0x2
+		#define	DEXEC	0x1
+*/
+	Qid9p1	qid;
+	long	size;
+	long	dblock[NDBLOCK];
+	long	iblock;
+	long	diblock;
+	long	atime;
+	long	mtime;
+};
+
+struct Kfsfile
+{
+	Dentry _;
+	long off;
+};
+
+struct Kfs
+{
+	int	RBUFSIZE;
+	int	BUFSIZE;
+	int	DIRPERBUF;
+	int	INDPERBUF;
+	int	INDPERBUF2;
+};
+
+extern int kfsinit(Fs*);
+
diff --git a/src/9vx/a/netif.c b/src/9vx/a/netif.c
@@ -0,0 +1,761 @@
+#include	"u.h"
+#include	"lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"error.h"
+#include	"netif.h"
+
+static int netown(Netfile*, char*, int);
+static int openfile(Netif*, int);
+static char* matchtoken(char*, char*);
+static char* netmulti(Netif*, Netfile*, uchar*, int);
+static int parseaddr(uchar*, char*, int);
+
+int	netifdebug;
+#define	dprint(...)	if(netifdebug)print(__VA_ARGS__); else USED(netifdebug)
+
+/*
+ *  set up a new network interface
+ */
+void
+netifinit(Netif *nif, char *name, int nfile, ulong limit)
+{
+	strncpy(nif->name, name, KNAMELEN-1);
+	nif->name[KNAMELEN-1] = 0;
+	nif->nfile = nfile;
+	nif->f = xalloc(nfile*sizeof(Netfile*));
+	if (nif->f == nil)
+		panic("netifinit: no memory");
+	nif->limit = limit;
+}
+
+#define DD(c,q,nam,n,owner,perm,dp) dprint("%lux.%llux %s\n", q.type, q.path, nam); devdir(c,q,nam,n,owner,perm,dp)
+
+/*
+ *  generate a 3 level directory
+ */
+static int
+netifgen(Chan *c, char *dummy, Dirtab *vp, int dummy1, int i, Dir *dp)
+{
+	Qid q;
+	Netif *nif = (Netif*)vp;
+	Netfile *f;
+	int t, perm;
+	char *o;
+
+	memset(&q, 0, sizeof q);
+	q.type = QTFILE;
+	q.vers = 0;
+
+	dprint("gen %d %llud %.2d	", c->dri, c->qid.path, i);
+	/* top level directory contains the name of the network */
+	if(c->qid.path == 0){
+		switch(i){
+		case DEVDOTDOT:
+			q.path = 0;
+			q.type = QTDIR;
+			DD(c, q, ".", 0, eve, 0555, dp);
+			break;
+		case 0:
+			q.path = N2ndqid;
+			q.type = QTDIR;
+			strcpy(up->genbuf, nif->name);
+			DD(c, q, up->genbuf, 0, eve, 0555, dp);
+			break;
+		default:
+			dprint("-> -1 (top)\n");
+			return -1;
+		}
+		return 1;
+	}
+
+	/* second level contains clone plus all the conversations */
+	t = NETTYPE(c->qid.path);
+	if(t == N2ndqid || t == Ncloneqid || t == Naddrqid || t == Nstatqid || t == Nifstatqid){
+		switch(i){
+		case DEVDOTDOT:
+			q.type = QTDIR;
+			q.path = 0;
+			DD(c, q, ".", 0, eve, DMDIR|0555, dp);
+			break;
+		case 0:
+			q.path = Ncloneqid;
+			DD(c, q, "clone", 0, eve, 0666, dp);
+			break;
+		case 1:
+			q.path = Naddrqid;
+			DD(c, q, "addr", 0, eve, 0666, dp);
+			break;
+		case 2:
+			q.path = Nstatqid;
+			DD(c, q, "stats", 0, eve, 0444, dp);
+			break;
+		case 3:
+			q.path = Nifstatqid;
+			DD(c, q, "ifstats", 0, eve, 0444, dp);
+			break;
+		default:
+			i -= 4;
+			if(i >= nif->nfile){
+				dprint("-> -1 (2d): %d %d\n", i, nif->nfile);
+				return -1;
+			}
+			if(nif->f[i] == 0){
+				dprint("nif->f[%d] -> 0\n", i);
+				return 0;
+			}
+			q.type = QTDIR;
+			q.path = NETQID(i, N3rdqid);
+			sprint(up->genbuf, "%d", i);
+			DD(c, q, up->genbuf, 0, eve, DMDIR|0555, dp);
+			break;
+		}
+		return 1;
+	}
+
+	/* third level */
+	f = nif->f[NETID(c->qid.path)];
+	if(f == 0){
+		dprint("->f 0\n");
+		return -1;
+	}
+	if(*f->owner){
+		o = f->owner;
+		perm = f->mode;
+	} else {
+		o = eve;
+		perm = 0666;
+	}
+	switch(i){
+	case DEVDOTDOT:
+		q.type = QTDIR;
+		q.path = N2ndqid;
+		strcpy(up->genbuf, nif->name);
+		DD(c, q, up->genbuf, 0, eve, DMDIR|0555, dp);
+		break;
+	case 0:
+		q.path = NETQID(NETID(c->qid.path), Ndataqid);
+		DD(c, q, "data", 0, o, perm, dp);
+		break;
+	case 1:
+		q.path = NETQID(NETID(c->qid.path), Nctlqid);
+		DD(c, q, "ctl", 0, o, perm, dp);
+		break;
+	case 2:
+		q.path = NETQID(NETID(c->qid.path), Nstatqid);
+		DD(c, q, "stats", 0, eve, 0444, dp);
+		break;
+	case 3:
+		q.path = NETQID(NETID(c->qid.path), Ntypeqid);
+		DD(c, q, "type", 0, eve, 0444, dp);
+		break;
+	case 4:
+		q.path = NETQID(NETID(c->qid.path), Nifstatqid);
+		DD(c, q, "ifstats", 0, eve, 0444, dp);
+		break;
+	default:
+		dprint("-> -1 (third)\n");
+		return -1;
+	}
+	return 1;
+}
+
+static void
+prwalk(Netif *nif, Chan *c, Chan *nc, char **name, int nname)
+{
+	char buf[512], *e, *p;
+
+	if(netifdebug == 0)
+		return;
+	p = buf;
+	e = p + sizeof buf;
+	for(int i = 0; i < nname; i++)
+		p = seprint(p, e, "%s ", name[i]);
+	if(p > buf)
+		p--;
+	*p = 0;
+	print("netifwalk %lld [%s]\n", c->qid.path, buf);
+}
+
+Walkqid*
+netifwalk(Netif *nif, Chan *c, Chan *nc, char **name, int nname)
+{
+	prwalk(nif, c, nc, name, nname);
+	return devwalk(c, nc, name, nname, (Dirtab *)nif, 0, netifgen);
+}
+
+Chan*
+netifopen(Netif *nif, Chan *c, int omode)
+{
+	int id;
+	Netfile *f;
+
+	dprint("netifopen %p %d\n", nif, c? c->qid.path: -1);
+	id = 0;
+	if(c->qid.type & QTDIR){
+		if(omode != OREAD)
+			error(Eperm);
+	} else {
+		switch(NETTYPE(c->qid.path)){
+		case Ndataqid:
+		case Nctlqid:
+			id = NETID(c->qid.path);
+			openfile(nif, id);
+			break;
+		case Ncloneqid:
+			id = openfile(nif, -1);
+			c->qid.path = NETQID(id, Nctlqid);
+			break;
+		default:
+			if(omode != OREAD)
+				error(Ebadarg);
+		}
+		switch(NETTYPE(c->qid.path)){
+		case Ndataqid:
+		case Nctlqid:
+			f = nif->f[id];
+			if(netown(f, up->user, omode&7) < 0)
+				error(Eperm);
+			break;
+		}
+	}
+	c->mode = openmode(omode);
+	c->flag |= COPEN;
+	c->offset = 0;
+	c->iounit = qiomaxatomic;
+	return c;
+}
+
+long
+netifread(Netif *nif, Chan *c, void *a, long n, ulong offset)
+{
+	int i, j;
+	Netfile *f;
+	char *p;
+
+	dprint("netifread %lud %lud\n", c->qid.path, NETTYPE(c->qid.path));
+	if(c->qid.type&QTDIR)
+		return devdirread(c, a, n, (Dirtab*)nif, 0, netifgen);
+
+	switch(NETTYPE(c->qid.path)){
+	case Ndataqid:
+		f = nif->f[NETID(c->qid.path)];
+		return qread(f->in, a, n);
+	case Nctlqid:
+		return readnum(offset, a, n, NETID(c->qid.path), NUMSIZE);
+	case Nstatqid:
+		dprint("netstatqid\n");
+		p = smalloc(READSTR);
+		j = snprint(p, READSTR, "in: %llud\n", nif->inpackets);
+		j += snprint(p+j, READSTR-j, "link: %d\n", nif->link);
+		j += snprint(p+j, READSTR-j, "out: %llud\n", nif->outpackets);
+		j += snprint(p+j, READSTR-j, "crc errs: %d\n", nif->crcs);
+		j += snprint(p+j, READSTR-j, "overflows: %d\n", nif->overflows);
+		j += snprint(p+j, READSTR-j, "soft overflows: %d\n", nif->soverflows);
+		j += snprint(p+j, READSTR-j, "framing errs: %d\n", nif->frames);
+		j += snprint(p+j, READSTR-j, "buffer errs: %d\n", nif->buffs);
+		j += snprint(p+j, READSTR-j, "output errs: %d\n", nif->oerrs);
+		j += snprint(p+j, READSTR-j, "prom: %d\n", nif->prom);
+		j += snprint(p+j, READSTR-j, "mbps: %d\n", nif->mbps);
+		j += snprint(p+j, READSTR-j, "addr: ");
+		for(i = 0; i < nif->alen; i++)
+			j += snprint(p+j, READSTR-j, "%2.2ux", nif->addr[i]);
+		snprint(p+j, READSTR-j, "\n");
+		n = readstr(offset, a, n, p);
+		free(p);
+		return n;
+	case Naddrqid:
+		p = malloc(READSTR);
+		j = 0;
+		for(i = 0; i < nif->alen; i++)
+			j += snprint(p+j, READSTR-j, "%2.2ux", nif->addr[i]);
+		n = readstr(offset, a, n, p);
+		free(p);
+		return n;
+	case Ntypeqid:
+		f = nif->f[NETID(c->qid.path)];
+		return readnum(offset, a, n, f->type, NUMSIZE);
+	case Nifstatqid:
+		return 0;
+	}
+	error(Ebadarg);
+	return -1;	/* not reached */
+}
+
+Block*
+netifbread(Netif *nif, Chan *c, long n, ulong offset)
+{
+	if((c->qid.type & QTDIR) || NETTYPE(c->qid.path) != Ndataqid)
+		return devbread(c, n, offset);
+
+	return qbread(nif->f[NETID(c->qid.path)]->in, n);
+}
+
+/*
+ *  make sure this type isn't already in use on this device
+ */
+static int
+typeinuse(Netif *nif, int type)
+{
+	Netfile *f, **fp, **efp;
+
+	if(type <= 0)
+		return 0;
+
+	efp = &nif->f[nif->nfile];
+	for(fp = nif->f; fp < efp; fp++){
+		f = *fp;
+		if(f == 0)
+			continue;
+		if(f->type == type)
+			return 1;
+	}
+	return 0;
+}
+
+/*
+ *  the devxxx.c that calls us handles writing data, it knows best
+ */
+long
+netifwrite(Netif *nif, Chan *c, void *a, long n)
+{
+	Netfile *f;
+	int type;
+	char *p, buf[64];
+	uchar binaddr[Nmaxaddr];
+
+	if(NETTYPE(c->qid.path) != Nctlqid)
+		error(Eperm);
+
+	if(n >= sizeof(buf))
+		n = sizeof(buf)-1;
+	memmove(buf, a, n);
+	buf[n] = 0;
+
+	if(waserror()){
+		QUNLOCK(nif);
+		nexterror();
+	}
+
+	QLOCK(nif);
+	f = nif->f[NETID(c->qid.path)];
+	if((p = matchtoken(buf, "connect")) != 0){
+		type = atoi(p);
+		if(typeinuse(nif, type))
+			error(Einuse);
+		f->type = type;
+		if(f->type < 0)
+			nif->all++;
+	} else if(matchtoken(buf, "promiscuous")){
+		if(f->prom == 0){
+			if(nif->prom == 0 && nif->promiscuous != nil)
+				nif->promiscuous(nif->arg, 1);
+			f->prom = 1;
+			nif->prom++;
+		}
+	} else if((p = matchtoken(buf, "scanbs")) != 0){
+		/* scan for base stations */
+		if(f->scan == 0){
+			type = atoi(p);
+			if(type < 5)
+				type = 5;
+			if(nif->scanbs != nil)
+				nif->scanbs(nif->arg, type);
+			f->scan = type;
+			nif->scan++;
+		}
+	} else if(matchtoken(buf, "bridge")){
+		f->bridge = 1;
+	} else if(matchtoken(buf, "headersonly")){
+		f->headersonly = 1;
+	} else if((p = matchtoken(buf, "addmulti")) != 0){
+		if(parseaddr(binaddr, p, nif->alen) < 0)
+			error("bad address");
+		p = netmulti(nif, f, binaddr, 1);
+		if(p)
+			error(p);
+	} else if((p = matchtoken(buf, "remmulti")) != 0){
+		if(parseaddr(binaddr, p, nif->alen) < 0)
+			error("bad address");
+		p = netmulti(nif, f, binaddr, 0);
+		if(p)
+			error(p);
+	} else
+		n = -1;
+	QUNLOCK(nif);
+	poperror();
+	return n;
+}
+
+int
+netifwstat(Netif *nif, Chan *c, uchar *db, int n)
+{
+	Dir *dir;
+	Netfile *f;
+	int m;
+
+	f = nif->f[NETID(c->qid.path)];
+	if(f == 0)
+		error(Enonexist);
+
+	if(netown(f, up->user, OWRITE) < 0)
+		error(Eperm);
+
+	dir = smalloc(sizeof(Dir)+n);
+	m = convM2D(db, n, &dir[0], (char*)&dir[1]);
+	if(m == 0){
+		free(dir);
+		error(Eshortstat);
+	}
+	if(!emptystr(dir[0].uid))
+		strncpy(f->owner, dir[0].uid, KNAMELEN);
+	if(dir[0].mode != ~0UL)
+		f->mode = dir[0].mode;
+	free(dir);
+	return m;
+}
+
+int
+netifstat(Netif *nif, Chan *c, uchar *db, int n)
+{
+	dprint("netifstat %s nfile %d %lld type=%d\n", nif->name, nif->nfile, c->qid.path, c->type);
+	return devstat(c, db, n, (Dirtab *)nif, 0, netifgen);
+}
+
+void
+netifclose(Netif *nif, Chan *c)
+{
+	Netfile *f;
+	int t;
+	Netaddr *ap;
+
+	if((c->flag & COPEN) == 0)
+		return;
+
+	t = NETTYPE(c->qid.path);
+	if(t != Ndataqid && t != Nctlqid)
+		return;
+
+	f = nif->f[NETID(c->qid.path)];
+	QLOCK(f);
+	if(--(f->inuse) == 0){
+		if(f->prom){
+			QLOCK(nif);
+			if(--(nif->prom) == 0 && nif->promiscuous != nil)
+				nif->promiscuous(nif->arg, 0);
+			QUNLOCK(nif);
+			f->prom = 0;
+		}
+		if(f->scan){
+			QLOCK(nif);
+			if(--(nif->scan) == 0 && nif->scanbs != nil)
+				nif->scanbs(nif->arg, 0);
+			QUNLOCK(nif);
+			f->prom = 0;
+			f->scan = 0;
+		}
+		if(f->nmaddr){
+			QLOCK(nif);
+			t = 0;
+			for(ap = nif->maddr; ap; ap = ap->next){
+				if(f->maddr[t/8] & (1<<(t%8)))
+					netmulti(nif, f, ap->addr, 0);
+			}
+			QUNLOCK(nif);
+			f->nmaddr = 0;
+		}
+		if(f->type < 0){
+			QLOCK(nif);
+			--(nif->all);
+			QUNLOCK(nif);
+		}
+		f->owner[0] = 0;
+print("drop type %.4ux\n", f->type);
+		f->type = 0;
+		f->bridge = 0;
+		f->headersonly = 0;
+		qclose(f->in);
+	}
+	QUNLOCK(f);
+}
+
+Lock netlock;
+
+static int
+netown(Netfile *p, char *o, int omode)
+{
+	static int access[] = { 0400, 0200, 0600, 0100 };
+	int mode;
+	int t;
+
+	lock(&netlock);
+	if(*p->owner){
+		if(strncmp(o, p->owner, KNAMELEN) == 0)	/* User */
+			mode = p->mode;
+		else if(strncmp(o, eve, KNAMELEN) == 0)	/* Bootes is group */
+			mode = p->mode<<3;
+		else
+			mode = p->mode<<6;		/* Other */
+
+		t = access[omode&3];
+		if((t & mode) == t){
+			unlock(&netlock);
+			return 0;
+		} else {
+			unlock(&netlock);
+			return -1;
+		}
+	}
+	strncpy(p->owner, o, KNAMELEN);
+	p->mode = 0660;
+	unlock(&netlock);
+	return 0;
+}
+
+/*
+ *  Increment the reference count of a network device.
+ *  If id < 0, return an unused ether device.
+ */
+static int
+openfile(Netif *nif, int id)
+{
+	Netfile *f, **fp, **efp;
+
+	if(id >= 0){
+		f = nif->f[id];
+		if(f == 0)
+			error(Enodev);
+		QLOCK(f);
+		qreopen(f->in);
+		f->inuse++;
+		QUNLOCK(f);
+		return id;
+	}
+
+	QLOCK(nif);
+	if(waserror()){
+		QUNLOCK(nif);
+		nexterror();
+	}
+	efp = &nif->f[nif->nfile];
+	for(fp = nif->f; fp < efp; fp++){
+		f = *fp;
+		if(f == 0){
+			f = malloc(sizeof(Netfile));
+			if(f == 0)
+				exhausted("memory");
+			f->in = qopen(nif->limit, Qmsg, 0, 0);
+			if(f->in == nil){
+				free(f);
+				exhausted("memory");
+			}
+			*fp = f;
+			QLOCK(f);
+		} else {
+			QLOCK(f);
+			if(f->inuse){
+				QUNLOCK(f);
+				continue;
+			}
+		}
+		f->inuse = 1;
+		qreopen(f->in);
+		netown(f, up->user, 0);
+		QUNLOCK(f);
+		QUNLOCK(nif);
+		poperror();
+		return fp - nif->f;
+	}
+	error(Enodev);
+	return -1;	/* not reached */
+}
+
+/*
+ *  look for a token starting a string,
+ *  return a pointer to first non-space char after it
+ */
+static char*
+matchtoken(char *p, char *token)
+{
+	int n;
+
+	n = strlen(token);
+	if(strncmp(p, token, n))
+		return 0;
+	p += n;
+	if(*p == 0)
+		return p;
+	if(*p != ' ' && *p != '\t' && *p != '\n')
+		return 0;
+	while(*p == ' ' || *p == '\t' || *p == '\n')
+		p++;
+	return p;
+}
+
+void
+hnputv(void *p, uvlong v)
+{
+	uchar *a;
+
+	a = p;
+	hnputl(a, v>>32);
+	hnputl(a+4, v);
+}
+
+void
+hnputl(void *p, uint v)
+{
+	uchar *a;
+
+	a = p;
+	a[0] = v>>24;
+	a[1] = v>>16;
+	a[2] = v>>8;
+	a[3] = v;
+}
+
+void
+hnputs(void *p, ushort v)
+{
+	uchar *a;
+
+	a = p;
+	a[0] = v>>8;
+	a[1] = v;
+}
+
+uvlong
+nhgetv(void *p)
+{
+	uchar *a;
+
+	a = p;
+	return ((vlong)nhgetl(a) << 32) | nhgetl(a+4);
+}
+
+uint
+nhgetl(void *p)
+{
+	uchar *a;
+
+	a = p;
+	return (a[0]<<24)|(a[1]<<16)|(a[2]<<8)|(a[3]<<0);
+}
+
+ushort
+nhgets(void *p)
+{
+	uchar *a;
+
+	a = p;
+	return (a[0]<<8)|(a[1]<<0);
+}
+
+static ulong
+hash(uchar *a, int len)
+{
+	ulong sum = 0;
+
+	while(len-- > 0)
+		sum = (sum << 1) + *a++;
+	return sum%Nmhash;
+}
+
+int
+activemulti(Netif *nif, uchar *addr, int alen)
+{
+	Netaddr *hp;
+
+	for(hp = nif->mhash[hash(addr, alen)]; hp; hp = hp->hnext)
+		if(memcmp(addr, hp->addr, alen) == 0){
+			if(hp->ref)
+				return 1;
+			else
+				break;
+		}
+	return 0;
+}
+
+static int
+parseaddr(uchar *to, char *from, int alen)
+{
+	char nip[4];
+	char *p;
+	int i;
+
+	p = from;
+	for(i = 0; i < alen; i++){
+		if(*p == 0)
+			return -1;
+		nip[0] = *p++;
+		if(*p == 0)
+			return -1;
+		nip[1] = *p++;
+		nip[2] = 0;
+		to[i] = strtoul(nip, 0, 16);
+		if(*p == ':')
+			p++;
+	}
+	return 0;
+}
+
+/*
+ *  keep track of multicast addresses
+ */
+static char*
+netmulti(Netif *nif, Netfile *f, uchar *addr, int add)
+{
+	Netaddr **l, *ap;
+	int i;
+	ulong h;
+
+	if(nif->multicast == nil)
+		return "interface does not support multicast";
+
+	l = &nif->maddr;
+	i = 0;
+	for(ap = *l; ap; ap = *l){
+		if(memcmp(addr, ap->addr, nif->alen) == 0)
+			break;
+		i++;
+		l = &ap->next;
+	}
+
+	if(add){
+		if(ap == 0){
+			*l = ap = smalloc(sizeof(*ap));
+			memmove(ap->addr, addr, nif->alen);
+			ap->next = 0;
+			ap->ref = 1;
+			h = hash(addr, nif->alen);
+			ap->hnext = nif->mhash[h];
+			nif->mhash[h] = ap;
+		} else {
+			ap->ref++;
+		}
+		if(ap->ref == 1){
+			nif->nmaddr++;
+			nif->multicast(nif->arg, addr, 1);
+		}
+		if(i < 8*sizeof(f->maddr)){
+			if((f->maddr[i/8] & (1<<(i%8))) == 0)
+				f->nmaddr++;
+			f->maddr[i/8] |= 1<<(i%8);
+		}
+	} else {
+		if(ap == 0 || ap->ref == 0)
+			return 0;
+		ap->ref--;
+		if(ap->ref == 0){
+			nif->nmaddr--;
+			nif->multicast(nif->arg, addr, 0);
+		}
+		if(i < 8*sizeof(f->maddr)){
+			if((f->maddr[i/8] & (1<<(i%8))) != 0)
+				f->nmaddr--;
+			f->maddr[i/8] &= ~(1<<(i%8));
+		}
+	}
+	return 0;
+}
diff --git a/src/9vx/a/netif.h b/src/9vx/a/netif.h
@@ -31,7 +31,7 @@ enum
  */
 struct Netfile
 {
-	QLock lk;
+	QLock	qlock; 
 
 	int	inuse;
 	ulong	mode;
@@ -64,7 +64,7 @@ struct Netaddr
  */
 struct Netif
 {
-	QLock lk;
+	QLock	qlock;
 
 	/* multiplexing */
 	char	name[KNAMELEN];		/* for top level directory */
@@ -87,8 +87,8 @@ struct Netif
 
 	/* statistics */
 	int	misses;
-	int	inpackets;
-	int	outpackets;
+	uvlong	inpackets;
+	uvlong	outpackets;
 	int	crcs;		/* input crc errors */
 	int	oerrs;		/* output errors */
 	int	frames;		/* framing errors */
diff --git a/src/9vx/a/part.c b/src/9vx/a/part.c
@@ -0,0 +1,341 @@
+#include	"u.h"
+#include	"lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+
+#include	"sd.h"
+#include	"fs.h"
+
+enum {
+	Npart = 32
+};
+
+uchar *mbrbuf, *partbuf;
+int nbuf;
+#define trace 0
+
+int
+tsdbio(SDunit *unit, SDpart *part, void *a, vlong off, int mbr)
+{
+	uchar *b;
+
+	if(unit->dev->ifc->bio(unit, 0, 0, a, 1, (off/unit->secsize) + part->start) != unit->secsize){
+		if(trace)
+			print("%s: read %lud at %lld failed\n", unit->dev->name,
+				unit->secsize, (vlong)part->start*unit->secsize+off);
+		return -1;
+	}
+	b = a;
+	if(mbr && (b[0x1FE] != 0x55 || b[0x1FF] != 0xAA)){
+		if(trace)
+			print("%s: bad magic %.2ux %.2ux at %lld\n",
+				unit->dev->name, b[0x1FE], b[0x1FF],
+				(vlong)part->start*unit->secsize+off);
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ *  read partition table.  The partition table is just ascii strings.
+ */
+#define MAGIC "plan9 partitions"
+static void
+oldp9part(SDunit *unit)
+{
+	SDpart *pp;
+	char *field[3], *line[Npart+1];
+	ulong n, start, end;
+	int i;
+
+	/*
+	 *  We have some partitions already.
+	 */
+	pp = &unit->part[unit->npart];
+
+	/*
+	 * We prefer partition tables on the second to last sector,
+	 * but some old disks use the last sector instead.
+	 */
+	pp->start = unit->sectors - 2;
+	pp->end = unit->sectors - 1;
+
+	if(tsdbio(unit, pp, partbuf, 0, 0) < 0)
+		return;
+
+	if(strncmp((char*)partbuf, MAGIC, sizeof(MAGIC)-1) != 0) {
+		/* not found on 2nd last sector; look on last sector */
+		pp->start++;
+		pp->end++;
+		if(tsdbio(unit, pp, partbuf, 0, 0) < 0)
+			return;
+		if(strncmp((char*)partbuf, MAGIC, sizeof(MAGIC)-1) != 0)
+			return;
+		print("%s: using old plan9 partition table on last sector\n", unit->dev->name);
+	}else
+		print("%s: using old plan9 partition table on 2nd-to-last sector\n", unit->dev->name);
+
+	/* we found a partition table, so add a partition partition */
+	unit->npart++;
+	partbuf[unit->secsize-1] = '\0';
+
+	/*
+	 * parse partition table
+	 */
+	n = getfields((char*)partbuf, line, Npart+1, 0, "\n");
+	if(n && strncmp(line[0], MAGIC, sizeof(MAGIC)-1) == 0){
+		for(i = 1; i < n && unit->npart < SDnpart; i++){
+			if(getfields(line[i], field, 3, 0, " ") != 3)
+				break;
+			start = strtoull(field[1], 0, 0);
+			end = strtoull(field[2], 0, 0);
+			if(start >= end || end > unit->sectors)
+				break;
+			sdaddpart(unit, field[0], start, end);
+		}
+	}	
+}
+
+static void
+p9part(SDunit *unit, char *name)
+{
+	SDpart *p;
+	char *field[4], *line[Npart+1];
+	uvlong start, end;
+	int i, n;
+	
+	p = sdfindpart(unit, name);
+	if(p == nil)
+		return;
+
+	if(tsdbio(unit, p, partbuf, unit->secsize, 0) < 0)
+		return;
+	partbuf[unit->secsize-1] = '\0';
+
+	if(strncmp((char*)partbuf, "part ", 5) != 0)
+		return;
+
+	n = getfields((char*)partbuf, line, Npart+1, 0, "\n");
+	if(n == 0)
+		return;
+	for(i = 0; i < n /* && unit->npart < SDnpart */; i++){
+		if(strncmp(line[i], "part ", 5) != 0)
+			break;
+		if(getfields(line[i], field, 4, 0, " ") != 4)
+			break;
+		start = strtoull(field[2], 0, 0);
+		end = strtoull(field[3], 0, 0);
+		if(start >= end || end > unit->sectors)
+			break;
+		sdaddpart(unit, field[1], p->start+start, p->start+end);
+	}
+}
+
+int
+isdos(int t)
+{
+	return t==FAT12 || t==FAT16 || t==FATHUGE || t==FAT32 || t==FAT32X;
+}
+
+int
+isextend(int t)
+{
+	return t==EXTEND || t==EXTHUGE || t==LEXTEND;
+}
+
+/* 
+ * Fetch the first dos and all plan9 partitions out of the MBR partition table.
+ * We return -1 if we did not find a plan9 partition.
+ */
+static int
+mbrpart(SDunit *unit)
+{
+	Dospart *dp;
+	ulong taboffset, start, end;
+	ulong firstxpart, nxtxpart;
+	int havedos, i, nplan9;
+	char name[10];
+
+	taboffset = 0;
+	dp = (Dospart*)&mbrbuf[0x1BE];
+	if(1) {
+		/* get the MBR (allowing for DMDDO) */
+		if(tsdbio(unit, &unit->part[0], mbrbuf, (vlong)taboffset*unit->secsize, 1) < 0)
+			return -1;
+		for(i=0; i<4; i++)
+			if(dp[i].type == DMDDO) {
+				if(trace)
+					print("DMDDO partition found\n");
+				taboffset = 63;
+				if(tsdbio(unit, &unit->part[0], mbrbuf, (vlong)taboffset*unit->secsize, 1) < 0)
+					return -1;
+				i = -1;	/* start over */
+			}
+	}
+
+	/*
+	 * Read the partitions, first from the MBR and then
+	 * from successive extended partition tables.
+	 */
+	nplan9 = 0;
+	havedos = 0;
+	firstxpart = 0;
+	for(;;) {
+		if(tsdbio(unit, &unit->part[0], mbrbuf, (vlong)taboffset*unit->secsize, 1) < 0)
+			return -1;
+		if(trace) {
+			if(firstxpart)
+				print("%s ext %lud ", unit->dev->name, taboffset);
+			else
+				print("%s mbr ", unit->dev->name);
+		}
+		nxtxpart = 0;
+		for(i=0; i<4; i++) {
+			if(trace)
+				print("dp %d...", dp[i].type);
+			start = taboffset+GLONG(dp[i].start);
+			end = start+GLONG(dp[i].len);
+
+			if(dp[i].type == PLAN9) {
+				if(nplan9 == 0)
+					strcpy(name, "plan9");
+				else
+					sprint(name, "plan9.%d", nplan9);
+				sdaddpart(unit, name, start, end);
+				p9part(unit, name);
+				nplan9++;
+			}
+
+			/*
+			 * We used to take the active partition (and then the first
+			 * when none are active).  We have to take the first here,
+			 * so that the partition we call ``dos'' agrees with the
+			 * partition disk/fdisk calls ``dos''. 
+			 */
+			if(havedos==0 && isdos(dp[i].type)){
+				havedos = 1;
+				sdaddpart(unit, "dos", start, end);
+			}
+
+			/* nxtxpart is relative to firstxpart (or 0), not taboffset */
+			if(isextend(dp[i].type)){
+				nxtxpart = start-taboffset+firstxpart;
+				if(trace)
+					print("link %lud...", nxtxpart);
+			}
+		}
+		if(trace)
+			print("\n");
+
+		if(!nxtxpart)
+			break;
+		if(!firstxpart)
+			firstxpart = nxtxpart;
+		taboffset = nxtxpart;
+	}	
+	return nplan9 ? 0 : -1;
+}
+
+/*
+ * To facilitate booting from CDs, we create a partition for
+ * the boot floppy image embedded in a bootable CD.
+ */
+static int
+part9660(SDunit *unit)
+{
+	uchar buf[2048];
+	ulong a, n;
+	uchar *p;
+
+	if(unit->secsize != 2048)
+		return -1;
+
+	if(unit->dev->ifc->bio(unit, 0, 0, buf, 2048/unit->secsize, (17*2048)/unit->secsize) < 0)
+		return -1;
+
+	if(buf[0] || strcmp((char*)buf+1, "CD001\x01EL TORITO SPECIFICATION") != 0)
+		return -1;
+
+	
+	p = buf+0x47;
+	a = p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24);
+
+	if(unit->dev->ifc->bio(unit, 0, 0, buf, 2048/unit->secsize, (a*2048)/unit->secsize) < 0)
+		return -1;
+
+	if(memcmp(buf, "\x01\x00\x00\x00", 4) != 0
+	|| memcmp(buf+30, "\x55\xAA", 2) != 0
+	|| buf[0x20] != 0x88)
+		return -1;
+
+	p = buf+0x28;
+	a = p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24);
+
+	switch(buf[0x21]){
+	case 0x01:
+		n = 1200*1024;
+		break;
+	case 0x02:
+		n = 1440*1024;
+		break;
+	case 0x03:
+		n = 2880*1024;
+		break;
+	default:
+		return -1;
+	}
+	n /= 2048;
+
+	print("found partition %s!cdboot; %lud+%lud\n", unit->dev->name, a, n);
+	sdaddpart(unit, "cdboot", a, a+n);
+	return 0;
+}
+
+enum {
+	NEW = 1<<0,
+	OLD = 1<<1
+};
+
+void
+partition(SDunit *unit)
+{
+	int type;
+	char *p;
+
+	if(unit->part == 0)
+		return;
+
+	if(part9660(unit) == 0)
+		return;
+
+	p = "new";
+
+	if(p != nil && strncmp(p, "new", 3) == 0)
+		type = NEW;
+	else if(p != nil && strncmp(p, "old", 3) == 0)
+		type = OLD;
+	else
+		type = NEW|OLD;
+
+	if(nbuf < unit->secsize) {
+		free(mbrbuf);
+		free(partbuf);
+		mbrbuf = malloc(unit->secsize);
+		partbuf = malloc(unit->secsize);
+		if(mbrbuf==nil || partbuf==nil) {
+			free(mbrbuf);
+			free(partbuf);
+			partbuf = mbrbuf = nil;
+			nbuf = 0;
+			return;
+		}
+		nbuf = unit->secsize;
+	}
+
+	if((type & NEW) && mbrpart(unit) >= 0){
+		/* nothing to do */;
+	}
+	else if(type & OLD)
+		oldp9part(unit);
+}
diff --git a/src/9vx/a/pgrp.c b/src/9vx/a/pgrp.c
@@ -180,7 +180,7 @@ dupfgrp(Fgrp *f)
 	lock(&f->ref.lk);
 	/* Make new fd list shorter if possible, preserving quantization */
 	new->nfd = f->maxfd+1;
-	i = new->nfd%DELTAFD;
+	i = (uint)new->nfd%DELTAFD;
 	if(i != 0)
 		new->nfd += DELTAFD - i;
 	new->fd = malloc(new->nfd*sizeof(Chan*));
diff --git a/src/9vx/a/portfns.h b/src/9vx/a/portfns.h
@@ -32,8 +32,8 @@ void		callwithureg(void(*)(Ureg*));
 char*		chanpath(Chan*);
 int		canlock(Lock*);
 int		canpage(Proc*);
-int		canqlock(QLock*);
-int		canrlock(RWlock*);
+int		__canqlock(QLock*);
+int		__canrlock(RWlock*);
 void		chandevinit(void);
 void		chandevreset(void);
 void		chandevshutdown(void);
@@ -166,7 +166,7 @@ void		ksetenv(char*, char*, int);
 void		kstrcpy(char*, char*, int);
 void		kstrdup(char**, char*);
 long		latin1(Rune*, int);
-int		lock(Lock*);
+int		__lock(Lock*);
 void		logopen(Log*);
 void		logclose(Log*);
 char*		logctl(Log*, int, char**, Logflag*);
@@ -277,7 +277,7 @@ void		qhangup(Queue*, char*);
 int		qisclosed(Queue*);
 int		qiwrite(Queue*, void*, int);
 int		qlen(Queue*);
-void		qlock(QLock*);
+void		__qlock(QLock*);
 Queue*		qopen(int, int, void (*)(void*), void*);
 int		qpass(Queue*, Block*);
 int		qpassnolim(Queue*, Block*);
@@ -287,7 +287,7 @@ long		qread(Queue*, void*, int);
 Block*		qremove(Queue*);
 void		qreopen(Queue*);
 void		qsetlimit(Queue*, int);
-void		qunlock(QLock*);
+void		__qunlock(QLock*);
 int		qwindow(Queue*);
 int		qwrite(Queue*, void*, int);
 void		qnoblock(Queue*, int);
@@ -305,9 +305,9 @@ void		renameuser(char*, char*);
 void		resched(char*);
 void		resrcwait(char*);
 int		return0(void*);
-void		rlock(RWlock*);
+void		__rlock(RWlock*);
 long		rtctime(void);
-void		runlock(RWlock*);
+void		__runlock(RWlock*);
 Proc*		runproc(void);
 void		savefpregs(FPsave*);
 void		sched(void);
@@ -361,7 +361,7 @@ int		uartstageoutput(Uart*);
 void		unbreak(Proc*);
 void		uncachepage(Page*);
 long		unionread(Chan*, void*, long);
-void		unlock(Lock*);
+void		__unlock(Lock*);
 uvlong		us2fastticks(uvlong);
 void		userinit(void);
 ulong		userpc(void);
@@ -372,8 +372,8 @@ void		validstat(uchar*, int);
 void*		vmemchr(void*, int, int);
 Proc*		wakeup(Rendez*);
 int		walk(Chan**, char**, int, int, int*);
-void		wlock(RWlock*);
-void		wunlock(RWlock*);
+void		__wlock(RWlock*);
+void		__wunlock(RWlock*);
 void*		xalloc(ulong);
 void*		xallocz(ulong, int);
 void		xfree(void*);
diff --git a/src/9vx/a/qlock.c b/src/9vx/a/qlock.c
@@ -5,6 +5,8 @@
 #include "dat.h"
 #include "fns.h"
 
+int tracelock = 0;
+
 struct {
 	ulong rlock;
 	ulong rlockq;
@@ -15,7 +17,7 @@ struct {
 } rwstats;
 
 void
-qlock(QLock *q)
+__qlock(QLock *q)
 {
 	Proc *p;
 
@@ -50,7 +52,7 @@ qlock(QLock *q)
 }
 
 int
-canqlock(QLock *q)
+__canqlock(QLock *q)
 {
 	if(!canlock(&q->use))
 		return 0;
@@ -64,7 +66,7 @@ canqlock(QLock *q)
 }
 
 void
-qunlock(QLock *q)
+__qunlock(QLock *q)
 {
 	Proc *p;
 
@@ -86,7 +88,7 @@ qunlock(QLock *q)
 }
 
 void
-rlock(RWlock *q)
+__rlock(RWlock *q)
 {
 	Proc *p;
 
@@ -115,7 +117,7 @@ rlock(RWlock *q)
 }
 
 void
-runlock(RWlock *q)
+__runlock(RWlock *q)
 {
 	Proc *p;
 
@@ -138,7 +140,7 @@ runlock(RWlock *q)
 }
 
 void
-wlock(RWlock *q)
+__wlock(RWlock *q)
 {
 	Proc *p;
 
@@ -170,7 +172,7 @@ wlock(RWlock *q)
 }
 
 void
-wunlock(RWlock *q)
+__wunlock(RWlock *q)
 {
 	Proc *p;
 
@@ -209,7 +211,7 @@ wunlock(RWlock *q)
 
 /* same as rlock but punts if there are any writers waiting */
 int
-canrlock(RWlock *q)
+__canrlock(RWlock *q)
 {
 	lock(&q->use);
 	rwstats.rlock++;
diff --git a/src/9vx/a/sd.h b/src/9vx/a/sd.h
@@ -129,9 +129,14 @@ extern void sdadddevs(SDev*);
 extern int sdsetsense(SDreq*, int, int, int, int);
 extern int sdmodesense(SDreq*, uchar*, void*, int);
 extern int sdfakescsi(SDreq*, void*, int);
+extern void sdaddpart(SDunit*, char*, uvlong, uvlong);
+extern SDpart* sdfindpart(SDunit*, char*);
 
 /* sdscsi.c */
 extern int scsiverify(SDunit*);
 extern int scsionline(SDunit*);
 extern long scsibio(SDunit*, int, int, void*, long, uvlong);
 extern SDev* scsiid(SDev*, SDifc*);
+
+/* part.c */
+extern void partition(SDunit*);
diff --git a/src/9vx/a/sdaoe.c b/src/9vx/a/sdaoe.c
@@ -0,0 +1,652 @@
+/*
+ * aoe sd driver, copyright © 2007 coraid
+ */
+
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "error.h"
+#include "sd.h"
+#include "netif.h"
+#include "aoe.h"
+
+extern	char	Echange[];
+extern	char	Enotup[];
+
+#define uprint(...)	snprint(up->genbuf, sizeof up->genbuf, __VA_ARGS__);
+
+enum {
+	Nctlr	= 32,
+	Maxpath	= 128,
+};
+
+enum {
+	/* sync with ahci.h */
+	Dllba 	= 1<<0,
+	Dsmart	= 1<<1,
+	Dpower	= 1<<2,
+	Dnop	= 1<<3,
+	Datapi	= 1<<4,
+	Datapi16= 1<<5,
+};
+
+static char *flagname[] = {
+	"llba",
+	"smart",
+	"power",
+	"nop",
+	"atapi",
+	"atapi16",
+};
+
+typedef struct Ctlr Ctlr;
+struct Ctlr{
+	QLock	qlock;
+
+	Ctlr	*next;
+	SDunit	*unit;
+
+	char	path[Maxpath];
+	Chan	*c;
+
+	ulong	vers;
+	uchar	mediachange;
+	uchar	flag;
+	uchar	smart;
+	uchar	smartrs;
+	uchar	feat;
+
+	uvlong	sectors;
+	char	serial[20+1];
+	char	firmware[8+1];
+	char	model[40+1];
+	char	ident[0x100];
+};
+
+static	Lock	ctlrlock;
+static	Ctlr	*head;
+static	Ctlr	*tail;
+
+SDifc sdaoeifc;
+
+static void
+idmove(char *p, ushort *a, int n)
+{
+	int i;
+	char *op, *e;
+
+	op = p;
+	for(i = 0; i < n/2; i++){
+		*p++ = a[i] >> 8;
+		*p++ = a[i];
+	}
+	*p = 0;
+	while(p > op && *--p == ' ')
+		*p = 0;
+	e = p;
+	p = op;
+	while(*p == ' ')
+		p++;
+	memmove(op, p, n - (e - p));
+}
+
+static ushort
+gbit16(void *a)
+{
+	uchar *i;
+
+	i = a;
+	return i[1] << 8 | i[0];
+}
+
+static ulong
+gbit32(void *a)
+{
+	ulong j;
+	uchar *i;
+
+	i = a;
+	j  = i[3] << 24;
+	j |= i[2] << 16;
+	j |= i[1] << 8;
+	j |= i[0];
+	return j;
+}
+
+static uvlong
+gbit64(void *a)
+{
+	uchar *i;
+
+	i = a;
+	return (uvlong)gbit32(i+4)<<32 | gbit32(i);
+}
+
+static int
+identify(Ctlr *c, ushort *id)
+{
+	int i;
+	uchar oserial[21];
+	uvlong osectors, s;
+
+	osectors = c->sectors;
+	memmove(oserial, c->serial, sizeof c->serial);
+
+	c->feat &= ~(Dllba|Dpower|Dsmart|Dnop);
+	i = gbit16(id+83) | gbit16(id+86);
+	if(i & (1<<10)){
+		c->feat |= Dllba;
+		s = gbit64(id+100);
+	}else
+		s = gbit32(id+60);
+
+	i = gbit16(id+83);
+	if((i>>14) == 1) {
+		if(i & (1<<3))
+			c->feat |= Dpower;
+		i = gbit16(id+82);
+		if(i & 1)
+			c->feat |= Dsmart;
+		if(i & (1<<14))
+			c->feat |= Dnop;
+	}
+
+	idmove(c->serial, id+10, 20);
+	idmove(c->firmware, id+23, 8);
+	idmove(c->model, id+27, 40);
+
+	if((osectors == 0 || osectors != s) &&
+	    memcmp(oserial, c->serial, sizeof oserial) != 0){
+		c->sectors = s;
+		c->mediachange = 1;
+		c->vers++;
+	}
+	return 0;
+}
+
+/* must call with d qlocked */
+static int
+aoeidentify(Ctlr *d, SDunit *u)
+{
+	Chan *c;
+
+	c = nil;
+	if(waserror()){
+		if(c)
+			cclose(c);
+		iprint("aoeidentify: %s\n", up->errstr);
+		nexterror();
+	}
+
+	uprint("%s/ident", d->path);
+	c = namec(up->genbuf, Aopen, OREAD, 0);
+	devtab[c->type]->read(c, d->ident, sizeof d->ident, 0);
+
+	poperror();
+	cclose(c);
+
+	d->feat = 0;
+	d->smart = 0;
+	identify(d, (ushort*)d->ident);
+
+	memset(u->inquiry, 0, sizeof u->inquiry);
+	u->inquiry[2] = 2;
+	u->inquiry[3] = 2;
+	u->inquiry[4] = sizeof u->inquiry - 4;
+	memmove(u->inquiry+8, d->model, 40);
+
+	return 0;
+}
+
+static Ctlr*
+ctlrlookup(char *path)
+{
+	Ctlr *c;
+
+	lock(&ctlrlock);
+	for(c = head; c; c = c->next)
+		if(strcmp(c->path, path) == 0)
+			break;
+	unlock(&ctlrlock);
+	return c;
+}
+
+static Ctlr*
+newctlr(char *path)
+{
+	Ctlr *c;
+
+	/* race? */
+	if(ctlrlookup(path))
+		error(Eexist);
+
+	if((c = malloc(sizeof *c)) == nil)
+		return 0;
+	kstrcpy(c->path, path, sizeof c->path);
+	lock(&ctlrlock);
+	if(head != nil)
+		tail->next = c;
+	else
+		head = c;
+	tail = c;
+	unlock(&ctlrlock);
+	return c;
+}
+
+static void
+delctlr(Ctlr *c)
+{
+	Ctlr *x, *prev;
+
+	lock(&ctlrlock);
+
+	for(prev = 0, x = head; x; prev = x, x = c->next)
+		if(strcmp(c->path, x->path) == 0)
+			break;
+	if(x == 0){
+		unlock(&ctlrlock);
+		error(Enonexist);
+	}
+
+	if(prev)
+		prev->next = x->next;
+	else
+		head = x->next;
+	if(x->next == nil)
+		tail = prev;
+	unlock(&ctlrlock);
+
+	if(x->c)
+		cclose(x->c);
+	free(x);
+}
+
+static SDev*
+aoeprobe(char *path, SDev *s)
+{
+	int n, i;
+	char *p;
+	Chan *c;
+	Ctlr *ctlr;
+
+	if((p = strrchr(path, '/')) == 0)
+		error(Ebadarg);
+	*p = 0;
+	uprint("%s/ctl", path);
+	*p = '/';
+
+	c = namec(up->genbuf, Aopen, OWRITE, 0);
+	if(waserror()) {
+		cclose(c);
+		nexterror();
+	}
+	n = uprint("discover %s", p+1);
+	devtab[c->type]->write(c, up->genbuf, n, 0);
+	poperror();
+	cclose(c);
+
+	for(i = 0;; i += 200){
+		if(i > 8000 || waserror())
+			error(Etimedout);
+		tsleep(&up->sleep, return0, 0, 200);
+		poperror();
+
+		uprint("%s/ident", path);
+		if(waserror())
+			continue;
+		c = namec(up->genbuf, Aopen, OREAD, 0);
+		poperror();
+		cclose(c);
+
+		ctlr = newctlr(path);
+		break;
+	}
+
+	if(s == nil && (s = malloc(sizeof *s)) == nil)
+		return nil;
+	s->ctlr = ctlr;
+	s->ifc = &sdaoeifc;
+	s->nunit = 1;
+	return s;
+}
+
+static char 	*probef[32];
+static int 	nprobe;
+
+static int
+pnpprobeid(char *s)
+{
+	int id;
+
+	if(strlen(s) < 2)
+		return 0;
+	id = 'e';
+	if(s[1] == '!')
+		id = s[0];
+	return id;
+}
+
+static SDev*
+aoepnp(void)
+{
+	int i, id;
+	char *p;
+	SDev *h, *t, *s;
+
+//	if((p = getconf("aoedev")) == 0)
+	if(1)
+		return 0;
+	nprobe = tokenize(p, probef, nelem(probef));
+	h = t = 0;
+	for(i = 0; i < nprobe; i++){
+		id = pnpprobeid(probef[i]);
+		if(id == 0)
+			continue;
+		s = malloc(sizeof *s);
+		if(s == nil)
+			break;
+		s->ctlr = 0;
+		s->idno = id;
+		s->ifc = &sdaoeifc;
+		s->nunit = 1;
+
+		if(h)
+			t->next = s;
+		else
+			h = s;
+		t = s;
+	}
+	return h;
+}
+
+static Ctlr*
+pnpprobe(SDev *sd)
+{
+	int j;
+	char *p;
+	static int i;
+
+	if(i > nprobe)
+		return 0;
+	p = probef[i++];
+	if(strlen(p) < 2)
+		return 0;
+	if(p[1] == '!')
+		p += 2;
+
+	for(j = 0;; j += 200){
+		if(j > 8000){
+			print("#æ: pnpprobe: %s: %s\n", probef[i-1], up->errstr);
+			return 0;
+		}
+		if(waserror()){
+			tsleep(&up->sleep, return0, 0, 200);
+			continue;
+		}
+		sd = aoeprobe(p, sd);
+		poperror();
+		break;
+	}
+	print("#æ: pnpprobe establishes %sin %dms\n", probef[i-1], j);
+	return sd->ctlr;
+}
+
+
+static int
+aoeverify(SDunit *u)
+{
+	SDev *s;
+	Ctlr *c;
+
+	s = u->dev;
+	c = s->ctlr;
+	if(c == nil && (s->ctlr = c = pnpprobe(s)) == nil)
+		return 0;
+	c->mediachange = 1;
+	return 1;
+}
+
+static int
+aoeconnect(SDunit *u, Ctlr *c)
+{
+	QLOCK(c);
+	if(waserror()){
+		QUNLOCK(c);
+		return -1;
+	}
+
+	aoeidentify(u->dev->ctlr, u);
+	if(c->c)
+		cclose(c->c);
+	c->c = 0;
+	uprint("%s/data", c->path);
+	c->c = namec(up->genbuf, Aopen, ORDWR, 0);
+	QUNLOCK(c);
+	poperror();
+
+	return 0;
+}
+
+static int
+aoeonline(SDunit *u)
+{
+	Ctlr *c;
+	int r;
+
+	c = u->dev->ctlr;
+	r = 0;
+
+	if((c->feat&Datapi) && c->mediachange){
+		if(aoeconnect(u, c) == 0 && (r = scsionline(u)) > 0)
+			c->mediachange = 0;
+		return r;
+	}
+
+	if(c->mediachange){
+		if(aoeconnect(u, c) == -1)
+			return 0;
+		r = 2;
+		c->mediachange = 0;
+		u->sectors = c->sectors;
+		u->secsize = Aoesectsz;
+	} else
+		r = 1;
+
+	return r;
+}
+
+static int
+aoerio(SDreq *r)
+{
+	int i, count;
+	uvlong lba;
+	char *name;
+	uchar *cmd;
+	long (*rio)(Chan*, void*, long, vlong);
+	Ctlr *c;
+	SDunit *unit;
+
+	unit = r->unit;
+	c = unit->dev->ctlr;
+//	if(c->feat & Datapi)
+//		return aoeriopkt(r, d);
+
+	cmd = r->cmd;
+	name = unit->perm.name;
+
+	if(r->cmd[0] == 0x35 || r->cmd[0] == 0x91){
+//		QLOCK(c);
+//		i = flushcache();
+//		QUNLOCK(c);
+//		if(i == 0)
+//			return sdsetsense(r, SDok, 0, 0, 0);
+		return sdsetsense(r, SDcheck, 3, 0xc, 2);
+	}
+
+	if((i = sdfakescsi(r, c->ident, sizeof c->ident)) != SDnostatus){
+		r->status = i;
+		return i;
+	}
+
+	switch(*cmd){
+	case 0x88:
+	case 0x28:
+		rio = devtab[c->c->type]->read;
+		break;
+	case 0x8a:
+	case 0x2a:
+		rio = devtab[c->c->type]->write;
+		break;
+	default:
+		print("%s: bad cmd %#.2ux\n", name, cmd[0]);
+		r->status = SDcheck;
+		return SDcheck;
+	}
+
+	if(r->data == nil)
+		return SDok;
+
+	if(r->clen == 16){
+		if(cmd[2] || cmd[3])
+			return sdsetsense(r, SDcheck, 3, 0xc, 2);
+		lba = (uvlong)cmd[4]<<40 | (uvlong)cmd[5]<<32;
+		lba |=   cmd[6]<<24 |  cmd[7]<<16 |  cmd[8]<<8 | cmd[9];
+		count = cmd[10]<<24 | cmd[11]<<16 | cmd[12]<<8 | cmd[13];
+	}else{
+		lba  = cmd[2]<<24 | cmd[3]<<16 | cmd[4]<<8 | cmd[5];
+		count = cmd[7]<<8 | cmd[8];
+	}
+
+	count *= Aoesectsz;
+
+	if(r->dlen < count)
+		count = r->dlen & ~0x1ff;
+
+	if(waserror()){
+		if(strcmp(up->errstr, Echange) == 0 ||
+		    strcmp(up->errstr, Enotup) == 0)
+			unit->sectors = 0;
+		nexterror();
+	}
+	r->rlen = rio(c->c, r->data, count, Aoesectsz * lba);
+	poperror();
+	r->status = SDok;
+	return SDok;
+}
+
+static char *smarttab[] = {
+	"unset",
+	"error",
+	"threshold exceeded",
+	"normal"
+};
+
+static char *
+pflag(char *s, char *e, uchar f)
+{
+	uchar i, m;
+
+	for(i = 0; i < 8; i++){
+		m = 1 << i;
+		if(f & m)
+			s = seprint(s, e, "%s ", flagname[i]);
+	}
+	return seprint(s, e, "\n");
+}
+
+static int
+aoerctl(SDunit *u, char *p, int l)
+{
+	Ctlr *c;
+	char *e, *op;
+
+	if((c = u->dev->ctlr) == nil)
+		return 0;
+	e = p+l;
+	op = p;
+
+	p = seprint(p, e, "model\t%s\n", c->model);
+	p = seprint(p, e, "serial\t%s\n", c->serial);
+	p = seprint(p, e, "firm	%s\n", c->firmware);
+	if(c->smartrs == 0xff)
+		p = seprint(p, e, "smart\tenable error\n");
+	else if(c->smartrs == 0)
+		p = seprint(p, e, "smart\tdisabled\n");
+	else
+		p = seprint(p, e, "smart\t%s\n", smarttab[c->smart]);
+	p = seprint(p, e, "flag	");
+	p = pflag(p, e, c->feat);
+	p = seprint(p, e, "geometry %llud %d\n", c->sectors, Aoesectsz);
+	return p-op;
+}
+
+static int
+aoewctl(SDunit *d1, Cmdbuf *cmd)
+{
+	cmderror(cmd, Ebadarg);
+	return 0;
+}
+
+static SDev*
+aoeprobew(DevConf *c)
+{
+	char *p;
+
+	p = strchr(c->type, '/');
+	if(p == nil || strlen(p) > Maxpath - 11)
+		error(Ebadarg);
+	if(p[1] == '#')
+		p++;			/* hack */
+	if(ctlrlookup(p))
+		error(Einuse);
+	return aoeprobe(p, 0);
+}
+
+static void
+aoeclear(SDev *s)
+{
+	delctlr((Ctlr *)s->ctlr);
+}
+
+static char*
+aoertopctl(SDev *s, char *p, char *e)
+{
+	Ctlr *c;
+
+	c = s->ctlr;
+	return seprint(p, e, "%s aoe %s\n", s->name, c->path);
+}
+
+static int
+aoewtopctl(SDev *d1, Cmdbuf *cmd)
+{
+	switch(cmd->nf){
+	default:
+		cmderror(cmd, Ebadarg);
+	}
+	return 0;
+}
+
+SDifc sdaoeifc = {
+	"aoe",
+
+	aoepnp,
+	nil,		/* legacy */
+	nil,		/* enable */
+	nil,		/* disable */
+
+	aoeverify,
+	aoeonline,
+	aoerio,
+	aoerctl,
+	aoewctl,
+
+	scsibio,
+	aoeprobew,	/* probe */
+	aoeclear,	/* clear */
+	aoertopctl,
+	aoewtopctl,
+};
diff --git a/src/9vx/bootcode.9 b/src/9vx/bootcode.9
Binary files differ.
diff --git a/src/9vx/devip.c b/src/9vx/devip.c
@@ -883,7 +883,7 @@ cswrite(Chan *c, void *a, long n, vlong offset)
 	return n;
 }
 
-Dev ipdevtab = 
+Dev pipdevtab = 
 {
 	'I',
 	"ip",
diff --git a/src/9vx/devtab.c b/src/9vx/devtab.c
@@ -5,6 +5,7 @@
 #include "fns.h"
 #include "error.h"
 
+extern Dev aoedevtab;
 extern Dev consdevtab;
 extern Dev rootdevtab;
 extern Dev pipedevtab;
@@ -24,14 +25,18 @@ extern Dev mntloopdevtab;
 extern Dev dupdevtab;
 extern Dev sddevtab;
 extern Dev capdevtab;
+extern Dev etherdevtab;
 
 Dev *devtab[] = {
 	&rootdevtab,	/* must be first */
+	&aoedevtab,
 	&audiodevtab,
+	&capdevtab,
 	&consdevtab,
 	&drawdevtab,
 	&dupdevtab,
 	&envdevtab,
+	ðerdevtab,
 	&fsdevtab,
 	&ipdevtab,
 	&mntdevtab,
@@ -40,11 +45,9 @@ Dev *devtab[] = {
 	&pipedevtab,
 	&procdevtab,
 	&ramdevtab,
+	&sddevtab,
 	&srvdevtab,
 	&ssldevtab,
 	&tlsdevtab,
-	&sddevtab,
-	&capdevtab,
 	0
 };
-
diff --git a/src/9vx/etherpcap.c b/src/9vx/etherpcap.c
@@ -0,0 +1,189 @@
+/*
+ * etherpcap - portable Virtual Ethernet driver for 9vx.
+ * 
+ * Copyright (c) 2008 Devon H. O'Dell
+ * copyright © 2008 erik quanstrom
+ * copyright © 2010 Jesus Galan Lopez
+ *
+ * Released under 2-clause BSD license.
+ */
+
+#include "u.h"
+
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "error.h"
+#include "netif.h"
+#include "etherif.h"
+#include "vether.h"
+
+#include <pcap.h>
+
+static	uvlong	txerrs;
+
+extern	int	eafrom(char *ma, uchar ea[6]);
+
+typedef struct Ctlr Ctlr;
+struct Ctlr {
+	pcap_t	*pd;
+};
+
+static void *
+veerror(char* err)
+{
+	iprint("ve: %s\n", err);
+	return nil;
+}
+
+static pcap_t *
+setup(char *dev, uchar *ea)
+{
+	char	filter[30];
+	char	errbuf[PCAP_ERRBUF_SIZE];
+	pcap_t	*pd;
+	struct bpf_program prog;
+	bpf_u_int32 net;
+	bpf_u_int32 mask;
+
+	if(sprint(filter, "ether dst %2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux",
+	ea[0], ea[1], ea[2],ea[3], ea[4], ea[5]) == -1)
+		return veerror("cannot create pcap filter");
+
+	if (!dev && (dev = pcap_lookupdev(errbuf)) == nil)
+		return veerror("cannot find network device");
+
+//	if ((pd = pcap_open_live(netdev, 1514, 1, 1, errbuf)) == nil)
+	if ((pd = pcap_open_live(dev, 65000, 1, 1, errbuf)) == nil)
+		return nil;
+
+	pcap_lookupnet(dev, &net, &mask, errbuf);
+	pcap_compile(pd, &prog, filter, 0, net);
+
+	if (pcap_setfilter(pd, &prog) == -1)
+		return nil;
+
+	pcap_freecode(&prog);
+
+	return pd;
+}
+
+static Block *
+pcappkt(Ctlr *c)
+{
+	struct pcap_pkthdr hdr;
+	uchar *p;
+	Block *b;
+
+	while ((p = pcap_next(c->pd, &hdr)) == nil);
+
+	b = allocb(hdr.caplen);
+	memcpy(b->rp, p, hdr.caplen);
+	b->wp += hdr.caplen;
+	b->flag |= Btcpck|Budpck|Bpktck;
+
+/*
+	iprint("+++++++++++ packet %d (len %d):\n", ++fn, hdr.caplen);
+	int i=0; uchar* u;
+	static int fn=0;
+
+	for(u=b->rp; u<b->wp; u++){
+		if (i%16 == 0) iprint("%.4ux", i);
+		if (i%8 == 0) iprint("   ");
+		iprint("%2.2ux ", *u);
+		if (++i%16 == 0) iprint("\n");
+	}
+	iprint("\n-------------\n");
+*/
+
+	return b;
+
+}
+
+static void
+pcaprecvkproc(void *v)
+{
+	Ether *e;
+	Block *b;
+
+	e = v;
+	while ((b = pcappkt(e->ctlr))) 
+		if (b != nil)
+			etheriq(e, b, 1);
+}
+
+static void
+pcaptransmit(Ether* e)
+{
+	const u_char *u;
+	Block *b;
+	Ctlr *c;
+
+	c = e->ctlr;
+	while ((b = qget(e->oq)) != nil) {
+		int wlen;
+
+		u = (const u_char*)b->rp;
+
+		wlen = pcap_inject(c->pd, u, BLEN(b));
+		// iprint("injected packet len %d\n", wlen);
+		if (wlen == -1)
+			txerrs++;
+
+		freeb(b);
+	}
+}
+
+static long
+pcapifstat(Ether *e, void *a, long n, ulong offset)
+{
+	char buf[128];
+
+	snprint(buf, sizeof buf, "txerrors: %lud\n", txerrs);
+	return readstr(offset, a, n, buf);
+}
+
+static void
+pcapattach(Ether* e)
+{
+	kproc("pcaprecv", pcaprecvkproc, e);
+}
+
+static int
+pcappnp(Ether* e)
+{
+	Ctlr c;
+	static int cve = 0;
+
+	while(cve < nve && ve[cve].tap == 1)
+		cve++;
+	if(cve >= nve)
+		return -1;
+
+	memset(&c, 0, sizeof(c));
+	c.pd = setup(ve[cve].dev, ve[cve].ea);
+	if (c.pd == nil) {
+		iprint("ve: pcap failed to initialize\n");
+		cve++;
+		return -1;
+	}
+	e->ctlr = malloc(sizeof(c));
+	memcpy(e->ctlr, &c, sizeof(c));
+	e->tbdf = BUSUNKNOWN;
+	memcpy(e->ea, ve[cve].ea, Eaddrlen);
+	e->attach = pcapattach;
+	e->transmit = pcaptransmit;
+	e->ifstat = pcapifstat;
+	e->ni.arg = e;
+	e->ni.link = 1;
+	cve++;
+	return 0;
+}
+
+void
+etherpcaplink(void)
+{
+	addethercard("pcap", pcappnp);
+}
diff --git a/src/9vx/ethertap.c b/src/9vx/ethertap.c
@@ -0,0 +1,185 @@
+/*
+ * ethertap: tap device ethernet driver
+ * copyright © 2008 erik quanstrom
+ * copyright © 2010 Tully Gray
+ * copyright © 2010 Jesus Galan Lopez
+ */
+
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "error.h"
+#include "netif.h"
+#include "etherif.h"
+#include "vether.h"
+
+#include <net/if.h>
+#include <sys/ioctl.h>
+
+#ifdef linux
+#include <netpacket/packet.h>
+#include <linux/if_tun.h>
+#elif defined(__FreeBSD__)
+#include <net/if_tun.h>
+#endif
+
+typedef struct Ctlr Ctlr;
+struct Ctlr {
+	int	fd;
+	int	txerrs;
+	uchar	ea[Eaddrlen];
+};
+
+static	uchar	anyea[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff,};
+
+#ifdef linux
+static int
+opentap(char *dev)
+{
+	int fd;
+	char *tap0 = "tap0";
+	struct ifreq ifr;
+
+	if(dev == nil)
+		dev = tap0;
+	if((fd = open("/dev/net/tun", O_RDWR)) < 0)
+		return -1;
+	memset(&ifr, 0, sizeof ifr);
+	strncpy(ifr.ifr_name, dev, sizeof ifr.ifr_name);
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+	if(ioctl(fd, TUNSETIFF, &ifr) < 0){
+		close(fd);
+		return -1;
+	}
+	return fd;
+}
+#elif defined(__FreeBSD__)
+static int
+opentap(char *dev)
+{
+	int fd;
+	struct stat s;
+
+	if((fd = open("/dev/tap", O_RDWR)) < 0)
+		return -1;
+	return fd;
+}
+#endif
+
+static int
+setup(char *dev)
+{
+	return opentap(dev);
+}
+
+Block*
+tappkt(Ctlr *c)
+{
+	int n;
+	Block *b;
+
+	b = allocb(1514);
+	for(;;){
+		n = read(c->fd, b->rp, BALLOC(b));
+		if(n <= 0)
+			panic("fd %d read %d", c->fd, n);
+		if(memcmp(b->rp + 0, anyea, 6) == 0
+		|| memcmp(b->rp + 0, c->ea, 6) == 0)
+			break;
+	}
+	b->wp += n;
+	b->flag |= Btcpck|Budpck|Bpktck;
+	return b;
+}
+
+static void
+taprecvkproc(void *v)
+{
+	Block *b;
+	Ether *e;
+
+	e = v;
+	while((b = tappkt(e->ctlr)))
+		etheriq(e, b, 1);
+	pexit("read fail", 1);
+}
+
+static void
+taptransmit(Ether* e)
+{
+	Block *b, *h;
+	Ctlr *c;
+
+	c = e->ctlr;
+	while ((b = qget(e->oq)) != nil) {
+		if(memcmp(b->rp + 6, anyea, 6) == 0 ||
+		memcmp(b->rp + 0, c->ea, 6) == 0){
+			h = allocb(BLEN(b));
+			memcpy(h->rp, b->wp, BLEN(b));
+			h->wp += BLEN(b);
+			h->flag |= Btcpck|Budpck|Bpktck;
+			etheriq(e, h, 1);
+		}
+		if(write(c->fd, b->rp, BLEN(b)) == -1)
+			c->txerrs++;
+		freeb(b);
+	}
+}
+
+static long
+tapifstat(Ether *e, void *a, long n, ulong offset)
+{
+	char buf[128];
+	Ctlr *c;
+
+	c = a;
+	snprint(buf, sizeof buf, "txerrors: %lud\n", c->txerrs);
+	return readstr(offset, a, n, buf);
+}
+
+static void
+tapattach(Ether* e)
+{
+	kproc("taprecv", taprecvkproc, e);
+}
+
+static int
+tappnp(Ether* e)
+{
+	Ctlr c;
+	static int cve = 0;
+
+	while(cve < nve && ve[cve].tap == 0)
+		cve++;
+	if(cve == nve)
+		return -1;
+
+	memset(&c, 0, sizeof c);
+	c.fd = setup(ve[cve].dev);
+	memcpy(c.ea, ve[cve].ea, Eaddrlen);
+	if(c.fd== -1){
+		iprint("ve: tap failed to initialize\n");
+		cve++;
+		return -1;
+	}
+	e->ctlr = malloc(sizeof c);
+	memcpy(e->ctlr, &c, sizeof c);
+	e->tbdf = BUSUNKNOWN;
+	memcpy(e->ea, ve[cve].ea, Eaddrlen);
+	e->attach = tapattach;
+	e->transmit = taptransmit;
+	e->ifstat = tapifstat;
+	e->ni.arg = e;
+	e->ni.link = 1;
+	cve++;
+	return 0;
+}
+
+void
+ethertaplink(void)
+{
+	addethercard("tap", tappnp);
+}
diff --git a/src/9vx/fossil.9 b/src/9vx/fossil.9
Binary files differ.
diff --git a/src/9vx/main.c b/src/9vx/main.c
@@ -25,13 +25,24 @@
 #include	"arg.h"
 #include	"tos.h"
 
+#include "fs.h"
+
+#include "netif.h"
+#include "etherif.h"
+#include "vether.h"
+
 #define Image IMAGE
 #include	"draw.h"
 #include	"memdraw.h"
 #include	"cursor.h"
 #include	"screen.h"
 
+#define	BOOTLINELEN	64
+#define	BOOTARGSLEN	(3584-0x200-BOOTLINELEN)
+#define	MAXCONF		100
+
 extern Dev ipdevtab;
+extern Dev pipdevtab;
 extern Dev drawdevtab;
 extern Dev fsdevtab;
 extern Dev audiodevtab;
@@ -42,8 +53,14 @@ char*	argv0;
 char*	conffile = "9vx";
 Conf	conf;
 
+static char*	inifile;
+static char	inibuf[BOOTARGSLEN];
+static char	*iniline[MAXCONF];
 static int	bootboot;	/* run /boot/boot instead of bootscript */
+static int	nofork;	/* do not fork at init */
 static int	initrc;	/* run rc instead of init */
+static int	nogui;	/* do not start the gui */
+static int	usetty;	/* use tty for input/output */
 static char*	username;
 static Mach mach0;
 
@@ -56,13 +73,19 @@ static int singlethread;
 static void	bootinit(void);
 static void	siginit(void);
 
+static int	readini(char *fn);
+static void	inifields(void (*fp)(char*, char*));
+static void	iniopt(char *name, char *value);
+static void	inienv(char *name, char *value);
+
 static char*	getuser(void);
 static char*	findroot(void);
 
 void
 usage(void)
 {
-	fprint(2, "usage: 9vx [-gt] [-r root] [-u user]\n");
+	// TODO(yy): add debug and other options by ron
+	fprint(2, "usage: 9vx [-p file.ini] [-bfgit] [-n [tap] [netdev]] [-m macaddr] [-r root] [-u user]\n");
 	exit(1);
 }
 
@@ -74,9 +97,8 @@ nop(void)
 int
 main(int argc, char **argv)
 {
-	int usetty;
-	int nogui;
-	int nofork;
+	int vetap;
+	char *vedev;
 	char buf[1024];
 	
 	/* Minimal set up to make print work. */
@@ -87,6 +109,7 @@ main(int argc, char **argv)
 	nogui = 0;
 	nofork = 0;
 	usetty = 0;
+	nve = 0;
 	localroot = nil;
 	ARGBEGIN{
 	/* debugging options */
@@ -102,9 +125,6 @@ main(int argc, char **argv)
 	case 'K':
 		tracekdev++;
 		break;
-	case 'F':
-		nofork = 1;
-		break;
 	case 'M':
 		tracemmu++;
 		break;
@@ -125,6 +145,9 @@ main(int argc, char **argv)
 	case 'b':
 		bootboot = 1;
 		break;
+	case 'f':
+		nofork = 1;
+		break;
 	case 'g':
 		nogui = 1;
 		usetty = 1;
@@ -132,6 +155,26 @@ main(int argc, char **argv)
 	case 'i':
 		initrc = 1;
 		break;
+	case 'p':
+		inifile = EARGF(usage());
+		break;
+	case 'm':
+		setmac(EARGF(usage()));
+		break;
+	case 'n':
+		vetap = 0;
+		vedev = ARGF();
+		if(vedev != nil && strcmp(vedev, "tap") == 0){
+			vetap = 1;
+			vedev = ARGF();
+		}
+		if(vedev != nil && vedev[0] == '-'){
+			vedev = nil;
+			argc++;
+			argv--;
+		}
+		addve(vedev, vetap);
+		break;
 	case 'r':
 		localroot = EARGF(usage());
 		break;
@@ -148,6 +191,13 @@ main(int argc, char **argv)
 	if(argc != 0)
 		usage();
 	
+	if(inifile){
+		if(readini(inifile) != 0)
+			panic("error reading config file %s", inifile);
+		conffile=inifile;
+		inifields(&iniopt);
+	}
+
 	if(!bootboot){
 		if(localroot == nil && (localroot = findroot()) == nil)
 			panic("cannot find plan 9 root; use -r");
@@ -188,14 +238,34 @@ main(int argc, char **argv)
 	/*
 	 * Debugging: tell user what options we guessed.
 	 */
-	print("9vx %s-r %s -u %s\n", usetty ? "-t " : "", localroot, username);
+	print("9vx ");
+	if(inifile)
+		print("-p %s ", inifile);
+	if(bootboot | nofork | nogui | initrc | usetty)
+		print("-%s%s%s%s%s ", bootboot ? "b" : "", nofork ? "f " : "",
+			nogui ? "g" : "", initrc ? "i " : "", usetty ? "t " : "");
+	for(int i=0; i<nve; i++){
+		print("-n %s", ve[i].tap ? "tap ": "");
+		if(ve[i].dev != nil)
+			print("%s ", ve[i].dev);
+		if(ve[i].mac != nil)
+			print("-m %s ", ve[i].mac);
+	}
+	print("-r %s -u %s\n", localroot, username);
+
+	if(nve == 0)
+		ipdevtab = pipdevtab;
 
 	printinit();
 	procinit0();
 	initseg();
+	if(nve > 0)
+		links();
+
 	chandevreset();
 	if(!singlethread){
-		makekprocdev(&ipdevtab);
+		if(nve == 0)
+			makekprocdev(&ipdevtab);
 		makekprocdev(&fsdevtab);
 		makekprocdev(&drawdevtab);
 		makekprocdev(&audiodevtab);
@@ -218,6 +288,144 @@ main(int argc, char **argv)
 }
 
 /*
+ *  read configuration file
+ */
+int
+readini(char *fn)
+{
+	int blankline, incomment, inspace, n, fd;
+	char *cp, *p, *q;
+
+	if(strcmp(fn, "-") == 0)
+		fd = stdin;
+	else if((fd = open(fn, OREAD)) < 0)
+		return -1;
+
+	cp = inibuf;
+	*cp = 0;
+	n = read(fd, cp, BOOTARGSLEN-1);
+	close(fd);
+	if(n <= 0)
+		return -1;
+
+	cp[n] = 0;
+
+	/*
+	 * Strip out '\r', change '\t' -> ' '.
+	 * Change runs of spaces into single spaces.
+	 * Strip out trailing spaces, blank lines.
+	 *
+	 * We do this before we make the copy so that if we 
+	 * need to change the copy, it is already fairly clean.
+	 * The main need is in the case when plan9.ini has been
+	 * padded with lots of trailing spaces, as is the case 
+	 * for those created during a distribution install.
+	 */
+	p = cp;
+	blankline = 1;
+	incomment = inspace = 0;
+	for(q = cp; *q; q++){
+		if(*q == '\r')
+			continue;
+		if(*q == '\t')
+			*q = ' ';
+		if(*q == ' '){
+			inspace = 1;
+			continue;
+		}
+		if(*q == '\n'){
+			if(!blankline){
+				if(!incomment)
+					*p++ = '\n';
+				blankline = 1;
+			}
+			incomment = inspace = 0;
+			continue;
+		}
+		if(inspace){
+			if(!blankline && !incomment)
+				*p++ = ' ';
+			inspace = 0;
+		}
+		if(blankline && *q == '#')
+			incomment = 1;
+		blankline = 0;
+		if(!incomment)
+			*p++ = *q;	
+	}
+	if(p > cp && p[-1] != '\n')
+		*p++ = '\n';
+	*p++ = 0;
+
+	getfields(cp, iniline, MAXCONF, 0, "\n");
+
+	return 0;
+}
+
+void
+inifields(void (*fp)(char*, char*))
+{
+	int i;
+	char *cp;
+
+	for(i = 0; i < MAXCONF; i++){
+		if(!iniline[i])
+			break;
+		cp = strchr(iniline[i], '=');
+		if(cp == 0)
+			continue;
+		*cp++ = 0;
+		if(cp - iniline[i] >= NAMELEN+1)
+			*(iniline[i]+NAMELEN-1) = 0;
+		(fp)(iniline[i], cp);
+		*(cp-1) = '=';
+	}
+}
+
+void
+iniopt(char *name, char *value)
+{
+	char *vedev;
+	int vetap;
+
+	if(*name == '*')
+		name++;
+	if(strcmp(name, "bootboot") == 0)
+		bootboot = 1;
+	else if(strcmp(name, "initrc") == 0)
+		initrc = 1;
+	else if(strcmp(name, "nofork") == 0)
+		nofork = 1;
+	else if(strcmp(name, "localroot") == 0 && !localroot)
+		localroot = value;
+	else if(strcmp(name, "user") == 0 && !username)
+		username = value;
+	else if(strcmp(name, "usetty") == 0)
+		usetty = 1;
+	else if(strcmp(name, "macaddr") == 0)
+		setmac(value);
+	else if(strcmp(name, "netdev") == 0){
+		if(strncmp(value, "tap", 3) == 0) {
+			vetap = 1;
+			value += 4;
+		}
+		vedev = value;
+		addve(vedev, vetap);
+	}
+	else if(strcmp(name, "nogui") == 0){
+		nogui = 1;
+		usetty = 1;
+	}
+}
+
+void
+inienv(char *name, char *value)
+{
+	if(*name != '*')
+		ksetenv(name, value, 0);
+}
+
+/*
  * Search for Plan 9 /386/bin/rc to find root.
  */
 static char*
@@ -228,8 +436,7 @@ findroot(void)
 	char buf[1024];
 	char *dir[] = {
 		cwd,
-		"/Users/rsc/9vx",
-		"/home/rsc/plan9/4e"
+		"/usr/local/9vx"
 	};
 	
 	if(getcwd(cwd, sizeof cwd) == nil){
@@ -304,6 +511,10 @@ bootinit(void)
 	 */
 	extern uchar factotumcode[];
 	extern long factotumlen;
+	extern uchar fossilcode[];
+	extern long fossillen;
+	extern uchar venticode[];
+	extern long ventilen;
 
 	if(bootboot){
 		extern uchar bootcode[];
@@ -314,6 +525,8 @@ bootinit(void)
 	else
 		addbootfile("boot", (uchar*)bootscript, strlen(bootscript));
 	addbootfile("factotum", factotumcode, factotumlen);
+	addbootfile("fossil", fossilcode, fossillen);
+	addbootfile("venti", venticode, ventilen);
 }
 
 static uchar *sp;	/* user stack of init proc */
@@ -484,7 +697,8 @@ init0(void)
 		ksetenv("service", "terminal", 0);
 	ksetenv("user", username, 0);
 	ksetenv("sysname", "vx32", 0);
-	
+	inifields(&inienv);
+
 	/* if we're not running /boot/boot, mount / and create /srv/boot */
 	if(!bootboot){
 		kbind("#Zplan9/", "/", MAFTER);
@@ -556,8 +770,13 @@ sigsegv(int signo, siginfo_t *info, void *v)
 #elif defined(__FreeBSD__)
 	mcontext_t *mc;
 	mc = &uc->uc_mcontext;
+#ifdef __i386__
 	eip = mc->mc_eip;
 	esp = mc->mc_esp;
+#elif defined(__amd64__)
+	eip = mc->mc_rip;
+	esp = mc->mc_rsp;
+#endif
 	addr = (ulong)info->si_addr;
 	if(__FreeBSD__ < 7){
 		/*
diff --git a/src/9vx/mmu.c b/src/9vx/mmu.c
@@ -26,7 +26,7 @@ int tracemmu;
  * Plan 9 assumes this, and while it's not a ton of work to break that
  * assumption, it was easier not to.
  */
-#define MEMSIZE (256<<20)
+#define MEMSIZE (256<<20)	// same as ../a/devether.c:13 (TODO: var)
 
 static int pagefile;
 static char* pagebase;
@@ -35,6 +35,19 @@ static Uspace uspace[16];
 static Uspace *ulist[nelem(uspace)];
 int nuspace = 1;
 
+#ifdef __i386__
+#define BIT32 0
+#define HINT nil
+#elif defined(__amd64__)
+#ifdef linux
+#define BIT32 MAP_32BIT
+#define HINT nil
+#elif defined(__FreeBSD__)
+#define BIT32 MAP_FIXED
+#define HINT (caddr_t)0x40000000
+#endif
+#endif
+
 int
 isuaddr(void *v)
 {
@@ -56,15 +69,14 @@ mapzero(void)
 {
 	int fd, bit32;
 	void *v;
+	void *hint;
 	
-#ifdef i386
-	bit32 = 0;
-#else
-	bit32 = MAP_32BIT;
-#endif
+	bit32 = BIT32;
+	hint = HINT;
+
 	/* First try mmaping /dev/zero.  Some OS'es don't allow this. */
 	if((fd = open("/dev/zero", O_RDONLY)) >= 0){
-		v = mmap(nil, USTKTOP, PROT_NONE, bit32|MAP_PRIVATE, fd, 0);
+		v = mmap(hint, USTKTOP, PROT_NONE, bit32|MAP_PRIVATE, fd, 0);
 		if(v != MAP_FAILED) {
 			if((uint32_t)(uintptr)v != (uintptr)v) {
 				iprint("mmap returned 64-bit pointer %p\n", v);
@@ -75,7 +87,7 @@ mapzero(void)
 	}
 	
 	/* Next try an anonymous map. */
-	v = mmap(nil, USTKTOP, PROT_NONE, bit32|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	v = mmap(hint, USTKTOP, PROT_NONE, bit32|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
 	if(v != MAP_FAILED) {
 		if((uint32_t)(uintptr)v != (uintptr)v) {
 			iprint("mmap returned 64-bit pointer %p\n", v);
diff --git a/src/9vx/sched.c b/src/9vx/sched.c
@@ -174,7 +174,7 @@ struct Pwaiter
 };
 
 void
-plock(Psleep *p)
+__plock(Psleep *p)
 {
 	int r;
 
@@ -193,7 +193,7 @@ plock(Psleep *p)
 }
 
 void
-punlock(Psleep *p)
+__punlock(Psleep *p)
 {
 	int r;
 
@@ -202,7 +202,7 @@ punlock(Psleep *p)
 }
 
 void
-psleep(Psleep *p)
+__psleep(Psleep *p)
 {
 	int r;
 	Pwaiter w;
@@ -218,7 +218,7 @@ psleep(Psleep *p)
 }
 
 void
-pwakeup(Psleep *p)
+__pwakeup(Psleep *p)
 {
 	int r;
 	Pwaiter *w;
diff --git a/src/9vx/sdloop.c b/src/9vx/sdloop.c
@@ -22,6 +22,7 @@ struct Ctlr{
 	Chan	*c;
 	int		mode;
 	uvlong	qidpath;
+	char		fn[20];
 };
 
 static	Lock	ctlrlock;
@@ -30,9 +31,47 @@ static	Ctlr	*ctlrtail;
 
 SDifc sdloopifc;
 
+static void
+loopopen(Ctlr *c)
+{
+	if(c->c == nil)
+		c->c = namec(c->fn, Aopen, c->mode, 0);
+}
+
 static SDev*
 looppnp(void)
 {
+	struct stat sbuf;
+	char c, c2;
+	char fn[20];
+
+	for(c = 'a'; c <= 'j'; ++c){
+		sprint(fn, "#Z/dev/sd%c", c);
+		if(stat(fn+2, &sbuf) == 0)
+			loopdev(fn, ORDWR);
+	}
+	for(c = '0'; c <= '9'; ++c){
+		sprintf(fn, "#Z/dev/sd%c",c);
+		if(stat(fn+2, &sbuf) == 0)
+			loopdev(fn, ORDWR);
+	}
+	for(c = 'a'; c <= 'j'; ++c){
+		sprint(fn, "#Z/dev/hd%c", c);
+		if(stat(fn+2, &sbuf) == 0)
+			loopdev(fn, ORDWR);
+	}
+	for(c = '0'; c <= '9'; ++c){
+		sprint(fn, "#Z/dev/wd%c", c);
+		if(stat(fn+2, &sbuf) == 0)
+			loopdev(fn, ORDWR);
+	}
+	for(c = '0'; c <= '8'; ++c){
+		for(c2 = '0'; c2 <= '8'; ++c2){
+			sprint(fn, "#Z/dev/cciss/c%cd%c", c, c2);
+			if(stat(fn+2, &sbuf) == 0)
+				loopdev(fn, ORDWR);
+		}
+	}
 	return nil;
 }
 
@@ -69,6 +108,7 @@ looponline(SDunit *unit)
 
 	sdev = unit->dev;
 	ctlr = sdev->ctlr;
+	loopopen(ctlr);
 	c = ctlr->c;
 	n = devtab[c->type]->stat(c, buf, sizeof buf);
 	if(convM2D(buf, n, &dir, nil) == 0)
@@ -99,6 +139,7 @@ looprio(SDreq *r)
 	unit = r->unit;
 	sdev = unit->dev;
 	ctlr = sdev->ctlr;
+	loopopen(ctlr);
 	cmd = r->cmd;
 
 	if((status = sdfakescsi(r, nil, 0)) != SDnostatus){
@@ -141,6 +182,7 @@ looprctl(SDunit *unit, char *p, int l)
 	char *e, *op;
 	
 	ctlr = unit->dev->ctlr;
+	loopopen(ctlr);
 	e = p+l;
 	op = p;
 	
@@ -170,7 +212,8 @@ loopclear1(Ctlr *ctlr)
 		ctlrtail = ctlr->prev;
 	unlock(&ctlrlock);
 	
-	cclose(ctlr->c);
+	if(ctlr->c)
+		cclose(ctlr->c);
 	free(ctlr);
 }
 
@@ -187,6 +230,7 @@ looprtopctl(SDev *s, char *p, char *e)
 	char *r;
 
 	c = s->ctlr;
+	loopopen(c);
 	r = "ro";
 	if(c->mode == ORDWR)
 		r = "rw";
@@ -219,9 +263,9 @@ loopdev(char *name, int mode)
 	Ctlr *volatile ctlr;
 	SDev *volatile sdev;
 
-	c = namec(name, Aopen, mode, 0);
 	ctlr = nil;
 	sdev = nil;
+/*
 	if(waserror()){
 		cclose(c);
 		if(ctlr)
@@ -230,6 +274,7 @@ loopdev(char *name, int mode)
 			free(sdev);
 		nexterror();
 	}
+*/
 
 	ctlr = smalloc(sizeof *ctlr);
 	sdev = smalloc(sizeof *sdev);
@@ -238,9 +283,11 @@ loopdev(char *name, int mode)
 	sdev->nunit = 1;
 	sdev->idno = '0';
 	ctlr->sdev = sdev;
-	ctlr->c = c;
+	strcpy(ctlr->fn, name);
 	ctlr->mode = mode;
+/*
 	poperror();
+*/
 
 	lock(&ctlrlock);
 	ctlr->next = nil;
@@ -277,11 +324,5 @@ SDifc sdloopifc = {
 	loopwtopctl,
 };
 
-SDifc *sdifc[] = 
-{
-	&sdloopifc,
-	nil
-};
-
 
 
diff --git a/src/9vx/u.h b/src/9vx/u.h
@@ -17,3 +17,4 @@ typedef int socklen_t;
 #define nil ((void*)0)
 #define sleep _ksleep
 #define syscall _ksyscall
+#define atoi(x) strtol(x, 0, 0)
diff --git a/src/9vx/venti.9 b/src/9vx/venti.9
Binary files differ.
diff --git a/src/9vx/vether.c b/src/9vx/vether.c
@@ -0,0 +1,122 @@
+#include "u.h"
+#include "mem.h"
+#include "lib.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "ip/ip.h"
+#include "netif.h"
+#include "etherif.h"
+#include "vether.h"
+#include "sd.h"
+
+extern int nettap;
+extern void ethertaplink(void);
+extern void etherpcaplink(void);
+extern void ethermediumlink(void);
+extern void loopbackmediumlink(void);
+extern void netdevmediumlink(void);
+
+extern void ilinit(Fs*);
+extern void tcpinit(Fs*);
+extern void udpinit(Fs*);
+extern void ipifcinit(Fs*);
+extern void icmpinit(Fs*);
+extern void icmp6init(Fs*);
+extern void greinit(Fs*);
+extern void ipmuxinit(Fs*);
+extern void espinit(Fs*);
+
+extern SDifc sdloopifc;
+extern SDifc sdaoeifc;
+
+void
+setmac(char *macaddr)
+{
+	int i;
+	char **nc = &macaddr;
+
+	if(nve == 0)
+		return;
+	ve[nve-1].mac = macaddr;
+	for(i = 0; i < Eaddrlen; i++){
+		ve[nve-1].ea[i] = (uchar)strtoul(macaddr, nc, 16);
+		macaddr = *nc+1;
+	}
+}
+
+static int
+eainuse(int n, uchar ea[Eaddrlen])
+{
+	int i;
+
+	for(i = 0; i < nve; i++)
+		if((i<n || ve[i].mac != nil) && memcmp(ea, ve[i].ea, Eaddrlen) == 0)
+			return -1;
+	return 0;
+}
+
+void
+addve(char *dev, int tap)
+{
+	if(nve == MaxEther)
+		panic("too many virtual ether cards");
+	ve[nve].tap = tap;
+	ve[nve].dev = dev;
+	ve[nve].mac = nil;
+	nve++;
+}
+
+void links(void) {
+	static uchar ea[Eaddrlen] = {0x00, 0x00, 0x09, 0x00, 0x00, 0x00};
+
+	ethermediumlink();
+	loopbackmediumlink();
+	netdevmediumlink();
+	for(int i=0; i<nve; i++){
+		if(ve[i].mac == nil){
+			while(eainuse(i, ea))
+				ea[5]++;
+			memcpy(ve[i].ea, ea, Eaddrlen);
+		}
+		if(ve[i].tap == 1)
+			ethertaplink();
+		else
+			etherpcaplink();
+	}
+}
+
+void (*ipprotoinit[])(Fs*) = {
+	ilinit,
+	tcpinit,
+	udpinit,
+	ipifcinit,
+	icmpinit,
+	icmp6init,
+	greinit,
+	ipmuxinit,
+	espinit,
+	nil,
+};
+
+int
+eafrom(char *ma, uchar ea[6])
+{
+	int i;
+	char **nc = &ma;
+
+	for(i = 0; i < 6; i++){
+		if(!ma)
+			return -1;
+		ea[i] = (uchar)strtoul(ma, nc, 16);
+		ma = *nc+1;
+	}
+	return 0;
+}
+
+SDifc *sdifc[] =
+{
+	&sdloopifc,
+	&sdaoeifc,
+	0,
+};
diff --git a/src/9vx/vether.h b/src/9vx/vether.h
@@ -0,0 +1,15 @@
+typedef struct Vether Vether;
+struct Vether
+{
+	int	tap;
+	char	*dev;
+	char	*mac;
+	uchar ea[Eaddrlen];
+};
+
+Vether ve[MaxEther+1];
+int nve;
+
+void	setmac(char*);
+void	addve(char*, int);
+void	links();
diff --git a/src/libvx32/Makefrag b/src/libvx32/Makefrag
@@ -1,8 +1,12 @@
 ifeq ($(ARCH),x86_64)
 VX32_RUN = run64.o
 else
+ifeq ($(ARCH),amd64)
+VX32_RUN = run64.o
+else
 VX32_RUN = run32.o 
 endif
+endif
 
 ifeq ($(OS),darwin)
 VX32_RUN := $(VX32_RUN) darwin-asm.o
diff --git a/src/libvx32/freebsd.c b/src/libvx32/freebsd.c
@@ -20,18 +20,34 @@
 #warning "libvx32 and FreeBSD 5 and 6's libpthread are not compatible."
 #endif
 
+#ifdef __i386__
 static void setbase(struct segment_descriptor *desc, unsigned long base)
+#elif defined __amd64__
+static void setbase(struct user_segment_descriptor *desc, unsigned long base)
+#endif
 {
 	desc->sd_lobase = base & 0xffffff;
 	desc->sd_hibase = base >> 24;
 }
 
+#ifdef __i386__
 static void setlimit(struct segment_descriptor *desc, unsigned long limit)
+#elif defined __amd64__
+static void setlimit(struct user_segment_descriptor *desc, unsigned long limit)
+#endif
 {
 	desc->sd_lolimit = limit & 0xffff;
 	desc->sd_hilimit = limit >> 16;
 }
 
+/*
+#ifdef __amd64__
+union descriptor {
+	struct user_segment_descriptor sd;
+	struct gate_descriptor gd;
+};
+#endif
+*/
 
 int vxemu_map(vxemu *emu, vxmmap *mm)
 {
@@ -52,27 +68,44 @@ int vxemu_map(vxemu *emu, vxmmap *mm)
 		desc.sd.sd_def32 = 1;
 		desc.sd.sd_gran = 1;
 		if(emu->datasel == 0){
+#ifdef __i386__
 			if ((s = i386_set_ldt(LDT_AUTO_ALLOC, &desc, 1)) < 0)
+#elif defined __amd64__
+			if ((s = sysarch(I386_SET_GSBASE, &desc)) < 0)
+#endif
 				return -1;
 			emu->datasel = (s<<3) + 4 + 3;	// 4=LDT, 3=RPL
-		}else if(i386_set_ldt(emu->datasel >> 3, &desc, 1) < 0)
+#ifdef __i386__
+		}else if (i386_set_ldt(emu->datasel >> 3, &desc, 1) < 0)
+#elif defined __amd64__
+		}else if (sysarch(I386_SET_GSBASE, &desc) < 0)
+#endif
 			return -1;
 
 		// Set up the process's vxemu segment selector (for FS).
 		setbase(&desc.sd, (unsigned long)emu);
 		setlimit(&desc.sd, (VXCODEBUFSIZE - 1) >> VXPAGESHIFT);
 		if(emu->emusel == 0){
+#ifdef __i386__
 			if ((s = i386_set_ldt(LDT_AUTO_ALLOC, &desc, 1)) < 0)
+#elif defined __amd64__
+			if ((s = sysarch(I386_SET_GSBASE, &desc)) < 0)
+#endif
 				return -1;
 			emu->emusel = (s<<3) + 4 + 3;	// 4=LDT, 3=RPL
-		}else if(i386_set_ldt(emu->emusel >> 3, &desc, 1) < 0)
+#ifdef __i386__
+		}else if (i386_set_ldt(emu->emusel >> 3, &desc, 1) < 0)
+#elif defined __amd64__
+		}else if (sysarch(I386_SET_GSBASE, &desc) < 0)
+#endif
 			return -1;
 
 		emu->ldt_base = (uintptr_t)mm->base;
 		emu->ldt_size = mm->size;
 	}
 
-#ifdef __x86_64
+#ifdef __amd64__
+/*
 	// Set up 32-bit mode code and data segments (not vxproc-specific),
 	// giving access to the full low 32-bit of linear address space.
 	// The code segment is necessary to get into 32-bit compatibility mode;
@@ -80,11 +113,9 @@ int vxemu_map(vxemu *emu, vxmmap *mm)
 	// doesn't give 64-bit processes a "real" data segment by default
 	// but instead just loads zero into the data segment selectors!
 	emu->runptr.sel = FLATCODE;
-	desc.entry_number = emu->runptr.sel / 8;
-	desc.base_addr = 0;
-	desc.limit = 0xfffff;
-	desc.contents = MODIFY_LDT_CONTENTS_CODE;
-	if (modify_ldt(1, &desc, sizeof(desc)) < 0)
+	setbase(&desc.sd, 0);
+	setlimit(&desc.sd, 0xfffff);
+	if ((s = sysarch(I386_SET_GSBASE, &desc)) < 0)
 		return -1;
 
 	desc.entry_number = FLATDATA / 8;
@@ -97,6 +128,7 @@ int vxemu_map(vxemu *emu, vxmmap *mm)
 	extern void vxrun_return();
 	asm volatile("movw %%cs,%0" : "=r" (emu->retptr.sel));
 	emu->retptr.ofs = (uint32_t)(intptr_t)vxrun_return;
+*/
 #endif
 
 	return 0;
@@ -122,28 +154,35 @@ static void dumpmcontext(mcontext_t *ctx, uint32_t cr2)
 		"r12 %016lx  r13 %016lx\nr14 %016lx  r15 %016lx\n"
 		"rip %016lx  efl %016lx  cs %04x  ss %04x\n"
 		"err %016lx  trapno %016lx  cr2 %016lx\n",
-		ctx->rax, ctx->rbx, ctx->rcx, ctx->rdx,
-		ctx->rsi, ctx->rdi, ctx->rbp, ctx->rsp,
-		ctx->r8, ctx->r9, ctx->r10, ctx->r11,
-		ctx->r12, ctx->r13, ctx->r14, ctx->r15,
-		ctx->rip, ctx->eflags, ctx->cs, ctx->__pad0,
-		ctx->err, ctx->trapno, ctx->cr2);
+		ctx->mc_rax, ctx->mc_rbx, ctx->mc_rcx, ctx->mc_rdx,
+		ctx->mc_rsi, ctx->mc_rdi, ctx->mc_rbp, ctx->mc_rsp,
+		ctx->mc_r8, ctx->mc_r9, ctx->mc_r10, ctx->mc_r11,
+		ctx->mc_r12, ctx->mc_r13, ctx->mc_r14, ctx->mc_r15,
+		ctx->mc_rip, ctx->mc_rflags, ctx->mc_cs, ctx->mc_ss,
+		ctx->mc_err, ctx->mc_trapno, cr2);
 #endif
 }
 
 static void
 fprestore(int *state, int fmt)
 {
+#ifdef __i386__
 	if(fmt == _MC_FPFMT_387)
 		asm volatile("frstor 0(%%eax); fwait\n" : : "a" (state) : "memory");
-	else if(fmt == _MC_FPFMT_XMM){
+	else
+#endif
+	if(fmt == _MC_FPFMT_XMM){
 		/* Have to 16-align the 512-byte state */
 		char buf[512+16], *p;
 		p = buf;
 		if((long)p&15)
 			p += 16 - (long)p&15;
 		memmove(p, state, 512);
+#ifdef __i386__
 		asm volatile("fxrstor 0(%%eax); fwait\n" : : "a" (p) : "memory");
+#elif defined(__amd64__)
+		asm volatile("fxrstor 0(%%rax); fwait\n" : : "a" (p) : "memory");
+#endif
 	}else
 		abort();
 }
@@ -167,12 +206,22 @@ int vx32_sighandler(int signo, siginfo_t *si, void *v)
 
 	// First sanity check vxproc segment number.
 	// FreeBSD reset the register before entering the handler!
+#ifdef __i386__
 	asm("movw %"VSEGSTR",%0"
 		: "=r" (oldvs));
 	vs = mc->mc_vs & 0xFFFF;	/* mc_vs #defined in os.h */
+#elif defined(__amd64__)
+	if (sysarch(I386_GET_GSBASE, &vs) < 0)
+		return 0;
+#endif
 
+#ifdef __i386__
 	if(0) vxprint("vx32_sighandler signo=%d eip=%#x esp=%#x vs=%#x currentvs=%#x\n",
 		signo, mc->mc_eip, mc->mc_esp, vs, oldvs);
+#elif defined(__amd64__)
+	if(0) vxprint("vx32_sighandler signo=%d rip=%#x rsp=%#x vs=%#x currentvs=%#x\n",
+		signo, mc->mc_rip, mc->mc_rsp, vs, oldvs);
+#endif
 
 	if ((vs & 7) != 7)	// LDT, RPL=3
 		return 0;
@@ -192,12 +241,21 @@ int vx32_sighandler(int signo, siginfo_t *si, void *v)
 	// Okay, we're convinced.
 
 	// Find current vxproc and vxemu.
+#ifdef __i386__
 	asm("movw %"VSEGSTR",%1\n"
 		"movw %2,%"VSEGSTR"\n"
 		"movl %"VSEGSTR":%3,%0\n"
 		"movw %1,%"VSEGSTR"\n"
 		: "=r" (vxp), "=r" (oldvs)
 		: "r" (vs), "m" (((vxemu*)0)->proc));
+#elif defined(__amd64__)
+	asm("movw %"VSEGSTR",%1\n"
+		"movw %2,%"VSEGSTR"\n"
+		"movw %"VSEGSTR":%3,%0\n"
+		"movw %1,%"VSEGSTR"\n"
+		: "=r" (vxp), "=r" (oldvs)
+		: "r" (vs), "m" (((vxemu*)0)->proc));
+#endif
 	emu = vxp->emu;
 
 	// Get back our regular host segment register state,
@@ -212,7 +270,11 @@ int vx32_sighandler(int signo, siginfo_t *si, void *v)
 	switch(signo){
 	case SIGSEGV:
 		newtrap = VXTRAP_PAGEFAULT;
+#ifdef __i386__
 		addr = (uint32_t)si->si_addr;
+#elif defined(__amd64__)
+		addr = (uint64_t)si->si_addr;
+#endif
 		break;
 	case SIGBUS:
 		/*
@@ -242,7 +304,11 @@ int vx32_sighandler(int signo, siginfo_t *si, void *v)
 		// before entering the signal handler.
 		addr = 0;
 		newtrap = VXTRAP_SINGLESTEP;
+#ifdef __i386__
 		mc->mc_eflags &= ~EFLAGS_TF;	// Just in case.
+#elif defined(__amd64__)
+		mc->mc_rflags &= ~EFLAGS_TF;	// Just in case.
+#endif
 		break;
 
 	default:
@@ -264,51 +330,111 @@ int vx32_sighandler(int signo, siginfo_t *si, void *v)
 	}
 	emu->cpu_trap = newtrap;
 
+#ifdef __i386__
 	r = vxemu_sighandler(emu, mc->mc_eip);
+#elif defined(__amd64__)
+	r = vxemu_sighandler(emu, mc->mc_rip);
+#endif
 
 	if (r == VXSIG_SINGLESTEP){
 		// Vxemu_sighandler wants us to single step.
 		// Execution state is in intermediate state - don't touch.
+#ifdef __i386__
 		mc->mc_eflags |= EFLAGS_TF;		// x86 TF (single-step) bit
+#elif defined(__amd64__)
+		mc->mc_rflags |= EFLAGS_TF;
+#endif
 		vxrun_setup(emu);
 		return 1;
 	}
 
 	// Copy execution state into emu.
 	if ((r & VXSIG_SAVE_ALL) == VXSIG_SAVE_ALL) {
+#ifdef __i386__
 		emu->cpu.reg[EAX] = mc->mc_eax;
 		emu->cpu.reg[EBX] = mc->mc_ebx;
 		emu->cpu.reg[ECX] = mc->mc_ecx;
 		emu->cpu.reg[EDX] = mc->mc_edx;
-		emu->cpu.reg[ESI] =  mc->mc_esi;
+		emu->cpu.reg[ESI] = mc->mc_esi;
 		emu->cpu.reg[EDI] = mc->mc_edi;
 		emu->cpu.reg[ESP] = mc->mc_esp;	// or esp_at_signal ???
 		emu->cpu.reg[EBP] = mc->mc_ebp;
 		emu->cpu.eflags = mc->mc_eflags;
+#elif defined(__amd64__)
+		emu->cpu.reg[EAX] = mc->mc_rax;
+		emu->cpu.reg[EBX] = mc->mc_rbx;
+		emu->cpu.reg[ECX] = mc->mc_rcx;
+		emu->cpu.reg[EDX] = mc->mc_rdx;
+		emu->cpu.reg[ESI] = mc->mc_rsi;
+		emu->cpu.reg[EDI] = mc->mc_rdi;
+		emu->cpu.reg[ESP] = mc->mc_rsp;	// or esp_at_signal ???
+		emu->cpu.reg[EBP] = mc->mc_rbp;
+		emu->cpu.eflags = mc->mc_rflags;
+#endif
 	} else if (r & VXSIG_SAVE_ALL) {
 		if (r & VXSIG_SAVE_EAX)
+#ifdef __i386__
 			emu->cpu.reg[EAX] = mc->mc_eax;
+#elif defined(__amd64__)
+			emu->cpu.reg[EAX] = mc->mc_rax;
+#endif
 		if (r & VXSIG_SAVE_EBX)
+#ifdef __i386__
 			emu->cpu.reg[EBX] = mc->mc_ebx;
+#elif defined(__amd64__)
+			emu->cpu.reg[EBX] = mc->mc_rbx;
+#endif
 		if (r & VXSIG_SAVE_ECX)
+#ifdef __i386__
 			emu->cpu.reg[ECX] = mc->mc_ecx;
+#elif defined(__amd64__)
+			emu->cpu.reg[ECX] = mc->mc_rcx;
+#endif
 		if (r & VXSIG_SAVE_EDX)
+#ifdef __i386__
 			emu->cpu.reg[EDX] = mc->mc_edx;
+#elif defined(__amd64__)
+			emu->cpu.reg[EDX] = mc->mc_rdx;
+#endif
 		if (r & VXSIG_SAVE_ESI)
+#ifdef __i386__
 			emu->cpu.reg[ESI] =  mc->mc_esi;
+#elif defined(__amd64__)
+			emu->cpu.reg[ESI] =  mc->mc_rsi;
+#endif
 		if (r & VXSIG_SAVE_EDI)
+#ifdef __i386__
 			emu->cpu.reg[EDI] = mc->mc_edi;
+#elif defined(__amd64__)
+			emu->cpu.reg[EDI] = mc->mc_rdi;
+#endif
 		if (r & VXSIG_SAVE_ESP)
+#ifdef __i386__
 			emu->cpu.reg[ESP] = mc->mc_esp;	// or esp_at_signal ???
+#elif defined(__amd64__)
+			emu->cpu.reg[ESP] = mc->mc_rsp;	// or esp_at_signal ???
+#endif
 		if (r & VXSIG_SAVE_EBP)
+#ifdef __i386__
 			emu->cpu.reg[EBP] = mc->mc_ebp;
+#elif defined(__amd64__)
+			emu->cpu.reg[EBP] = mc->mc_rbp;
+#endif
 		if (r & VXSIG_SAVE_EFLAGS)
+#ifdef __i386__
 			emu->cpu.eflags = mc->mc_eflags;
+#elif defined(__amd64__)
+			emu->cpu.eflags = mc->mc_rflags;
+#endif
 	}
 	r &= ~VXSIG_SAVE_ALL;
 
 	if (r & VXSIG_SAVE_EBX_AS_EIP)
+#ifdef __i386__
 		emu->cpu.eip = mc->mc_ebx;
+#elif defined(__amd64__)
+		emu->cpu.eip = mc->mc_rbx;
+#endif
 	r &= ~VXSIG_SAVE_EBX_AS_EIP;
 
 	if (r & VXSIG_ADD_COUNT_TO_ESP) {
@@ -327,7 +453,11 @@ int vx32_sighandler(int signo, siginfo_t *si, void *v)
 			return 0;
 		emu->cpu.traperr = mc->mc_err;
 		emu->cpu.trapva = addr;
+#ifdef __i386__
 		memmove(&mc->mc_gs, &emu->trapenv->mc_gs, 19*4);
+#elif defined(__amd64__)
+		memmove(&mc->mc_onstack, &emu->trapenv->mc_onstack, sizeof(mcontext_t));
+#endif
 		return 1;
 	}
 
diff --git a/src/libvx32/run64.S b/src/libvx32/run64.S
@@ -79,7 +79,11 @@ vxrun:
 	movl	VXEMU_EDI(%r8),%edi
 
 	// Run translated code
+#ifndef __FreeBSD__
 	ljmpl	*VXEMU_RUNPTR(%r8)	// 'ljmpq' doesn't work - gas bug??
+#else
+	ljmpq	*VXEMU_RUNPTR(%r8)
+#endif
 
 
 // Return from running translated code to the normal host environment.