commit b0d887c52d091ae3c62316cfc54ff178f64ab850
parent a700164cb1d15543535ee17fbff2f91fe4b4f595
Author: John (EBo) David <ebo@users.sourceforge.net>
Date: Mon, 21 Jun 2010 03:17:15 -0500
attempting merge
--HG--
branch : yy-int-branch
Diffstat:
74 files changed, 28454 insertions(+), 87 deletions(-)
diff --git a/.hgignore b/.hgignore
@@ -25,6 +25,8 @@ src/vxa/bz2/*ebz2
src/vxlinux/vxlinux
src/9vx/9vx
src/9vx/bootcode.S
+src/9vx/fossil.S
+src/9vx/venti.S
src/9vx/data2s
src/9vx/a/errstr.h
src/9vx/kerndate.h
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
@@ -3,3 +3,9 @@ The following people have contributed source code to vx32.
Bryan Ford <baford@pdos.csail.mit.edu>
Michael Teichgräber <mt4swm@googlemail.com>
Russ Cox <rsc@swtch.com>
+Jesus Galan Lopez <yiyu.jgl@gmail.com>
+Tuly Gray
+Devon H. O'Dell
+Ron Minnich
+Erik Quantrom
+Brian L. Stuart
diff --git a/doc/9vx.1 b/doc/9vx.1
@@ -0,0 +1,127 @@
+.TH 9VX 1
+.SH NAME
+9vx, 9vx-tap \- Plan9 port to the virtual execution environment vx32
+.SH SYNOPSIS
+.B 9vx
+[
+.I option ...
+]
+[
+.I -p 9vx.ini
+]
+[
+.I -r root
+]
+[
+.I -u user
+]
+.PP
+.B 9vx-tap
+[
+.I option ...
+]
+[
+.I -p 9vx.ini
+]
+[
+.I -r root
+]
+[
+.I -u user
+]
+.SH DESCRIPTION
+Plan 9 VX (or
+.I 9vx
+for short) is a port of the Plan 9 operating system to run on top of commodity operating systems, allowing the use of both Plan 9 and the host system simultaneously. To run user programs,
+.I 9vx
+creates an appropriate address space in a window within its own address space and invokes vx32 to simulate user mode execution. Some hardware devices are replaced by virtual versions, depending on the options given to
+.I 9vx.
+.I 9vx-tap
+is a shell script that sets up a tap device with tunctl(1), launches
+.I 9vx,
+and removes the tap device when finished.
+.PP
+Options can be passed to
+.I 9vx
+as command line arguments or in a configuration file with the
+.I -p
+option (see below). If no
+.I root
+argument is present, the current directory or
+.I /usr/local/9vx
+is used.
+Alternatively, a file system can be specified in the 9vx.ini file.
+If an
+.I user
+is not specified, the current user in the host operating system will be used.
+Other options are:
+.nr xx \w'\fL-m\f2name\ \ '
+.TP \n(xxu
+.BI -b
+Run /boot/boot instead of bootscript
+.TP
+.BI -f
+Do not fork at init
+.TP
+.BI -g
+Do not start the gui
+.TP
+.BI -i
+Run rc instead of init
+.TP
+.BI -t
+Use tty for input/output
+.TP
+.BI -n " [ tap ] [ device ]"
+Create virtual ethernet devices. The
+.I tap
+option tells that
+.I device
+is a tap device. Else, the virtual device will use pcap(3) to intercept packets going to
+.I device,
+and will therefore need root privileges. If a host
+.I device
+is not specified, pcap will use the first one available, and tap will use the
+.I tap0
+device. More than one virtual ethernet device can be used. In absence of virtual devices, the network stack of the host system will be used.
+.TP
+.BI -m " macaddress"
+Use the hardware address
+.I macaddress
+for the last given virtual network device.
+.SS 9vx.ini configuration files
+Configuration parameters can also be given to
+.I 9vx
+in the configuration file specified with the
+.I -p
+command line option.
+The file name
+.L -
+means the standard input.
+The file
+.I 9vx.ini
+has to contain a list of
+.I parameter=value
+pairs in a similar fasion to plan9.ini(8). Available options are
+.I bootboot,
+.I nofork,
+.I nogui,
+.I initrc,
+.I usetty,
+.I net,
+.I macaddr,
+.I localroot
+and
+.I user.
+Other options will be passed to the boot process as environment variables.
+.SH BUGS
+The menu system of plan9.ini(8) is not supported in
+.I 9vx.ini
+files.
+.P
+.I 9vx
+is not so stable as native Plan9 systems.
+.SH "SEE ALSO"
+.br
+Bryan Ford and Russ Cox,
+``Vx32: Lightweight User-level Sandboxing on the x86'
diff --git a/src/9vx/9vx-tap b/src/9vx/9vx-tap
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+USERID=`whoami`
+
+# Create the tap device with tunctl
+IFACE=`sudo tunctl -b -u $USERID`
+# or openvpn
+#IFACE=tap0
+#sudo openvpn --mktun --dev $IFACE --user $USERID
+
+# Bring the tap device up
+sudo /sbin/ifconfig $IFACE 0.0.0.0 up
+
+# Add it to the bridge
+sudo /usr/sbin/brctl addif br0 $IFACE
+
+# Launch 9vx (use -f to not fork)
+9vx -f -n tap $IFACE $*
+
+# Bring the tap device down and disconnect from br0
+sudo /sbin/ifconfig $IFACE down
+sudo /usr/sbin/brctl delif br0 $IFACE
+
+# Remove the tap device with tunctl
+sudo tunctl -d $IFACE &> /dev/null
+# or openvpn
+#sudo openvpn --rmtun --dev $1
diff --git a/src/9vx/LICENSE b/src/9vx/LICENSE
@@ -4,6 +4,8 @@ Plan 9 from Bell Labs distribution, which carries this license.
The local changes are Copyright (c) 2006-2008 Russ Cox and
are distributed as contributions under the terms of this license.
+Other contributors are listed on the AUTHORS file.
+
===================================================================
diff --git a/src/9vx/Makefrag b/src/9vx/Makefrag
@@ -29,13 +29,12 @@ PLAN9_OBJS = \
devaudio.o \
devaudio-$(PLAN9AUDIO).o \
devfs-posix.o \
- devip.o \
- devip-posix.o \
devmntloop.o \
devmouse.o \
devram.o \
devtab.o \
factotum.o \
+ fossil.o \
kprocdev.o \
label.o \
main.o \
@@ -47,6 +46,7 @@ PLAN9_OBJS = \
time.o \
trap.o \
tty.o \
+ venti.o \
vx32.o \
)
@@ -58,7 +58,6 @@ PLAN9_A_OBJS = \
$(addprefix 9vx/a/, \
allocb.o \
auth.o \
- bo.o \
chan.o \
classmask.o \
cleanname.o \
@@ -91,6 +90,7 @@ PLAN9_A_OBJS = \
page.o \
parse.o \
parseip.o \
+ part.o \
pgrp.o \
print.o \
proc.o \
@@ -111,6 +111,48 @@ PLAN9_A_OBJS = \
utf.o \
)
+PLAN9_IP_OBJS = \
+ $(addprefix 9vx/,\
+ devip.o \
+ devip-posix.o \
+ etherpcap.o \
+ ethertap.o \
+ vether.o \
+ ) \
+ $(addprefix 9vx/a/,\
+ devaoe.o \
+ devether.o \
+ netif.o \
+ sdaoe.o \
+ ) \
+ $(addprefix 9vx/a/ip/,\
+ arp.o \
+ chandial.o \
+ devip.o \
+ esp.o \
+ ethermedium.o \
+ gre.o \
+ icmp.o \
+ icmp6.o \
+ il.o \
+ inferno.o \
+ ip.o \
+ ipaux.o \
+ ipifc.o \
+ ipmux.o \
+ iproute.o \
+ ipv6.o \
+ loopbackmedium.o \
+ netdevmedium.o \
+ netlog.o \
+ nullmedium.o \
+ pktmedium.o \
+ ptclbsum.o \
+ tcp.o \
+ udp.o \
+ )
+PLAN9_IP_LIBS = -lpcap
+
PLAN9_nogui_OBJS = \
$(addprefix 9vx/,\
nogui.o \
@@ -142,6 +184,7 @@ PLAN9_GUI_LIBS = $(PLAN9_$(PLAN9GUI)_LIBS)
PLAN9_DEPS = \
$(PLAN9_OBJS) \
$(PLAN9_A_OBJS) \
+ $(PLAN9_IP_OBJS) \
$(PLAN9_GUI_OBJS) \
9vx/libsec/libsec.a \
9vx/libmemlayer/libmemlayer.a \
@@ -150,7 +193,7 @@ PLAN9_DEPS = \
libvx32/libvx32.a \
9vx/9vx: $(PLAN9_DEPS)
- $(HOST_CC) -o $@ $(PLAN9_DEPS) $(PLAN9_GUI_LIBS) -lpthread
+ $(HOST_CC) -o $@ $(PLAN9_DEPS) $(PLAN9_GUI_LIBS) $(PLAN9_IP_LIBS) -lpthread
9vx/a/%.o: 9vx/a/%.c
$(HOST_CC) $(HOST_CFLAGS) -I. -I9vx -I9vx/a -Wall -Wno-missing-braces -c -o $@ $<
@@ -176,6 +219,12 @@ PLAN9_DEPS = \
9vx/factotum.S: 9vx/data2s 9vx/factotum.9
./9vx/data2s factotum < 9vx/factotum.9 >$@_ && mv $@_ $@
+9vx/fossil.S: 9vx/data2s 9vx/fossil.9
+ ./9vx/data2s fossil < 9vx/fossil.9 >$@_ && mv $@_ $@
+
+9vx/venti.S: 9vx/data2s 9vx/venti.9
+ ./9vx/data2s venti < 9vx/venti.9 > $@_ && mv $@_ $@
+
9vx/a/errstr.h: 9vx/a/error.h
sed 's/extern //; s!;.*/\* ! = "!; s! \*\/!";!' 9vx/a/error.h >9vx/a/errstr.h
@@ -199,7 +248,10 @@ CLEAN_FILES += \
9vx/a/errstr.h \
9vx/9vx \
9vx/data2s \
- 9vx/bootcode.S
+ 9vx/bootcode.S \
+ 9vx/factotum.S \
+ 9vx/fossil.S \
+ 9vx/venti.S
include 9vx/libdraw/Makefrag
include 9vx/libmemlayer/Makefrag
diff --git a/src/9vx/a/aoe.h b/src/9vx/a/aoe.h
@@ -0,0 +1,84 @@
+enum {
+ ACata,
+ ACconfig,
+};
+
+enum {
+ AQCread,
+ AQCtest,
+ AQCprefix,
+ AQCset,
+ AQCfset,
+};
+
+enum {
+ AEcmd = 1,
+ AEarg,
+ AEdev,
+ AEcfg,
+ AEver,
+};
+
+enum {
+ Aoetype = 0x88a2,
+ Aoesectsz = 512,
+ Szaoeata = 24+12,
+ Szaoeqc = 24+8,
+ Aoever = 1,
+
+ AFerr = 1<<2,
+ AFrsp = 1<<3,
+
+ AAFwrite= 1,
+ AAFext = 1<<6,
+};
+
+typedef struct {
+ uchar dst[Eaddrlen];
+ uchar src[Eaddrlen];
+ uchar type[2];
+ uchar verflag;
+ uchar error;
+ uchar major[2];
+ uchar minor;
+ uchar cmd;
+ uchar tag[4];
+} Aoehdr;
+
+typedef struct {
+ uchar dst[Eaddrlen];
+ uchar src[Eaddrlen];
+ uchar type[2];
+ uchar verflag;
+ uchar error;
+ uchar major[2];
+ uchar minor;
+ uchar cmd;
+ uchar tag[4];
+ uchar aflag;
+ uchar errfeat;
+ uchar scnt;
+ uchar cmdstat;
+ uchar lba[6];
+ uchar res[2];
+} Aoeata;
+
+typedef struct {
+ uchar dst[Eaddrlen];
+ uchar src[Eaddrlen];
+ uchar type[2];
+ uchar verflag;
+ uchar error;
+ uchar major[2];
+ uchar minor;
+ uchar cmd;
+ uchar tag[4];
+ uchar bufcnt[2];
+ uchar fwver[2];
+ uchar scnt;
+ uchar verccmd;
+ uchar cslen[2];
+} Aoeqc;
+
+extern char Echange[];
+extern char Enotup[];
diff --git a/src/9vx/a/chan.c b/src/9vx/a/chan.c
@@ -28,7 +28,7 @@ struct Elemlist
{
char *aname; /* original name */
char *name; /* copy of name, so '/' can be overwritten */
- int nelems;
+ uint nelems;
char **elems;
int *off;
int mustbedir;
diff --git a/src/9vx/a/devaoe.c b/src/9vx/a/devaoe.c
@@ -0,0 +1,2575 @@
+/*
+ * © 2005-8 coraid
+ * aoe storage initiator
+ */
+
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+#include "error.h"
+#include "netif.h"
+#include "etherif.h"
+#include "ip/ip.h"
+#include "aoe.h"
+
+#define WAKEUP(x) wakeup(&((x)->rend))
+#define SLEEP(a,b,c) sleep(&(a->rend), b, c)
+
+//#pragma varargck argpos eventlog 1
+
+#define dprint(...) if(debug) eventlog(__VA_ARGS__); else USED(debug);
+#define uprint(...) snprint(up->genbuf, sizeof up->genbuf, __VA_ARGS__);
+
+enum {
+ Maxunits = 0xff,
+ Maxframes = 128,
+ Maxmtu = 100000,
+ Ndevlink = 6,
+ Nea = 6,
+ Nnetlink = 6,
+};
+
+#define TYPE(q) ((ulong)(q).path & 0xf)
+#define UNIT(q) (((ulong)(q).path>>4) & 0xff)
+#define L(q) (((ulong)(q).path>>12) & 0xf)
+#define QID(u, t) ((u)<<4 | (t))
+#define Q3(l, u, t) ((l)<<8 | QID(u, t))
+#define UP(d) ((d)->flag & Dup)
+
+#define Ticks msec()
+#define Ms2tk(t) (((t)*HZ)/1000)
+#define Tk2ms(t) (((t)*1000)/HZ)
+
+enum {
+ Qzero,
+ Qtopdir = 1,
+ Qtopbase,
+ Qtopctl = Qtopbase,
+ Qtoplog,
+ Qtopend,
+
+ Qunitdir,
+ Qunitbase,
+ Qctl = Qunitbase,
+ Qdata,
+ Qconfig,
+ Qident,
+
+ Qdevlinkdir,
+ Qdevlinkbase,
+ Qdevlink = Qdevlinkbase,
+ Qdevlinkend,
+
+ Qtopfiles = Qtopend-Qtopbase,
+ Qdevlinkfiles = Qdevlinkend-Qdevlinkbase,
+
+ Eventlen = 256,
+ Nevents = 64,
+
+ Fread = 0,
+ Fwrite,
+ Tfree = -1,
+ Tmgmt,
+
+ /* round trip bounds, timeouts, in ticks */
+ Rtmax = Ms2tk(320),
+ Rtmin = Ms2tk(20),
+ Srbtimeout = 45*HZ,
+
+ Dbcnt = 1024,
+
+ Crd = 0x20,
+ Crdext = 0x24,
+ Cwr = 0x30,
+ Cwrext = 0x34,
+ Cid = 0xec,
+};
+
+enum {
+ Read,
+ Write,
+};
+
+/*
+ * unified set of flags
+ * a Netlink + Aoedev most both be jumbo capable
+ * to send jumbograms to that interface.
+ */
+enum {
+ /* sync with ahci.h */
+ Dllba = 1<<0,
+ Dsmart = 1<<1,
+ Dpower = 1<<2,
+ Dnop = 1<<3,
+ Datapi = 1<<4,
+ Datapi16= 1<<5,
+
+ /* aoe specific */
+ Dup = 1<<6,
+ Djumbo = 1<<7,
+};
+
+static char *flagname[] = {
+ "llba",
+ "smart",
+ "power",
+ "nop",
+ "atapi",
+ "atapi16",
+
+ "up",
+ "jumbo",
+};
+
+typedef struct {
+ uchar flag;
+ uchar lostjumbo;
+ int datamtu;
+
+ Chan *cc;
+ Chan *dc;
+ Chan *mtu; /* open early to prevent bind issues. */
+ char path[Maxpath];
+ uchar ea[Eaddrlen];
+} Netlink;
+
+typedef struct {
+ Netlink *nl;
+ int nea;
+ ulong eaidx;
+ uchar eatab[Nea][Eaddrlen];
+ int datamtu;
+ ulong npkt;
+ ulong resent;
+ uchar flag;
+
+ ulong rttavg;
+ ulong mintimer;
+} Devlink;
+
+typedef struct Srb Srb;
+struct Srb {
+ Rendez rend;
+ Srb *next;
+ ulong ticksent;
+ ulong len;
+ vlong sector;
+ short write;
+ short nout;
+ char *error;
+ void *dp;
+ void *data;
+};
+
+typedef struct {
+ int tag;
+ ulong bcnt;
+ ulong dlen;
+ vlong lba;
+ ulong ticksent;
+ int nhdr;
+ uchar hdr[ETHERMINTU];
+ void *dp;
+ Devlink *dl;
+ Netlink *nl;
+ int eaidx;
+ Srb *srb;
+} Frame;
+
+typedef struct Aoedev Aoedev;
+struct Aoedev {
+ QLock qlock;
+ Aoedev *next;
+
+ ulong vers;
+
+ int ndl;
+ ulong dlidx;
+ Devlink *dl;
+ Devlink dltab[Ndevlink];
+
+ ushort fwver;
+ uchar flag;
+ int nopen;
+ int major;
+ int minor;
+ int unit;
+ int lasttag;
+ int nframes;
+ Frame *frames;
+ vlong bsize;
+ vlong realbsize;
+
+ uint maxbcnt;
+ uint maxmtu;
+ ulong lostjumbo;
+ ushort nout;
+ ushort maxout;
+ ulong lastwadj;
+ Srb *head;
+ Srb *tail;
+ Srb *inprocess;
+
+ char serial[20+1];
+ char firmware[8+1];
+ char model[40+1];
+ int nconfig;
+ uchar config[1024];
+ uchar ident[512];
+};
+
+//#pragma varargck type "æ" Aoedev*
+
+static struct {
+ Lock lk;
+ QLock qlock;
+ Rendez rend;
+ char buf[Eventlen*Nevents];
+ char *rp;
+ char *wp;
+} events;
+
+static struct {
+ RWlock rwlock;
+ int nd;
+ Aoedev *d;
+} devs;
+
+static struct {
+ Lock lk;
+ int reader[Nnetlink]; /* reader is running. */
+ Rendez rendez[Nnetlink]; /* confirm exit. */
+ Netlink nl[Nnetlink];
+} netlinks;
+
+extern Dev aoedevtab;
+static Ref units;
+static Ref drivevers;
+static int debug;
+static int autodiscover = 1;
+static int rediscover;
+ char Enotup[] = "aoe device is down";
+ char Echange[] = "media or partition has changed";
+
+static Srb*
+srballoc(ulong sz)
+{
+ Srb *srb;
+
+ srb = malloc(sizeof *srb+sz);
+ srb->dp = srb->data = srb+1;
+ srb->ticksent = Ticks;
+ return srb;
+}
+
+static Srb*
+srbkalloc(void *db, ulong dummy)
+{
+ Srb *srb;
+
+ srb = malloc(sizeof *srb);
+ srb->dp = srb->data = db;
+ srb->ticksent = Ticks;
+ return srb;
+}
+
+#define srbfree(srb) free(srb)
+
+static void
+srberror(Srb *srb, char *s)
+{
+ srb->error = s;
+ srb->nout--;
+ WAKEUP(srb);
+}
+
+static void
+frameerror(Aoedev *d, Frame *f, char *s)
+{
+ Srb *srb;
+
+ srb = f->srb;
+ if(f->tag == Tfree)
+ return;
+ f->srb = nil;
+ f->tag = Tfree; /* don't get fooled by way-slow responses */
+ if(!srb)
+ return;
+ srberror(srb, s);
+ d->nout--;
+}
+
+static char*
+unitname(Aoedev *d)
+{
+ uprint("%d.%d", d->major, d->minor);
+ return up->genbuf;
+}
+
+static long
+eventlogread(void *a, long n)
+{
+ int len;
+ char *p, *buf;
+
+ buf = smalloc(Eventlen);
+ QLOCK(&events);
+ LOCK(&events);
+ p = events.rp;
+ len = *p;
+ if(len == 0){
+ n = 0;
+ UNLOCK(&events);
+ } else {
+ if(n > len)
+ n = len;
+ /* can't move directly into pageable space with events lock held */
+ memmove(buf, p+1, n);
+ *p = 0;
+ events.rp = p += Eventlen;
+ if(p >= events.buf + sizeof events.buf)
+ events.rp = events.buf;
+ UNLOCK(&events);
+
+ /* the concern here is page faults in memmove below */
+ if(waserror()){
+ free(buf);
+ QUNLOCK(&events);
+ nexterror();
+ }
+ memmove(a, buf, n);
+ poperror();
+ }
+ free(buf);
+ QUNLOCK(&events);
+ return n;
+}
+
+static int
+eventlog(char *fmt, ...)
+{
+ int dragrp, n;
+ char *p;
+ va_list arg;
+
+ LOCK(&events);
+ p = events.wp;
+ dragrp = *p++;
+ va_start(arg, fmt);
+ n = vsnprint(p, Eventlen-1, fmt, arg);
+ *--p = n;
+ p = events.wp += Eventlen;
+ if(p >= events.buf + sizeof events.buf)
+ p = events.wp = events.buf;
+ if(dragrp)
+ events.rp = p;
+ UNLOCK(&events);
+ WAKEUP(&events);
+ return n;
+}
+
+static int
+eventcount(void)
+{
+ int n;
+
+ LOCK(&events);
+ if(*events.rp == 0)
+ n = 0;
+ else if(events.wp < events.rp)
+ n = Nevents - (events.rp - events.wp);
+ else
+ n = events.wp - events.rp;
+ UNLOCK(&events);
+ return n/Eventlen;
+}
+
+static int
+tsince(int tag)
+{
+ int n;
+
+ n = Ticks & 0xffff;
+ n -= tag & 0xffff;
+ if(n < 0)
+ n += 1<<16;
+ return n;
+}
+
+static int
+newtag(Aoedev *d)
+{
+ int t;
+
+ do {
+ t = ++d->lasttag << 16;
+ t |= Ticks & 0xffff;
+ } while (t == Tfree || t == Tmgmt);
+ return t;
+}
+
+static void
+downdev(Aoedev *d, char *err)
+{
+ Frame *f, *e;
+
+ d->flag &= ~Dup;
+ f = d->frames;
+ e = f + d->nframes;
+ for(; f < e; f->tag = Tfree, f->srb = nil, f++)
+ frameerror(d, f, Enotup);
+ d->inprocess = nil;
+ eventlog("%æ: removed; %s\n", d, err);
+}
+
+static Block*
+allocfb(Frame *f)
+{
+ int len;
+ Block *b;
+
+ len = f->nhdr + f->dlen;
+ if(len < ETHERMINTU)
+ len = ETHERMINTU;
+ b = allocb(len);
+ memmove(b->wp, f->hdr, f->nhdr);
+ if(f->dlen)
+ memmove(b->wp + f->nhdr, f->dp, f->dlen);
+ b->wp += len;
+ return b;
+}
+
+static void
+putlba(Aoeata *a, vlong lba)
+{
+ uchar *c;
+
+ c = a->lba;
+ c[0] = lba;
+ c[1] = lba >> 8;
+ c[2] = lba >> 16;
+ c[3] = lba >> 24;
+ c[4] = lba >> 32;
+ c[5] = lba >> 40;
+}
+
+static Devlink*
+pickdevlink(Aoedev *d)
+{
+ ulong i, n;
+ Devlink *l;
+
+ for(i = 0; i < d->ndl; i++){
+ n = d->dlidx++ % d->ndl;
+ l = d->dl + n;
+ if(l && l->flag & Dup)
+ return l;
+ }
+ return 0;
+}
+
+static int
+pickea(Devlink *l)
+{
+ if(l == 0)
+ return -1;
+ if(l->nea == 0)
+ return -1;
+ return l->eaidx++ % l->nea;
+}
+
+static int
+hset(Aoedev *d, Frame *f, Aoehdr *h, int cmd)
+{
+ int i;
+ Devlink *l;
+
+ if(f->srb)
+ if((long)(Ticks-f->srb->ticksent) > Srbtimeout){
+ eventlog("%æ: srb timeout\n", d);
+ frameerror(d, f, Etimedout);
+ return -1;
+ }
+ l = pickdevlink(d);
+ i = pickea(l);
+ if(i == -1){
+ downdev(d, "resend fails; no netlink/ea");
+ return -1;
+ }
+ memmove(h->dst, l->eatab[i], Eaddrlen);
+ memmove(h->src, l->nl->ea, sizeof h->src);
+ hnputs(h->type, Aoetype);
+ h->verflag = Aoever << 4;
+ h->error = 0;
+ hnputs(h->major, d->major);
+ h->minor = d->minor;
+ h->cmd = cmd;
+
+ hnputl(h->tag, f->tag = newtag(d));
+ f->dl = l;
+ f->nl = l->nl;
+ f->eaidx = i;
+ f->ticksent = Ticks;
+
+ return f->tag;
+}
+
+static int
+resend(Aoedev *d, Frame *f)
+{
+ ulong n;
+ Aoeata *a;
+
+ a = (Aoeata*)f->hdr;
+ if(hset(d, f, (Aoehdr*)a, a->cmd) == -1)
+ return -1;
+ n = f->bcnt;
+ if(n > d->maxbcnt){
+ n = d->maxbcnt; /* mtu mismatch (jumbo fail?) */
+ if(f->dlen > n)
+ f->dlen = n;
+ }
+ a->scnt = n / Aoesectsz;
+ f->dl->resent++;
+ f->dl->npkt++;
+ if(waserror())
+ /* should remove the netlink */
+ return -1;
+ devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
+ poperror();
+ return 0;
+}
+
+static void
+discover(int major, int minor)
+{
+ Aoehdr *h;
+ Block *b;
+ Netlink *nl, *e;
+
+ nl = netlinks.nl;
+ e = nl + nelem(netlinks.nl);
+ for(; nl < e; nl++){
+ if(nl->cc == nil)
+ continue;
+ b = allocb(ETHERMINTU);
+ if(waserror()){
+ freeb(b);
+ nexterror();
+ }
+ b->wp = b->rp + ETHERMINTU;
+ memset(b->rp, 0, ETHERMINTU);
+ h = (Aoehdr*)b->rp;
+ memset(h->dst, 0xff, sizeof h->dst);
+ memmove(h->src, nl->ea, sizeof h->src);
+ hnputs(h->type, Aoetype);
+ h->verflag = Aoever << 4;
+ hnputs(h->major, major);
+ h->minor = minor;
+ h->cmd = ACconfig;
+ poperror();
+ devtab[nl->dc->type]->bwrite(nl->dc, b, 0);
+ }
+}
+
+/*
+ * Check all frames on device and resend any frames that have been
+ * outstanding for 200% of the device round trip time average.
+ */
+static void
+aoesweepproc(void *dummy)
+{
+ ulong i, tx, timeout, nbc;
+ vlong starttick;
+ enum { Nms = 100, Nbcms = 30*1000, };
+ uchar *ea;
+ Aoeata *a;
+ Aoedev *d;
+ Devlink *l;
+ Frame *f, *e;
+
+ nbc = Nbcms/Nms;
+loop:
+ if(nbc-- == 0){
+ if(rediscover && !waserror()){
+ discover(0xffff, 0xff);
+ poperror();
+ }
+ nbc = Nbcms/Nms;
+ }
+ starttick = Ticks;
+ RLOCK(&devs);
+ for(d = devs.d; d; d = d->next){
+ if(!CANQLOCK(d))
+ continue;
+ if(!UP(d)){
+ QUNLOCK(d);
+ continue;
+ }
+ tx = 0;
+ f = d->frames;
+ e = f + d->nframes;
+ for (; f < e; f++){
+ if(f->tag == Tfree)
+ continue;
+ l = f->dl;
+ timeout = l->rttavg << 1;
+ i = tsince(f->tag);
+ if(i < timeout)
+ continue;
+ if(d->nout == d->maxout){
+ if(d->maxout > 1)
+ d->maxout--;
+ d->lastwadj = Ticks;
+ }
+ a = (Aoeata*)f->hdr;
+ if(a->scnt > Dbcnt / Aoesectsz &&
+ ++f->nl->lostjumbo > (d->nframes << 1)){
+ ea = f->dl->eatab[f->eaidx];
+ eventlog("%æ: jumbo failure on %s:%E; lba%lld\n",
+ d, f->nl->path, ea, f->lba);
+ d->maxbcnt = Dbcnt;
+ d->flag &= ~Djumbo;
+ }
+ resend(d, f);
+ if(tx++ == 0){
+ if((l->rttavg <<= 1) > Rtmax)
+ l->rttavg = Rtmax;
+ eventlog("%æ: rtt %ldms\n", d, Tk2ms(l->rttavg));
+ }
+ }
+ if(d->nout == d->maxout && d->maxout < d->nframes &&
+ TK2MS(Ticks-d->lastwadj) > 10*1000){
+ d->maxout++;
+ d->lastwadj = Ticks;
+ }
+ QUNLOCK(d);
+ }
+ RUNLOCK(&devs);
+ i = Nms - TK2MS(Ticks - starttick);
+ if(i > 0)
+ tsleep(&up->sleep, return0, 0, i);
+ goto loop;
+}
+
+static int
+fmtaoe(Fmt *f)
+{
+ char buf[16];
+ Aoedev *d;
+
+ d = va_arg(f->args, Aoedev*);
+ snprint(buf, sizeof buf, "aoe%d.%d", d->major, d->minor);
+ return fmtstrcpy(f, buf);
+}
+
+static void netbind(char *path);
+
+static void
+aoecfg(void)
+{
+ int n, i;
+ char *p, *f[32], buf[24];
+
+ if(1)
+// if((p = getconf("aoeif")) == nil || (n = tokenize(p, f, nelem(f))) < 1)
+ return;
+ /* goo! */
+ for(i = 0; i < n; i++){
+ p = f[i];
+ if(strncmp(p, "ether", 5) == 0)
+ snprint(buf, sizeof buf, "#l%c/ether%c", p[5], p[5]);
+ else if(strncmp(p, "#l", 2) == 0)
+ snprint(buf, sizeof buf, "#l%c/ether%c", p[2], p[2]);
+ else
+ continue;
+ if(!waserror()){
+ netbind(buf);
+ poperror();
+ }
+ }
+}
+
+static void
+aoeinit(void)
+{
+ static int init;
+ static QLock l;
+
+ if(!canqlock(&l))
+ return;
+ if(init == 0){
+ fmtinstall(L'æ', fmtaoe);
+ events.rp = events.wp = events.buf;
+ kproc("aoesweep", aoesweepproc, nil);
+ aoecfg();
+ init = 1;
+ }
+ qunlock(&l);
+}
+
+static Chan*
+aoeattach(char *spec)
+{
+ Chan *c;
+
+ if(*spec)
+ error(Enonexist);
+ aoeinit();
+ c = devattach(L'æ', spec);
+ mkqid(&c->qid, Qzero, 0, QTDIR);
+ return c;
+}
+
+static Aoedev*
+unitseq(ulong unit)
+{
+ int i;
+ Aoedev *d;
+
+ i = 0;
+ RLOCK(&devs);
+ for(d = devs.d; d; d = d->next)
+ if(i++ == unit)
+ break;
+ RUNLOCK(&devs);
+ return d;
+}
+
+static Aoedev*
+unit2dev(ulong unit)
+{
+ Aoedev *d;
+
+ RLOCK(&devs);
+ for(d = devs.d; d; d = d->next)
+ if(d->unit == unit){
+ RUNLOCK(&devs);
+ return d;
+ }
+ RUNLOCK(&devs);
+ error("unit lookup failure");
+ return nil;
+}
+
+static int
+unitgen(Chan *c, ulong type, Dir *dp)
+{
+ int perm, t;
+ ulong vers;
+ vlong size;
+ char *p;
+ Aoedev *d;
+ Qid q;
+
+ d = unit2dev(UNIT(c->qid));
+ perm = 0644;
+ size = 0;
+ vers = d->vers;
+ t = QTFILE;
+
+ switch(type){
+ default:
+ return -1;
+ case Qctl:
+ p = "ctl";
+ break;
+ case Qdata:
+ p = "data";
+ perm = 0640;
+ if(UP(d))
+ size = d->bsize;
+ break;
+ case Qconfig:
+ p = "config";
+ if(UP(d))
+ size = d->nconfig;
+ break;
+ case Qident:
+ p = "ident";
+ if(UP(d))
+ size = sizeof d->ident;
+ break;
+ case Qdevlinkdir:
+ p = "devlink";
+ t = QTDIR;
+ perm = 0555;
+ break;
+ }
+ mkqid(&q, QID(UNIT(c->qid), type), vers, t);
+ devdir(c, q, p, size, eve, perm, dp);
+ return 1;
+}
+
+static int
+topgen(Chan *c, ulong type, Dir *d)
+{
+ int perm;
+ vlong size;
+ char *p;
+ Qid q;
+
+ perm = 0444;
+ size = 0;
+ switch(type){
+ default:
+ return -1;
+ case Qtopctl:
+ p = "ctl";
+ perm = 0644;
+ break;
+ case Qtoplog:
+ p = "log";
+ size = eventcount();
+ break;
+ }
+ mkqid(&q, type, 0, QTFILE);
+ devdir(c, q, p, size, eve, perm, d);
+ return 1;
+}
+
+static int
+aoegen(Chan *c, char *d0, Dirtab *d1, int d2, int s, Dir *dp)
+{
+ int i;
+ Aoedev *d;
+ Qid q;
+
+ if(c->qid.path == 0){
+ switch(s){
+ case DEVDOTDOT:
+ q.path = 0;
+ q.type = QTDIR;
+ devdir(c, q, "#æ", 0, eve, 0555, dp);
+ break;
+ case 0:
+ q.path = Qtopdir;
+ q.type = QTDIR;
+ devdir(c, q, "aoe", 0, eve, 0555, dp);
+ break;
+ default:
+ return -1;
+ }
+ return 1;
+ }
+
+ switch(TYPE(c->qid)){
+ default:
+ return -1;
+ case Qtopdir:
+ if(s == DEVDOTDOT){
+ mkqid(&q, Qzero, 0, QTDIR);
+ devdir(c, q, "aoe", 0, eve, 0555, dp);
+ return 1;
+ }
+ if(s < Qtopfiles)
+ return topgen(c, Qtopbase + s, dp);
+ s -= Qtopfiles;
+ if((d = unitseq(s)) == 0)
+ return -1;
+ mkqid(&q, QID(d->unit, Qunitdir), 0, QTDIR);
+ devdir(c, q, unitname(d), 0, eve, 0555, dp);
+ return 1;
+ case Qtopctl:
+ case Qtoplog:
+ return topgen(c, TYPE(c->qid), dp);
+ case Qunitdir:
+ if(s == DEVDOTDOT){
+ mkqid(&q, QID(0, Qtopdir), 0, QTDIR);
+ uprint("%uld", UNIT(c->qid));
+ devdir(c, q, up->genbuf, 0, eve, 0555, dp);
+ return 1;
+ }
+ return unitgen(c, Qunitbase+s, dp);
+ case Qctl:
+ case Qdata:
+ case Qconfig:
+ case Qident:
+ return unitgen(c, TYPE(c->qid), dp);
+ case Qdevlinkdir:
+ i = UNIT(c->qid);
+ if(s == DEVDOTDOT){
+ mkqid(&q, QID(i, Qunitdir), 0, QTDIR);
+ devdir(c, q, "devlink", 0, eve, 0555, dp);
+ return 1;
+ }
+ if(i >= units.ref)
+ return -1;
+ d = unit2dev(i);
+ if(s >= d->ndl)
+ return -1;
+ uprint("%d", s);
+ mkqid(&q, Q3(s, i, Qdevlink), 0, QTFILE);
+ devdir(c, q, up->genbuf, 0, eve, 0755, dp);
+ return 1;
+ case Qdevlink:
+ uprint("%d", s);
+ mkqid(&q, Q3(s, UNIT(c->qid), Qdevlink), 0, QTFILE);
+ devdir(c, q, up->genbuf, 0, eve, 0755, dp);
+ return 1;
+ }
+}
+
+static Walkqid*
+aoewalk(Chan *c, Chan *nc, char **name, int nname)
+{
+ return devwalk(c, nc, name, nname, nil, 0, aoegen);
+}
+
+static int
+aoestat(Chan *c, uchar *db, int n)
+{
+ return devstat(c, db, n, nil, 0, aoegen);
+}
+
+static Chan*
+aoeopen(Chan *c, int omode)
+{
+ Aoedev *d;
+
+ if(TYPE(c->qid) != Qdata)
+ return devopen(c, omode, 0, 0, aoegen);
+
+ d = unit2dev(UNIT(c->qid));
+ QLOCK(d);
+ if(waserror()){
+ QUNLOCK(d);
+ nexterror();
+ }
+ if(!UP(d))
+ error(Enotup);
+ c = devopen(c, omode, 0, 0, aoegen);
+ d->nopen++;
+ poperror();
+ QUNLOCK(d);
+ return c;
+}
+
+static void
+aoeclose(Chan *c)
+{
+ Aoedev *d;
+
+ if(TYPE(c->qid) != Qdata || (c->flag&COPEN) == 0)
+ return;
+
+ d = unit2dev(UNIT(c->qid));
+ QLOCK(d);
+ if(--d->nopen == 0 && !waserror()){
+ discover(d->major, d->minor);
+ poperror();
+ }
+ QUNLOCK(d);
+}
+
+static void
+atarw(Aoedev *d, Frame *f)
+{
+ ulong bcnt;
+ char extbit, writebit;
+ Aoeata *ah;
+ Srb *srb;
+
+ extbit = 0x4;
+ writebit = 0x10;
+
+ srb = d->inprocess;
+ bcnt = d->maxbcnt;
+ if(bcnt > srb->len)
+ bcnt = srb->len;
+ f->nhdr = Szaoeata;
+ memset(f->hdr, 0, f->nhdr);
+ ah = (Aoeata*)f->hdr;
+ if(hset(d, f, (Aoehdr*)ah, ACata) == -1)
+ return;
+ f->dp = srb->dp;
+ f->bcnt = bcnt;
+ f->lba = srb->sector;
+ f->srb = srb;
+
+ ah->scnt = bcnt / Aoesectsz;
+ putlba(ah, f->lba);
+ if(d->flag & Dllba)
+ ah->aflag |= AAFext;
+ else {
+ extbit = 0;
+ ah->lba[3] &= 0x0f;
+ ah->lba[3] |= 0xe0; /* LBA bit+obsolete 0xa0 */
+ }
+ if(srb->write){
+ ah->aflag |= AAFwrite;
+ f->dlen = bcnt;
+ }else{
+ writebit = 0;
+ f->dlen = 0;
+ }
+ ah->cmdstat = 0x20 | writebit | extbit;
+
+ /* mark tracking fields and load out */
+ srb->nout++;
+ srb->dp = (uchar*)srb->dp + bcnt;
+ srb->len -= bcnt;
+ srb->sector += bcnt / Aoesectsz;
+ if(srb->len == 0)
+ d->inprocess = nil;
+ d->nout++;
+ f->dl->npkt++;
+ if(waserror()){
+ f->tag = Tfree;
+ d->inprocess = nil;
+ nexterror();
+ }
+ devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
+ poperror();
+}
+
+static char*
+aoeerror(Aoehdr *h)
+{
+ int n;
+ static char *errs[] = {
+ "aoe protocol error: unknown",
+ "aoe protocol error: bad command code",
+ "aoe protocol error: bad argument param",
+ "aoe protocol error: device unavailable",
+ "aoe protocol error: config string present",
+ "aoe protocol error: unsupported version",
+ };
+
+ if((h->verflag & AFerr) == 0)
+ return 0;
+ n = h->error;
+ if(n > nelem(errs))
+ n = 0;
+ return errs[n];
+}
+
+static void
+rtupdate(Devlink *l, int rtt)
+{
+ int n;
+
+ n = rtt;
+ if(rtt < 0){
+ n = -rtt;
+ if(n < Rtmin)
+ n = Rtmin;
+ else if(n > Rtmax)
+ n = Rtmax;
+ l->mintimer += (n - l->mintimer) >> 1;
+ } else if(n < l->mintimer)
+ n = l->mintimer;
+ else if(n > Rtmax)
+ n = Rtmax;
+
+ /* g == .25; cf. Congestion Avoidance and Control, Jacobson&Karels; 1988 */
+ n -= l->rttavg;
+ l->rttavg += n >> 2;
+}
+
+static int
+srbready(void *v)
+{
+ Srb *s;
+
+ s = v;
+ return s->error || (!s->nout && !s->len);
+}
+
+static Frame*
+getframe(Aoedev *d, int tag)
+{
+ Frame *f, *e;
+
+ f = d->frames;
+ e = f + d->nframes;
+ for(; f < e; f++)
+ if(f->tag == tag)
+ return f;
+ return nil;
+}
+
+static Frame*
+freeframe(Aoedev *d)
+{
+ if(d->nout < d->maxout)
+ return getframe(d, Tfree);
+ return nil;
+}
+
+static void
+work(Aoedev *d)
+{
+ Frame *f;
+
+ while(f = freeframe(d)) {
+ if(d->inprocess == nil){
+ if(d->head == nil)
+ return;
+ d->inprocess = d->head;
+ d->head = d->head->next;
+ if(d->head == nil)
+ d->tail = nil;
+ }
+ atarw(d, f);
+ }
+}
+
+static void
+strategy(Aoedev *d, Srb *srb)
+{
+ QLOCK(d);
+ if(waserror()){
+ QUNLOCK(d);
+ nexterror();
+ }
+ srb->next = nil;
+ if(d->tail)
+ d->tail->next = srb;
+ d->tail = srb;
+ if(d->head == nil)
+ d->head = srb;
+ work(d);
+ poperror();
+ QUNLOCK(d);
+
+ while(waserror())
+ ;
+ SLEEP(srb, srbready, srb);
+ poperror();
+}
+
+#define iskaddr(a) (!up || (uintptr)(a) > up->pmmu.uzero+USTKTOP)
+
+static long
+rw(Aoedev *d, int write, uchar *db, long len, uvlong off)
+{
+ long n, nlen, copy;
+ enum { Srbsz = 1<<19, };
+ Srb *srb;
+
+ if((off|len) & (Aoesectsz-1))
+ error("offset and length must be sector multiple.\n");
+ if(off >= d->bsize)
+ return 0;
+ if(off + len > d->bsize)
+ len = d->bsize - off;
+ copy = 0;
+ if(iskaddr(db)){
+panic("iskaddr %p %p\n", db);
+ srb = srbkalloc(db, len);
+ copy = 1;
+ }else
+ srb = srballoc(Srbsz <= len? Srbsz: len);
+ if(waserror()){
+ srbfree(srb);
+ nexterror();
+ }
+ srb->write = write;
+ for(nlen = len; nlen; nlen -= n){
+ if(!UP(d))
+ error(Eio);
+ srb->sector = off / Aoesectsz;
+ srb->dp = srb->data;
+ n = nlen;
+ if(n > Srbsz)
+ n = Srbsz;
+ srb->len = n;
+ if(write && !copy)
+ memmove(srb->data, db, n);
+ strategy(d, srb);
+ if(srb->error)
+ error(srb->error);
+ if(!write && !copy)
+ memmove(db, srb->data, n);
+ db += n;
+ off += n;
+ }
+ poperror();
+ srbfree(srb);
+ return len;
+}
+
+static long
+readmem(ulong off, void *dst, long n, void *src, long size)
+{
+ if(off >= size)
+ return 0;
+ if(off + n > size)
+ n = size - off;
+ memmove(dst, (uchar*)src + off, n);
+ return n;
+}
+
+static char*
+pflag(char *s, char *e, uchar f)
+{
+ uchar i;
+
+ for(i = 0; i < nelem(flagname); i++)
+ if(f & 1 << i)
+ s = seprint(s, e, "%s ", flagname[i]);
+ return seprint(s, e, "\n");
+}
+
+static int
+pstat(Aoedev *d, char *db, int len, int off)
+{
+ int i;
+ char *state, *s, *p, *e;
+
+ s = p = malloc(1024);
+ e = p + 1024;
+
+ state = "down";
+ if(UP(d))
+ state = "up";
+
+ p = seprint(p, e,
+ "state: %s\n" "nopen: %d\n" "nout: %d\n"
+ "nmaxout: %d\n" "nframes: %d\n" "maxbcnt: %d [maxmtu %d]\n"
+ "fw: %.4ux\n"
+ "model: %s\n" "serial: %s\n" "firmware: %s\n",
+ state, d->nopen, d->nout,
+ d->maxout, d->nframes, d->maxbcnt, d->maxmtu,
+ d->fwver,
+ d->model, d->serial, d->firmware);
+ p = seprint(p, e, "flag: ");
+ p = pflag(p, e, d->flag);
+
+ if(p - s < len)
+ len = p - s;
+ i = readstr(off, db, len, s);
+ free(s);
+ return i;
+}
+
+static long
+unitread(Chan *c, void *db, long len, vlong off)
+{
+ Aoedev *d;
+
+ d = unit2dev(UNIT(c->qid));
+ if(d->vers != c->qid.vers)
+ error(Echange);
+ switch(TYPE(c->qid)){
+ default:
+ error(Ebadarg);
+ case Qctl:
+ return pstat(d, db, len, off);
+ case Qdata:
+ return rw(d, Read, db, len, off);
+ case Qconfig:
+ if(!UP(d))
+ error(Enotup);
+ return readmem(off, db, len, d->config, d->nconfig);
+ case Qident:
+ if(!UP(d))
+ error(Enotup);
+ return readmem(off, db, len, d->ident, sizeof d->ident);
+ }
+}
+
+static int
+devlinkread(Chan *c, void *db, int len, int off)
+{
+ int i;
+ char *s, *p, *e;
+ Aoedev *d;
+ Devlink *l;
+
+ d = unit2dev(UNIT(c->qid));
+ i = L(c->qid);
+ if(i >= d->ndl)
+ return 0;
+ l = d->dl + i;
+
+ s = p = malloc(1024);
+ e = s + 1024;
+
+ p = seprint(p, e, "addr: ");
+ for(i = 0; i < l->nea; i++)
+ p = seprint(p, e, "%E ", l->eatab[i]);
+ p = seprint(p, e, "\n");
+ p = seprint(p, e, "npkt: %uld\n", l->npkt);
+ p = seprint(p, e, "resent: %uld\n", l->resent);
+ p = seprint(p, e, "flag: "); p = pflag(p, e, l->flag);
+ p = seprint(p, e, "rttavg: %uld\n", Tk2ms(l->rttavg));
+ p = seprint(p, e, "mintimer: %uld\n", Tk2ms(l->mintimer));
+
+ p = seprint(p, e, "nl path: %s\n", l->nl->path);
+ p = seprint(p, e, "nl ea: %E\n", l->nl->ea);
+ p = seprint(p, e, "nl flag: "); p = pflag(p, e, l->flag);
+ p = seprint(p, e, "nl lostjumbo: %d\n", l->nl->lostjumbo);
+ p = seprint(p, e, "nl datamtu: %d\n", l->nl->datamtu);
+
+ if(p - s < len)
+ len = p - s;
+ i = readstr(off, db, len, s);
+ free(s);
+ return i;
+}
+
+static long
+topctlread(Chan *d0, void *db, int len, int off)
+{
+ int i;
+ char *s, *p, *e;
+ Netlink *n;
+
+ s = p = malloc(1024);
+ e = s + 1024;
+
+ p = seprint(p, e, "debug: %d\n", debug);
+ p = seprint(p, e, "autodiscover: %d\n", autodiscover);
+ p = seprint(p, e, "rediscover: %d\n", rediscover);
+
+ for(i = 0; i < Nnetlink; i++){
+ n = netlinks.nl+i;
+ if(n->cc == 0)
+ continue;
+ p = seprint(p, e, "if%d path: %s\n", i, n->path);
+ p = seprint(p, e, "if%d ea: %E\n", i, n->ea);
+ p = seprint(p, e, "if%d flag: ", i); p = pflag(p, e, n->flag);
+ p = seprint(p, e, "if%d lostjumbo: %d\n", i, n->lostjumbo);
+ p = seprint(p, e, "if%d datamtu: %d\n", i, n->datamtu);
+ }
+
+ if(p - s < len)
+ len = p - s;
+ i = readstr(off, db, len, s);
+ free(s);
+ return i;
+}
+
+static long
+aoeread(Chan *c, void *db, long n, vlong off)
+{
+ switch(TYPE(c->qid)){
+ default:
+ error(Eperm);
+ case Qzero:
+ case Qtopdir:
+ case Qunitdir:
+ case Qdevlinkdir:
+ return devdirread(c, db, n, 0, 0, aoegen);
+ case Qtopctl:
+ return topctlread(c, db, n, off);
+ case Qtoplog:
+ return eventlogread(db, n);
+ case Qctl:
+ case Qdata:
+ case Qconfig:
+ case Qident:
+ return unitread(c, db, n, off);
+ case Qdevlink:
+ return devlinkread(c, db, n, off);
+ }
+}
+
+static long
+configwrite(Aoedev *d, void *db, long len)
+{
+ char *s;
+ Aoeqc *ch;
+ Frame *f;
+ Srb *srb;
+
+ if(!UP(d))
+ error(Enotup);
+ if(len > sizeof d->config)
+ error(Etoobig);
+ srb = srballoc(len);
+ s = malloc(len);
+ memmove(s, db, len);
+ if(waserror()){
+ srbfree(srb);
+ free(s);
+ nexterror();
+ }
+ for (;;) {
+ QLOCK(d);
+ if(waserror()){
+ QUNLOCK(d);
+ nexterror();
+ }
+ f = freeframe(d);
+ if(f != nil)
+ break;
+ poperror();
+ QUNLOCK(d);
+ if(waserror())
+ nexterror();
+ tsleep(&up->sleep, return0, 0, 100);
+ poperror();
+ }
+ f->nhdr = Szaoeqc;
+ memset(f->hdr, 0, f->nhdr);
+ ch = (Aoeqc*)f->hdr;
+ if(hset(d, f, (Aoehdr*)ch, ACconfig) == -1)
+ return 0;
+ f->srb = srb;
+ f->dp = s;
+ ch->verccmd = AQCfset;
+ hnputs(ch->cslen, len);
+ d->nout++;
+ srb->nout++;
+ f->dl->npkt++;
+ f->dlen = len;
+ /*
+ * these refer to qlock & waserror in the above for loop.
+ * there's still the first waserror outstanding.
+ */
+ poperror();
+ QUNLOCK(d);
+
+ devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
+ SLEEP(srb, srbready, srb);
+ if(srb->error)
+ error(srb->error);
+
+ QLOCK(d);
+ if(waserror()){
+ QUNLOCK(d);
+ nexterror();
+ }
+ memmove(d->config, s, len);
+ d->nconfig = len;
+ poperror();
+ QUNLOCK(d);
+
+ poperror(); /* pop first waserror */
+
+ srbfree(srb);
+ memmove(db, s, len);
+ free(s);
+ return len;
+}
+
+static int
+getmtu(Chan *m)
+{
+ int n, mtu;
+ char buf[36];
+
+ mtu = 1514;
+ if(m == nil || waserror())
+ return mtu;
+ n = devtab[m->type]->read(m, buf, sizeof buf - 1, 0);
+ poperror();
+ if(n > 12){
+ buf[n] = 0;
+ mtu = strtoul(buf + 12, 0, 0);
+ }
+ return mtu;
+}
+
+static int
+devmaxdata(Aoedev *d)
+{
+ int i, m, mtu;
+ Devlink *l;
+ Netlink *n;
+
+ mtu = 100000;
+ for(i = 0; i < d->ndl; i++){
+ l = d->dl + i;
+ n = l->nl;
+ if((l->flag & Dup) == 0 || (n->flag & Dup) == 0)
+ continue;
+ m = getmtu(n->mtu);
+ if(m > l->datamtu)
+ m = l->datamtu;
+ if(m < mtu)
+ mtu = m;
+ }
+ if(mtu == 100000)
+ mtu = 1514;
+ mtu -= Szaoeata;
+ mtu -= mtu % Aoesectsz;
+ return mtu;
+}
+
+static int
+toggle(char *s, int init)
+{
+ if(s == nil)
+ return init ^ 1;
+ return strcmp(s, "on") == 0;
+}
+
+static void ataident(Aoedev*);
+
+static long
+unitctlwrite(Aoedev *d, void *db, long n)
+{
+ uint maxbcnt, m;
+ uvlong bsize;
+ enum {
+ Failio,
+ Ident,
+ Jumbo,
+ Maxbno,
+ Mtu,
+ Setsize,
+ };
+ Cmdbuf *cb;
+ Cmdtab *ct;
+ static Cmdtab cmds[] = {
+ {Failio, "failio", 1 },
+ {Ident, "identify", 1 },
+ {Jumbo, "jumbo", 0 },
+ {Maxbno, "maxbno", 0 },
+ {Mtu, "mtu", 0 },
+ {Setsize, "setsize", 0 },
+ };
+
+ cb = parsecmd(db, n);
+ QLOCK(d);
+ if(waserror()){
+ QUNLOCK(d);
+ free(cb);
+ nexterror();
+ }
+ ct = lookupcmd(cb, cmds, nelem(cmds));
+ switch(ct->index){
+ case Failio:
+ downdev(d, "i/o failure");
+ break;
+ case Ident:
+ ataident(d);
+ break;
+ case Jumbo:
+ m = 0;
+ if(d->flag & Djumbo)
+ m = 1;
+ toggle(cb->f[1], m);
+ if(m)
+ d->flag |= Djumbo;
+ else
+ d->flag &= ~Djumbo;
+ break;
+ case Maxbno:
+ case Mtu:
+ maxbcnt = devmaxdata(d);
+ if(cb->nf > 2)
+ error(Ecmdargs);
+ if(cb->nf == 2){
+ m = strtoul(cb->f[1], 0, 0);
+ if(ct->index == Maxbno)
+ m *= Aoesectsz;
+ else{
+ m -= Szaoeata;
+ m &= ~(Aoesectsz-1);
+ }
+ if(m == 0 || m > maxbcnt)
+ cmderror(cb, "invalid mtu");
+ maxbcnt = m;
+ d->maxmtu = m;
+ } else
+ d->maxmtu = Maxmtu;
+ d->maxbcnt = maxbcnt;
+ break;
+ case Setsize:
+ bsize = d->realbsize;
+ if(cb->nf > 2)
+ error(Ecmdargs);
+ if(cb->nf == 2){
+ bsize = strtoull(cb->f[1], 0, 0);
+ if(bsize % Aoesectsz)
+ cmderror(cb, "disk size must be sector aligned");
+ }
+ d->bsize = bsize;
+ break;
+ default:
+ cmderror(cb, "unknown aoe control message");
+ }
+ poperror();
+ QUNLOCK(d);
+ free(cb);
+ return n;
+}
+
+static long
+unitwrite(Chan *c, void *db, long n, vlong off)
+{
+ long rv;
+ char *buf;
+ Aoedev *d;
+
+ d = unit2dev(UNIT(c->qid));
+ switch(TYPE(c->qid)){
+ default:
+ error(Ebadarg);
+ case Qctl:
+ return unitctlwrite(d, db, n);
+ case Qident:
+ error(Eperm);
+ case Qdata:
+ return rw(d, Write, db, n, off);
+ case Qconfig:
+ if(off + n > sizeof d->config)
+ error(Etoobig);
+ buf = malloc(sizeof d->config);
+ if(waserror()){
+ free(buf);
+ nexterror();
+ }
+ memmove(buf, d->config, d->nconfig);
+ memmove(buf + off, db, n);
+ rv = configwrite(d, buf, n + off);
+ poperror();
+ free(buf);
+ return rv;
+ }
+}
+
+static Netlink*
+addnet(char *path, Chan *cc, Chan *dc, Chan *mtu, uchar *ea)
+{
+ Netlink *nl, *e;
+
+ LOCK(&netlinks);
+ if(waserror()){
+ UNLOCK(&netlinks);
+ nexterror();
+ }
+ nl = netlinks.nl;
+ e = nl + nelem(netlinks.nl);
+ for(; nl < e && nl->cc; nl++)
+ continue;
+ if(nl == e)
+ error("out of netlink structures");
+ nl->cc = cc;
+ nl->dc = dc;
+ nl->mtu = mtu;
+ strncpy(nl->path, path, sizeof nl->path);
+ memmove(nl->ea, ea, sizeof nl->ea);
+ poperror();
+ nl->flag |= Dup;
+ UNLOCK(&netlinks);
+ return nl;
+}
+
+static int
+newunit(void)
+{
+ int x;
+
+ LOCK(&units);
+ if(units.ref == Maxunits)
+ x = -1;
+ else
+ x = units.ref++;
+ UNLOCK(&units);
+ return x;
+}
+
+static int
+dropunit(void)
+{
+ int x;
+
+ LOCK(&units);
+ x = --units.ref;
+ UNLOCK(&units);
+ return x;
+}
+
+/*
+ * always allocate max frames. maxout may change.
+ */
+static Aoedev*
+newdev(long major, long minor, int n)
+{
+ Aoedev *d;
+ Frame *f, *e;
+
+ d = malloc(sizeof *d);
+ f = malloc(sizeof *f*Maxframes);
+ if(!d || !f) {
+ free(d);
+ free(f);
+ error("aoe device allocation failure");
+ }
+ d->nframes = n;
+ d->frames = f;
+ for (e = f + Maxframes; f < e; f++)
+ f->tag = Tfree;
+ d->maxout = n;
+ d->major = major;
+ d->minor = minor;
+ d->maxbcnt = Dbcnt;
+ d->flag = Djumbo;
+ d->maxmtu = Maxmtu;
+ d->unit = newunit(); /* bzzt. inaccurate if units removed */
+ if(d->unit == -1){
+ free(d);
+ free(d->frames);
+ error("too many units");
+ }
+ d->dl = d->dltab;
+ return d;
+}
+
+static Aoedev*
+mm2dev(int major, int minor)
+{
+ Aoedev *d;
+
+ RLOCK(&devs);
+ for(d = devs.d; d; d = d->next)
+ if(d->major == major && d->minor == minor){
+ RUNLOCK(&devs);
+ return d;
+ }
+ RUNLOCK(&devs);
+ eventlog("mm2dev: %d.%d not found\n", major, minor);
+ return nil;
+}
+
+/* Find the device in our list. If not known, add it */
+static Aoedev*
+getdev(long major, long minor, int n)
+{
+ Aoedev *d;
+
+ if(major == 0xffff || minor == 0xff)
+ return 0;
+ WLOCK(&devs);
+ if(waserror()){
+ WUNLOCK(&devs);
+ nexterror();
+ }
+ for(d = devs.d; d; d = d->next)
+ if(d->major == major && d->minor == minor)
+ break;
+ if(d == nil) {
+ d = newdev(major, minor, n);
+ d->next = devs.d;
+ devs.d = d;
+ }
+ poperror();
+ WUNLOCK(&devs);
+ return d;
+}
+
+static ushort
+gbit16(void *a)
+{
+ uchar *i;
+
+ i = a;
+ return i[1] << 8 | i[0];
+}
+
+static ulong
+gbit32(void *a)
+{
+ ulong j;
+ uchar *i;
+
+ i = a;
+ j = i[3] << 24;
+ j |= i[2] << 16;
+ j |= i[1] << 8;
+ j |= i[0];
+ return j;
+}
+
+static uvlong
+gbit64(void *a)
+{
+ uchar *i;
+
+ i = a;
+ return (uvlong)gbit32(i+4) << 32 | gbit32(a);
+}
+
+static void
+ataident(Aoedev *d)
+{
+ Aoeata *a;
+ Block *b;
+ Frame *f;
+
+ f = freeframe(d);
+ if(f == nil)
+ return;
+ f->nhdr = Szaoeata;
+ memset(f->hdr, 0, f->nhdr);
+ a = (Aoeata*)f->hdr;
+ if(hset(d, f, (Aoehdr*)a, ACata) == -1)
+ return;
+ f->srb = srbkalloc(0, 0);
+ a->cmdstat = Cid; /* ata 6, page 110 */
+ a->scnt = 1;
+ a->lba[3] = 0xa0;
+ d->nout++;
+ f->dl->npkt++;
+ f->bcnt = 512;
+ f->dlen = 0;
+ b = allocfb(f);
+ devtab[f->nl->dc->type]->bwrite(f->nl->dc, b, 0);
+}
+
+static int
+newdlea(Devlink *l, uchar *ea)
+{
+ int i;
+ uchar *t;
+
+ for(i = 0; i < Nea; i++){
+ t = l->eatab[i];
+ if(i == l->nea){
+ memmove(t, ea, Eaddrlen);
+ return l->nea++;
+ }
+ if(memcmp(t, ea, Eaddrlen) == 0)
+ return i;
+ }
+ return -1;
+}
+
+static Devlink*
+newdevlink(Aoedev *d, Netlink *n, Aoeqc *c)
+{
+ int i;
+ Devlink *l;
+
+ for(i = 0; i < Ndevlink; i++){
+ l = d->dl + i;
+ if(i == d->ndl){
+ d->ndl++;
+ newdlea(l, c->src);
+ l->datamtu = c->scnt*Aoesectsz;
+ l->nl = n;
+ l->flag |= Dup;
+ l->mintimer = Rtmin;
+ l->rttavg = Rtmax;
+ return l;
+ }
+ if(l->nl == n){
+ newdlea(l, c->src);
+ l->datamtu = c->scnt*Aoesectsz;
+ l->flag |= Dup;
+ return l;
+ }
+ }
+ eventlog("%æ: out of links: %s:%E to %E\n", d, n->path, n->ea, c->src);
+ return 0;
+}
+
+static void
+errrsp(Block *b, char *s)
+{
+ int n;
+ Aoedev *d;
+ Aoehdr *h;
+ Frame *f;
+
+ h = (Aoehdr*)b->rp;
+ n = nhgetl(h->tag);
+ if(n == Tmgmt || n == Tfree)
+ return;
+ d = mm2dev(nhgets(h->major), h->minor);
+ if(d == 0)
+ return;
+ if(f = getframe(d, n))
+ frameerror(d, f, s);
+}
+
+static void
+qcfgrsp(Block *b, Netlink *nl)
+{
+ int major, cmd, cslen, blen;
+ unsigned n;
+ Aoedev *d;
+ Aoeqc *ch;
+ Devlink *l;
+ Frame *f;
+
+ ch = (Aoeqc*)b->rp;
+ major = nhgets(ch->major);
+ n = nhgetl(ch->tag);
+ if(n != Tmgmt){
+ d = mm2dev(major, ch->minor);
+ if(d == nil)
+ return;
+ QLOCK(d);
+ f = getframe(d, n);
+ if(f == nil){
+ QUNLOCK(d);
+ eventlog("%æ: unknown response tag %ux\n", d, n);
+ return;
+ }
+ cslen = nhgets(ch->cslen);
+ blen = BLEN(b) - Szaoeqc;
+ if(cslen < blen)
+ eventlog("%æ: cfgrsp: tag %.8ux oversized %d %d\n",
+ d, n, cslen, blen);
+ if(cslen > blen){
+ eventlog("%æ: cfgrsp: tag %.8ux runt %d %d\n",
+ d, n, cslen, blen);
+ cslen = blen;
+ }
+ memmove(f->dp, ch + 1, cslen);
+ f->srb->nout--;
+ WAKEUP(f->srb);
+ d->nout--;
+ f->srb = nil;
+ f->tag = Tfree;
+ QUNLOCK(d);
+ return;
+ }
+
+ cmd = ch->verccmd & 0xf;
+ if(cmd != 0){
+ eventlog("aoe%d.%d: cfgrsp: bad command %d\n", major, ch->minor, cmd);
+ return;
+ }
+ n = nhgets(ch->bufcnt);
+ if(n > Maxframes)
+ n = Maxframes;
+
+ if(waserror()){
+ eventlog("getdev: %d.%d ignored: %s\n", major, ch->minor, up->errstr);
+ return;
+ }
+ d = getdev(major, ch->minor, n);
+ poperror();
+ if(d == 0)
+ return;
+
+ QLOCK(d);
+ *up->errstr = 0;
+ if(waserror()){
+ QUNLOCK(d);
+ eventlog("%æ: %s\n", d, up->errstr);
+ nexterror();
+ }
+
+ l = newdevlink(d, nl, ch); /* add this interface. */
+
+ d->fwver = nhgets(ch->fwver);
+ n = nhgets(ch->cslen);
+ if(n > sizeof d->config)
+ n = sizeof d->config;
+ d->nconfig = n;
+ memmove(d->config, ch + 1, n);
+
+ /* manually set mtu may be reset lower if conditions warrant */
+ if(l){
+ n = devmaxdata(d);
+ if(!(d->flag & Djumbo))
+ n = Dbcnt;
+ if(n > d->maxmtu)
+ n = d->maxmtu;
+ if(n != d->maxbcnt){
+ eventlog("%æ: setting %d byte mtu on %s:%E\n",
+ d, n, nl->path, nl->ea);
+ d->maxbcnt = n;
+ }
+ }
+ if(d->nopen == 0)
+ ataident(d);
+ poperror();
+ QUNLOCK(d);
+}
+
+static void
+idmove(char *p, ushort *a, unsigned n)
+{
+ int i;
+ char *op, *e;
+
+ op = p;
+ for(i = 0; i < n / 2; i++){
+ *p++ = a[i] >> 8;
+ *p++ = a[i];
+ }
+ *p = 0;
+ while(p > op && *--p == ' ')
+ *p = 0;
+ e = p;
+ p = op;
+ while(*p == ' ')
+ p++;
+ memmove(op, p, n - (e - p));
+}
+
+static vlong
+aoeidentify(Aoedev *d, ushort *id)
+{
+ int i;
+ vlong s;
+
+ d->flag &= ~(Dllba|Dpower|Dsmart|Dnop|Dup);
+
+ i = gbit16(id+83) | gbit16(id+86);
+ if(i & (1<<10)){
+ d->flag |= Dllba;
+ s = gbit64(id+100);
+ }else
+ s = gbit32(id+60);
+
+ i = gbit16(id+83);
+ if((i>>14) == 1) {
+ if(i & (1<<3))
+ d->flag |= Dpower;
+ i = gbit16(id+82);
+ if(i & 1)
+ d->flag |= Dsmart;
+ if(i & (1<<14))
+ d->flag |= Dnop;
+ }
+// eventlog("%æ up\n", d);
+ d->flag |= Dup;
+ memmove(d->ident, id, sizeof d->ident);
+ return s;
+}
+
+static void
+newvers(Aoedev *d)
+{
+ LOCK(&drivevers);
+ d->vers = drivevers.ref++;
+ UNLOCK(&drivevers);
+}
+
+static int
+identify(Aoedev *d, ushort *id)
+{
+ vlong osectors, s;
+ uchar oserial[21];
+
+ s = aoeidentify(d, id);
+ if(s == -1)
+ return -1;
+ osectors = d->realbsize;
+ memmove(oserial, d->serial, sizeof d->serial);
+
+ idmove(d->serial, id+10, 20);
+ idmove(d->firmware, id+23, 8);
+ idmove(d->model, id+27, 40);
+
+ s *= Aoesectsz;
+ if(osectors != s || memcmp(oserial, d->serial, sizeof oserial)){
+ d->bsize = s;
+ d->realbsize = s;
+// d->mediachange = 1;
+ newvers(d);
+ }
+ return 0;
+}
+
+static void
+atarsp(Block *b)
+{
+ unsigned n;
+ short major;
+ Aoeata *ahin, *ahout;
+ Aoedev *d;
+ Frame *f;
+ Srb *srb;
+
+ ahin = (Aoeata*)b->rp;
+ major = nhgets(ahin->major);
+ d = mm2dev(major, ahin->minor);
+ if(d == nil)
+ return;
+ QLOCK(d);
+ if(waserror()){
+ QUNLOCK(d);
+ nexterror();
+ }
+ n = nhgetl(ahin->tag);
+ f = getframe(d, n);
+ if(f == nil){
+ dprint("%æ: unexpected response; tag %ux\n", d, n);
+ goto bail;
+ }
+ rtupdate(f->dl, tsince(f->tag));
+ ahout = (Aoeata*)f->hdr;
+ srb = f->srb;
+
+ if(ahin->cmdstat & 0xa9){
+ eventlog("%æ: ata error cmd %.2ux stat %.2ux\n",
+ d, ahout->cmdstat, ahin->cmdstat);
+ if(srb)
+ srb->error = Eio;
+ } else {
+ n = ahout->scnt * Aoesectsz;
+ switch(ahout->cmdstat){
+ case Crd:
+ case Crdext:
+ if(BLEN(b) - Szaoeata < n){
+ eventlog("%æ: runt read blen %ld expect %d\n",
+ d, BLEN(b), n);
+ goto bail;
+ }
+ memmove(f->dp, b->rp + Szaoeata, n);
+ case Cwr:
+ case Cwrext:
+ if(n > Dbcnt)
+ f->nl->lostjumbo = 0;
+ if(f->bcnt -= n){
+ f->lba += n / Aoesectsz;
+ f->dp = (uchar*)f->dp + n;
+ resend(d, f);
+ goto bail;
+ }
+ break;
+ case Cid:
+ if(BLEN(b) - Szaoeata < 512){
+ eventlog("%æ: runt identify blen %ld expect %d\n",
+ d, BLEN(b), n);
+ goto bail;
+ }
+ identify(d, (ushort*)(b->rp + Szaoeata));
+ break;
+ default:
+ eventlog("%æ: unknown ata command %.2ux \n",
+ d, ahout->cmdstat);
+ }
+ }
+
+ if(srb && --srb->nout == 0 && srb->len == 0)
+ WAKEUP(srb);
+ f->srb = nil;
+ f->tag = Tfree;
+ d->nout--;
+
+ work(d);
+bail:
+ poperror();
+ QUNLOCK(d);
+}
+
+static void
+netrdaoeproc(void *v)
+{
+ int idx;
+ char name[Maxpath+1], *s;
+ Aoehdr *h;
+ Block *b;
+ Netlink *nl;
+
+ nl = (Netlink*)v;
+ idx = nl - netlinks.nl;
+ netlinks.reader[idx] = 1;
+ kstrcpy(name, nl->path, Maxpath);
+
+ if(waserror()){
+ eventlog("netrdaoe@%s: exiting: %s\n", name, up->errstr);
+ netlinks.reader[idx] = 0;
+ wakeup(netlinks.rendez + idx);
+ pexit(up->errstr, 1);
+ }
+ if(autodiscover)
+ discover(0xffff, 0xff);
+ for (;;) {
+ if(!(nl->flag & Dup))
+ error("netlink is down");
+ if(nl->dc == nil)
+ panic("netrdaoe: nl->dc == nil");
+ b = devtab[nl->dc->type]->bread(nl->dc, 1<<16, 0);
+ if(b == nil)
+ error("network read");
+ h = (Aoehdr*)b->rp;
+ if(h->verflag & AFrsp)
+ if(s = aoeerror(h)){
+ eventlog("%s: %s\n", nl->path, s);
+ errrsp(b, s);
+ }else if(h->cmd == ACata)
+ atarsp(b);
+ else if(h->cmd == ACconfig)
+ qcfgrsp(b, nl);
+ else if((h->cmd & 0xf0) == 0){
+ eventlog("%s: unknown cmd %d\n",
+ nl->path, h->cmd);
+ errrsp(b, "unknown command");
+ }
+ freeb(b);
+ }
+}
+
+static void
+getaddr(char *path, uchar *ea)
+{
+ int n;
+ char buf[2*Eaddrlen+1];
+ Chan *c;
+
+ uprint("%s/addr", path);
+ c = namec(up->genbuf, Aopen, OREAD, 0);
+ if(waserror()) {
+ cclose(c);
+ nexterror();
+ }
+ if(c == nil)
+ panic("æ: getaddr: c == nil");
+ n = devtab[c->type]->read(c, buf, sizeof buf-1, 0);
+ poperror();
+ cclose(c);
+ buf[n] = 0;
+ if(parseether(ea, buf) < 0)
+ error("parseether failure");
+}
+
+static void
+netbind(char *path)
+{
+ char addr[Maxpath];
+ uchar ea[2*Eaddrlen+1];
+ Chan *dc, *cc, *mtu;
+ Netlink *nl;
+
+ snprint(addr, sizeof addr, "%s!0x%x", path, Aoetype);
+ dc = chandial(addr, nil, nil, &cc);
+ snprint(addr, sizeof addr, "%s/mtu", path);
+ if(waserror())
+ mtu = nil;
+ else {
+ mtu = namec(addr, Aopen, OREAD, 0);
+ poperror();
+ }
+
+ if(waserror()){
+ cclose(dc);
+ cclose(cc);
+ if(mtu)
+ cclose(mtu);
+ nexterror();
+ }
+ if(dc == nil || cc == nil)
+ error(Enonexist);
+ getaddr(path, ea);
+ nl = addnet(path, cc, dc, mtu, ea);
+ snprint(addr, sizeof addr, "netrdaoe@%s", path);
+ kproc(addr, netrdaoeproc, nl);
+ poperror();
+}
+
+static int
+unbound(void *v)
+{
+ return *(int*)v != 0;
+}
+
+static void
+netunbind(char *path)
+{
+ int i, idx;
+ Aoedev *d, *p, *next;
+ Chan *dc, *cc;
+ Devlink *l;
+ Frame *f;
+ Netlink *n, *e;
+
+ n = netlinks.nl;
+ e = n + nelem(netlinks.nl);
+
+ LOCK(&netlinks);
+ for(; n < e; n++)
+ if(n->dc && strcmp(n->path, path) == 0)
+ break;
+ UNLOCK(&netlinks);
+ if(n == e)
+ error("device not bound");
+
+ /*
+ * hunt down devices using this interface; disable
+ * this also terminates the reader.
+ */
+ idx = n - netlinks.nl;
+ WLOCK(&devs);
+ for(d = devs.d; d; d = d->next){
+ QLOCK(d);
+ for(i = 0; i < d->ndl; i++){
+ l = d->dl + i;
+ if(l->nl == n)
+ l->flag &= ~Dup;
+ }
+ QUNLOCK(d);
+ }
+ n->flag &= ~Dup;
+ WUNLOCK(&devs);
+
+ /* confirm reader is down. */
+ while(waserror())
+ ;
+ sleep(netlinks.rendez + idx, unbound, netlinks.reader + idx);
+ poperror();
+
+ /* reschedule packets. */
+ WLOCK(&devs);
+ for(d = devs.d; d; d = d->next){
+ QLOCK(d);
+ for(i = 0; i < d->nframes; i++){
+ f = d->frames + i;
+ if(f->tag != Tfree && f->nl == n)
+ resend(d, f);
+ }
+ QUNLOCK(d);
+ }
+ WUNLOCK(&devs);
+
+ /* squeeze devlink pool. (we assert nobody is using them now) */
+ WLOCK(&devs);
+ for(d = devs.d; d; d = d->next){
+ QLOCK(d);
+ for(i = 0; i < d->ndl; i++){
+ l = d->dl + i;
+ if(l->nl == n)
+ memmove(l, l + 1, sizeof *l * (--d->ndl - i));
+ }
+ QUNLOCK(d);
+ }
+ WUNLOCK(&devs);
+
+ /* close device link. */
+ LOCK(&netlinks);
+ dc = n->dc;
+ cc = n->cc;
+ if(n->mtu)
+ cclose(n->mtu);
+ memset(n, 0, sizeof *n);
+ UNLOCK(&netlinks);
+
+ cclose(dc);
+ cclose(cc);
+
+ /* squeeze orphan devices */
+ WLOCK(&devs);
+ for(p = d = devs.d; d; d = next){
+ next = d->next;
+ if(d->ndl > 0){
+ p = d;
+ continue;
+ }
+ QLOCK(d);
+ downdev(d, "orphan");
+ QUNLOCK(d);
+ if(p != devs.d)
+ p->next = next;
+ else{
+ devs.d = next;
+ p = devs.d;
+ }
+ free(d->frames);
+ free(d);
+ dropunit();
+ }
+ WUNLOCK(&devs);
+}
+
+static void
+strtoss(char *f, ushort *shelf, ushort *slot)
+{
+ ulong sh;
+ char *s;
+
+ *shelf = 0xffff;
+ *slot = 0xff;
+ if(!f)
+ return;
+ *shelf = sh = strtol(f, &s, 0);
+ if(s == f || sh > 0xffff)
+ error("bad shelf");
+ f = s;
+ if(*f++ == '.'){
+ *slot = strtol(f, &s, 0);
+ if(s == f || *slot > 0xff)
+ error("bad shelf");
+ }else
+ *slot = 0xff;
+}
+
+static void
+discoverstr(char *f)
+{
+ ushort shelf, slot;
+
+ strtoss(f, &shelf, &slot);
+ discover(shelf, slot);
+}
+
+static void
+removedev(Aoedev *d)
+{
+ int i;
+ Aoedev *p;
+
+ WLOCK(&devs);
+ p = 0;
+ if(d != devs.d)
+ for(p = devs.d; p; p = p->next)
+ if(p->next == d)
+ break;
+ QLOCK(d);
+ d->flag &= ~Dup;
+ newvers(d);
+ d->ndl = 0;
+ QUNLOCK(d);
+ for(i = 0; i < d->nframes; i++)
+ frameerror(d, d->frames+i, Enotup);
+
+ if(p)
+ p->next = d->next;
+ else
+ devs.d = d->next;
+ free(d->frames);
+ free(d);
+ dropunit();
+ WUNLOCK(&devs);
+}
+
+
+static void
+aoeremove(Chan *c)
+{
+ switch(TYPE(c->qid)){
+ default:
+ case Qzero:
+ case Qtopdir:
+ case Qtoplog:
+ case Qtopctl:
+ case Qctl:
+ case Qdata:
+ case Qconfig:
+ case Qident:
+ error(Eperm);
+ case Qunitdir:
+ removedev(unit2dev(UNIT(c->qid)));
+ break;
+ }
+}
+
+static void
+removestr(char *f)
+{
+ ushort shelf, slot;
+ Aoedev *d;
+
+ strtoss(f, &shelf, &slot);
+ WLOCK(&devs);
+ for(d = devs.d; d; d = d->next)
+ if(shelf == d->major && slot == d->minor){
+ WUNLOCK(&devs); /* BOTCH */
+ removedev(d);
+ return;
+ }
+ WUNLOCK(&devs);
+ error("device not bound");
+}
+
+static long
+topctlwrite(void *db, long n)
+{
+ enum {
+ Autodiscover,
+ Bind,
+ Debug,
+ Discover,
+ Closewait,
+ Rediscover,
+ Remove,
+ Unbind,
+ };
+ char *f;
+ Cmdbuf *cb;
+ Cmdtab *ct;
+ static Cmdtab cmds[] = {
+ { Autodiscover, "autodiscover", 0 },
+ { Bind, "bind", 2 },
+ { Debug, "debug", 0 },
+ { Discover, "discover", 0 },
+ { Rediscover, "rediscover", 0 },
+ { Remove, "remove", 2 },
+ { Unbind, "unbind", 2 },
+ };
+
+ cb = parsecmd(db, n);
+ if(waserror()){
+ free(cb);
+ nexterror();
+ }
+ ct = lookupcmd(cb, cmds, nelem(cmds));
+ f = cb->f[1];
+ switch(ct->index){
+ case Autodiscover:
+ autodiscover = toggle(f, autodiscover);
+ break;
+ case Bind:
+ netbind(f);
+ break;
+ case Debug:
+ debug = toggle(f, debug);
+ break;
+ case Discover:
+ discoverstr(f);
+ break;
+ case Rediscover:
+ rediscover = toggle(f, rediscover);
+ break;
+ case Remove:
+ removestr(f); /* depricated */
+ break;
+ case Unbind:
+ netunbind(f);
+ break;
+ default:
+ cmderror(cb, "unknown aoe control message");
+ }
+ poperror();
+ free(cb);
+ return n;
+}
+
+static long
+aoewrite(Chan *c, void *db, long n, vlong off)
+{
+ switch(TYPE(c->qid)){
+ default:
+ case Qzero:
+ case Qtopdir:
+ case Qunitdir:
+ case Qtoplog:
+ error(Eperm);
+ case Qtopctl:
+ return topctlwrite(db, n);
+ case Qctl:
+ case Qdata:
+ case Qconfig:
+ case Qident:
+ return unitwrite(c, db, n, off);
+ }
+}
+
+Dev aoedevtab = {
+ L'æ',
+ "aoe",
+
+ devreset,
+ devinit,
+ devshutdown,
+ aoeattach,
+ aoewalk,
+ aoestat,
+ aoeopen,
+ devcreate,
+ aoeclose,
+ aoeread,
+ devbread,
+ aoewrite,
+ devbwrite,
+ aoeremove,
+ devwstat,
+ devpower,
+ devconfig,
+};
diff --git a/src/9vx/a/devcons.c b/src/9vx/a/devcons.c
@@ -784,6 +784,7 @@ consread(Chan *c, void *buf, long n, vlong off)
while(!qcanread(lineq)){
if(qread(kbdq, &ch, 1) == 0)
continue;
+ //XXX TODO: startup blocks here
send = 0;
if(ch == 0){
/* flush output on rawoff -> rawon */
diff --git a/src/9vx/a/devether.c b/src/9vx/a/devether.c
@@ -0,0 +1,542 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+#include "error.h"
+#include "netif.h"
+
+#include "etherif.h"
+
+#define MEMSIZE (256<<20) // same as ../mmu.c:29 (TODO: var)
+
+static Ether *etherxx[MaxEther];
+
+Chan*
+etherattach(char* spec)
+{
+ ulong ctlrno;
+ char *p;
+ Chan *chan;
+
+ ctlrno = 0;
+ if(spec && *spec){
+ ctlrno = strtoul(spec, &p, 0);
+ if((ctlrno == 0 && p == spec) || *p || (ctlrno >= MaxEther))
+ error(Ebadarg);
+ }
+ if(etherxx[ctlrno] == 0)
+ error(Enodev);
+
+ chan = devattach('l', spec);
+ if(waserror()){
+ chanfree(chan);
+ nexterror();
+ }
+ chan->dev = ctlrno;
+ if(etherxx[ctlrno]->attach)
+ etherxx[ctlrno]->attach(etherxx[ctlrno]);
+ poperror();
+ return chan;
+}
+
+static Walkqid*
+etherwalk(Chan* chan, Chan* nchan, char** name, int nname)
+{
+ return netifwalk(ðerxx[chan->dev]->ni, chan, nchan, name, nname);
+}
+
+static int
+etherstat(Chan* chan, uchar* dp, int n)
+{
+ return netifstat(ðerxx[chan->dev]->ni, chan, dp, n);
+}
+
+static Chan*
+etheropen(Chan* chan, int omode)
+{
+ return netifopen(ðerxx[chan->dev]->ni, chan, omode);
+}
+
+static void
+ethercreate(Chan* ch, char* c, int i, ulong ul)
+{
+}
+
+static void
+etherclose(Chan* chan)
+{
+ netifclose(ðerxx[chan->dev]->ni, chan);
+}
+
+static long
+etherread(Chan* chan, void* buf, long n, vlong off)
+{
+ Ether *ether;
+ ulong offset = off;
+
+ ether = etherxx[chan->dev];
+ if((chan->qid.type & QTDIR) == 0 && ether->ifstat){
+ /*
+ * With some controllers it is necessary to reach
+ * into the chip to extract statistics.
+ */
+ if(NETTYPE(chan->qid.path) == Nifstatqid)
+ return ether->ifstat(ether, buf, n, offset);
+ else if(NETTYPE(chan->qid.path) == Nstatqid)
+ ether->ifstat(ether, buf, 0, offset);
+ }
+
+ return netifread(ðer->ni, chan, buf, n, offset);
+}
+
+static Block*
+etherbread(Chan* chan, long n, ulong offset)
+{
+ return netifbread(ðerxx[chan->dev]->ni, chan, n, offset);
+}
+
+static int
+etherwstat(Chan* chan, uchar* dp, int n)
+{
+ return netifwstat(ðerxx[chan->dev]->ni, chan, dp, n);
+}
+
+static void
+etherrtrace(Netfile* f, Etherpkt* pkt, int len)
+{
+ int i, n;
+ Block *bp;
+
+ if(qwindow(f->in) <= 0)
+ return;
+ if(len > 58)
+ n = 58;
+ else
+ n = len;
+ bp = iallocb(64);
+ if(bp == nil)
+ return;
+ memmove(bp->wp, pkt->d, n);
+ i = TK2MS(MACHP(0)->tscticks);
+ bp->wp[58] = len>>8;
+ bp->wp[59] = len;
+ bp->wp[60] = i>>24;
+ bp->wp[61] = i>>16;
+ bp->wp[62] = i>>8;
+ bp->wp[63] = i;
+ bp->wp += 64;
+ qpass(f->in, bp);
+}
+
+Block*
+etheriq(Ether* ether, Block* bp, int fromwire)
+{
+ Etherpkt *pkt;
+ ushort type;
+ int len, multi, tome, fromme;
+ Netfile **ep, *f, **fp, *fx;
+ Block *xbp;
+
+ ether->ni.inpackets++;
+
+ pkt = (Etherpkt*)bp->rp;
+ len = BLEN(bp);
+ type = (pkt->type[0]<<8)|pkt->type[1];
+ fx = 0;
+ ep = ðer->ni.f[Ntypes];
+
+ multi = pkt->d[0] & 1;
+ /* check for valid multicast addresses */
+ if(multi && memcmp(pkt->d, ether->ni.bcast, sizeof(pkt->d)) != 0 && ether->ni.prom == 0){
+ if(!activemulti(ðer->ni, pkt->d, sizeof(pkt->d))){
+ if(fromwire){
+ freeb(bp);
+ bp = 0;
+ }
+ return bp;
+ }
+ }
+
+ /* is it for me? */
+ tome = memcmp(pkt->d, ether->ea, sizeof(pkt->d)) == 0;
+ fromme = memcmp(pkt->s, ether->ea, sizeof(pkt->s)) == 0;
+ // if(tome||fromme)
+ // iprint("XXX PACK: %2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux -> %2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux%s[%d]%s\n",
+ // pkt->s[0], pkt->s[1], pkt->s[2],pkt->s[3], pkt->s[4], pkt->s[5],
+ // pkt->d[0], pkt->d[1], pkt->d[2],pkt->d[3], pkt->d[4], pkt->d[5],
+ // (tome ? " <<--" : ""), len, (fromme ? " -->>" : ""));
+ /*
+ * Multiplex the packet to all the connections which want it.
+ * If the packet is not to be used subsequently (fromwire != 0),
+ * attempt to simply pass it into one of the connections, thereby
+ * saving a copy of the data (usual case hopefully).
+ */
+ for(fp = ether->ni.f; fp < ep; fp++){
+ if((f = *fp) != nil)
+ if(f->type == type || f->type < 0)
+ if(tome || multi || f->prom){
+ /* Don't want to hear bridged packets */
+ if(f->bridge && !fromwire && !fromme)
+ continue;
+ if(!f->headersonly){
+ if(fromwire && fx == 0)
+ fx = f;
+ else if((xbp = iallocb(len)) != nil){
+ memmove(xbp->wp, pkt, len);
+ xbp->wp += len;
+ if(qpass(f->in, xbp) < 0)
+ ether->ni.soverflows++;
+ }
+ else
+ ether->ni.soverflows++;
+ }
+ else
+ etherrtrace(f, pkt, len);
+ }
+ }
+
+ if(fx){
+ if(qpass(fx->in, bp) < 0)
+ ether->ni.soverflows++;
+ return 0;
+ }
+ if(fromwire){
+ freeb(bp);
+ return 0;
+ }
+
+ return bp;
+}
+
+static int
+etheroq(Ether* ether, Block* bp)
+{
+ int len, loopback, s;
+ Etherpkt *pkt;
+
+ ether->ni.outpackets++;
+
+ /*
+ * Check if the packet has to be placed back onto the input queue,
+ * i.e. if it's a loopback or broadcast packet or the interface is
+ * in promiscuous mode.
+ * If it's a loopback packet indicate to etheriq that the data isn't
+ * needed and return, etheriq will pass-on or free the block.
+ * To enable bridging to work, only packets that were originated
+ * by this interface are fed back.
+ */
+ pkt = (Etherpkt*)bp->rp;
+ len = BLEN(bp);
+ loopback = memcmp(pkt->d, ether->ea, sizeof(pkt->d)) == 0;
+ if(loopback || memcmp(pkt->d, ether->ni.bcast, sizeof(pkt->d)) == 0 || ether->ni.prom){
+ s = splhi();
+ etheriq(ether, bp, 0);
+ splx(s);
+ }
+
+ if(!loopback){
+ qbwrite(ether->oq, bp);
+ if(ether->transmit != nil)
+ ether->transmit(ether);
+ } else
+ freeb(bp);
+
+ return len;
+}
+
+static long
+etherwrite(Chan* chan, void* buf, long n, vlong v)
+{
+ Ether *ether;
+ Block *bp;
+ int nn, onoff;
+ Cmdbuf *cb;
+
+ ether = etherxx[chan->dev];
+ if(NETTYPE(chan->qid.path) != Ndataqid) {
+ nn = netifwrite(ðer->ni, chan, buf, n);
+ if(nn >= 0)
+ return nn;
+ cb = parsecmd(buf, n);
+ if(cb->f[0] && strcmp(cb->f[0], "nonblocking") == 0){
+ if(cb->nf <= 1)
+ onoff = 1;
+ else
+ onoff = atoi(cb->f[1]);
+ qnoblock(ether->oq, onoff);
+ free(cb);
+ return n;
+ }
+ free(cb);
+ if(ether->ctl!=nil)
+ return ether->ctl(ether,buf,n);
+
+ error(Ebadctl);
+ }
+
+ if(n > ether->maxmtu)
+ error(Etoobig);
+ if(n < ether->minmtu)
+ error(Etoosmall);
+
+ bp = allocb(n);
+ if(waserror()){
+ freeb(bp);
+ nexterror();
+ }
+ memmove(bp->rp, buf, n);
+ memmove(bp->rp+Eaddrlen, ether->ea, Eaddrlen);
+ poperror();
+ bp->wp += n;
+
+ return etheroq(ether, bp);
+}
+
+static long
+etherbwrite(Chan* chan, Block* bp, ulong u)
+{
+ Ether *ether;
+ long n;
+
+ n = BLEN(bp);
+ if(NETTYPE(chan->qid.path) != Ndataqid){
+ if(waserror()) {
+ freeb(bp);
+ nexterror();
+ }
+ n = etherwrite(chan, bp->rp, n, 0);
+ poperror();
+ freeb(bp);
+ return n;
+ }
+ ether = etherxx[chan->dev];
+
+ if(n > ether->maxmtu){
+ freeb(bp);
+ error(Etoobig);
+ }
+ if(n < ether->minmtu){
+ freeb(bp);
+ error(Etoosmall);
+ }
+
+ return etheroq(ether, bp);
+}
+
+static struct {
+ char* type;
+ int (*reset)(Ether*);
+} cards[MaxEther+1];
+
+void
+addethercard(char* t, int (*r)(Ether*))
+{
+ static int ncard;
+
+ if(ncard == MaxEther)
+ panic("too many ether cards");
+ cards[ncard].type = t;
+ cards[ncard].reset = r;
+ ncard++;
+}
+
+int
+parseether(uchar *to, char *from)
+{
+ char nip[4];
+ char *p;
+ int i;
+
+ p = from;
+ for(i = 0; i < Eaddrlen; i++){
+ if(*p == 0)
+ return -1;
+ nip[0] = *p++;
+ if(*p == 0)
+ return -1;
+ nip[1] = *p++;
+ nip[2] = 0;
+ to[i] = strtoul(nip, 0, 16);
+ if(*p == ':')
+ p++;
+ }
+ return 0;
+}
+
+static Ether*
+etherprobe(int cardno, int ctlrno)
+{
+ int i, lg;
+ ulong mb, bsz;
+ Ether *ether;
+ char buf[128], name[32];
+
+ ether = malloc(sizeof(Ether));
+ memset(ether, 0, sizeof(Ether));
+ ether->ctlrno = ctlrno;
+ ether->tbdf = BUSUNKNOWN;
+ ether->ni.mbps = 100;
+ ether->minmtu = ETHERMINTU;
+ ether->maxmtu = ETHERMAXTU;
+
+ if(cardno < 0){
+ for(cardno = 0; cards[cardno].type; cardno++){
+ for(i = 0; i < ether->isac.nopt; i++){
+ if(strncmp(ether->isac.opt[i], "ea=", 3))
+ continue;
+ if(parseether(ether->ea, ðer->isac.opt[i][3]))
+ memset(ether->ea, 0, Eaddrlen);
+ }
+ break;
+ }
+ }
+
+ if(cardno >= MaxEther || cards[cardno].type == nil){
+ free(ether);
+ return nil;
+ }
+ if(cards[cardno].reset(ether) < 0){
+ free(ether);
+ return nil;
+ }
+
+ /*
+ * IRQ2 doesn't really exist, it's used to gang the interrupt
+ * controllers together. A device set to IRQ2 will appear on
+ * the second interrupt controller as IRQ9.
+ */
+ if(ether->isac.irq == 2)
+ ether->isac.irq = 9;
+ snprint(name, sizeof(name), "ether%d", ctlrno);
+
+ i = sprint(buf, "#l%d: %s: %dMbps port 0x%luX irq %d",
+ ctlrno, cards[cardno].type, ether->ni.mbps, ether->isac.port, ether->isac.irq);
+ if(ether->isac.mem)
+ i += sprint(buf+i, " addr 0x%luX", ether->isac.mem);
+ if(ether->isac.size)
+ i += sprint(buf+i, " size 0x%luX", ether->isac.size);
+ i += sprint(buf+i, ": %2.2ux%2.2ux%2.2ux%2.2ux%2.2ux%2.2ux",
+ ether->ea[0], ether->ea[1], ether->ea[2],
+ ether->ea[3], ether->ea[4], ether->ea[5]);
+ sprint(buf+i, "\n");
+ print(buf);
+
+ /* compute log10(ether->ni.mbps) into lg */
+ for(lg = 0, mb = ether->ni.mbps; mb >= 10; lg++)
+ mb /= 10;
+ if (lg > 0)
+ lg--;
+ if (lg > 14) /* 2^(14+17) = 2ⁱ */
+ lg = 14;
+ /* allocate larger output queues for higher-speed interfaces */
+ bsz = 1UL << (lg + 17); /* 2ⁱ⁷ = 128K, bsz = 2ⁿ × 128K */
+ while (bsz > MEMSIZE && bsz >= 128*1024)
+ bsz /= 2;
+
+ netifinit(ðer->ni, name, Ntypes, bsz);
+ while (ether->oq == nil && bsz >= 128*1024) {
+ bsz /= 2;
+ ether->oq = qopen(bsz, Qmsg, 0, 0);
+ ether->ni.limit = bsz;
+ }
+ if(ether->oq == nil)
+ panic("etherreset %s", name);
+ ether->ni.alen = Eaddrlen;
+ memmove(ether->ni.addr, ether->ea, Eaddrlen);
+ memset(ether->ni.bcast, 0xFF, Eaddrlen);
+
+ // iprint("XXX EADDR: %2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux\n",
+ // ether->ea[0], ether->ea[1], ether->ea[2],ether->ea[3], ether->ea[4], ether->ea[5]);
+
+ return ether;
+}
+
+static void
+etherreset(void)
+{
+ Ether *ether;
+ int cardno, ctlrno;
+
+ for(ctlrno = 0; ctlrno < MaxEther; ctlrno++){
+ if((ether = etherprobe(-1, ctlrno)) == nil)
+ continue;
+ etherxx[ctlrno] = ether;
+ }
+
+ cardno = ctlrno = 0;
+ while(cards[cardno].type != nil && ctlrno < MaxEther){
+ if(etherxx[ctlrno] != nil){
+ ctlrno++;
+ continue;
+ }
+ if((ether = etherprobe(cardno, ctlrno)) == nil){
+ cardno++;
+ continue;
+ }
+ etherxx[ctlrno] = ether;
+ ctlrno++;
+ }
+}
+
+static void
+ethershutdown(void)
+{
+ Ether *ether;
+ int i;
+
+ for(i = 0; i < MaxEther; i++){
+ ether = etherxx[i];
+ if(ether == nil)
+ continue;
+ if(ether->shutdown == nil) {
+ print("#l%d: no shutdown fuction\n", i);
+ continue;
+ }
+ (*ether->shutdown)(ether);
+ }
+}
+
+
+#define POLY 0xedb88320
+
+/* really slow 32 bit crc for ethers */
+ulong
+ethercrc(uchar *p, int len)
+{
+ int i, j;
+ ulong crc, b;
+
+ crc = 0xffffffff;
+ for(i = 0; i < len; i++){
+ b = *p++;
+ for(j = 0; j < 8; j++){
+ crc = (crc>>1) ^ (((crc^b) & 1) ? POLY : 0);
+ b >>= 1;
+ }
+ }
+ return crc;
+}
+
+Dev etherdevtab = {
+ 'l',
+ "ether",
+
+ etherreset,
+ devinit,
+ ethershutdown,
+ etherattach,
+ etherwalk,
+ etherstat,
+ etheropen,
+ ethercreate,
+ etherclose,
+ etherread,
+ etherbread,
+ etherwrite,
+ etherbwrite,
+ devremove,
+ etherwstat,
+};
diff --git a/src/9vx/a/devsd.c b/src/9vx/a/devsd.c
@@ -72,7 +72,7 @@ enum {
((p)<<PartSHIFT)|((t)<<TypeSHIFT))
-static void
+void
sdaddpart(SDunit* unit, char* name, uvlong start, uvlong end)
{
SDpart *pp;
@@ -135,6 +135,19 @@ sdaddpart(SDunit* unit, char* name, uvlong start, uvlong end)
pp->valid = 1;
}
+SDpart*
+sdfindpart(SDunit *unit, char *name)
+{
+ int i;
+
+ for(i=0; i<unit->npart; i++) {
+ if(strcmp(unit->part[i].perm.name, name) == 0){
+ return &unit->part[i];
+ }
+ }
+ return nil;
+}
+
static void
sddelpart(SDunit* unit, char* name)
{
@@ -198,6 +211,7 @@ sdinitpart(SDunit* unit)
if(unit->sectors){
sdincvers(unit);
sdaddpart(unit, "data", 0, unit->sectors);
+ partition(unit);
#if 0
/*
* Use partitions passed from boot program,
diff --git a/src/9vx/a/dosfs.h b/src/9vx/a/dosfs.h
@@ -0,0 +1,62 @@
+typedef struct Dosboot Dosboot;
+typedef struct Dos Dos;
+typedef struct Dosdir Dosdir;
+typedef struct Dosfile Dosfile;
+typedef struct Dospart Dospart;
+
+struct Dospart
+{
+ uchar flag; /* active flag */
+ uchar shead; /* starting head */
+ uchar scs[2]; /* starting cylinder/sector */
+ uchar type; /* partition type */
+ uchar ehead; /* ending head */
+ uchar ecs[2]; /* ending cylinder/sector */
+ uchar start[4]; /* starting sector */
+ uchar len[4]; /* length in sectors */
+};
+
+#define FAT12 0x01
+#define FAT16 0x04
+#define EXTEND 0x05
+#define FATHUGE 0x06
+#define FAT32 0x0b
+#define FAT32X 0x0c
+#define EXTHUGE 0x0f
+#define DMDDO 0x54
+#define PLAN9 0x39
+#define LEXTEND 0x85
+
+struct Dosfile{
+ Dos *dos; /* owning dos file system */
+ char name[8];
+ char ext[3];
+ uchar attr;
+ long length;
+ long pstart; /* physical start cluster address */
+ long pcurrent; /* physical current cluster address */
+ long lcurrent; /* logical current cluster address */
+ long offset;
+};
+
+struct Dos{
+ long start; /* start of file system */
+ int sectsize; /* in bytes */
+ int clustsize; /* in sectors */
+ int clustbytes; /* in bytes */
+ int nresrv; /* sectors */
+ int nfats; /* usually 2 */
+ int rootsize; /* number of entries */
+ int volsize; /* in sectors */
+ int mediadesc;
+ int fatsize; /* in sectors */
+ int fatclusters;
+ int fatbits; /* 12 or 16 */
+ long fataddr; /* sector number */
+ long rootaddr;
+ long rootclust;
+ long dataaddr;
+ long freeptr;
+};
+
+extern int dosinit(Fs*);
diff --git a/src/9vx/a/etherif.h b/src/9vx/a/etherif.h
@@ -0,0 +1,39 @@
+enum {
+ MaxEther = 48,
+ Ntypes = 8,
+};
+
+typedef struct Ether Ether;
+struct Ether {
+ ISAConf isac;
+
+ int ctlrno;
+ int tbdf; /* type+busno+devno+funcno */
+ int minmtu;
+ int maxmtu;
+ uchar ea[Eaddrlen];
+
+ void (*attach)(Ether*); /* filled in by reset routine */
+ void (*detach)(Ether*);
+ void (*transmit)(Ether*);
+ void (*interrupt)(Ureg*, void*);
+ long (*ifstat)(Ether*, void*, long, ulong);
+ long (*ctl)(Ether*, void*, long); /* custom ctl messages */
+ void (*power)(Ether*, int); /* power on/off */
+ void (*shutdown)(Ether*); /* shutdown hardware before reboot */
+ void *ctlr;
+
+ Queue* oq;
+
+ Netif ni;
+};
+
+extern Block* etheriq(Ether*, Block*, int);
+extern void addethercard(char*, int(*)(Ether*));
+extern ulong ethercrc(uchar*, int);
+extern int parseether(uchar*, char*);
+
+#define NEXT(x, l) (((uint)(x)+1)%(l))
+#define PREV(x, l) (((x) == 0) ? (l)-1: (x)-1)
+#define HOWMANY(x, y) (((x)+((y)-1))/(y))
+#define ROUNDUP(x, y) (HOWMANY((x), (y))*(y))
diff --git a/src/9vx/a/fns.ed b/src/9vx/a/fns.ed
@@ -16,4 +16,54 @@ int tailkmesg(char*, int);
void trap(Ureg*);
void uartecho(char*, int);
void uartinit(int);
+
+#define GSHORT(p) (((p)[1]<<8)|(p)[0])
+#define GLONG(p) ((GSHORT(p+2)<<16)|GSHORT(p))
+
+void __plock(Psleep*);
+void __punlock(Psleep*);
+void __pwakeup(Psleep*);
+void __psleep(Psleep*);
+
+extern int tracelock;
+
+#define lockfngen(type) __ ## type
+
+#define lockgen(type, arg) \
+ do { \
+ if (tracelock) { \
+ iprint("%s %p %s %d\n", (#type), (arg), __FILE__, __LINE__); \
+ lockfngen(type)((arg)); \
+ } else { \
+ lockfngen(type)((arg)); \
+ } \
+ } while (0)
+
+#define qlock(x) lockgen(qlock, (x))
+#define qunlock(x) lockgen(qunlock, (x))
+#define rlock(x) lockgen(rlock, (x))
+#define runlock(x) lockgen(runlock, (x))
+#define wlock(x) lockgen(wlock, (x))
+#define wunlock(x) lockgen(wunlock, (x))
+#define plock(x) lockgen(plock, (x))
+#define punlock(x) lockgen(punlock, (x))
+#define pwakeup(x) lockgen(pwakeup, (x))
+#define psleep(x) lockgen(psleep, (x))
+// #define lock(x) lockgen(lock, (x))
+// #define unlock(x) lockgen(unlock, (x))
+#define lock(x) __lock(x)
+#define unlock(x) __unlock(x)
+#define canqlock __canqlock
+#define canrlock __canrlock
+
+#define LOCK(x) lock(&((x)->lk))
+#define UNLOCK(x) unlock(&((x)->lk))
+#define CANQLOCK(x) canqlock(&((x)->qlock))
+#define QLOCK(x) qlock(&((x)->qlock))
+#define QUNLOCK(x) qunlock(&((x)->qlock))
+#define CANRLOCK(x) canrlock(&((x)->rwlock))
+#define RLOCK(x) rlock(&((x)->rwlock))
+#define RUNLOCK(x) runlock(&((x)->rwlock))
+#define WLOCK(x) wlock(&((x)->rwlock))
+#define WUNLOCK(x) wunlock(&((x)->rwlock))
.
diff --git a/src/9vx/a/fns.h b/src/9vx/a/fns.h
@@ -167,8 +167,53 @@ void *uvalidaddr(ulong addr, ulong len, int write);
int isuaddr(void*);
void setsigsegv(int invx32);
-void plock(Psleep*);
-void punlock(Psleep*);
-void pwakeup(Psleep*);
-void psleep(Psleep*);
+#define GSHORT(p) (((p)[1]<<8)|(p)[0])
+#define GLONG(p) ((GSHORT(p+2)<<16)|GSHORT(p))
+
+void __plock(Psleep*);
+void __punlock(Psleep*);
+void __pwakeup(Psleep*);
+void __psleep(Psleep*);
+
+extern int tracelock;
+
+#define lockfngen(type) __ ## type
+
+#define lockgen(type, arg) \
+ do { \
+ if (tracelock) { \
+ iprint("%s %p %s %d\n", (#type), (arg), __FILE__, __LINE__); \
+ lockfngen(type)((arg)); \
+ } else { \
+ lockfngen(type)((arg)); \
+ } \
+ } while (0)
+
+#define qlock(x) lockgen(qlock, (x))
+#define qunlock(x) lockgen(qunlock, (x))
+#define rlock(x) lockgen(rlock, (x))
+#define runlock(x) lockgen(runlock, (x))
+#define wlock(x) lockgen(wlock, (x))
+#define wunlock(x) lockgen(wunlock, (x))
+#define plock(x) lockgen(plock, (x))
+#define punlock(x) lockgen(punlock, (x))
+#define pwakeup(x) lockgen(pwakeup, (x))
+#define psleep(x) lockgen(psleep, (x))
+// #define lock(x) lockgen(lock, (x))
+// #define unlock(x) lockgen(unlock, (x))
+#define lock(x) __lock(x)
+#define unlock(x) __unlock(x)
+#define canqlock __canqlock
+#define canrlock __canrlock
+
+#define LOCK(x) lock(&((x)->lk))
+#define UNLOCK(x) unlock(&((x)->lk))
+#define CANQLOCK(x) canqlock(&((x)->qlock))
+#define QLOCK(x) qlock(&((x)->qlock))
+#define QUNLOCK(x) qunlock(&((x)->qlock))
+#define CANRLOCK(x) canrlock(&((x)->rwlock))
+#define RLOCK(x) rlock(&((x)->rwlock))
+#define RUNLOCK(x) runlock(&((x)->rwlock))
+#define WLOCK(x) wlock(&((x)->rwlock))
+#define WUNLOCK(x) wunlock(&((x)->rwlock))
diff --git a/src/9vx/a/fs.h b/src/9vx/a/fs.h
@@ -0,0 +1,38 @@
+typedef struct File File;
+typedef struct Fs Fs;
+
+#include "dosfs.h"
+#include "kfs.h"
+
+struct File{
+ union{
+ Dosfile dos;
+ Kfsfile kfs;
+ int walked;
+ };
+ Fs *fs;
+ char *path;
+};
+
+struct Fs{
+ union {
+ Dos dos;
+ Kfs kfs;
+ };
+ int dev; /* device id */
+ long (*diskread)(Fs*, void*, long); /* disk read routine */
+ vlong (*diskseek)(Fs*, vlong); /* disk seek routine */
+ long (*read)(File*, void*, long);
+ int (*walk)(File*, char*);
+ File root;
+};
+
+/*
+extern int chatty;
+extern int dotini(Fs*);
+extern int fswalk(Fs*, char*, File*);
+extern int fsread(File*, void*, long);
+extern int fsboot(Fs*, char*, Boot*);
+*/
+
+#define BADPTR(x) ((ulong)x < 0x80000000)
diff --git a/src/9vx/a/ip.ed b/src/9vx/a/ip.ed
@@ -0,0 +1,2297 @@
+diff -e ip.orig/arp.c ip/arp.c
+643c
+ QUNLOCK(arp);
+.
+613,614c
+ RUNLOCK(ifc);
+ QLOCK(arp);
+.
+609c
+ QUNLOCK(arp); /* for icmpns */
+.
+589c
+ if((a->rxtsrem <= 0) || !(CANRLOCK(ifc)) || (a->ifcid != ifc->ifcid)){
+.
+574c
+ QLOCK(arp);
+.
+557c
+ QUNLOCK(arp);
+.
+554c
+ QLOCK(arp);
+.
+511c
+ QUNLOCK(arp);
+.
+481c
+ QLOCK(arp);
+.
+444c
+ QUNLOCK(arp);
+.
+426c
+ QLOCK(arp);
+.
+398c
+ QUNLOCK(arp);
+.
+380c
+ RUNLOCK(ifc);
+.
+375c
+ RLOCK(ifc);
+.
+372c
+ RUNLOCK(ifc);
+.
+366c
+ QUNLOCK(arp);
+.
+337c
+ QLOCK(arp);
+.
+292c
+ QUNLOCK(arp);
+.
+260c
+ QUNLOCK(arp);
+.
+258c
+arprelease(Arp *arp, Arpent* ae)
+.
+250c
+ QUNLOCK(arp);
+.
+219c
+ QLOCK(arp);
+.
+50c
+int ReTransTimer = RETRANS_TIMER;
+.
+48c
+#define haship(s) ((ulong)((s)[IPaddrlen-1])%NHASH)
+.
+36c
+ QLock qlock;
+.
+14d
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/chandial.c ip/chandial.c
+6,7c
+#include "error.h"
+#include "ip/ip.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/devip.c ip/devip.c
+1430c
+ QUNLOCK(c);
+.
+1418c
+ QUNLOCK(c);
+.
+1404,1411c
+ QUNLOCK(c);
+.
+1399c
+ QLOCK(c);
+.
+1349c
+ QUNLOCK(c);
+.
+1326,1328d
+1322,1323d
+1318c
+ QUNLOCK(c);
+.
+1310c
+ if(CANQLOCK(c)){
+.
+1294c
+ QLOCK(c);
+.
+1185c
+ QUNLOCK(c);
+.
+1130c
+ QUNLOCK(c);
+.
+1128c
+ QLOCK(c);
+.
+1033c
+ QLOCK(c);
+.
+1029c
+ QLOCK(c);
+.
+1027c
+ QUNLOCK(c);
+.
+980c
+ QLOCK(c);
+.
+976c
+ QLOCK(c);
+.
+974c
+ QUNLOCK(c);
+.
+831c
+ QUNLOCK(p);
+.
+820,826c
+ QUNLOCK(p);
+.
+793c
+ QLOCK(p);
+.
+765c
+ QUNLOCK(p);
+.
+760c
+ QUNLOCK(p);
+.
+748c
+ QLOCK(p);
+.
+582c
+ QUNLOCK(cv);
+.
+561c
+ QUNLOCK(cv);
+.
+558c
+ QLOCK(cv);
+.
+516c
+ipremove(Chan* _)
+.
+510c
+ipcreate(Chan* _, char* __, int ___, ulong ____)
+.
+494c
+ QUNLOCK(cv);
+.
+487c
+ QLOCK(cv);
+.
+470c
+ QUNLOCK(cv);
+.
+468c
+ QLOCK(cv);
+.
+447,448c
+ QUNLOCK(cv);
+ QUNLOCK(p);
+.
+431,432c
+ QUNLOCK(cv);
+ QUNLOCK(p);
+.
+429c
+ QLOCK(cv);
+.
+427c
+ QLOCK(p);
+.
+415c
+ QUNLOCK(p);
+.
+411c
+ QUNLOCK(p);
+.
+409c
+ QLOCK(p);
+.
+174c
+ipgen(Chan *c, char* __ch, Dirtab* __dt, int __i, int s, Dir *dp)
+.
+50c
+#define QID(p, c, y) ( ((uint)(p)<<(Shiftproto)) | ((uint)(c)<<Shiftconv) | (y) )
+.
+6,7c
+#include "error.h"
+#include "ip/ip.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/esp.c ip/esp.c
+1106a
+
+
+#ifdef notdef
+enum {
+ RC4forward= 10*1024*1024, /* maximum skip forward */
+ RC4back = 100*1024, /* maximum look back */
+};
+
+typedef struct Esprc4 Esprc4;
+struct Esprc4
+{
+ ulong cseq; /* current byte sequence number */
+ RC4state current;
+
+ int ovalid; /* old is valid */
+ ulong lgseq; /* last good sequence */
+ ulong oseq; /* old byte sequence number */
+ RC4state old;
+};
+
+static void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
+
+static int
+rc4cipher(Espcb *ecb, uchar *p, int n)
+{
+ Esprc4 *esprc4;
+ RC4state tmpstate;
+ ulong seq;
+ long d, dd;
+
+ if(n < 4)
+ return 0;
+
+ esprc4 = ecb->espstate;
+ if(ecb->incoming) {
+ seq = nhgetl(p);
+ p += 4;
+ n -= 4;
+ d = seq-esprc4->cseq;
+ if(d == 0) {
+ rc4(&esprc4->current, p, n);
+ esprc4->cseq += n;
+ if(esprc4->ovalid) {
+ dd = esprc4->cseq - esprc4->lgseq;
+ if(dd > RC4back)
+ esprc4->ovalid = 0;
+ }
+ } else if(d > 0) {
+print("esp rc4cipher: missing packet: %uld %ld\n", seq, d); /* this link is hosed */
+ if(d > RC4forward) {
+ strcpy(up->errstr, "rc4cipher: skipped too much");
+ return 0;
+ }
+ esprc4->lgseq = seq;
+ if(!esprc4->ovalid) {
+ esprc4->ovalid = 1;
+ esprc4->oseq = esprc4->cseq;
+ memmove(&esprc4->old, &esprc4->current,
+ sizeof(RC4state));
+ }
+ rc4skip(&esprc4->current, d);
+ rc4(&esprc4->current, p, n);
+ esprc4->cseq = seq+n;
+ } else {
+print("esp rc4cipher: reordered packet: %uld %ld\n", seq, d);
+ dd = seq - esprc4->oseq;
+ if(!esprc4->ovalid || -d > RC4back || dd < 0) {
+ strcpy(up->errstr, "rc4cipher: too far back");
+ return 0;
+ }
+ memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
+ rc4skip(&tmpstate, dd);
+ rc4(&tmpstate, p, n);
+ return 1;
+ }
+
+ /* move old state up */
+ if(esprc4->ovalid) {
+ dd = esprc4->cseq - RC4back - esprc4->oseq;
+ if(dd > 0) {
+ rc4skip(&esprc4->old, dd);
+ esprc4->oseq += dd;
+ }
+ }
+ } else {
+ hnputl(p, esprc4->cseq);
+ p += 4;
+ n -= 4;
+ rc4(&esprc4->current, p, n);
+ esprc4->cseq += n;
+ }
+ return 1;
+}
+
+static void
+rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
+{
+ Esprc4 *esprc4;
+
+ /* bits to bytes */
+ n = (n+7)>>3;
+ esprc4 = smalloc(sizeof(Esprc4));
+ memset(esprc4, 0, sizeof(Esprc4));
+ setupRC4state(&esprc4->current, k, n);
+ ecb->espalg = name;
+ ecb->espblklen = 4;
+ ecb->espivlen = 4;
+ ecb->cipher = rc4cipher;
+ ecb->espstate = esprc4;
+}
+#endif
+.
+1056,1081d
+1048,1050c
+ ecb->espblklen = 8;
+ ecb->espivlen = 8;
+.
+1045c
+ for(i=0; i<8; i++)
+.
+1040,1042c
+ /* bits to bytes */
+ n = (n+7)>>3;
+ if(n > 8)
+ n = 8;
+.
+1037c
+ uchar key[8], ivec[8];
+.
+1035c
+desespinit(Espcb *ecb, char *name, uchar *k, int n)
+.
+1019,1033d
+1013,1014c
+ memmove(p, ds->ivec, 8);
+ for(p += 8; p < ep; p += 8){
+ pp = p;
+ ip = ds->ivec;
+ for(eip = ip+8; ip < eip; )
+ *pp++ ^= *ip++;
+ block_cipher(ds->expanded, p, 0);
+ memmove(ds->ivec, p, 8);
+ }
+.
+1010,1011c
+ memmove(ds->ivec, p, 8);
+ p += 8;
+ while(p < ep){
+ memmove(tmp, p, 8);
+ block_cipher(ds->expanded, p, 1);
+ tp = tmp;
+ ip = ds->ivec;
+ for(eip = ip+8; ip < eip; ){
+ *p++ ^= *ip;
+ *ip++ = *tp++;
+ }
+ }
+.
+1008a
+ ep = p + n;
+.
+1006a
+ uchar tmp[8];
+ uchar *pp, *tp, *ip, *eip, *ep;
+.
+999,1003d
+993c
+ ecb->ahlen = 12;
+.
+990c
+ klen >>= 3; /* convert to bytes */
+
+.
+986c
+md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
+.
+979c
+ seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+.
+968c
+ digest = md5(opad, 64, nil, nil);
+.
+966c
+ digest = md5(ipad, 64, nil, nil);
+.
+959,962c
+ for(i=0; i<64; i++){
+ ipad[i] = 0x36;
+ opad[i] = 0x5c;
+ }
+ ipad[64] = opad[64] = 0;
+ for(i=0; i<klen; i++){
+.
+957a
+ uchar innerhash[MD5dlen];
+.
+956d
+954a
+ uchar ipad[65], opad[65];
+.
+796,952c
+void
+.
+790c
+ ecb->ahlen = 12;
+.
+786c
+ klen >>= 8; /* convert to bytes */
+.
+782c
+shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
+.
+775c
+ seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+.
+772a
+ int r;
+.
+771d
+764c
+ digest = sha1(opad, 64, nil, nil);
+.
+762c
+ digest = sha1(ipad, 64, nil, nil);
+.
+755,758c
+ for(i=0; i<64; i++){
+ ipad[i] = 0x36;
+ opad[i] = 0x5c;
+ }
+ ipad[64] = opad[64] = 0;
+ for(i=0; i<klen; i++){
+.
+753a
+ uchar innerhash[SHA1dlen];
+.
+752d
+750a
+ uchar ipad[65], opad[65];
+.
+743,748c
+void
+.
+735c
+nullahinit(Espcb *ecb, char *name, uchar* _, int __)
+.
+729c
+nullauth(Espcb* _, uchar* __, int ___, uchar* ____)
+.
+720c
+nullespinit(Espcb *ecb, char *name, uchar* _, int __)
+.
+714c
+nullcipher(Espcb* _, uchar* __, int ___)
+.
+708,712d
+647c
+ QUNLOCK(c);
+.
+642c
+ QLOCK(c);
+.
+632c
+ QUNLOCK(c);
+.
+627c
+ QLOCK(c);
+.
+606c
+ QUNLOCK(esp);
+.
+600,601c
+ spi = nhgets(h->espspi);
+ QLOCK(esp);
+ c = convlookup(esp, spi);
+.
+597,598c
+ h = (Esp4hdr*)(bp->rp);
+.
+595c
+ ulong spi;
+.
+593a
+ Esp4hdr *h;
+.
+590d
+568c
+ QUNLOCK(c);
+.
+565c
+ qpass(c->rq, bp);
+.
+560,561c
+ netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
+ laddr, spi);
+.
+557,558d
+547c
+ bp->rp += hdrlen + ecb->espivlen;
+.
+539,541c
+ QUNLOCK(c);
+ netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n",
+ raddr, laddr, spi);
+.
+535c
+ et = (Esptail*)(bp->rp + hdrlen + payload);
+.
+523,529c
+ if(!ecb->cipher(ecb, bp->rp + hdrlen, payload)) {
+ QUNLOCK(c);
+print("esp: cipher failed %I -> %I!%ld: %s\n", raddr, laddr, spi, up->errstr);
+ netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %s\n", raddr,
+ laddr, spi, up->errstr);
+.
+517,519c
+ QUNLOCK(c);
+ netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n",
+ raddr, laddr, spi, payload, BLEN(bp));
+.
+515c
+ payload = BLEN(bp) - hdrlen - ecb->ahlen;
+.
+507,510c
+ QUNLOCK(c);
+print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
+ netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
+ laddr, spi);
+.
+502,505c
+ espspi = version == V4? ((Esp4hdr*)bp->rp)->espspi:
+ ((Esp6hdr*)bp->rp)->espspi;
+.
+493,496c
+ if(BLEN(bp) < hdrlen + ecb->espivlen + Esptaillen + ecb->ahlen) {
+ QUNLOCK(c);
+ netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
+ laddr, spi);
+.
+485,486c
+ QLOCK(c);
+ QUNLOCK(esp);
+.
+477,479c
+ QUNLOCK(esp);
+ netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
+ laddr, spi);
+.
+475c
+ c = convlookup(esp, spi);
+.
+473c
+ if (version == V4) {
+ eh4 = (Esp4hdr*)bp->rp;
+ spi = nhgetl(eh4->espspi);
+ v4tov6(raddr, eh4->espsrc);
+ v4tov6(laddr, eh4->espdst);
+ } else {
+ eh6 = (Esp6hdr*)bp->rp;
+ spi = nhgetl(eh6->espspi);
+ ipmove(raddr, eh6->src);
+ ipmove(laddr, eh6->dst);
+ }
+
+ QLOCK(esp);
+.
+471d
+464,466c
+ bp = pullupblock(bp, hdrlen + Esptaillen);
+.
+462a
+ if (bp == nil || BLEN(bp) == 0) {
+ /* get enough to identify the IP version */
+ bp = pullupblock(bp, IP4HDR);
+ if(bp == nil) {
+ netlog(f, Logesp, "esp: short packet\n");
+ return;
+ }
+ }
+ eh4 = (Esp4hdr*)bp->rp;
+ version = ((eh4->vihl & 0xf0) == IP_VER4? V4: V6);
+ hdrlen = version == V4? Esp4hdrlen: Esp6hdrlen;
+.
+459,460c
+ uchar *auth, *espspi;
+ ulong spi;
+ int payload, nexthdr, version, hdrlen;
+.
+457c
+ uchar raddr[IPaddrlen], laddr[IPaddrlen];
+.
+453,454c
+ Esp4hdr *eh4;
+ Esp6hdr *eh6;
+ Esptail *et;
+ Userhdr *uh;
+.
+451c
+espiput(Proto *esp, Ipifc* _, Block *bp)
+.
+446,449d
+440c
+ if (version == V4)
+.
+438c
+ QUNLOCK(c);
+.
+434,435c
+ ecb->auth(ecb, bp->rp + iphdrlen, (hdrlen - iphdrlen) +
+.
+429,431d
+425a
+ hnputl(eh6->espspi, ecb->spi);
+ hnputl(eh6->espseq, ++ecb->seq);
+.
+424d
+420,422d
+414a
+ hnputl(eh4->espspi, ecb->spi);
+ hnputl(eh4->espseq, ++ecb->seq);
+.
+411,413c
+ /* fill in head */
+ if (version == V4) {
+.
+407,409c
+ ecb->cipher(ecb, bp->rp + hdrlen, payload + pad + Esptaillen);
+ auth = bp->rp + hdrlen + payload + pad + Esptaillen;
+.
+401c
+ eh4 = (Esp4hdr *)bp->rp;
+ eh6 = (Esp6hdr *)bp->rp;
+ et = (Esptail*)(bp->rp + hdrlen + payload + pad);
+.
+383,384c
+ bp = padblock(bp, hdrlen + ecb->espivlen);
+.
+370c
+ QUNLOCK(c);
+.
+363c
+ QLOCK(c);
+.
+358c
+ version = ipvers(c);
+ iphdrlen = version == V4? IP4HDR: IP6HDR;
+ hdrlen = version == V4? Esp4hdrlen: Esp6hdrlen;
+
+.
+356c
+ Espcb *ecb;
+ Block *bp;
+ int nexthdr, payload, pad, align, version, hdrlen, iphdrlen;
+ uchar *auth;
+.
+353d
+347,349d
+299,344d
+284,297d
+274c
+ipvers(Conv *c)
+.
+221c
+ QUNLOCK(c->p);
+.
+215c
+ QLOCK(c->p);
+.
+207,210c
+ parseip(c->raddr, argv[1]);
+.
+192c
+ char *p, *pp;
+ char *e = nil;
+.
+182,186c
+ "null", 0, nullahinit,
+ "hmac_sha1_96", 128, shaahinit, /* rfc2404 */
+// "aes_xcbc_mac_96", 128, aesahinit, /* rfc3566 */
+ "hmac_md5_96", 128, md5ahinit, /* rfc2403 */
+ nil, 0, nil,
+.
+170,177c
+ "null", 0, nullespinit,
+// "des3_cbc", 192, des3espinit, /* rfc2451 */
+// "aes_128_cbc", 128, aescbcespinit, /* rfc3602 */
+// "aes_ctr", 128, aesctrespinit, /* rfc3686 */
+ "des_56_cbc", 64, desespinit, /* rfc2405, deprecated */
+// "rc4_128", 128, rc4espinit, /* gone in rfc4305 */
+ nil, 0, nil,
+.
+163,166c
+static void nullahinit(Espcb*, char*, uchar *key, int keylen);
+static void shaahinit(Espcb*, char*, uchar *key, int keylen);
+static void md5ahinit(Espcb*, char*, uchar *key, int keylen);
+.
+157,161c
+static void nullespinit(Espcb*, char*, uchar *key, int keylen);
+static void desespinit(Espcb *ecb, char *name, uchar *k, int n);
+.
+150c
+ void (*init)(Espcb*, char* name, uchar *key, int keylen);
+.
+143d
+137d
+131d
+127c
+ int header; /* user user level header */
+.
+96,107d
+86,87c
+ /* Ip6hdr; */
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+
+ /* Esphdr; */
+ uchar espspi[4]; /* Security parameter index */
+ uchar espseq[4]; /* Sequence number */
+.
+80c
+ /* Esphdr; */
+ uchar espspi[4]; /* Security parameter index */
+ uchar espseq[4]; /* Sequence number */
+.
+58,64c
+ * tunnel-mode layout: IP | ESP | TCP/UDP | user data.
+ * transport-mode layout is: ESP | IP | TCP/UDP | user data.
+.
+54d
+42,47d
+32,35c
+enum
+{
+.
+30a
+typedef struct Esppriv Esppriv;
+typedef struct Espcb Espcb;
+typedef struct Algorithm Algorithm;
+.
+26,28d
+20,23c
+typedef struct Esphdr Esphdr;
+.
+14c
+#include "error.h"
+.
+10c
+#include "lib.h"
+.
+6,7c
+ * TODO: update to match rfc4303.
+.
+3,4d
+diff -e ip.orig/ethermedium.c ip/ethermedium.c
+536c
+ if((sflag = ipv6anylocal(ifc, ipsrc)) != 0)
+.
+429c
+etherremmulti(Ipifc *ifc, uchar *a, uchar *_)
+.
+407c
+etheraddmulti(Ipifc *ifc, uchar *a, uchar *_)
+.
+401c
+ RUNLOCK(ifc);
+.
+392c
+ RUNLOCK(ifc);
+.
+387c
+ if(!CANRLOCK(ifc)){
+.
+362c
+ RUNLOCK(ifc);
+.
+353c
+ RUNLOCK(ifc);
+.
+348c
+ if(!CANRLOCK(ifc)){
+.
+269c
+ * called by ipoput with a single block to write with ifc RLOCK'd
+.
+123a
+
+.
+8c
+#include "netif.h"
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/gre.c ip/gre.c
+968c
+ gre->ptclsize = 0;
+.
+919,948d
+894,916c
+ return "unknown control request";
+.
+885,892d
+881,883c
+ else if(strcmp(f[0], "cooked") == 0){
+ gpriv->raw = 0;
+ return nil;
+.
+696,879c
+ gpriv = c->p->priv;
+ if(n == 1){
+ if(strcmp(f[0], "raw") == 0){
+ gpriv->raw = 1;
+ return nil;
+.
+694c
+ GREpriv *gpriv;
+.
+691,692c
+char*
+grectl(Conv *c, char **f, int n)
+.
+681,688c
+ return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr);
+.
+675,679d
+659,660c
+ if(qlen(c->rq) > 64*1024)
+ freeblist(bp);
+.
+651d
+648d
+645c
+ freeblist(bp);
+.
+643c
+ len = nhgets(ghp->len) - GRE_IPONLY;
+.
+639a
+ QUNLOCK(gre);
+
+.
+633,636c
+ if(*p == nil) {
+ QUNLOCK(gre);
+ freeblist(bp);
+.
+590,629c
+ if(c->rport == eproto &&
+ (gpriv->raw || ipcmp(c->raddr, raddr) == 0))
+.
+587d
+553,585c
+ /* Look for a conversation structure for this port and address */
+ c = nil;
+ for(p = gre->conv; *p; p++) {
+.
+547,551c
+ v4tov6(raddr, ghp->src);
+ eproto = nhgets(ghp->eproto);
+ QLOCK(gre);
+.
+536,545c
+ gpriv = gre->priv;
+ ghp = (GREhdr*)(bp->rp);
+.
+534d
+531,532c
+ ushort eproto;
+ uchar raddr[IPaddrlen];
+.
+336,529c
+ int len;
+ GREhdr *ghp;
+.
+334c
+greiput(Proto *gre, Ipifc* __, Block *bp)
+.
+328,329d
+325,326c
+ ghp->proto = IP_GREPROTO;
+ ghp->frag[0] = 0;
+ ghp->frag[1] = 0;
+.
+322c
+ hnputs(ghp->eproto, c->rport);
+.
+318,320c
+ findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */
+ memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen);
+.
+314,315c
+ memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen);
+ v4tov6(laddr, ghp->src);
+.
+311,312c
+ if(!((GREpriv*)c->p->priv)->raw){
+ v4tov6(raddr, ghp->dst);
+.
+308,309c
+ ghp = (GREhdr *)(bp->rp);
+ ghp->vihl = IP_VER4;
+.
+295,297d
+287,289c
+ Conv *c = x;
+ GREhdr *ghp;
+.
+283a
+int drop;
+
+.
+281c
+ c->lport = 0;
+ c->rport = 0;
+.
+247,278c
+ qclose(c->rq);
+ qclose(c->wq);
+ qclose(c->eq);
+.
+241c
+ return "pktifc does not support announce";
+.
+239c
+greannounce(Conv* _, char** __, int ___)
+.
+218,235c
+ USED(c);
+ return snprint(state, n, "%s\n", "Datagram");
+.
+211c
+ c->rq = qopen(64*1024, Qmsg, 0, c);
+.
+199c
+ QUNLOCK(p);
+.
+184c
+ QLOCK(p);
+.
+138,171c
+static char*
+.
+136d
+71,134d
+68c
+ ulong csumerr; /* checksum errors */
+ ulong lenerr; /* short packet */
+.
+66c
+struct GREpriv
+{
+ int raw; /* Raw GRE mode */
+
+.
+63c
+} GREhdr;
+.
+54c
+ uchar Unused;
+.
+46,47c
+typedef struct GREhdr
+{
+.
+21,43d
+13c
+enum
+{
+.
+9c
+#include "error.h"
+.
+5c
+#include "lib.h"
+.
+diff -e ip.orig/icmp.c ip/icmp.c
+350c
+ if(iplen > n || ((uint)iplen % 1)){
+.
+339,341c
+ netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code);
+.
+324c
+icmpiput(Proto *icmp, Ipifc* __, Block *bp)
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/icmp6.c ip/icmp6.c
+781c
+ bp->rp -= sizeof(IPICMP);
+.
+770c
+ bp->rp += sizeof(IPICMP);
+.
+762c
+ bp->rp -= sizeof(IPICMP);
+.
+750c
+ bp->rp += sizeof(IPICMP);
+.
+711c
+ RUNLOCK(ifc);
+.
+707c
+ RUNLOCK(ifc);
+.
+700c
+ RUNLOCK(ifc);
+.
+698c
+ RLOCK(ifc);
+.
+666c
+ sz = sizeof(IPICMP) + 8;
+.
+661c
+ if(pktsz - sizeof(Ip6hdr) < 8) {
+.
+649c
+ sz = sizeof(IPICMP) + 8;
+.
+641c
+ if(pktsz - sizeof(Ip6hdr) < 16) {
+.
+575c
+ if(iplen > n - IP6HDR || ((uint)iplen % 1) != 0) {
+.
+568c
+ if(n < sizeof(IPICMP)) {
+.
+546c
+ memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+.
+537c
+ netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n",
+.
+534c
+ netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n",
+.
+518c
+ int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+.
+506c
+ memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+.
+498c
+ netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n",
+.
+495c
+ netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n",
+.
+479c
+ int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+.
+471c
+ RUNLOCK(ifc);
+.
+457c
+ memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+.
+445c
+ netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n",
+.
+442c
+ netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n",
+.
+440c
+ RLOCK(ifc);
+.
+425c
+ int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+.
+397c
+ nbp = newIPICMP(sizeof(Ndpkt));
+.
+375c
+ nbp->wp -= sizeof(Ndpkt) - sizeof(NdiscC);
+.
+354c
+ nbp = newIPICMP(sizeof(Ndpkt));
+.
+260c
+ if(blocklen(bp) < sizeof(IPICMP)){
+.
+257c
+ bp = padblock(bp, sizeof(Ip6hdr));
+.
+122c
+ QLock qlock;
+.
+109,110d
+106d
+101a
+
+.
+99,100c
+ /* ICMPpkt; */
+ uchar type;
+ uchar code;
+ uchar cksum[2];
+ uchar icmpid[2];
+ uchar seq[2];
+
+.
+97c
+struct Ndpkt
+{
+ /* NdiscC; */
+ /* IPICMP; */
+ /* Ip6hdr; */
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+.
+94d
+91,92c
+ /* ICMPpkt; */
+ uchar type;
+ uchar code;
+ uchar cksum[2];
+ uchar icmpid[2];
+ uchar seq[2];
+
+.
+89c
+struct NdiscC
+{
+ /* IPICMP; */
+ /* Ip6hdr; */
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+.
+85,86c
+ /* Ip6hdr; */
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+
+ /* ICMPpkt; */
+ uchar type;
+ uchar code;
+ uchar cksum[2];
+ uchar icmpid[2];
+ uchar seq[2];
+.
+75,82c
+struct ICMPpkt {
+ uchar type;
+ uchar code;
+ uchar cksum[2];
+ uchar icmpid[2];
+ uchar seq[2];
+};
+.
+70c
+typedef struct ICMPpkt ICMPpkt;
+.
+9c
+#include "error.h"
+.
+5c
+#include "lib.h"
+.
+diff -e ip.orig/igmp.c ip/igmp.c
+217c
+ mp = Mediacopymulti(m);
+.
+177c
+igmpiput(Media *m, Ipifc *, Block *bp)
+.
+123c
+ byte ip[IPaddrlen];
+.
+97,99c
+ bp->wp += sizeof(IGMPpkt);
+ memset(bp->rp, 0, sizeof(IGMPpkt));
+ hnputl(p->src, Mediagetaddr(m));
+.
+87c
+igmpsendreport(Media *m, byte *addr)
+.
+68c
+ Lock lk;
+
+.
+60c
+ Media *m;
+.
+51,52d
+43,48c
+ byte vertype; /* version and type */
+ byte unused;
+ byte igmpcksum[2]; /* checksum of igmp portion */
+ byte group[IPaddrlen]; /* multicast group */
+.
+31,40c
+ byte vihl; /* Version and header length */
+ byte tos; /* Type of service */
+ byte len[2]; /* packet length (including headers) */
+ byte id[2]; /* Identification */
+ byte frag[2]; /* Fragment information */
+ byte Unused;
+ byte proto; /* Protocol */
+ byte cksum[2]; /* checksum of ip portion */
+ byte src[IPaddrlen]; /* Ip source */
+ byte dst[IPaddrlen]; /* Ip destination */
+.
+27a
+typedef char byte;
+
+.
+10c
+#include "error.h"
+.
+6c
+#include "lib.h"
+.
+1,4d
+diff -e ip.orig/inferno.c ip/inferno.c
+28a
+
+Medium tripmedium =
+{
+ "trip",
+};
+.
+25c
+bootpread(char* _, ulong __, int ___)
+.
+23a
+char*
+bootp(Ipifc* _)
+{
+ return "unimplmented";
+}
+
+.
+17a
+Chan*
+commonfdtochan(int fd, int mode, int a, int b)
+{
+ return fdtochan(fd, mode, a, b);
+}
+
+.
+6c
+#include "error.h"
+#include "ip.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/ip.c ip/ip.c
+522,524c
+ if(bp->base+sizeof(Ipfrag) >= bp->rp){
+ bp = padblock(bp, sizeof(Ipfrag));
+ bp->rp += sizeof(Ipfrag);
+.
+466,467c
+ for(i = 0; i < Nstats; i++)
+ p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]);
+.
+383c
+ freeb(bp);
+.
+381a
+ Conv conv;
+
+.
+322d
+320d
+301c
+ RUNLOCK(ifc);
+.
+213c
+ RUNLOCK(ifc);
+.
+211d
+196,199c
+ medialen = ifc->maxtu - ifc->m->hsize;
+.
+189c
+ RUNLOCK(ifc);
+.
+186c
+ if(!CANRLOCK(ifc))
+.
+11a
+/* MIB II counters */
+enum
+{
+ Forwarding,
+ DefaultTTL,
+ InReceives,
+ InHdrErrors,
+ InAddrErrors,
+ ForwDatagrams,
+ InUnknownProtos,
+ InDiscards,
+ InDelivers,
+ OutRequests,
+ OutDiscards,
+ OutNoRoutes,
+ ReasmTimeout,
+ ReasmReqds,
+ ReasmOKs,
+ ReasmFails,
+ FragOKs,
+ FragFails,
+ FragCreates,
+
+ Nstats,
+};
+
+struct Fragment4
+{
+ Block* blist;
+ Fragment4* next;
+ ulong src;
+ ulong dst;
+ ushort id;
+ ulong age;
+};
+
+struct Fragment6
+{
+ Block* blist;
+ Fragment6* next;
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+ uint id;
+ ulong age;
+};
+
+struct Ipfrag
+{
+ ushort foff;
+ ushort flen;
+};
+
+/* an instance of IP */
+struct IP
+{
+ ulong stats[Nstats];
+
+ QLock fraglock4;
+ Fragment4* flisthead4;
+ Fragment4* fragfree4;
+ Ref id4;
+
+ QLock fraglock6;
+ Fragment6* flisthead6;
+ Fragment6* fragfree6;
+ Ref id6;
+
+ int iprouting; /* true if we route like a gateway */
+};
+
+.
+9a
+typedef struct Fragment4 Fragment4;
+typedef struct Fragment6 Fragment6;
+typedef struct Ipfrag Ipfrag;
+
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/ip.h ip/ip.h
+732a
+Chan* commonfdtochan(int, int, int, int);
+.
+727a
+extern char* bootp(Ipifc*);
+.
+676a
+extern Medium tripmedium;
+.
+669c
+#define NOW msec()
+.
+578c
+/* RouteTree; */
+ Route* right;
+ Route* left;
+ Route* mid;
+ uchar depth;
+ uchar type;
+ uchar ifcid; /* must match ifc->id */
+ Ipifc *ifc;
+ char tag[4];
+ int ref;
+.
+516,517d
+491a
+ Logilmsg= 1<<8,
+.
+488a
+ Logil= 1<<4,
+.
+423c
+ RWlock rwlock;
+
+ Conv *conv; /* link to its conversation structure */
+.
+386c
+ QLock qlock;
+
+.
+374c
+ Lock lk;
+
+.
+312c
+ RWlock rwlock;
+.
+173c
+ QLock qlock;
+.
+153a
+typedef struct Ip4hdr Ip4hdr;
+.
+79,152d
+41c
+ Maxincall= 5,
+.
+30,35d
+8,9d
+2,3d
+diff -e ip.orig/ipaux.c ip/ipaux.c
+366c
+ UNLOCK(ht);
+.
+363c
+ UNLOCK(ht);
+.
+352c
+ UNLOCK(ht);
+.
+340c
+ UNLOCK(ht);
+.
+328c
+ UNLOCK(ht);
+.
+316c
+ UNLOCK(ht);
+.
+309c
+ LOCK(ht);
+.
+290c
+ UNLOCK(ht);
+.
+282c
+ LOCK(ht);
+.
+272c
+ UNLOCK(ht);
+.
+269c
+ LOCK(ht);
+.
+241c
+ return (ulong)(sa[IPaddrlen-1]<<24 ^ sp<< 16 ^ da[IPaddrlen-1]<<8 ^ dp) % Nhash;
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/ipifc.c ip/ipifc.c
+1575c
+ RUNLOCK(nifc);
+.
+1565c
+ RUNLOCK(nifc);
+.
+1562c
+ RLOCK(nifc);
+.
+1555c
+ RUNLOCK(nifc);
+.
+1541c
+ RUNLOCK(nifc);
+.
+1538c
+ RLOCK(nifc);
+.
+1518d
+1511d
+1498c
+ WUNLOCK(ifc);
+.
+1494c
+ WLOCK(ifc);
+.
+1491c
+ WUNLOCK(ifc);
+.
+1455c
+ WUNLOCK(ifc);
+.
+1451c
+ WLOCK(ifc);
+.
+1448c
+ WUNLOCK(ifc);
+.
+1301c
+ QUNLOCK(f->ipifc);
+.
+1265,1266c
+ if((atypel > atype && atype < atyper) ||
+ (atypel < atype && atype > atyper)){
+.
+1232,1234c
+ QLOCK(f->ipifc);
+.
+1154c
+ (isv6mcast(addr) && (addr[1] & 0xF) <= Link_local_scop))
+.
+1054c
+ QUNLOCK(f->self);
+.
+1040c
+ QLOCK(f->self);
+.
+1021c
+ QUNLOCK(f->self);
+.
+951c
+ QLOCK(f->self);
+.
+888c
+ QUNLOCK(f->self);
+.
+839c
+ QLOCK(f->self);
+.
+689c
+ WUNLOCK(ifc);
+.
+683c
+ WLOCK(ifc);
+.
+680c
+ WUNLOCK(ifc);
+.
+619c
+ WUNLOCK(ifc);
+.
+604c
+ WLOCK(ifc);
+.
+539c
+ * always called with ifc WLOCK'd
+.
+531c
+ WUNLOCK(ifc);
+.
+417c
+ WLOCK(ifc);
+.
+319c
+ c->sq = qopen(2*QMAX, 0, 0, 0);
+.
+306c
+ RUNLOCK(ifc);
+.
+299c
+ RUNLOCK(ifc);
+.
+294c
+ if(!CANRLOCK(ifc)){
+.
+266c
+ RUNLOCK(ifc);
+.
+259c
+ RLOCK(ifc);
+.
+244c
+ RUNLOCK(ifc);
+.
+238c
+ RLOCK(ifc);
+.
+212c
+ WUNLOCK(ifc);
+.
+181c
+ WLOCK(ifc);
+.
+178c
+ WUNLOCK(ifc);
+.
+162c
+ WUNLOCK(ifc);
+.
+124c
+ WUNLOCK(ifc);
+.
+120c
+ WUNLOCK(ifc);
+.
+118c
+ WLOCK(ifc);
+.
+58c
+#define hashipa(a) ( (ulong)(((a)[IPaddrlen-2]<<8) | (a)[IPaddrlen-1])%NHASH )
+.
+39c
+ QLock qlock;
+.
+18c
+ QMAX = 64*1024-1,
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/ipmux.c ip/ipmux.c
+811c
+ RUNLOCK(f);
+.
+809c
+ RLOCK(f);
+.
+742c
+ RUNLOCK(f);
+.
+680c
+ RLOCK(f);
+.
+631,633c
+ WLOCK(f);
+ i = (Ipmux *)c->p->priv;
+ ipmuxremove(&i, r->chain);
+ WUNLOCK(f);
+.
+617a
+ Ipmux *i;
+.
+610c
+ipmuxannounce(Conv* _, char** __, int ___)
+.
+583c
+ WUNLOCK(f);
+.
+581c
+ WLOCK(f);
+.
+9c
+#include "error.h"
+.
+5c
+#include "lib.h"
+.
+diff -e ip.orig/iproute.c ip/iproute.c
+469c
+ while((p = f->queue) != nil) {
+.
+425c
+ while((p = f->queue) != nil) {
+.
+359c
+ while((p = f->queue) != nil) {
+.
+313c
+ while((p = f->queue) != nil) {
+.
+213,214c
+ dl = 0; if((l = p->left) != nil) dl = l->depth;
+ dr = 0; if((r = p->right) != nil) dr = r->depth;
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/ipv6.c ip/ipv6.c
+506,508c
+ if(bp->base+sizeof(Ipfrag) >= bp->rp){
+ bp = padblock(bp, sizeof(Ipfrag));
+ bp->rp += sizeof(Ipfrag);
+.
+218c
+ RUNLOCK(ifc);
+.
+122c
+ RUNLOCK(ifc);
+.
+110c
+ RUNLOCK(ifc);
+.
+106c
+ if(!CANRLOCK(ifc))
+.
+29a
+/* MIB II counters */
+enum
+{
+ Forwarding,
+ DefaultTTL,
+ InReceives,
+ InHdrErrors,
+ InAddrErrors,
+ ForwDatagrams,
+ InUnknownProtos,
+ InDiscards,
+ InDelivers,
+ OutRequests,
+ OutDiscards,
+ OutNoRoutes,
+ ReasmTimeout,
+ ReasmReqds,
+ ReasmOKs,
+ ReasmFails,
+ FragOKs,
+ FragFails,
+ FragCreates,
+
+ Nstats,
+};
+
+static char *statnames[] =
+{
+[Forwarding] "Forwarding",
+[DefaultTTL] "DefaultTTL",
+[InReceives] "InReceives",
+[InHdrErrors] "InHdrErrors",
+[InAddrErrors] "InAddrErrors",
+[ForwDatagrams] "ForwDatagrams",
+[InUnknownProtos] "InUnknownProtos",
+[InDiscards] "InDiscards",
+[InDelivers] "InDelivers",
+[OutRequests] "OutRequests",
+[OutDiscards] "OutDiscards",
+[OutNoRoutes] "OutNoRoutes",
+[ReasmTimeout] "ReasmTimeout",
+[ReasmReqds] "ReasmReqds",
+[ReasmOKs] "ReasmOKs",
+[ReasmFails] "ReasmFails",
+[FragOKs] "FragOKs",
+[FragFails] "FragFails",
+[FragCreates] "FragCreates",
+};
+
+struct Fragment4
+{
+ Block* blist;
+ Fragment4* next;
+ ulong src;
+ ulong dst;
+ ushort id;
+ ulong age;
+};
+
+struct Fragment6
+{
+ Block* blist;
+ Fragment6* next;
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+ uint id;
+ ulong age;
+};
+
+struct Ipfrag
+{
+ ushort foff;
+ ushort flen;
+};
+
+/* an instance of IP */
+struct IP
+{
+ ulong stats[Nstats];
+
+ QLock fraglock4;
+ Fragment4* flisthead4;
+ Fragment4* fragfree4;
+ Ref id4;
+
+ QLock fraglock6;
+ Fragment6* flisthead6;
+ Fragment6* fragfree6;
+ Ref id6;
+
+ int iprouting; /* true if we route like a gateway */
+};
+
+.
+22a
+typedef struct Fragment4 Fragment4;
+typedef struct Fragment6 Fragment6;
+typedef struct Ipfrag Ipfrag;
+
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/ipv6.h ip/ipv6.h
+145c
+struct Routinghdr {
+.
+134c
+struct Opthdr {
+.
+130,131c
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+.
+120,128d
+81c
+ IP6HDR = 20, /* sizeof(Ip6hdr) */
+.
+26a
+#undef ESP
+
+.
+diff -e ip.orig/loopbackmedium.c ip/loopbackmedium.c
+99c
+ RUNLOCK(ifc);
+.
+92c
+ RUNLOCK(ifc);
+.
+87c
+ if(!CANRLOCK(ifc)){
+.
+58c
+loopbackbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
+.
+26c
+loopbackbind(Ipifc *ifc, int _, char** __)
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/netdevmedium.c ip/netdevmedium.c
+144c
+ RUNLOCK(ifc);
+.
+136c
+ RUNLOCK(ifc);
+.
+131c
+ if(!CANRLOCK(ifc)){
+.
+85c
+netdevbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/netlog.c ip/netlog.c
+260c
+ wakeup(&f->alog->rendez);
+.
+258c
+ UNLOCK(f->alog);
+.
+242c
+ LOCK(f->alog);
+.
+228c
+ char buf[128], *t, *fp;
+.
+185c
+ set = 1;
+.
+160c
+ QUNLOCK(f->alog);
+.
+157c
+ sleep(&f->alog->rendez, netlogready, f);
+.
+155c
+ UNLOCK(f->alog);
+.
+146c
+ UNLOCK(f->alog);
+.
+134c
+ LOCK(f->alog);
+.
+129c
+ QUNLOCK(f->alog);
+.
+127c
+ QLOCK(f->alog);
+.
+122c
+netlogread(Fs *f, void *a, ulong _, long n)
+.
+109c
+ UNLOCK(f->alog);
+.
+101c
+ UNLOCK(f->alog);
+.
+99c
+ LOCK(f->alog);
+.
+92c
+ UNLOCK(f->alog);
+.
+82c
+ UNLOCK(f->alog);
+.
+80c
+ LOCK(f->alog);
+.
+28,29c
+ QLock qlock;
+ Rendez rendez;
+.
+17c
+ Lock lk;
+.
+6,7c
+#include "error.h"
+#include "ip/ip.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/nullmedium.c ip/nullmedium.c
+22c
+nullbwrite(Ipifc* _, Block* __, int ___, uchar* ____)
+.
+17c
+nullunbind(Ipifc* _)
+.
+11c
+nullbind(Ipifc* _, int __, char** ___)
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/pktmedium.c ip/pktmedium.c
+51c
+pktbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
+.
+43c
+pktunbind(Ipifc* _)
+.
+36d
+34c
+pktbind(Ipifc* _, int argc, char **argv)
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/ptclbsum.c ip/ptclbsum.c
+68c
+ while((hisum = losum>>16))
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/rudp.c ip/rudp.c
+693c
+ rudp->nc = 16;
+.
+11c
+#include "error.h"
+.
+7c
+#include "lib.h"
+.
+diff -e ip.orig/tcp.c ip/tcp.c
+3171c
+ QUNLOCK(c);
+.
+3154c
+ if(!CANQLOCK(c))
+.
+3127c
+ p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+.
+3101c
+/* called with c QLOCKed */
+.
+3085c
+ QUNLOCK(tcp);
+.
+3080c
+ QUNLOCK(s);
+.
+3073,3074c
+ QLOCK(s);
+ QUNLOCK(tcp);
+.
+3064c
+ QLOCK(tcp);
+.
+2871,2873d
+2869c
+ if(seg->mss != 0 && seg->mss < tcb->mss)
+.
+2859d
+2842c
+ QUNLOCK(s);
+.
+2830c
+ netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
+.
+2817c
+ QLOCK(s);
+.
+2814c
+ QUNLOCK(s);
+.
+2768c
+tcpsetchecksum(Conv *s, char **f, int _)
+.
+2737c
+ QUNLOCK(s);
+.
+2728c
+ QLOCK(s);
+.
+2725c
+ QUNLOCK(s);
+.
+2641c
+ QLOCK(s);
+.
+2638,2639c
+ if((uint)(msgs%4) == 1){
+ QUNLOCK(s);
+.
+2563c
+ netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
+.
+2421c
+ QUNLOCK(s);
+.
+2417c
+ QUNLOCK(s);
+.
+2351c
+ QUNLOCK(s);
+.
+2189c
+ QUNLOCK(s);
+.
+2172,2174d
+2144c
+ QUNLOCK(s);
+.
+2095,2096c
+ QLOCK(s);
+ QUNLOCK(tcp);
+.
+2092c
+ QUNLOCK(s);
+.
+2072c
+ QUNLOCK(tcp);
+.
+2064c
+ QUNLOCK(tcp);
+.
+2053c
+ QUNLOCK(tcp);
+.
+2050,2051c
+ netlog(f, Logtcp, "iphtlook failed\n");
+.
+2045c
+ QLOCK(tcp);
+.
+1942c
+tcpiput(Proto *tcp, Ipifc* _, Block *bp)
+.
+1862c
+ netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
+.
+1817c
+ netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
+.
+1685,1686d
+1683c
+ if(lp->mss != 0 && lp->mss < tcb->mss)
+.
+1626c
+ netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d\n",
+.
+1562c
+ QUNLOCK(tcp);
+.
+1529c
+ if(!CANQLOCK(tcp))
+.
+1421,1422d
+1334c
+ * called with s QLOCKed
+.
+1245,1246d
+1231,1232d
+1210,1211d
+1208c
+ if(optlen == MSS_LENGTH)
+.
+995d
+873c
+ * called with s QLOCKed
+.
+861,862d
+805d
+609c
+ QUNLOCK(s);
+.
+603c
+ QLOCK(s);
+.
+600c
+ QUNLOCK(s);
+.
+583,584d
+569c
+ QUNLOCK(s);
+.
+551c
+ QLOCK(s);
+.
+548c
+ QUNLOCK(s);
+.
+352c
+ ulong stats[Nstats];
+.
+317d
+293d
+231c
+ ulong window; /* Recevive window */
+.
+229c
+ ushort mss; /* Mean segment size */
+.
+193c
+ * the QLOCK in the Conv locks this structure
+.
+49,50c
+ DEF_MSS = 1460, /* Default mean segment */
+ DEF_MSS6 = 1280, /* Default mean segment (min) for v6 */
+.
+44c
+ MSS_LENGTH = 4, /* Mean segment size */
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/udp.c ip/udp.c
+590,591c
+ return snprint(buf, len, "InDatagrams: %lud\nNoPorts: %lud\nInErrors: %lud\nOutDatagrams: %lud\n",
+.
+580c
+ QUNLOCK(udp);
+.
+575c
+ QUNLOCK(s);
+.
+571,572c
+ QLOCK(s);
+ QUNLOCK(udp);
+.
+562c
+ QLOCK(udp);
+.
+510c
+ QUNLOCK(c);
+.
+502c
+ QUNLOCK(c);
+.
+475c
+ QUNLOCK(c);
+.
+456,457c
+ QLOCK(c);
+ QUNLOCK(udp);
+.
+447c
+ QUNLOCK(udp);
+.
+410c
+ QUNLOCK(udp);
+.
+404c
+ QLOCK(udp);
+.
+197c
+ netlog(c->p->f, Logudp, "udp: kick\n");
+.
+103c
+ QLock qlock;
+.
+78c
+ ulong udpOutDatagrams;
+.
+75c
+ ulong udpInDatagrams;
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
diff --git a/src/9vx/a/ip/arp.c b/src/9vx/a/ip/arp.c
@@ -0,0 +1,684 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+
+/*
+ * address resolution tables
+ */
+enum
+{
+ NHASH = (1<<6),
+ NCACHE = 256,
+
+ AOK = 1,
+ AWAIT = 2,
+};
+
+char *arpstate[] =
+{
+ "UNUSED",
+ "OK",
+ "WAIT",
+};
+
+/*
+ * one per Fs
+ */
+struct Arp
+{
+ QLock qlock;
+ Fs *f;
+ Arpent *hash[NHASH];
+ Arpent cache[NCACHE];
+ Arpent *rxmt;
+ Proc *rxmitp; /* neib sol re-transmit proc */
+ Rendez rxmtq;
+ Block *dropf, *dropl;
+};
+
+char *Ebadarp = "bad arp";
+
+#define haship(s) ((ulong)((s)[IPaddrlen-1])%NHASH)
+
+int ReTransTimer = RETRANS_TIMER;
+
+static void rxmitproc(void *v);
+
+void
+arpinit(Fs *f)
+{
+ f->arp = smalloc(sizeof(Arp));
+ f->arp->f = f;
+ f->arp->rxmt = nil;
+ f->arp->dropf = f->arp->dropl = nil;
+ kproc("rxmitproc", rxmitproc, f->arp);
+}
+
+/*
+ * create a new arp entry for an ip address.
+ */
+static Arpent*
+newarp6(Arp *arp, uchar *ip, Ipifc *ifc, int addrxt)
+{
+ uint t;
+ Block *next, *xp;
+ Arpent *a, *e, *f, **l;
+ Medium *m = ifc->m;
+ int empty;
+
+ /* find oldest entry */
+ e = &arp->cache[NCACHE];
+ a = arp->cache;
+ t = a->utime;
+ for(f = a; f < e; f++){
+ if(f->utime < t){
+ t = f->utime;
+ a = f;
+ }
+ }
+
+ /* dump waiting packets */
+ xp = a->hold;
+ a->hold = nil;
+
+ if(isv4(a->ip)){
+ while(xp){
+ next = xp->list;
+ freeblist(xp);
+ xp = next;
+ }
+ }
+ else { /* queue icmp unreachable for rxmitproc later on, w/o arp lock */
+ if(xp){
+ if(arp->dropl == nil)
+ arp->dropf = xp;
+ else
+ arp->dropl->list = xp;
+
+ for(next = xp->list; next; next = next->list)
+ xp = next;
+ arp->dropl = xp;
+ wakeup(&arp->rxmtq);
+ }
+ }
+
+ /* take out of current chain */
+ l = &arp->hash[haship(a->ip)];
+ for(f = *l; f; f = f->hash){
+ if(f == a){
+ *l = a->hash;
+ break;
+ }
+ l = &f->hash;
+ }
+
+ /* insert into new chain */
+ l = &arp->hash[haship(ip)];
+ a->hash = *l;
+ *l = a;
+
+ memmove(a->ip, ip, sizeof(a->ip));
+ a->utime = NOW;
+ a->ctime = 0;
+ a->type = m;
+
+ a->rtime = NOW + ReTransTimer;
+ a->rxtsrem = MAX_MULTICAST_SOLICIT;
+ a->ifc = ifc;
+ a->ifcid = ifc->ifcid;
+
+ /* put to the end of re-transmit chain; addrxt is 0 when isv4(a->ip) */
+ if(!ipismulticast(a->ip) && addrxt){
+ l = &arp->rxmt;
+ empty = (*l==nil);
+
+ for(f = *l; f; f = f->nextrxt){
+ if(f == a){
+ *l = a->nextrxt;
+ break;
+ }
+ l = &f->nextrxt;
+ }
+ for(f = *l; f; f = f->nextrxt){
+ l = &f->nextrxt;
+ }
+ *l = a;
+ if(empty)
+ wakeup(&arp->rxmtq);
+ }
+
+ a->nextrxt = nil;
+
+ return a;
+}
+
+/* called with arp qlocked */
+
+void
+cleanarpent(Arp *arp, Arpent *a)
+{
+ Arpent *f, **l;
+
+ a->utime = 0;
+ a->ctime = 0;
+ a->type = 0;
+ a->state = 0;
+
+ /* take out of current chain */
+ l = &arp->hash[haship(a->ip)];
+ for(f = *l; f; f = f->hash){
+ if(f == a){
+ *l = a->hash;
+ break;
+ }
+ l = &f->hash;
+ }
+
+ /* take out of re-transmit chain */
+ l = &arp->rxmt;
+ for(f = *l; f; f = f->nextrxt){
+ if(f == a){
+ *l = a->nextrxt;
+ break;
+ }
+ l = &f->nextrxt;
+ }
+ a->nextrxt = nil;
+ a->hash = nil;
+ a->hold = nil;
+ a->last = nil;
+ a->ifc = nil;
+}
+
+/*
+ * fill in the media address if we have it. Otherwise return an
+ * Arpent that represents the state of the address resolution FSM
+ * for ip. Add the packet to be sent onto the list of packets
+ * waiting for ip->mac to be resolved.
+ */
+Arpent*
+arpget(Arp *arp, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *mac)
+{
+ int hash;
+ Arpent *a;
+ Medium *type = ifc->m;
+ uchar v6ip[IPaddrlen];
+
+ if(version == V4){
+ v4tov6(v6ip, ip);
+ ip = v6ip;
+ }
+
+ QLOCK(arp);
+ hash = haship(ip);
+ for(a = arp->hash[hash]; a; a = a->hash){
+ if(memcmp(ip, a->ip, sizeof(a->ip)) == 0)
+ if(type == a->type)
+ break;
+ }
+
+ if(a == nil){
+ a = newarp6(arp, ip, ifc, (version != V4));
+ a->state = AWAIT;
+ }
+ a->utime = NOW;
+ if(a->state == AWAIT){
+ if(bp != nil){
+ if(a->hold)
+ a->last->list = bp;
+ else
+ a->hold = bp;
+ a->last = bp;
+ bp->list = nil;
+ }
+ return a; /* return with arp qlocked */
+ }
+
+ memmove(mac, a->mac, a->type->maclen);
+
+ /* remove old entries */
+ if(NOW - a->ctime > 15*60*1000)
+ cleanarpent(arp, a);
+
+ QUNLOCK(arp);
+ return nil;
+}
+
+/*
+ * called with arp locked
+ */
+void
+arprelease(Arp *arp, Arpent* ae)
+{
+ QUNLOCK(arp);
+}
+
+/*
+ * Copy out the mac address from the Arpent. Return the
+ * block waiting to get sent to this mac address.
+ *
+ * called with arp locked
+ */
+Block*
+arpresolve(Arp *arp, Arpent *a, Medium *type, uchar *mac)
+{
+ Block *bp;
+ Arpent *f, **l;
+
+ if(!isv4(a->ip)){
+ l = &arp->rxmt;
+ for(f = *l; f; f = f->nextrxt){
+ if(f == a){
+ *l = a->nextrxt;
+ break;
+ }
+ l = &f->nextrxt;
+ }
+ }
+
+ memmove(a->mac, mac, type->maclen);
+ a->type = type;
+ a->state = AOK;
+ a->utime = NOW;
+ bp = a->hold;
+ a->hold = nil;
+ QUNLOCK(arp);
+
+ return bp;
+}
+
+void
+arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, int refresh)
+{
+ Arp *arp;
+ Route *r;
+ Arpent *a, *f, **l;
+ Ipifc *ifc;
+ Medium *type;
+ Block *bp, *next;
+ uchar v6ip[IPaddrlen];
+
+ arp = fs->arp;
+
+ if(n != 6){
+// print("arp: len = %d\n", n);
+ return;
+ }
+
+ switch(version){
+ case V4:
+ r = v4lookup(fs, ip, nil);
+ v4tov6(v6ip, ip);
+ ip = v6ip;
+ break;
+ case V6:
+ r = v6lookup(fs, ip, nil);
+ break;
+ default:
+ panic("arpenter: version %d", version);
+ return; /* to supress warnings */
+ }
+
+ if(r == nil){
+// print("arp: no route for entry\n");
+ return;
+ }
+
+ ifc = r->ifc;
+ type = ifc->m;
+
+ QLOCK(arp);
+ for(a = arp->hash[haship(ip)]; a; a = a->hash){
+ if(a->type != type || (a->state != AWAIT && a->state != AOK))
+ continue;
+
+ if(ipcmp(a->ip, ip) == 0){
+ a->state = AOK;
+ memmove(a->mac, mac, type->maclen);
+
+ if(version == V6){
+ /* take out of re-transmit chain */
+ l = &arp->rxmt;
+ for(f = *l; f; f = f->nextrxt){
+ if(f == a){
+ *l = a->nextrxt;
+ break;
+ }
+ l = &f->nextrxt;
+ }
+ }
+
+ a->ifc = ifc;
+ a->ifcid = ifc->ifcid;
+ bp = a->hold;
+ a->hold = nil;
+ if(version == V4)
+ ip += IPv4off;
+ a->utime = NOW;
+ a->ctime = a->utime;
+ QUNLOCK(arp);
+
+ while(bp){
+ next = bp->list;
+ if(ifc != nil){
+ if(waserror()){
+ RUNLOCK(ifc);
+ nexterror();
+ }
+ RLOCK(ifc);
+ if(ifc->m != nil)
+ ifc->m->bwrite(ifc, bp, version, ip);
+ else
+ freeb(bp);
+ RUNLOCK(ifc);
+ poperror();
+ } else
+ freeb(bp);
+ bp = next;
+ }
+ return;
+ }
+ }
+
+ if(refresh == 0){
+ a = newarp6(arp, ip, ifc, 0);
+ a->state = AOK;
+ a->type = type;
+ a->ctime = NOW;
+ memmove(a->mac, mac, type->maclen);
+ }
+
+ QUNLOCK(arp);
+}
+
+int
+arpwrite(Fs *fs, char *s, int len)
+{
+ int n;
+ Route *r;
+ Arp *arp;
+ Block *bp;
+ Arpent *a, *fl, **l;
+ Medium *m;
+ char *f[4], buf[256];
+ uchar ip[IPaddrlen], mac[MAClen];
+
+ arp = fs->arp;
+
+ if(len == 0)
+ error(Ebadarp);
+ if(len >= sizeof(buf))
+ len = sizeof(buf)-1;
+ strncpy(buf, s, len);
+ buf[len] = 0;
+ if(len > 0 && buf[len-1] == '\n')
+ buf[len-1] = 0;
+
+ n = getfields(buf, f, 4, 1, " ");
+ if(strcmp(f[0], "flush") == 0){
+ QLOCK(arp);
+ for(a = arp->cache; a < &arp->cache[NCACHE]; a++){
+ memset(a->ip, 0, sizeof(a->ip));
+ memset(a->mac, 0, sizeof(a->mac));
+ a->hash = nil;
+ a->state = 0;
+ a->utime = 0;
+ while(a->hold != nil){
+ bp = a->hold->list;
+ freeblist(a->hold);
+ a->hold = bp;
+ }
+ }
+ memset(arp->hash, 0, sizeof(arp->hash));
+ /* clear all pkts on these lists (rxmt, dropf/l) */
+ arp->rxmt = nil;
+ arp->dropf = nil;
+ arp->dropl = nil;
+ QUNLOCK(arp);
+ } else if(strcmp(f[0], "add") == 0){
+ switch(n){
+ default:
+ error(Ebadarg);
+ case 3:
+ if (parseip(ip, f[1]) == -1)
+ error(Ebadip);
+ if(isv4(ip))
+ r = v4lookup(fs, ip+IPv4off, nil);
+ else
+ r = v6lookup(fs, ip, nil);
+ if(r == nil)
+ error("Destination unreachable");
+ m = r->ifc->m;
+ n = parsemac(mac, f[2], m->maclen);
+ break;
+ case 4:
+ m = ipfindmedium(f[1]);
+ if(m == nil)
+ error(Ebadarp);
+ if (parseip(ip, f[2]) == -1)
+ error(Ebadip);
+ n = parsemac(mac, f[3], m->maclen);
+ break;
+ }
+
+ if(m->ares == nil)
+ error(Ebadarp);
+
+ m->ares(fs, V6, ip, mac, n, 0);
+ } else if(strcmp(f[0], "del") == 0){
+ if(n != 2)
+ error(Ebadarg);
+
+ if (parseip(ip, f[1]) == -1)
+ error(Ebadip);
+ QLOCK(arp);
+
+ l = &arp->hash[haship(ip)];
+ for(a = *l; a; a = a->hash){
+ if(memcmp(ip, a->ip, sizeof(a->ip)) == 0){
+ *l = a->hash;
+ break;
+ }
+ l = &a->hash;
+ }
+
+ if(a){
+ /* take out of re-transmit chain */
+ l = &arp->rxmt;
+ for(fl = *l; fl; fl = fl->nextrxt){
+ if(fl == a){
+ *l = a->nextrxt;
+ break;
+ }
+ l = &fl->nextrxt;
+ }
+
+ a->nextrxt = nil;
+ a->hash = nil;
+ a->hold = nil;
+ a->last = nil;
+ a->ifc = nil;
+ memset(a->ip, 0, sizeof(a->ip));
+ memset(a->mac, 0, sizeof(a->mac));
+ }
+ QUNLOCK(arp);
+ } else
+ error(Ebadarp);
+
+ return len;
+}
+
+enum
+{
+ Alinelen= 90,
+};
+
+char *aformat = "%-6.6s %-8.8s %-40.40I %-32.32s\n";
+
+static void
+convmac(char *p, uchar *mac, int n)
+{
+ while(n-- > 0)
+ p += sprint(p, "%2.2ux", *mac++);
+}
+
+int
+arpread(Arp *arp, char *p, ulong offset, int len)
+{
+ Arpent *a;
+ int n;
+ char mac[2*MAClen+1];
+
+ if(offset % Alinelen)
+ return 0;
+
+ offset = offset/Alinelen;
+ len = len/Alinelen;
+
+ n = 0;
+ for(a = arp->cache; len > 0 && a < &arp->cache[NCACHE]; a++){
+ if(a->state == 0)
+ continue;
+ if(offset > 0){
+ offset--;
+ continue;
+ }
+ len--;
+ QLOCK(arp);
+ convmac(mac, a->mac, a->type->maclen);
+ n += sprint(p+n, aformat, a->type->name, arpstate[a->state], a->ip, mac);
+ QUNLOCK(arp);
+ }
+
+ return n;
+}
+
+extern int
+rxmitsols(Arp *arp)
+{
+ uint sflag;
+ Block *next, *xp;
+ Arpent *a, *b, **l;
+ Fs *f;
+ uchar ipsrc[IPaddrlen];
+ Ipifc *ifc = nil;
+ long nrxt;
+
+ QLOCK(arp);
+ f = arp->f;
+
+ a = arp->rxmt;
+ if(a==nil){
+ nrxt = 0;
+ goto dodrops; /* return nrxt; */
+ }
+ nrxt = a->rtime - NOW;
+ if(nrxt > 3*ReTransTimer/4)
+ goto dodrops; /* return nrxt; */
+
+ for(; a; a = a->nextrxt){
+ ifc = a->ifc;
+ assert(ifc != nil);
+ if((a->rxtsrem <= 0) || !(CANRLOCK(ifc)) || (a->ifcid != ifc->ifcid)){
+ xp = a->hold;
+ a->hold = nil;
+
+ if(xp){
+ if(arp->dropl == nil)
+ arp->dropf = xp;
+ else
+ arp->dropl->list = xp;
+ }
+
+ cleanarpent(arp, a);
+ }
+ else
+ break;
+ }
+ if(a == nil)
+ goto dodrops;
+
+
+ QUNLOCK(arp); /* for icmpns */
+ if((sflag = ipv6anylocal(ifc, ipsrc)) != SRC_UNSPEC)
+ icmpns(f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
+
+ RUNLOCK(ifc);
+ QLOCK(arp);
+
+ /* put to the end of re-transmit chain */
+ l = &arp->rxmt;
+ for(b = *l; b; b = b->nextrxt){
+ if(b == a){
+ *l = a->nextrxt;
+ break;
+ }
+ l = &b->nextrxt;
+ }
+ for(b = *l; b; b = b->nextrxt){
+ l = &b->nextrxt;
+ }
+ *l = a;
+ a->rxtsrem--;
+ a->nextrxt = nil;
+ a->rtime = NOW + ReTransTimer;
+
+ a = arp->rxmt;
+ if(a==nil)
+ nrxt = 0;
+ else
+ nrxt = a->rtime - NOW;
+
+dodrops:
+ xp = arp->dropf;
+ arp->dropf = nil;
+ arp->dropl = nil;
+ QUNLOCK(arp);
+
+ for(; xp; xp = next){
+ next = xp->list;
+ icmphostunr(f, ifc, xp, Icmp6_adr_unreach, 1);
+ }
+
+ return nrxt;
+
+}
+
+static int
+rxready(void *v)
+{
+ Arp *arp = (Arp *) v;
+ int x;
+
+ x = ((arp->rxmt != nil) || (arp->dropf != nil));
+
+ return x;
+}
+
+static void
+rxmitproc(void *v)
+{
+ Arp *arp = v;
+ long wakeupat;
+
+ arp->rxmitp = up;
+ //print("arp rxmitproc started\n");
+ if(waserror()){
+ arp->rxmitp = 0;
+ pexit("hangup", 1);
+ }
+ for(;;){
+ wakeupat = rxmitsols(arp);
+ if(wakeupat == 0)
+ sleep(&arp->rxmtq, rxready, v);
+ else if(wakeupat > ReTransTimer/4)
+ tsleep(&arp->rxmtq, return0, 0, wakeupat);
+ }
+}
+
diff --git a/src/9vx/a/ip/chandial.c b/src/9vx/a/ip/chandial.c
@@ -0,0 +1,124 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "ip/ip.h"
+
+typedef struct DS DS;
+static Chan* call(char*, char*, DS*);
+static void _dial_string_parse(char*, DS*);
+
+enum
+{
+ Maxstring= 128,
+};
+
+struct DS
+{
+ char buf[Maxstring]; /* dist string */
+ char *netdir;
+ char *proto;
+ char *rem;
+ char *local; /* other args */
+ char *dir;
+ Chan **ctlp;
+};
+
+/*
+ * the dialstring is of the form '[/net/]proto!dest'
+ */
+Chan*
+chandial(char *dest, char *local, char *dir, Chan **ctlp)
+{
+ DS ds;
+ char clone[Maxpath];
+
+ ds.local = local;
+ ds.dir = dir;
+ ds.ctlp = ctlp;
+
+ _dial_string_parse(dest, &ds);
+ if(ds.netdir == 0)
+ ds.netdir = "/net";
+
+ /* no connection server, don't translate */
+ snprint(clone, sizeof(clone), "%s/%s/clone", ds.netdir, ds.proto);
+ return call(clone, ds.rem, &ds);
+}
+
+static Chan*
+call(char *clone, char *dest, DS *ds)
+{
+ int n;
+ Chan *dchan, *cchan;
+ char name[Maxpath], data[Maxpath], *p;
+
+ cchan = namec(clone, Aopen, ORDWR, 0);
+
+ /* get directory name */
+ if(waserror()){
+ cclose(cchan);
+ nexterror();
+ }
+ n = devtab[cchan->type]->read(cchan, name, sizeof(name)-1, 0);
+ name[n] = 0;
+ for(p = name; *p == ' '; p++)
+ ;
+ sprint(name, "%lud", strtoul(p, 0, 0));
+ p = strrchr(clone, '/');
+ *p = 0;
+ if(ds->dir)
+ snprint(ds->dir, Maxpath, "%s/%s", clone, name);
+ snprint(data, sizeof(data), "%s/%s/data", clone, name);
+
+ /* connect */
+ if(ds->local)
+ snprint(name, sizeof(name), "connect %s %s", dest, ds->local);
+ else
+ snprint(name, sizeof(name), "connect %s", dest);
+ devtab[cchan->type]->write(cchan, name, strlen(name), 0);
+
+ /* open data connection */
+ dchan = namec(data, Aopen, ORDWR, 0);
+ if(ds->ctlp)
+ *ds->ctlp = cchan;
+ else
+ cclose(cchan);
+ poperror();
+ return dchan;
+
+}
+
+/*
+ * parse a dial string
+ */
+static void
+_dial_string_parse(char *str, DS *ds)
+{
+ char *p, *p2;
+
+ strncpy(ds->buf, str, Maxstring);
+ ds->buf[Maxstring-1] = 0;
+
+ p = strchr(ds->buf, '!');
+ if(p == 0) {
+ ds->netdir = 0;
+ ds->proto = "net";
+ ds->rem = ds->buf;
+ } else {
+ if(*ds->buf != '/' && *ds->buf != '#'){
+ ds->netdir = 0;
+ ds->proto = ds->buf;
+ } else {
+ for(p2 = p; *p2 != '/'; p2--)
+ ;
+ *p2++ = 0;
+ ds->netdir = ds->buf;
+ ds->proto = p2;
+ }
+ *p = 0;
+ ds->rem = p + 1;
+ }
+}
diff --git a/src/9vx/a/ip/devip.c b/src/9vx/a/ip/devip.c
@@ -0,0 +1,1439 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "ip/ip.h"
+
+enum
+{
+ Qtopdir= 1, /* top level directory */
+ Qtopbase,
+ Qarp= Qtopbase,
+ Qbootp,
+ Qndb,
+ Qiproute,
+ Qipselftab,
+ Qlog,
+
+ Qprotodir, /* directory for a protocol */
+ Qprotobase,
+ Qclone= Qprotobase,
+ Qstats,
+
+ Qconvdir, /* directory for a conversation */
+ Qconvbase,
+ Qctl= Qconvbase,
+ Qdata,
+ Qerr,
+ Qlisten,
+ Qlocal,
+ Qremote,
+ Qstatus,
+ Qsnoop,
+
+ Logtype= 5,
+ Masktype= (1<<Logtype)-1,
+ Logconv= 12,
+ Maskconv= (1<<Logconv)-1,
+ Shiftconv= Logtype,
+ Logproto= 8,
+ Maskproto= (1<<Logproto)-1,
+ Shiftproto= Logtype + Logconv,
+
+ Nfs= 128,
+};
+#define TYPE(x) ( ((ulong)(x).path) & Masktype )
+#define CONV(x) ( (((ulong)(x).path) >> Shiftconv) & Maskconv )
+#define PROTO(x) ( (((ulong)(x).path) >> Shiftproto) & Maskproto )
+#define QID(p, c, y) ( ((uint)(p)<<(Shiftproto)) | ((uint)(c)<<Shiftconv) | (y) )
+
+static char network[] = "network";
+
+QLock fslock;
+Fs *ipfs[Nfs]; /* attached fs's */
+Queue *qlog;
+
+extern void nullmediumlink(void);
+extern void pktmediumlink(void);
+ long ndbwrite(Fs *f, char *a, ulong off, int n);
+
+static int
+ip3gen(Chan *c, int i, Dir *dp)
+{
+ Qid q;
+ Conv *cv;
+ char *p;
+
+ cv = ipfs[c->dev]->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+ if(cv->owner == nil)
+ kstrdup(&cv->owner, eve);
+ mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE);
+
+ switch(i) {
+ default:
+ return -1;
+ case Qctl:
+ devdir(c, q, "ctl", 0, cv->owner, cv->perm, dp);
+ return 1;
+ case Qdata:
+ devdir(c, q, "data", qlen(cv->rq), cv->owner, cv->perm, dp);
+ return 1;
+ case Qerr:
+ devdir(c, q, "err", qlen(cv->eq), cv->owner, cv->perm, dp);
+ return 1;
+ case Qlisten:
+ devdir(c, q, "listen", 0, cv->owner, cv->perm, dp);
+ return 1;
+ case Qlocal:
+ p = "local";
+ break;
+ case Qremote:
+ p = "remote";
+ break;
+ case Qsnoop:
+ if(strcmp(cv->p->name, "ipifc") != 0)
+ return -1;
+ devdir(c, q, "snoop", qlen(cv->sq), cv->owner, 0400, dp);
+ return 1;
+ case Qstatus:
+ p = "status";
+ break;
+ }
+ devdir(c, q, p, 0, cv->owner, 0444, dp);
+ return 1;
+}
+
+static int
+ip2gen(Chan *c, int i, Dir *dp)
+{
+ Qid q;
+
+ switch(i) {
+ case Qclone:
+ mkqid(&q, QID(PROTO(c->qid), 0, Qclone), 0, QTFILE);
+ devdir(c, q, "clone", 0, network, 0666, dp);
+ return 1;
+ case Qstats:
+ mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE);
+ devdir(c, q, "stats", 0, network, 0444, dp);
+ return 1;
+ }
+ return -1;
+}
+
+static int
+ip1gen(Chan *c, int i, Dir *dp)
+{
+ Qid q;
+ char *p;
+ int prot;
+ int len = 0;
+ Fs *f;
+ extern ulong kerndate;
+
+ f = ipfs[c->dev];
+
+ prot = 0666;
+ mkqid(&q, QID(0, 0, i), 0, QTFILE);
+ switch(i) {
+ default:
+ return -1;
+ case Qarp:
+ p = "arp";
+ prot = 0664;
+ break;
+ case Qbootp:
+ p = "bootp";
+ break;
+ case Qndb:
+ p = "ndb";
+ len = strlen(f->ndb);
+ q.vers = f->ndbvers;
+ break;
+ case Qiproute:
+ p = "iproute";
+ prot = 0664;
+ break;
+ case Qipselftab:
+ p = "ipselftab";
+ prot = 0444;
+ break;
+ case Qlog:
+ p = "log";
+ break;
+ }
+ devdir(c, q, p, len, network, prot, dp);
+ if(i == Qndb && f->ndbmtime > kerndate)
+ dp->mtime = f->ndbmtime;
+ return 1;
+}
+
+static int
+ipgen(Chan *c, char* __ch, Dirtab* __dt, int __i, int s, Dir *dp)
+{
+ Qid q;
+ Conv *cv;
+ Fs *f;
+
+ f = ipfs[c->dev];
+
+ switch(TYPE(c->qid)) {
+ case Qtopdir:
+ if(s == DEVDOTDOT){
+ mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
+ sprint(up->genbuf, "#I%lud", c->dev);
+ devdir(c, q, up->genbuf, 0, network, 0555, dp);
+ return 1;
+ }
+ if(s < f->np) {
+ if(f->p[s]->connect == nil)
+ return 0; /* protocol with no user interface */
+ mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
+ devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
+ return 1;
+ }
+ s -= f->np;
+ return ip1gen(c, s+Qtopbase, dp);
+ case Qarp:
+ case Qbootp:
+ case Qndb:
+ case Qlog:
+ case Qiproute:
+ case Qipselftab:
+ return ip1gen(c, TYPE(c->qid), dp);
+ case Qprotodir:
+ if(s == DEVDOTDOT){
+ mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
+ sprint(up->genbuf, "#I%lud", c->dev);
+ devdir(c, q, up->genbuf, 0, network, 0555, dp);
+ return 1;
+ }
+ if(s < f->p[PROTO(c->qid)]->ac) {
+ cv = f->p[PROTO(c->qid)]->conv[s];
+ sprint(up->genbuf, "%d", s);
+ mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
+ devdir(c, q, up->genbuf, 0, cv->owner, 0555, dp);
+ return 1;
+ }
+ s -= f->p[PROTO(c->qid)]->ac;
+ return ip2gen(c, s+Qprotobase, dp);
+ case Qclone:
+ case Qstats:
+ return ip2gen(c, TYPE(c->qid), dp);
+ case Qconvdir:
+ if(s == DEVDOTDOT){
+ s = PROTO(c->qid);
+ mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
+ devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
+ return 1;
+ }
+ return ip3gen(c, s+Qconvbase, dp);
+ case Qctl:
+ case Qdata:
+ case Qerr:
+ case Qlisten:
+ case Qlocal:
+ case Qremote:
+ case Qstatus:
+ case Qsnoop:
+ return ip3gen(c, TYPE(c->qid), dp);
+ }
+ return -1;
+}
+
+static void
+ipreset(void)
+{
+ nullmediumlink();
+ pktmediumlink();
+
+ fmtinstall('i', eipfmt);
+ fmtinstall('I', eipfmt);
+ fmtinstall('E', eipfmt);
+ fmtinstall('V', eipfmt);
+ fmtinstall('M', eipfmt);
+}
+
+static Fs*
+ipgetfs(int dev)
+{
+ extern void (*ipprotoinit[])(Fs*);
+ Fs *f;
+ int i;
+
+ if(dev >= Nfs)
+ return nil;
+
+ qlock(&fslock);
+ if(ipfs[dev] == nil){
+ f = smalloc(sizeof(Fs));
+ ip_init(f);
+ arpinit(f);
+ netloginit(f);
+ for(i = 0; ipprotoinit[i]; i++)
+ ipprotoinit[i](f);
+ f->dev = dev;
+ ipfs[dev] = f;
+ }
+ qunlock(&fslock);
+
+ return ipfs[dev];
+}
+
+IPaux*
+newipaux(char *owner, char *tag)
+{
+ IPaux *a;
+ int n;
+
+ a = smalloc(sizeof(*a));
+ kstrdup(&a->owner, owner);
+ memset(a->tag, ' ', sizeof(a->tag));
+ n = strlen(tag);
+ if(n > sizeof(a->tag))
+ n = sizeof(a->tag);
+ memmove(a->tag, tag, n);
+ return a;
+}
+
+#define ATTACHER(c) (((IPaux*)((c)->aux))->owner)
+
+static Chan*
+ipattach(char* spec)
+{
+ Chan *c;
+ int dev;
+
+ dev = atoi(spec);
+ if(dev >= Nfs)
+ error("bad specification");
+
+ ipgetfs(dev);
+ c = devattach('I', spec);
+ mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
+ c->dev = dev;
+
+ c->aux = newipaux(commonuser(), "none");
+
+ return c;
+}
+
+static Walkqid*
+ipwalk(Chan* c, Chan *nc, char **name, int nname)
+{
+ IPaux *a = c->aux;
+ Walkqid* w;
+
+ w = devwalk(c, nc, name, nname, nil, 0, ipgen);
+ if(w != nil && w->clone != nil)
+ w->clone->aux = newipaux(a->owner, a->tag);
+ return w;
+}
+
+
+static int
+ipstat(Chan* c, uchar* db, int n)
+{
+ return devstat(c, db, n, nil, 0, ipgen);
+}
+
+static int
+incoming(void* arg)
+{
+ Conv *conv;
+
+ conv = arg;
+ return conv->incall != nil;
+}
+
+static int m2p[] = {
+ [OREAD] 4,
+ [OWRITE] 2,
+ [ORDWR] 6
+};
+
+static Chan*
+ipopen(Chan* c, int omode)
+{
+ Conv *cv, *nc;
+ Proto *p;
+ int perm;
+ Fs *f;
+
+ perm = m2p[omode&3];
+
+ f = ipfs[c->dev];
+
+ switch(TYPE(c->qid)) {
+ default:
+ break;
+ case Qndb:
+ if(omode & (OWRITE|OTRUNC) && !iseve())
+ error(Eperm);
+ if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC))
+ f->ndb[0] = 0;
+ break;
+ case Qlog:
+ netlogopen(f);
+ break;
+ case Qiproute:
+ case Qarp:
+ if(omode != OREAD && !iseve())
+ error(Eperm);
+ break;
+ case Qtopdir:
+ case Qprotodir:
+ case Qconvdir:
+ case Qstatus:
+ case Qremote:
+ case Qlocal:
+ case Qstats:
+ case Qbootp:
+ case Qipselftab:
+ if(omode != OREAD)
+ error(Eperm);
+ break;
+ case Qsnoop:
+ if(omode != OREAD)
+ error(Eperm);
+ p = f->p[PROTO(c->qid)];
+ cv = p->conv[CONV(c->qid)];
+ if(strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
+ error(Eperm);
+ incref(&cv->snoopers);
+ break;
+ case Qclone:
+ p = f->p[PROTO(c->qid)];
+ QLOCK(p);
+ if(waserror()){
+ QUNLOCK(p);
+ nexterror();
+ }
+ cv = Fsprotoclone(p, ATTACHER(c));
+ QUNLOCK(p);
+ poperror();
+ if(cv == nil) {
+ error(Enodev);
+ break;
+ }
+ mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE);
+ break;
+ case Qdata:
+ case Qctl:
+ case Qerr:
+ p = f->p[PROTO(c->qid)];
+ QLOCK(p);
+ cv = p->conv[CONV(c->qid)];
+ QLOCK(cv);
+ if(waserror()) {
+ QUNLOCK(cv);
+ QUNLOCK(p);
+ nexterror();
+ }
+ if((perm & (cv->perm>>6)) != perm) {
+ if(strcmp(ATTACHER(c), cv->owner) != 0)
+ error(Eperm);
+ if((perm & cv->perm) != perm)
+ error(Eperm);
+
+ }
+ cv->inuse++;
+ if(cv->inuse == 1){
+ kstrdup(&cv->owner, ATTACHER(c));
+ cv->perm = 0660;
+ }
+ QUNLOCK(cv);
+ QUNLOCK(p);
+ poperror();
+ break;
+ case Qlisten:
+ cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+ if((perm & (cv->perm>>6)) != perm) {
+ if(strcmp(ATTACHER(c), cv->owner) != 0)
+ error(Eperm);
+ if((perm & cv->perm) != perm)
+ error(Eperm);
+
+ }
+
+ if(cv->state != Announced)
+ error("not announced");
+
+ if(waserror()){
+ closeconv(cv);
+ nexterror();
+ }
+ QLOCK(cv);
+ cv->inuse++;
+ QUNLOCK(cv);
+
+ nc = nil;
+ while(nc == nil) {
+ /* give up if we got a hangup */
+ if(qisclosed(cv->rq))
+ error("listen hungup");
+
+ qlock(&cv->listenq);
+ if(waserror()) {
+ qunlock(&cv->listenq);
+ nexterror();
+ }
+
+ /* wait for a connect */
+ sleep(&cv->listenr, incoming, cv);
+
+ QLOCK(cv);
+ nc = cv->incall;
+ if(nc != nil){
+ cv->incall = nc->next;
+ mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
+ kstrdup(&cv->owner, ATTACHER(c));
+ }
+ QUNLOCK(cv);
+
+ qunlock(&cv->listenq);
+ poperror();
+ }
+ closeconv(cv);
+ poperror();
+ break;
+ }
+ c->mode = openmode(omode);
+ c->flag |= COPEN;
+ c->offset = 0;
+ return c;
+}
+
+static void
+ipcreate(Chan* _, char* __, int ___, ulong ____)
+{
+ error(Eperm);
+}
+
+static void
+ipremove(Chan* _)
+{
+ error(Eperm);
+}
+
+static int
+ipwstat(Chan *c, uchar *dp, int n)
+{
+ Dir d;
+ Conv *cv;
+ Fs *f;
+ Proto *p;
+
+ f = ipfs[c->dev];
+ switch(TYPE(c->qid)) {
+ default:
+ error(Eperm);
+ break;
+ case Qctl:
+ case Qdata:
+ break;
+ }
+
+ n = convM2D(dp, n, &d, nil);
+ if(n > 0){
+ p = f->p[PROTO(c->qid)];
+ cv = p->conv[CONV(c->qid)];
+ if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
+ error(Eperm);
+ if(d.uid[0])
+ kstrdup(&cv->owner, d.uid);
+ cv->perm = d.mode & 0777;
+ }
+ return n;
+}
+
+void
+closeconv(Conv *cv)
+{
+ Conv *nc;
+ Ipmulti *mp;
+
+ QLOCK(cv);
+
+ if(--cv->inuse > 0) {
+ QUNLOCK(cv);
+ return;
+ }
+
+ /* close all incoming calls since no listen will ever happen */
+ for(nc = cv->incall; nc; nc = cv->incall){
+ cv->incall = nc->next;
+ closeconv(nc);
+ }
+ cv->incall = nil;
+
+ kstrdup(&cv->owner, network);
+ cv->perm = 0660;
+
+ while((mp = cv->multi) != nil)
+ ipifcremmulti(cv, mp->ma, mp->ia);
+
+ cv->r = nil;
+ cv->rgen = 0;
+ cv->p->close(cv);
+ cv->state = Idle;
+ QUNLOCK(cv);
+}
+
+static void
+ipclose(Chan* c)
+{
+ Fs *f;
+
+ f = ipfs[c->dev];
+ switch(TYPE(c->qid)) {
+ default:
+ break;
+ case Qlog:
+ if(c->flag & COPEN)
+ netlogclose(f);
+ break;
+ case Qdata:
+ case Qctl:
+ case Qerr:
+ if(c->flag & COPEN)
+ closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]);
+ break;
+ case Qsnoop:
+ if(c->flag & COPEN)
+ decref(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers);
+ break;
+ }
+ free(((IPaux*)c->aux)->owner);
+ free(c->aux);
+}
+
+enum
+{
+ Statelen= 32*1024,
+};
+
+static long
+ipread(Chan *ch, void *a, long n, vlong off)
+{
+ Conv *c;
+ Proto *x;
+ char *buf, *p;
+ long rv;
+ Fs *f;
+ ulong offset = off;
+
+ f = ipfs[ch->dev];
+
+ p = a;
+ switch(TYPE(ch->qid)) {
+ default:
+ error(Eperm);
+ case Qtopdir:
+ case Qprotodir:
+ case Qconvdir:
+ return devdirread(ch, a, n, 0, 0, ipgen);
+ case Qarp:
+ return arpread(f->arp, a, offset, n);
+ case Qbootp:
+ return bootpread(a, offset, n);
+ case Qndb:
+ return readstr(offset, a, n, f->ndb);
+ case Qiproute:
+ return routeread(f, a, offset, n);
+ case Qipselftab:
+ return ipselftabread(f, a, offset, n);
+ case Qlog:
+ return netlogread(f, a, offset, n);
+ case Qctl:
+ buf = smalloc(16);
+ sprint(buf, "%lud", CONV(ch->qid));
+ rv = readstr(offset, p, n, buf);
+ free(buf);
+ return rv;
+ case Qremote:
+ buf = smalloc(Statelen);
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+ if(x->remote == nil) {
+ sprint(buf, "%I!%d\n", c->raddr, c->rport);
+ } else {
+ (*x->remote)(c, buf, Statelen-2);
+ }
+ rv = readstr(offset, p, n, buf);
+ free(buf);
+ return rv;
+ case Qlocal:
+ buf = smalloc(Statelen);
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+ if(x->local == nil) {
+ sprint(buf, "%I!%d\n", c->laddr, c->lport);
+ } else {
+ (*x->local)(c, buf, Statelen-2);
+ }
+ rv = readstr(offset, p, n, buf);
+ free(buf);
+ return rv;
+ case Qstatus:
+ buf = smalloc(Statelen);
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+ (*x->state)(c, buf, Statelen-2);
+ rv = readstr(offset, p, n, buf);
+ free(buf);
+ return rv;
+ case Qdata:
+ c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
+ return qread(c->rq, a, n);
+ case Qerr:
+ c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
+ return qread(c->eq, a, n);
+ case Qsnoop:
+ c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
+ return qread(c->sq, a, n);
+ case Qstats:
+ x = f->p[PROTO(ch->qid)];
+ if(x->stats == nil)
+ error("stats not implemented");
+ buf = smalloc(Statelen);
+ (*x->stats)(x, buf, Statelen);
+ rv = readstr(offset, p, n, buf);
+ free(buf);
+ return rv;
+ }
+}
+
+static Block*
+ipbread(Chan* ch, long n, ulong offset)
+{
+ Conv *c;
+ Proto *x;
+ Fs *f;
+
+ switch(TYPE(ch->qid)){
+ case Qdata:
+ f = ipfs[ch->dev];
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+ return qbread(c->rq, n);
+ default:
+ return devbread(ch, n, offset);
+ }
+}
+
+/*
+ * set local address to be that of the ifc closest to remote address
+ */
+static void
+setladdr(Conv* c)
+{
+ findlocalip(c->p->f, c->laddr, c->raddr);
+}
+
+/*
+ * set a local port making sure the quad of raddr,rport,laddr,lport is unique
+ */
+char*
+setluniqueport(Conv* c, int lport)
+{
+ Proto *p;
+ Conv *xp;
+ int x;
+
+ p = c->p;
+
+ QLOCK(p);
+ for(x = 0; x < p->nc; x++){
+ xp = p->conv[x];
+ if(xp == nil)
+ break;
+ if(xp == c)
+ continue;
+ if((xp->state == Connected || xp->state == Announced)
+ && xp->lport == lport
+ && xp->rport == c->rport
+ && ipcmp(xp->raddr, c->raddr) == 0
+ && ipcmp(xp->laddr, c->laddr) == 0){
+ QUNLOCK(p);
+ return "address in use";
+ }
+ }
+ c->lport = lport;
+ QUNLOCK(p);
+ return nil;
+}
+
+/*
+ * is lport in use by anyone?
+ */
+static int
+lportinuse(Proto *p, ushort lport)
+{
+ int x;
+
+ for(x = 0; x < p->nc && p->conv[x]; x++)
+ if(p->conv[x]->lport == lport)
+ return 1;
+ return 0;
+}
+
+/*
+ * pick a local port and set it
+ */
+char *
+setlport(Conv* c)
+{
+ Proto *p;
+ int i, port;
+
+ p = c->p;
+ QLOCK(p);
+ if(c->restricted){
+ /* Restricted ports cycle between 600 and 1024. */
+ for(i=0; i<1024-600; i++){
+ if(p->nextrport >= 1024 || p->nextrport < 600)
+ p->nextrport = 600;
+ port = p->nextrport++;
+ if(!lportinuse(p, port))
+ goto chosen;
+ }
+ }else{
+ /*
+ * Unrestricted ports are chosen randomly
+ * between 2^15 and 2^16. There are at most
+ * 4*Nchan = 4096 ports in use at any given time,
+ * so even in the worst case, a random probe has a
+ * 1 - 4096/2^15 = 87% chance of success.
+ * If 64 successive probes fail, there is a bug somewhere
+ * (or a once in 10^58 event has happened, but that's
+ * less likely than a venti collision).
+ */
+ for(i=0; i<64; i++){
+ port = (1<<15) + nrand(1<<15);
+ if(!lportinuse(p, port))
+ goto chosen;
+ }
+ }
+ QUNLOCK(p);
+ return "no ports available";
+
+chosen:
+ c->lport = port;
+ QUNLOCK(p);
+ return nil;
+}
+
+/*
+ * set a local address and port from a string of the form
+ * [address!]port[!r]
+ */
+char*
+setladdrport(Conv* c, char* str, int announcing)
+{
+ char *p;
+ char *rv;
+ ushort lport;
+ uchar addr[IPaddrlen];
+
+ /*
+ * ignore restricted part if it exists. it's
+ * meaningless on local ports.
+ */
+ p = strchr(str, '!');
+ if(p != nil){
+ *p++ = 0;
+ if(strcmp(p, "r") == 0)
+ p = nil;
+ }
+
+ c->lport = 0;
+ if(p == nil){
+ if(announcing)
+ ipmove(c->laddr, IPnoaddr);
+ else
+ setladdr(c);
+ p = str;
+ } else {
+ if(strcmp(str, "*") == 0)
+ ipmove(c->laddr, IPnoaddr);
+ else {
+ if(parseip(addr, str) == -1)
+ return Ebadip;
+ if(ipforme(c->p->f, addr))
+ ipmove(c->laddr, addr);
+ else
+ return "not a local IP address";
+ }
+ }
+
+ /* one process can get all connections */
+ if(announcing && strcmp(p, "*") == 0){
+ if(!iseve())
+ error(Eperm);
+ return setluniqueport(c, 0);
+ }
+
+ lport = atoi(p);
+ if(lport <= 0)
+ rv = setlport(c);
+ else
+ rv = setluniqueport(c, lport);
+ return rv;
+}
+
+static char*
+setraddrport(Conv* c, char* str)
+{
+ char *p;
+
+ p = strchr(str, '!');
+ if(p == nil)
+ return "malformed address";
+ *p++ = 0;
+ if (parseip(c->raddr, str) == -1)
+ return Ebadip;
+ c->rport = atoi(p);
+ p = strchr(p, '!');
+ if(p){
+ if(strstr(p, "!r") != nil)
+ c->restricted = 1;
+ }
+ return nil;
+}
+
+/*
+ * called by protocol connect routine to set addresses
+ */
+char*
+Fsstdconnect(Conv *c, char *argv[], int argc)
+{
+ char *p;
+
+ switch(argc) {
+ default:
+ return "bad args to connect";
+ case 2:
+ p = setraddrport(c, argv[1]);
+ if(p != nil)
+ return p;
+ setladdr(c);
+ p = setlport(c);
+ if (p != nil)
+ return p;
+ break;
+ case 3:
+ p = setraddrport(c, argv[1]);
+ if(p != nil)
+ return p;
+ p = setladdrport(c, argv[2], 0);
+ if(p != nil)
+ return p;
+ }
+
+ if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
+ memcmp(c->laddr, v4prefix, IPv4off) == 0)
+ || ipcmp(c->raddr, IPnoaddr) == 0)
+ c->ipversion = V4;
+ else
+ c->ipversion = V6;
+
+ return nil;
+}
+/*
+ * initiate connection and sleep till its set up
+ */
+static int
+connected(void* a)
+{
+ return ((Conv*)a)->state == Connected;
+}
+static void
+connectctlmsg(Proto *x, Conv *c, Cmdbuf *cb)
+{
+ char *p;
+
+ if(c->state != 0)
+ error(Econinuse);
+ c->state = Connecting;
+ c->cerr[0] = '\0';
+ if(x->connect == nil)
+ error("connect not supported");
+ p = x->connect(c, cb->f, cb->nf);
+ if(p != nil)
+ error(p);
+
+ QUNLOCK(c);
+ if(waserror()){
+ QLOCK(c);
+ nexterror();
+ }
+ sleep(&c->cr, connected, c);
+ QLOCK(c);
+ poperror();
+
+ if(c->cerr[0] != '\0')
+ error(c->cerr);
+}
+
+/*
+ * called by protocol announce routine to set addresses
+ */
+char*
+Fsstdannounce(Conv* c, char* argv[], int argc)
+{
+ memset(c->raddr, 0, sizeof(c->raddr));
+ c->rport = 0;
+ switch(argc){
+ default:
+ break;
+ case 2:
+ return setladdrport(c, argv[1], 1);
+ }
+ return "bad args to announce";
+}
+
+/*
+ * initiate announcement and sleep till its set up
+ */
+static int
+announced(void* a)
+{
+ return ((Conv*)a)->state == Announced;
+}
+static void
+announcectlmsg(Proto *x, Conv *c, Cmdbuf *cb)
+{
+ char *p;
+
+ if(c->state != 0)
+ error(Econinuse);
+ c->state = Announcing;
+ c->cerr[0] = '\0';
+ if(x->announce == nil)
+ error("announce not supported");
+ p = x->announce(c, cb->f, cb->nf);
+ if(p != nil)
+ error(p);
+
+ QUNLOCK(c);
+ if(waserror()){
+ QLOCK(c);
+ nexterror();
+ }
+ sleep(&c->cr, announced, c);
+ QLOCK(c);
+ poperror();
+
+ if(c->cerr[0] != '\0')
+ error(c->cerr);
+}
+
+/*
+ * called by protocol bind routine to set addresses
+ */
+char*
+Fsstdbind(Conv* c, char* argv[], int argc)
+{
+ switch(argc){
+ default:
+ break;
+ case 2:
+ return setladdrport(c, argv[1], 0);
+ }
+ return "bad args to bind";
+}
+
+static void
+bindctlmsg(Proto *x, Conv *c, Cmdbuf *cb)
+{
+ char *p;
+
+ if(x->bind == nil)
+ p = Fsstdbind(c, cb->f, cb->nf);
+ else
+ p = x->bind(c, cb->f, cb->nf);
+ if(p != nil)
+ error(p);
+}
+
+static void
+tosctlmsg(Conv *c, Cmdbuf *cb)
+{
+ if(cb->nf < 2)
+ c->tos = 0;
+ else
+ c->tos = atoi(cb->f[1]);
+}
+
+static void
+ttlctlmsg(Conv *c, Cmdbuf *cb)
+{
+ if(cb->nf < 2)
+ c->ttl = MAXTTL;
+ else
+ c->ttl = atoi(cb->f[1]);
+}
+
+static long
+ipwrite(Chan* ch, void *v, long n, vlong off)
+{
+ Conv *c;
+ Proto *x;
+ char *p;
+ Cmdbuf *cb;
+ uchar ia[IPaddrlen], ma[IPaddrlen];
+ Fs *f;
+ char *a;
+ ulong offset = off;
+
+ a = v;
+ f = ipfs[ch->dev];
+
+ switch(TYPE(ch->qid)){
+ default:
+ error(Eperm);
+ case Qdata:
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+
+ if(c->wq == nil)
+ error(Eperm);
+
+ qwrite(c->wq, a, n);
+ break;
+ case Qarp:
+ return arpwrite(f, a, n);
+ case Qiproute:
+ return routewrite(f, ch, a, n);
+ case Qlog:
+ netlogctl(f, a, n);
+ return n;
+ case Qndb:
+ return ndbwrite(f, a, offset, n);
+ break;
+ case Qctl:
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+ cb = parsecmd(a, n);
+
+ QLOCK(c);
+ if(waserror()) {
+ QUNLOCK(c);
+ free(cb);
+ nexterror();
+ }
+ if(cb->nf < 1)
+ error("short control request");
+ if(strcmp(cb->f[0], "connect") == 0)
+ connectctlmsg(x, c, cb);
+ else if(strcmp(cb->f[0], "announce") == 0)
+ announcectlmsg(x, c, cb);
+ else if(strcmp(cb->f[0], "bind") == 0)
+ bindctlmsg(x, c, cb);
+ else if(strcmp(cb->f[0], "ttl") == 0)
+ ttlctlmsg(c, cb);
+ else if(strcmp(cb->f[0], "tos") == 0)
+ tosctlmsg(c, cb);
+ else if(strcmp(cb->f[0], "ignoreadvice") == 0)
+ c->ignoreadvice = 1;
+ else if(strcmp(cb->f[0], "addmulti") == 0){
+ if(cb->nf < 2)
+ error("addmulti needs interface address");
+ if(cb->nf == 2){
+ if(!ipismulticast(c->raddr))
+ error("addmulti for a non multicast address");
+ if (parseip(ia, cb->f[1]) == -1)
+ error(Ebadip);
+ ipifcaddmulti(c, c->raddr, ia);
+ } else {
+ if (parseip(ia, cb->f[1]) == -1 ||
+ parseip(ma, cb->f[2]) == -1)
+ error(Ebadip);
+ if(!ipismulticast(ma))
+ error("addmulti for a non multicast address");
+ ipifcaddmulti(c, ma, ia);
+ }
+ } else if(strcmp(cb->f[0], "remmulti") == 0){
+ if(cb->nf < 2)
+ error("remmulti needs interface address");
+ if(!ipismulticast(c->raddr))
+ error("remmulti for a non multicast address");
+ if (parseip(ia, cb->f[1]) == -1)
+ error(Ebadip);
+ ipifcremmulti(c, c->raddr, ia);
+ } else if(strcmp(cb->f[0], "maxfragsize") == 0){
+ if(cb->nf < 2)
+ error("maxfragsize needs size");
+
+ c->maxfragsize = (int)strtol(cb->f[1], nil, 0);
+
+ } else if(x->ctl != nil) {
+ p = x->ctl(c, cb->f, cb->nf);
+ if(p != nil)
+ error(p);
+ } else
+ error("unknown control request");
+ QUNLOCK(c);
+ free(cb);
+ poperror();
+ }
+ return n;
+}
+
+static long
+ipbwrite(Chan* ch, Block* bp, ulong offset)
+{
+ Conv *c;
+ Proto *x;
+ Fs *f;
+ int n;
+
+ switch(TYPE(ch->qid)){
+ case Qdata:
+ f = ipfs[ch->dev];
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+
+ if(c->wq == nil)
+ error(Eperm);
+
+ if(bp->next)
+ bp = concatblock(bp);
+ n = BLEN(bp);
+ qbwrite(c->wq, bp);
+ return n;
+ default:
+ return devbwrite(ch, bp, offset);
+ }
+}
+
+Dev ipdevtab = {
+ 'I',
+ "ip",
+
+ ipreset,
+ devinit,
+ devshutdown,
+ ipattach,
+ ipwalk,
+ ipstat,
+ ipopen,
+ ipcreate,
+ ipclose,
+ ipread,
+ ipbread,
+ ipwrite,
+ ipbwrite,
+ ipremove,
+ ipwstat,
+};
+
+int
+Fsproto(Fs *f, Proto *p)
+{
+ if(f->np >= Maxproto)
+ return -1;
+
+ p->f = f;
+
+ if(p->ipproto > 0){
+ if(f->t2p[p->ipproto] != nil)
+ return -1;
+ f->t2p[p->ipproto] = p;
+ }
+
+ p->qid.type = QTDIR;
+ p->qid.path = QID(f->np, 0, Qprotodir);
+ p->conv = malloc(sizeof(Conv*)*(p->nc+1));
+ if(p->conv == nil)
+ panic("Fsproto");
+
+ p->x = f->np;
+ p->nextrport = 600;
+ f->p[f->np++] = p;
+
+ return 0;
+}
+
+/*
+ * return true if this protocol is
+ * built in
+ */
+int
+Fsbuiltinproto(Fs* f, uchar proto)
+{
+ return f->t2p[proto] != nil;
+}
+
+/*
+ * called with protocol locked
+ */
+Conv*
+Fsprotoclone(Proto *p, char *user)
+{
+ Conv *c, **pp, **ep;
+
+retry:
+ c = nil;
+ ep = &p->conv[p->nc];
+ for(pp = p->conv; pp < ep; pp++) {
+ c = *pp;
+ if(c == nil){
+ c = malloc(sizeof(Conv));
+ if(c == nil)
+ error(Enomem);
+ QLOCK(c);
+ c->p = p;
+ c->x = pp - p->conv;
+ if(p->ptclsize != 0){
+ c->ptcl = malloc(p->ptclsize);
+ if(c->ptcl == nil) {
+ free(c);
+ error(Enomem);
+ }
+ }
+ *pp = c;
+ p->ac++;
+ c->eq = qopen(1024, Qmsg, 0, 0);
+ (*p->create)(c);
+ break;
+ }
+ if(CANQLOCK(c)){
+ /*
+ * make sure both processes and protocol
+ * are done with this Conv
+ */
+ if(c->inuse == 0 && (p->inuse == nil || (*p->inuse)(c) == 0))
+ break;
+
+ QUNLOCK(c);
+ }
+ }
+ if(pp >= ep) {
+ if(p->gc != nil && (*p->gc)(p))
+ goto retry;
+ return nil;
+ }
+
+ c->inuse = 1;
+ kstrdup(&c->owner, user);
+ c->perm = 0660;
+ c->state = Idle;
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ c->r = nil;
+ c->rgen = 0;
+ c->lport = 0;
+ c->rport = 0;
+ c->restricted = 0;
+ c->maxfragsize = 0;
+ c->ttl = MAXTTL;
+ qreopen(c->rq);
+ qreopen(c->wq);
+ qreopen(c->eq);
+
+ QUNLOCK(c);
+ return c;
+}
+
+int
+Fsconnected(Conv* c, char* msg)
+{
+ if(msg != nil && *msg != '\0')
+ strncpy(c->cerr, msg, ERRMAX-1);
+
+ switch(c->state){
+
+ case Announcing:
+ c->state = Announced;
+ break;
+
+ case Connecting:
+ c->state = Connected;
+ break;
+ }
+
+ wakeup(&c->cr);
+ return 0;
+}
+
+Proto*
+Fsrcvpcol(Fs* f, uchar proto)
+{
+ if(f->ipmux)
+ return f->ipmux;
+ else
+ return f->t2p[proto];
+}
+
+Proto*
+Fsrcvpcolx(Fs *f, uchar proto)
+{
+ return f->t2p[proto];
+}
+
+/*
+ * called with protocol locked
+ */
+Conv*
+Fsnewcall(Conv *c, uchar *raddr, ushort rport, uchar *laddr, ushort lport, uchar version)
+{
+ Conv *nc;
+ Conv **l;
+ int i;
+
+ QLOCK(c);
+ i = 0;
+ for(l = &c->incall; *l; l = &(*l)->next)
+ i++;
+ if(i >= Maxincall) {
+ QUNLOCK(c);
+ return nil;
+ }
+
+ /* find a free conversation */
+ nc = Fsprotoclone(c->p, network);
+ if(nc == nil) {
+ QUNLOCK(c);
+ return nil;
+ }
+ ipmove(nc->raddr, raddr);
+ nc->rport = rport;
+ ipmove(nc->laddr, laddr);
+ nc->lport = lport;
+ nc->next = nil;
+ *l = nc;
+ nc->state = Connected;
+ nc->ipversion = version;
+
+ QUNLOCK(c);
+
+ wakeup(&c->listenr);
+
+ return nc;
+}
+
+long
+ndbwrite(Fs *f, char *a, ulong off, int n)
+{
+ if(off > strlen(f->ndb))
+ error(Eio);
+ if(off+n >= sizeof(f->ndb))
+ error(Eio);
+ memmove(f->ndb+off, a, n);
+ f->ndb[off+n] = 0;
+ f->ndbvers++;
+ f->ndbmtime = seconds();
+ return n;
+}
+
+ulong
+scalednconv(void)
+{
+ if(cpuserver && conf.npage*BY2PG >= 128*MB)
+ return Nchans*4;
+ return Nchans;
+}
diff --git a/src/9vx/a/ip/eipconvtest.c b/src/9vx/a/ip/eipconvtest.c
@@ -0,0 +1,152 @@
+#include <u.h>
+#include <libc.h>
+
+enum
+{
+ Isprefix= 16,
+};
+
+uchar prefixvals[256] =
+{
+[0x00] 0 | Isprefix,
+[0x80] 1 | Isprefix,
+[0xC0] 2 | Isprefix,
+[0xE0] 3 | Isprefix,
+[0xF0] 4 | Isprefix,
+[0xF8] 5 | Isprefix,
+[0xFC] 6 | Isprefix,
+[0xFE] 7 | Isprefix,
+[0xFF] 8 | Isprefix,
+};
+
+uchar v4prefix[16] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0xff, 0xff,
+ 0, 0, 0, 0
+};
+
+void
+hnputl(void *p, ulong v)
+{
+ uchar *a;
+
+ a = p;
+ a[0] = v>>24;
+ a[1] = v>>16;
+ a[2] = v>>8;
+ a[3] = v;
+}
+
+int
+eipconv(va_list *arg, Fconv *f)
+{
+ char buf[8*5];
+ static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
+ static char *ifmt = "%d.%d.%d.%d";
+ uchar *p, ip[16];
+ ulong *lp;
+ ushort s;
+ int i, j, n, eln, eli;
+
+ switch(f->chr) {
+ case 'E': /* Ethernet address */
+ p = va_arg(*arg, uchar*);
+ sprint(buf, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
+ break;
+ case 'I': /* Ip address */
+ p = va_arg(*arg, uchar*);
+common:
+ if(memcmp(p, v4prefix, 12) == 0)
+ sprint(buf, ifmt, p[12], p[13], p[14], p[15]);
+ else {
+ /* find longest elision */
+ eln = eli = -1;
+ for(i = 0; i < 16; i += 2){
+ for(j = i; j < 16; j += 2)
+ if(p[j] != 0 || p[j+1] != 0)
+ break;
+ if(j > i && j - i > eln){
+ eli = i;
+ eln = j - i;
+ }
+ }
+
+ /* print with possible elision */
+ n = 0;
+ for(i = 0; i < 16; i += 2){
+ if(i == eli){
+ n += sprint(buf+n, "::");
+ i += eln;
+ if(i >= 16)
+ break;
+ } else if(i != 0)
+ n += sprint(buf+n, ":");
+ s = (p[i]<<8) + p[i+1];
+ n += sprint(buf+n, "%ux", s);
+ }
+ }
+ break;
+ case 'i': /* v6 address as 4 longs */
+ lp = va_arg(*arg, ulong*);
+ for(i = 0; i < 4; i++)
+ hnputl(ip+4*i, *lp++);
+ p = ip;
+ goto common;
+ case 'V': /* v4 ip address */
+ p = va_arg(*arg, uchar*);
+ sprint(buf, ifmt, p[0], p[1], p[2], p[3]);
+ break;
+ case 'M': /* ip mask */
+ p = va_arg(*arg, uchar*);
+
+ /* look for a prefix mask */
+ for(i = 0; i < 16; i++)
+ if(p[i] != 0xff)
+ break;
+ if(i < 16){
+ if((prefixvals[p[i]] & Isprefix) == 0)
+ goto common;
+ for(j = i+1; j < 16; j++)
+ if(p[j] != 0)
+ goto common;
+ n = 8*i + (prefixvals[p[i]] & ~Isprefix);
+ } else
+ n = 8*16;
+
+ /* got one, use /xx format */
+ sprint(buf, "/%d", n);
+ break;
+ default:
+ strcpy(buf, "(eipconv)");
+ }
+ strconv(buf, f);
+ return sizeof(uchar*);
+}
+
+uchar testvec[11][16] =
+{
+ { 0,0,0,0, 0,0,0,0, 0,0,0xff,0xff, 1,3,4,5, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, },
+ { 0xff,0xff,0x80,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xc0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xe0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xf0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xff,0xf8,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, },
+ { 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0,0,0,0, 0,0x11,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0,0,0,0x11, 0,0,0,0, 0,0,0,0, 0,0,0,0x12, },
+};
+
+void
+main(void)
+{
+ int i;
+
+ fmtinstall('I', eipconv);
+ fmtinstall('M', eipconv);
+ for(i = 0; i < 11; i++)
+ print("%I\n%M\n", testvec[i], testvec[i]);
+ exits(0);
+}
diff --git a/src/9vx/a/ip/esp.c b/src/9vx/a/ip/esp.c
@@ -0,0 +1,951 @@
+/*
+ * Encapsulating Security Payload for IPsec for IPv4, rfc1827.
+ * currently only implements tunnel mode.
+ * TODO: update to match rfc4303.
+ */
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+#include "libsec.h"
+
+typedef struct Esphdr Esphdr;
+typedef struct Esp4hdr Esp4hdr;
+typedef struct Esp6hdr Esp6hdr;
+typedef struct Esptail Esptail;
+typedef struct Userhdr Userhdr;
+typedef struct Esppriv Esppriv;
+typedef struct Espcb Espcb;
+typedef struct Algorithm Algorithm;
+
+enum
+{
+ IP_ESPPROTO = 50, /* IP v4 and v6 protocol number */
+ Esp4hdrlen = IP4HDR + 8,
+ Esp6hdrlen = IP6HDR + 8,
+
+ Esptaillen = 2, /* does not include pad or auth data */
+ Userhdrlen = 4, /* user-visible header size - if enabled */
+};
+
+struct Esphdr
+{
+ uchar espspi[4]; /* Security parameter index */
+ uchar espseq[4]; /* Sequence number */
+};
+
+/*
+ * tunnel-mode layout: IP | ESP | TCP/UDP | user data.
+ * transport-mode layout is: ESP | IP | TCP/UDP | user data.
+ */
+struct Esp4hdr
+{
+ /* ipv4 header */
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+ uchar Unused;
+ uchar espproto; /* Protocol */
+ uchar espplen[2]; /* Header plus data length */
+ uchar espsrc[4]; /* Ip source */
+ uchar espdst[4]; /* Ip destination */
+
+ /* Esphdr; */
+ uchar espspi[4]; /* Security parameter index */
+ uchar espseq[4]; /* Sequence number */
+};
+
+/* tunnel-mode layout */
+struct Esp6hdr
+{
+ /* Ip6hdr; */
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+
+ /* Esphdr; */
+ uchar espspi[4]; /* Security parameter index */
+ uchar espseq[4]; /* Sequence number */
+};
+
+struct Esptail
+{
+ uchar pad;
+ uchar nexthdr;
+};
+
+/* header as seen by the user */
+struct Userhdr
+{
+ uchar nexthdr; /* next protocol */
+ uchar unused[3];
+};
+
+struct Esppriv
+{
+ ulong in;
+ ulong inerrors;
+};
+
+/*
+ * protocol specific part of Conv
+ */
+struct Espcb
+{
+ int incoming;
+ int header; /* user user level header */
+ ulong spi;
+ ulong seq; /* last seq sent */
+ ulong window; /* for replay attacks */
+ char *espalg;
+ void *espstate; /* other state for esp */
+ int espivlen; /* in bytes */
+ int espblklen;
+ int (*cipher)(Espcb*, uchar *buf, int len);
+ char *ahalg;
+ void *ahstate; /* other state for esp */
+ int ahlen; /* auth data length in bytes */
+ int ahblklen;
+ int (*auth)(Espcb*, uchar *buf, int len, uchar *hash);
+};
+
+struct Algorithm
+{
+ char *name;
+ int keylen; /* in bits */
+ void (*init)(Espcb*, char* name, uchar *key, int keylen);
+};
+
+static Conv* convlookup(Proto *esp, ulong spi);
+static char *setalg(Espcb *ecb, char **f, int n, Algorithm *alg);
+static void espkick(void *x);
+
+static void nullespinit(Espcb*, char*, uchar *key, int keylen);
+static void desespinit(Espcb *ecb, char *name, uchar *k, int n);
+
+static void nullahinit(Espcb*, char*, uchar *key, int keylen);
+static void shaahinit(Espcb*, char*, uchar *key, int keylen);
+static void md5ahinit(Espcb*, char*, uchar *key, int keylen);
+
+static Algorithm espalg[] =
+{
+ "null", 0, nullespinit,
+// "des3_cbc", 192, des3espinit, /* rfc2451 */
+// "aes_128_cbc", 128, aescbcespinit, /* rfc3602 */
+// "aes_ctr", 128, aesctrespinit, /* rfc3686 */
+ "des_56_cbc", 64, desespinit, /* rfc2405, deprecated */
+// "rc4_128", 128, rc4espinit, /* gone in rfc4305 */
+ nil, 0, nil,
+};
+
+static Algorithm ahalg[] =
+{
+ "null", 0, nullahinit,
+ "hmac_sha1_96", 128, shaahinit, /* rfc2404 */
+// "aes_xcbc_mac_96", 128, aesahinit, /* rfc3566 */
+ "hmac_md5_96", 128, md5ahinit, /* rfc2403 */
+ nil, 0, nil,
+};
+
+static char*
+espconnect(Conv *c, char **argv, int argc)
+{
+ char *p, *pp;
+ char *e = nil;
+ ulong spi;
+ Espcb *ecb = (Espcb*)c->ptcl;
+
+ switch(argc) {
+ default:
+ e = "bad args to connect";
+ break;
+ case 2:
+ p = strchr(argv[1], '!');
+ if(p == nil){
+ e = "malformed address";
+ break;
+ }
+ *p++ = 0;
+ parseip(c->raddr, argv[1]);
+ findlocalip(c->p->f, c->laddr, c->raddr);
+ ecb->incoming = 0;
+ ecb->seq = 0;
+ if(strcmp(p, "*") == 0) {
+ QLOCK(c->p);
+ for(;;) {
+ spi = nrand(1<<16) + 256;
+ if(convlookup(c->p, spi) == nil)
+ break;
+ }
+ QUNLOCK(c->p);
+ ecb->spi = spi;
+ ecb->incoming = 1;
+ qhangup(c->wq, nil);
+ } else {
+ spi = strtoul(p, &pp, 10);
+ if(pp == p) {
+ e = "malformed address";
+ break;
+ }
+ ecb->spi = spi;
+ qhangup(c->rq, nil);
+ }
+ nullespinit(ecb, "null", nil, 0);
+ nullahinit(ecb, "null", nil, 0);
+ }
+ Fsconnected(c, e);
+
+ return e;
+}
+
+
+static int
+espstate(Conv *c, char *state, int n)
+{
+ return snprint(state, n, "%s", c->inuse?"Open\n":"Closed\n");
+}
+
+static void
+espcreate(Conv *c)
+{
+ c->rq = qopen(64*1024, Qmsg, 0, 0);
+ c->wq = qopen(64*1024, Qkick, espkick, c);
+}
+
+static void
+espclose(Conv *c)
+{
+ Espcb *ecb;
+
+ qclose(c->rq);
+ qclose(c->wq);
+ qclose(c->eq);
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+
+ ecb = (Espcb*)c->ptcl;
+ free(ecb->espstate);
+ free(ecb->ahstate);
+ memset(ecb, 0, sizeof(Espcb));
+}
+
+static int
+ipvers(Conv *c)
+{
+ if((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
+ memcmp(c->laddr, v4prefix, IPv4off) == 0) ||
+ ipcmp(c->raddr, IPnoaddr) == 0)
+ return V4;
+ else
+ return V6;
+}
+
+static void
+espkick(void *x)
+{
+ Conv *c = x;
+ Esp4hdr *eh4;
+ Esp6hdr *eh6;
+ Esptail *et;
+ Userhdr *uh;
+ Espcb *ecb;
+ Block *bp;
+ int nexthdr, payload, pad, align, version, hdrlen, iphdrlen;
+ uchar *auth;
+
+ version = ipvers(c);
+ iphdrlen = version == V4? IP4HDR: IP6HDR;
+ hdrlen = version == V4? Esp4hdrlen: Esp6hdrlen;
+
+ bp = qget(c->wq);
+ if(bp == nil)
+ return;
+
+ QLOCK(c);
+ ecb = c->ptcl;
+
+ if(ecb->header) {
+ /* make sure the message has a User header */
+ bp = pullupblock(bp, Userhdrlen);
+ if(bp == nil) {
+ QUNLOCK(c);
+ return;
+ }
+ uh = (Userhdr*)bp->rp;
+ nexthdr = uh->nexthdr;
+ bp->rp += Userhdrlen;
+ } else {
+ nexthdr = 0; /* what should this be? */
+ }
+
+ payload = BLEN(bp) + ecb->espivlen;
+
+ /* Make space to fit ip header */
+ bp = padblock(bp, hdrlen + ecb->espivlen);
+
+ align = 4;
+ if(ecb->espblklen > align)
+ align = ecb->espblklen;
+ if(align % ecb->ahblklen != 0)
+ panic("espkick: ahblklen is important after all");
+ pad = (align-1) - (payload + Esptaillen-1)%align;
+
+ /*
+ * Make space for tail
+ * this is done by calling padblock with a negative size
+ * Padblock does not change bp->wp!
+ */
+ bp = padblock(bp, -(pad+Esptaillen+ecb->ahlen));
+ bp->wp += pad+Esptaillen+ecb->ahlen;
+
+ eh4 = (Esp4hdr *)bp->rp;
+ eh6 = (Esp6hdr *)bp->rp;
+ et = (Esptail*)(bp->rp + hdrlen + payload + pad);
+
+ /* fill in tail */
+ et->pad = pad;
+ et->nexthdr = nexthdr;
+
+ ecb->cipher(ecb, bp->rp + hdrlen, payload + pad + Esptaillen);
+ auth = bp->rp + hdrlen + payload + pad + Esptaillen;
+
+ /* fill in head */
+ if (version == V4) {
+ eh4->vihl = IP_VER4;
+ hnputl(eh4->espspi, ecb->spi);
+ hnputl(eh4->espseq, ++ecb->seq);
+ v6tov4(eh4->espsrc, c->laddr);
+ v6tov4(eh4->espdst, c->raddr);
+ eh4->espproto = IP_ESPPROTO;
+ eh4->frag[0] = 0;
+ eh4->frag[1] = 0;
+ } else {
+ eh6->vcf[0] = IP_VER6;
+ hnputl(eh6->espspi, ecb->spi);
+ hnputl(eh6->espseq, ++ecb->seq);
+ ipmove(eh6->src, c->laddr);
+ ipmove(eh6->dst, c->raddr);
+ eh6->proto = IP_ESPPROTO;
+ }
+
+ ecb->auth(ecb, bp->rp + iphdrlen, (hdrlen - iphdrlen) +
+ payload + pad + Esptaillen, auth);
+
+ QUNLOCK(c);
+ /* print("esp: pass down: %uld\n", BLEN(bp)); */
+ if (version == V4)
+ ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+ else
+ ipoput6(c->p->f, bp, 0, c->ttl, c->tos, c);
+}
+
+void
+espiput(Proto *esp, Ipifc* _, Block *bp)
+{
+ Esp4hdr *eh4;
+ Esp6hdr *eh6;
+ Esptail *et;
+ Userhdr *uh;
+ Conv *c;
+ Espcb *ecb;
+ uchar raddr[IPaddrlen], laddr[IPaddrlen];
+ Fs *f;
+ uchar *auth, *espspi;
+ ulong spi;
+ int payload, nexthdr, version, hdrlen;
+
+ f = esp->f;
+ if (bp == nil || BLEN(bp) == 0) {
+ /* get enough to identify the IP version */
+ bp = pullupblock(bp, IP4HDR);
+ if(bp == nil) {
+ netlog(f, Logesp, "esp: short packet\n");
+ return;
+ }
+ }
+ eh4 = (Esp4hdr*)bp->rp;
+ version = ((eh4->vihl & 0xf0) == IP_VER4? V4: V6);
+ hdrlen = version == V4? Esp4hdrlen: Esp6hdrlen;
+
+ bp = pullupblock(bp, hdrlen + Esptaillen);
+ if(bp == nil) {
+ netlog(f, Logesp, "esp: short packet\n");
+ return;
+ }
+
+ if (version == V4) {
+ eh4 = (Esp4hdr*)bp->rp;
+ spi = nhgetl(eh4->espspi);
+ v4tov6(raddr, eh4->espsrc);
+ v4tov6(laddr, eh4->espdst);
+ } else {
+ eh6 = (Esp6hdr*)bp->rp;
+ spi = nhgetl(eh6->espspi);
+ ipmove(raddr, eh6->src);
+ ipmove(laddr, eh6->dst);
+ }
+
+ QLOCK(esp);
+ /* Look for a conversation structure for this port */
+ c = convlookup(esp, spi);
+ if(c == nil) {
+ QUNLOCK(esp);
+ netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
+ laddr, spi);
+ icmpnoconv(f, bp);
+ freeblist(bp);
+ return;
+ }
+
+ QLOCK(c);
+ QUNLOCK(esp);
+
+ ecb = c->ptcl;
+ /* too hard to do decryption/authentication on block lists */
+ if(bp->next)
+ bp = concatblock(bp);
+
+ if(BLEN(bp) < hdrlen + ecb->espivlen + Esptaillen + ecb->ahlen) {
+ QUNLOCK(c);
+ netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
+ laddr, spi);
+ freeb(bp);
+ return;
+ }
+
+ auth = bp->wp - ecb->ahlen;
+ espspi = version == V4? ((Esp4hdr*)bp->rp)->espspi:
+ ((Esp6hdr*)bp->rp)->espspi;
+ if(!ecb->auth(ecb, espspi, auth - espspi, auth)) {
+ QUNLOCK(c);
+print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
+ netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
+ laddr, spi);
+ freeb(bp);
+ return;
+ }
+
+ payload = BLEN(bp) - hdrlen - ecb->ahlen;
+ if(payload <= 0 || payload % 4 != 0 || payload % ecb->espblklen != 0) {
+ QUNLOCK(c);
+ netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n",
+ raddr, laddr, spi, payload, BLEN(bp));
+ freeb(bp);
+ return;
+ }
+ if(!ecb->cipher(ecb, bp->rp + hdrlen, payload)) {
+ QUNLOCK(c);
+print("esp: cipher failed %I -> %I!%ld: %s\n", raddr, laddr, spi, up->errstr);
+ netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %s\n", raddr,
+ laddr, spi, up->errstr);
+ freeb(bp);
+ return;
+ }
+
+ payload -= Esptaillen;
+ et = (Esptail*)(bp->rp + hdrlen + payload);
+ payload -= et->pad + ecb->espivlen;
+ nexthdr = et->nexthdr;
+ if(payload <= 0) {
+ QUNLOCK(c);
+ netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n",
+ raddr, laddr, spi);
+ freeb(bp);
+ return;
+ }
+
+ /* trim packet */
+ bp->rp += hdrlen + ecb->espivlen;
+ bp->wp = bp->rp + payload;
+ if(ecb->header) {
+ /* assume Userhdrlen < Esp4hdrlen < Esp6hdrlen */
+ bp->rp -= Userhdrlen;
+ uh = (Userhdr*)bp->rp;
+ memset(uh, 0, Userhdrlen);
+ uh->nexthdr = nexthdr;
+ }
+
+ if(qfull(c->rq)){
+ netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
+ laddr, spi);
+ freeblist(bp);
+ }else {
+// print("esp: pass up: %uld\n", BLEN(bp));
+ qpass(c->rq, bp);
+ }
+
+ QUNLOCK(c);
+}
+
+char*
+espctl(Conv *c, char **f, int n)
+{
+ Espcb *ecb = c->ptcl;
+ char *e = nil;
+
+ if(strcmp(f[0], "esp") == 0)
+ e = setalg(ecb, f, n, espalg);
+ else if(strcmp(f[0], "ah") == 0)
+ e = setalg(ecb, f, n, ahalg);
+ else if(strcmp(f[0], "header") == 0)
+ ecb->header = 1;
+ else if(strcmp(f[0], "noheader") == 0)
+ ecb->header = 0;
+ else
+ e = "unknown control request";
+ return e;
+}
+
+void
+espadvise(Proto *esp, Block *bp, char *msg)
+{
+ Esp4hdr *h;
+ Conv *c;
+ ulong spi;
+
+ h = (Esp4hdr*)(bp->rp);
+
+ spi = nhgets(h->espspi);
+ QLOCK(esp);
+ c = convlookup(esp, spi);
+ if(c != nil) {
+ qhangup(c->rq, msg);
+ qhangup(c->wq, msg);
+ }
+ QUNLOCK(esp);
+ freeblist(bp);
+}
+
+int
+espstats(Proto *esp, char *buf, int len)
+{
+ Esppriv *upriv;
+
+ upriv = esp->priv;
+ return snprint(buf, len, "%lud %lud\n",
+ upriv->in,
+ upriv->inerrors);
+}
+
+static int
+esplocal(Conv *c, char *buf, int len)
+{
+ Espcb *ecb = c->ptcl;
+ int n;
+
+ QLOCK(c);
+ if(ecb->incoming)
+ n = snprint(buf, len, "%I!%uld\n", c->laddr, ecb->spi);
+ else
+ n = snprint(buf, len, "%I\n", c->laddr);
+ QUNLOCK(c);
+ return n;
+}
+
+static int
+espremote(Conv *c, char *buf, int len)
+{
+ Espcb *ecb = c->ptcl;
+ int n;
+
+ QLOCK(c);
+ if(ecb->incoming)
+ n = snprint(buf, len, "%I\n", c->raddr);
+ else
+ n = snprint(buf, len, "%I!%uld\n", c->raddr, ecb->spi);
+ QUNLOCK(c);
+ return n;
+}
+
+static Conv*
+convlookup(Proto *esp, ulong spi)
+{
+ Conv *c, **p;
+ Espcb *ecb;
+
+ for(p=esp->conv; *p; p++){
+ c = *p;
+ ecb = c->ptcl;
+ if(ecb->incoming && ecb->spi == spi)
+ return c;
+ }
+ return nil;
+}
+
+static char *
+setalg(Espcb *ecb, char **f, int n, Algorithm *alg)
+{
+ uchar *key;
+ int c, i, nbyte, nchar;
+
+ if(n < 2)
+ return "bad format";
+ for(; alg->name; alg++)
+ if(strcmp(f[1], alg->name) == 0)
+ break;
+ if(alg->name == nil)
+ return "unknown algorithm";
+
+ if(n != 3)
+ return "bad format";
+ nbyte = (alg->keylen + 7) >> 3;
+ nchar = strlen(f[2]);
+ for(i=0; i<nchar; i++) {
+ c = f[2][i];
+ if(c >= '0' && c <= '9')
+ f[2][i] -= '0';
+ else if(c >= 'a' && c <= 'f')
+ f[2][i] -= 'a'-10;
+ else if(c >= 'A' && c <= 'F')
+ f[2][i] -= 'A'-10;
+ else
+ return "bad character in key";
+ }
+ key = smalloc(nbyte);
+ for(i=0; i<nchar && i*2<nbyte; i++) {
+ c = f[2][nchar-i-1];
+ if(i&1)
+ c <<= 4;
+ key[i>>1] |= c;
+ }
+
+ alg->init(ecb, alg->name, key, alg->keylen);
+ free(key);
+ return nil;
+}
+
+static int
+nullcipher(Espcb* _, uchar* __, int ___)
+{
+ return 1;
+}
+
+static void
+nullespinit(Espcb *ecb, char *name, uchar* _, int __)
+{
+ ecb->espalg = name;
+ ecb->espblklen = 1;
+ ecb->espivlen = 0;
+ ecb->cipher = nullcipher;
+}
+
+static int
+nullauth(Espcb* _, uchar* __, int ___, uchar* ____)
+{
+ return 1;
+}
+
+static void
+nullahinit(Espcb *ecb, char *name, uchar* _, int __)
+{
+ ecb->ahalg = name;
+ ecb->ahblklen = 1;
+ ecb->ahlen = 0;
+ ecb->auth = nullauth;
+}
+
+void
+seanq_hmac_sha1(uchar hash[SHA1dlen], uchar *t, long tlen, uchar *key, long klen)
+{
+ uchar ipad[65], opad[65];
+ int i;
+ DigestState *digest;
+ uchar innerhash[SHA1dlen];
+
+ for(i=0; i<64; i++){
+ ipad[i] = 0x36;
+ opad[i] = 0x5c;
+ }
+ ipad[64] = opad[64] = 0;
+ for(i=0; i<klen; i++){
+ ipad[i] ^= key[i];
+ opad[i] ^= key[i];
+ }
+ digest = sha1(ipad, 64, nil, nil);
+ sha1(t, tlen, innerhash, digest);
+ digest = sha1(opad, 64, nil, nil);
+ sha1(innerhash, SHA1dlen, hash, digest);
+}
+
+static int
+shaauth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
+{
+ uchar hash[SHA1dlen];
+ int r;
+
+ memset(hash, 0, SHA1dlen);
+ seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+ r = memcmp(auth, hash, ecb->ahlen) == 0;
+ memmove(auth, hash, ecb->ahlen);
+ return r;
+}
+
+static void
+shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
+{
+ if(klen != 128)
+ panic("shaahinit: bad keylen");
+ klen >>= 8; /* convert to bytes */
+
+ ecb->ahalg = name;
+ ecb->ahblklen = 1;
+ ecb->ahlen = 12;
+ ecb->auth = shaauth;
+ ecb->ahstate = smalloc(klen);
+ memmove(ecb->ahstate, key, klen);
+}
+
+void
+seanq_hmac_md5(uchar hash[MD5dlen], uchar *t, long tlen, uchar *key, long klen)
+{
+ uchar ipad[65], opad[65];
+ int i;
+ DigestState *digest;
+ uchar innerhash[MD5dlen];
+
+ for(i=0; i<64; i++){
+ ipad[i] = 0x36;
+ opad[i] = 0x5c;
+ }
+ ipad[64] = opad[64] = 0;
+ for(i=0; i<klen; i++){
+ ipad[i] ^= key[i];
+ opad[i] ^= key[i];
+ }
+ digest = md5(ipad, 64, nil, nil);
+ md5(t, tlen, innerhash, digest);
+ digest = md5(opad, 64, nil, nil);
+ md5(innerhash, MD5dlen, hash, digest);
+}
+
+static int
+md5auth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
+{
+ uchar hash[MD5dlen];
+ int r;
+
+ memset(hash, 0, MD5dlen);
+ seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+ r = memcmp(auth, hash, ecb->ahlen) == 0;
+ memmove(auth, hash, ecb->ahlen);
+ return r;
+}
+
+static void
+md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
+{
+ if(klen != 128)
+ panic("md5ahinit: bad keylen");
+ klen >>= 3; /* convert to bytes */
+
+ ecb->ahalg = name;
+ ecb->ahblklen = 1;
+ ecb->ahlen = 12;
+ ecb->auth = md5auth;
+ ecb->ahstate = smalloc(klen);
+ memmove(ecb->ahstate, key, klen);
+}
+
+static int
+descipher(Espcb *ecb, uchar *p, int n)
+{
+ uchar tmp[8];
+ uchar *pp, *tp, *ip, *eip, *ep;
+ DESstate *ds = ecb->espstate;
+
+ ep = p + n;
+ if(ecb->incoming) {
+ memmove(ds->ivec, p, 8);
+ p += 8;
+ while(p < ep){
+ memmove(tmp, p, 8);
+ block_cipher(ds->expanded, p, 1);
+ tp = tmp;
+ ip = ds->ivec;
+ for(eip = ip+8; ip < eip; ){
+ *p++ ^= *ip;
+ *ip++ = *tp++;
+ }
+ }
+ } else {
+ memmove(p, ds->ivec, 8);
+ for(p += 8; p < ep; p += 8){
+ pp = p;
+ ip = ds->ivec;
+ for(eip = ip+8; ip < eip; )
+ *pp++ ^= *ip++;
+ block_cipher(ds->expanded, p, 0);
+ memmove(ds->ivec, p, 8);
+ }
+ }
+ return 1;
+}
+
+static void
+desespinit(Espcb *ecb, char *name, uchar *k, int n)
+{
+ uchar key[8], ivec[8];
+ int i;
+
+ /* bits to bytes */
+ n = (n+7)>>3;
+ if(n > 8)
+ n = 8;
+ memset(key, 0, sizeof(key));
+ memmove(key, k, n);
+ for(i=0; i<8; i++)
+ ivec[i] = nrand(256);
+ ecb->espalg = name;
+ ecb->espblklen = 8;
+ ecb->espivlen = 8;
+ ecb->cipher = descipher;
+ ecb->espstate = smalloc(sizeof(DESstate));
+ setupDESstate(ecb->espstate, key, ivec);
+}
+
+void
+espinit(Fs *fs)
+{
+ Proto *esp;
+
+ esp = smalloc(sizeof(Proto));
+ esp->priv = smalloc(sizeof(Esppriv));
+ esp->name = "esp";
+ esp->connect = espconnect;
+ esp->announce = nil;
+ esp->ctl = espctl;
+ esp->state = espstate;
+ esp->create = espcreate;
+ esp->close = espclose;
+ esp->rcv = espiput;
+ esp->advise = espadvise;
+ esp->stats = espstats;
+ esp->local = esplocal;
+ esp->remote = espremote;
+ esp->ipproto = IP_ESPPROTO;
+ esp->nc = Nchans;
+ esp->ptclsize = sizeof(Espcb);
+
+ Fsproto(fs, esp);
+}
+
+
+#ifdef notdef
+enum {
+ RC4forward= 10*1024*1024, /* maximum skip forward */
+ RC4back = 100*1024, /* maximum look back */
+};
+
+typedef struct Esprc4 Esprc4;
+struct Esprc4
+{
+ ulong cseq; /* current byte sequence number */
+ RC4state current;
+
+ int ovalid; /* old is valid */
+ ulong lgseq; /* last good sequence */
+ ulong oseq; /* old byte sequence number */
+ RC4state old;
+};
+
+static void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
+
+static int
+rc4cipher(Espcb *ecb, uchar *p, int n)
+{
+ Esprc4 *esprc4;
+ RC4state tmpstate;
+ ulong seq;
+ long d, dd;
+
+ if(n < 4)
+ return 0;
+
+ esprc4 = ecb->espstate;
+ if(ecb->incoming) {
+ seq = nhgetl(p);
+ p += 4;
+ n -= 4;
+ d = seq-esprc4->cseq;
+ if(d == 0) {
+ rc4(&esprc4->current, p, n);
+ esprc4->cseq += n;
+ if(esprc4->ovalid) {
+ dd = esprc4->cseq - esprc4->lgseq;
+ if(dd > RC4back)
+ esprc4->ovalid = 0;
+ }
+ } else if(d > 0) {
+print("esp rc4cipher: missing packet: %uld %ld\n", seq, d); /* this link is hosed */
+ if(d > RC4forward) {
+ strcpy(up->errstr, "rc4cipher: skipped too much");
+ return 0;
+ }
+ esprc4->lgseq = seq;
+ if(!esprc4->ovalid) {
+ esprc4->ovalid = 1;
+ esprc4->oseq = esprc4->cseq;
+ memmove(&esprc4->old, &esprc4->current,
+ sizeof(RC4state));
+ }
+ rc4skip(&esprc4->current, d);
+ rc4(&esprc4->current, p, n);
+ esprc4->cseq = seq+n;
+ } else {
+print("esp rc4cipher: reordered packet: %uld %ld\n", seq, d);
+ dd = seq - esprc4->oseq;
+ if(!esprc4->ovalid || -d > RC4back || dd < 0) {
+ strcpy(up->errstr, "rc4cipher: too far back");
+ return 0;
+ }
+ memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
+ rc4skip(&tmpstate, dd);
+ rc4(&tmpstate, p, n);
+ return 1;
+ }
+
+ /* move old state up */
+ if(esprc4->ovalid) {
+ dd = esprc4->cseq - RC4back - esprc4->oseq;
+ if(dd > 0) {
+ rc4skip(&esprc4->old, dd);
+ esprc4->oseq += dd;
+ }
+ }
+ } else {
+ hnputl(p, esprc4->cseq);
+ p += 4;
+ n -= 4;
+ rc4(&esprc4->current, p, n);
+ esprc4->cseq += n;
+ }
+ return 1;
+}
+
+static void
+rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
+{
+ Esprc4 *esprc4;
+
+ /* bits to bytes */
+ n = (n+7)>>3;
+ esprc4 = smalloc(sizeof(Esprc4));
+ memset(esprc4, 0, sizeof(Esprc4));
+ setupRC4state(&esprc4->current, k, n);
+ ecb->espalg = name;
+ ecb->espblklen = 4;
+ ecb->espivlen = 4;
+ ecb->cipher = rc4cipher;
+ ecb->espstate = esprc4;
+}
+#endif
diff --git a/src/9vx/a/ip/ethermedium.c b/src/9vx/a/ip/ethermedium.c
@@ -0,0 +1,766 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "netif.h"
+#include "ip.h"
+#include "ipv6.h"
+
+typedef struct Etherhdr Etherhdr;
+struct Etherhdr
+{
+ uchar d[6];
+ uchar s[6];
+ uchar t[2];
+};
+
+static uchar ipbroadcast[IPaddrlen] = {
+ 0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
+};
+
+static uchar etherbroadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+static void etherread4(void *a);
+static void etherread6(void *a);
+static void etherbind(Ipifc *ifc, int argc, char **argv);
+static void etherunbind(Ipifc *ifc);
+static void etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
+static void etheraddmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static void etherremmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static Block* multicastarp(Fs *f, Arpent *a, Medium*, uchar *mac);
+static void sendarp(Ipifc *ifc, Arpent *a);
+static void sendgarp(Ipifc *ifc, uchar*);
+static int multicastea(uchar *ea, uchar *ip);
+static void recvarpproc(void*);
+static void resolveaddr6(Ipifc *ifc, Arpent *a);
+static void etherpref2addr(uchar *pref, uchar *ea);
+
+Medium ethermedium =
+{
+.name= "ether",
+.hsize= 14,
+.mintu= 60,
+.maxtu= 1514,
+.maclen= 6,
+.bind= etherbind,
+.unbind= etherunbind,
+.bwrite= etherbwrite,
+.addmulti= etheraddmulti,
+.remmulti= etherremmulti,
+.ares= arpenter,
+.areg= sendgarp,
+.pref2addr= etherpref2addr,
+};
+
+Medium gbemedium =
+{
+.name= "gbe",
+.hsize= 14,
+.mintu= 60,
+.maxtu= 9014,
+.maclen= 6,
+.bind= etherbind,
+.unbind= etherunbind,
+.bwrite= etherbwrite,
+.addmulti= etheraddmulti,
+.remmulti= etherremmulti,
+.ares= arpenter,
+.areg= sendgarp,
+.pref2addr= etherpref2addr,
+};
+
+typedef struct Etherrock Etherrock;
+struct Etherrock
+{
+ Fs *f; /* file system we belong to */
+ Proc *arpp; /* arp process */
+ Proc *read4p; /* reading process (v4)*/
+ Proc *read6p; /* reading process (v6)*/
+ Chan *mchan4; /* Data channel for v4 */
+ Chan *achan; /* Arp channel */
+ Chan *cchan4; /* Control channel for v4 */
+ Chan *mchan6; /* Data channel for v6 */
+ Chan *cchan6; /* Control channel for v6 */
+};
+
+/*
+ * ethernet arp request
+ */
+enum
+{
+ ARPREQUEST = 1,
+ ARPREPLY = 2,
+};
+
+typedef struct Etherarp Etherarp;
+struct Etherarp
+{
+ uchar d[6];
+ uchar s[6];
+ uchar type[2];
+ uchar hrd[2];
+ uchar pro[2];
+ uchar hln;
+ uchar pln;
+ uchar op[2];
+ uchar sha[6];
+ uchar spa[4];
+ uchar tha[6];
+ uchar tpa[4];
+};
+
+static char *nbmsg = "nonblocking";
+
+/*
+ * called to bind an IP ifc to an ethernet device
+ * called with ifc wlock'd
+ */
+
+static void
+etherbind(Ipifc *ifc, int argc, char **argv)
+{
+ Chan *mchan4, *cchan4, *achan, *mchan6, *cchan6, *schan;
+ char addr[Maxpath]; //char addr[2*KNAMELEN];
+ char dir[Maxpath]; //char dir[2*KNAMELEN];
+ char *buf;
+ int n;
+ char *ptr;
+ Etherrock *er;
+
+ if(argc < 2)
+ error(Ebadarg);
+
+ mchan4 = cchan4 = achan = mchan6 = cchan6 = nil;
+ buf = nil;
+ if(waserror()){
+ if(mchan4 != nil)
+ cclose(mchan4);
+ if(cchan4 != nil)
+ cclose(cchan4);
+ if(achan != nil)
+ cclose(achan);
+ if(mchan6 != nil)
+ cclose(mchan6);
+ if(cchan6 != nil)
+ cclose(cchan6);
+ if(buf != nil)
+ free(buf);
+ nexterror();
+ }
+
+ /*
+ * open ipv4 conversation
+ *
+ * the dial will fail if the type is already open on
+ * this device.
+ */
+ snprint(addr, sizeof(addr), "%s!0x800", argv[2]); /* ETIP4 */
+ mchan4 = chandial(addr, nil, dir, &cchan4);
+
+ /*
+ * make it non-blocking
+ */
+ devtab[cchan4->type]->write(cchan4, nbmsg, strlen(nbmsg), 0);
+
+ /*
+ * get mac address and speed
+ */
+ snprint(addr, sizeof(addr), "%s/stats", argv[2]);
+ buf = smalloc(512);
+ schan = namec(addr, Aopen, OREAD, 0);
+ if(waserror()){
+ cclose(schan);
+ nexterror();
+ }
+ n = devtab[schan->type]->read(schan, buf, 511, 0);
+ cclose(schan);
+ poperror();
+ buf[n] = 0;
+
+ ptr = strstr(buf, "addr: ");
+ if(!ptr)
+ error(Eio);
+ ptr += 6;
+ parsemac(ifc->mac, ptr, 6);
+
+ ptr = strstr(buf, "mbps: ");
+ if(ptr){
+ ptr += 6;
+ ifc->mbps = atoi(ptr);
+ } else
+ ifc->mbps = 100;
+
+ /*
+ * open arp conversation
+ */
+ snprint(addr, sizeof(addr), "%s!0x806", argv[2]); /* ETARP */
+ achan = chandial(addr, nil, nil, nil);
+
+ /*
+ * open ipv6 conversation
+ *
+ * the dial will fail if the type is already open on
+ * this device.
+ */
+ snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]); /* ETIP6 */
+ mchan6 = chandial(addr, nil, dir, &cchan6);
+
+ /*
+ * make it non-blocking
+ */
+ devtab[cchan6->type]->write(cchan6, nbmsg, strlen(nbmsg), 0);
+
+ er = smalloc(sizeof(*er));
+ er->mchan4 = mchan4;
+ er->cchan4 = cchan4;
+ er->achan = achan;
+ er->mchan6 = mchan6;
+ er->cchan6 = cchan6;
+ er->f = ifc->conv->p->f;
+ ifc->arg = er;
+
+ free(buf);
+ poperror();
+
+ kproc("etherread4", etherread4, ifc);
+ kproc("recvarpproc", recvarpproc, ifc);
+ kproc("etherread6", etherread6, ifc);
+}
+
+/*
+ * called with ifc wlock'd
+ */
+static void
+etherunbind(Ipifc *ifc)
+{
+ Etherrock *er = ifc->arg;
+
+ if(er->read4p)
+ postnote(er->read4p, 1, "unbind", 0);
+ if(er->read6p)
+ postnote(er->read6p, 1, "unbind", 0);
+ if(er->arpp)
+ postnote(er->arpp, 1, "unbind", 0);
+
+ /* wait for readers to die */
+ while(er->arpp != 0 || er->read4p != 0 || er->read6p != 0)
+ tsleep(&up->sleep, return0, 0, 300);
+
+ if(er->mchan4 != nil)
+ cclose(er->mchan4);
+ if(er->achan != nil)
+ cclose(er->achan);
+ if(er->cchan4 != nil)
+ cclose(er->cchan4);
+ if(er->mchan6 != nil)
+ cclose(er->mchan6);
+ if(er->cchan6 != nil)
+ cclose(er->cchan6);
+
+ free(er);
+}
+
+/*
+ * called by ipoput with a single block to write with ifc RLOCK'd
+ */
+static void
+etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip)
+{
+ Etherhdr *eh;
+ Arpent *a;
+ uchar mac[6];
+ Etherrock *er = ifc->arg;
+
+ /* get mac address of destination */
+ a = arpget(er->f->arp, bp, version, ifc, ip, mac);
+ if(a){
+ /* check for broadcast or multicast */
+ bp = multicastarp(er->f, a, ifc->m, mac);
+ if(bp==nil){
+ switch(version){
+ case V4:
+ sendarp(ifc, a);
+ break;
+ case V6:
+ resolveaddr6(ifc, a);
+ break;
+ default:
+ panic("etherbwrite: version %d", version);
+ }
+ return;
+ }
+ }
+
+ /* make it a single block with space for the ether header */
+ bp = padblock(bp, ifc->m->hsize);
+ if(bp->next)
+ bp = concatblock(bp);
+ if(BLEN(bp) < ifc->mintu)
+ bp = adjustblock(bp, ifc->mintu);
+ eh = (Etherhdr*)bp->rp;
+
+ /* copy in mac addresses and ether type */
+ memmove(eh->s, ifc->mac, sizeof(eh->s));
+ memmove(eh->d, mac, sizeof(eh->d));
+
+ switch(version){
+ case V4:
+ eh->t[0] = 0x08;
+ eh->t[1] = 0x00;
+ devtab[er->mchan4->type]->bwrite(er->mchan4, bp, 0);
+ break;
+ case V6:
+ eh->t[0] = 0x86;
+ eh->t[1] = 0xDD;
+ devtab[er->mchan6->type]->bwrite(er->mchan6, bp, 0);
+ break;
+ default:
+ panic("etherbwrite2: version %d", version);
+ }
+ ifc->out++;
+}
+
+
+/*
+ * process to read from the ethernet
+ */
+static void
+etherread4(void *a)
+{
+ Ipifc *ifc;
+ Block *bp;
+ Etherrock *er;
+
+ ifc = a;
+ er = ifc->arg;
+ er->read4p = up; /* hide identity under a rock for unbind */
+ if(waserror()){
+ er->read4p = 0;
+ pexit("hangup", 1);
+ }
+ for(;;){
+ bp = devtab[er->mchan4->type]->bread(er->mchan4, ifc->maxtu, 0);
+ if(!CANRLOCK(ifc)){
+ freeb(bp);
+ continue;
+ }
+ if(waserror()){
+ RUNLOCK(ifc);
+ nexterror();
+ }
+ ifc->in++;
+ bp->rp += ifc->m->hsize;
+ if(ifc->lifc == nil)
+ freeb(bp);
+ else
+ ipiput4(er->f, ifc, bp);
+ RUNLOCK(ifc);
+ poperror();
+ }
+}
+
+
+/*
+ * process to read from the ethernet, IPv6
+ */
+static void
+etherread6(void *a)
+{
+ Ipifc *ifc;
+ Block *bp;
+ Etherrock *er;
+
+ ifc = a;
+ er = ifc->arg;
+ er->read6p = up; /* hide identity under a rock for unbind */
+ if(waserror()){
+ er->read6p = 0;
+ pexit("hangup", 1);
+ }
+ for(;;){
+ bp = devtab[er->mchan6->type]->bread(er->mchan6, ifc->maxtu, 0);
+ if(!CANRLOCK(ifc)){
+ freeb(bp);
+ continue;
+ }
+ if(waserror()){
+ RUNLOCK(ifc);
+ nexterror();
+ }
+ ifc->in++;
+ bp->rp += ifc->m->hsize;
+ if(ifc->lifc == nil)
+ freeb(bp);
+ else
+ ipiput6(er->f, ifc, bp);
+ RUNLOCK(ifc);
+ poperror();
+ }
+}
+
+static void
+etheraddmulti(Ipifc *ifc, uchar *a, uchar *_)
+{
+ uchar mac[6];
+ char buf[64];
+ Etherrock *er = ifc->arg;
+ int version;
+
+ version = multicastea(mac, a);
+ sprint(buf, "addmulti %E", mac);
+ switch(version){
+ case V4:
+ devtab[er->cchan4->type]->write(er->cchan4, buf, strlen(buf), 0);
+ break;
+ case V6:
+ devtab[er->cchan6->type]->write(er->cchan6, buf, strlen(buf), 0);
+ break;
+ default:
+ panic("etheraddmulti: version %d", version);
+ }
+}
+
+static void
+etherremmulti(Ipifc *ifc, uchar *a, uchar *_)
+{
+ uchar mac[6];
+ char buf[64];
+ Etherrock *er = ifc->arg;
+ int version;
+
+ version = multicastea(mac, a);
+ sprint(buf, "remmulti %E", mac);
+ switch(version){
+ case V4:
+ devtab[er->cchan4->type]->write(er->cchan4, buf, strlen(buf), 0);
+ break;
+ case V6:
+ devtab[er->cchan6->type]->write(er->cchan6, buf, strlen(buf), 0);
+ break;
+ default:
+ panic("etherremmulti: version %d", version);
+ }
+}
+
+/*
+ * send an ethernet arp
+ * (only v4, v6 uses the neighbor discovery, rfc1970)
+ */
+static void
+sendarp(Ipifc *ifc, Arpent *a)
+{
+ int n;
+ Block *bp;
+ Etherarp *e;
+ Etherrock *er = ifc->arg;
+
+ /* don't do anything if it's been less than a second since the last */
+ if(NOW - a->ctime < 1000){
+ arprelease(er->f->arp, a);
+ return;
+ }
+
+ /* remove all but the last message */
+ while((bp = a->hold) != nil){
+ if(bp == a->last)
+ break;
+ a->hold = bp->list;
+ freeblist(bp);
+ }
+
+ /* try to keep it around for a second more */
+ a->ctime = NOW;
+ arprelease(er->f->arp, a);
+
+ n = sizeof(Etherarp);
+ if(n < a->type->mintu)
+ n = a->type->mintu;
+ bp = allocb(n);
+ memset(bp->rp, 0, n);
+ e = (Etherarp*)bp->rp;
+ memmove(e->tpa, a->ip+IPv4off, sizeof(e->tpa));
+ ipv4local(ifc, e->spa);
+ memmove(e->sha, ifc->mac, sizeof(e->sha));
+ memset(e->d, 0xff, sizeof(e->d)); /* ethernet broadcast */
+ memmove(e->s, ifc->mac, sizeof(e->s));
+
+ hnputs(e->type, ETARP);
+ hnputs(e->hrd, 1);
+ hnputs(e->pro, ETIP4);
+ e->hln = sizeof(e->sha);
+ e->pln = sizeof(e->spa);
+ hnputs(e->op, ARPREQUEST);
+ bp->wp += n;
+
+ devtab[er->achan->type]->bwrite(er->achan, bp, 0);
+}
+
+static void
+resolveaddr6(Ipifc *ifc, Arpent *a)
+{
+ int sflag;
+ Block *bp;
+ Etherrock *er = ifc->arg;
+ uchar ipsrc[IPaddrlen];
+
+ /* don't do anything if it's been less than a second since the last */
+ if(NOW - a->ctime < ReTransTimer){
+ arprelease(er->f->arp, a);
+ return;
+ }
+
+ /* remove all but the last message */
+ while((bp = a->hold) != nil){
+ if(bp == a->last)
+ break;
+ a->hold = bp->list;
+ freeblist(bp);
+ }
+
+ /* try to keep it around for a second more */
+ a->ctime = NOW;
+ a->rtime = NOW + ReTransTimer;
+ if(a->rxtsrem <= 0) {
+ arprelease(er->f->arp, a);
+ return;
+ }
+
+ a->rxtsrem--;
+ arprelease(er->f->arp, a);
+
+ if((sflag = ipv6anylocal(ifc, ipsrc)) != 0)
+ icmpns(er->f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
+}
+
+/*
+ * send a gratuitous arp to refresh arp caches
+ */
+static void
+sendgarp(Ipifc *ifc, uchar *ip)
+{
+ int n;
+ Block *bp;
+ Etherarp *e;
+ Etherrock *er = ifc->arg;
+
+ /* don't arp for our initial non address */
+ if(ipcmp(ip, IPnoaddr) == 0)
+ return;
+
+ n = sizeof(Etherarp);
+ if(n < ifc->m->mintu)
+ n = ifc->m->mintu;
+ bp = allocb(n);
+ memset(bp->rp, 0, n);
+ e = (Etherarp*)bp->rp;
+ memmove(e->tpa, ip+IPv4off, sizeof(e->tpa));
+ memmove(e->spa, ip+IPv4off, sizeof(e->spa));
+ memmove(e->sha, ifc->mac, sizeof(e->sha));
+ memset(e->d, 0xff, sizeof(e->d)); /* ethernet broadcast */
+ memmove(e->s, ifc->mac, sizeof(e->s));
+
+ hnputs(e->type, ETARP);
+ hnputs(e->hrd, 1);
+ hnputs(e->pro, ETIP4);
+ e->hln = sizeof(e->sha);
+ e->pln = sizeof(e->spa);
+ hnputs(e->op, ARPREQUEST);
+ bp->wp += n;
+
+ devtab[er->achan->type]->bwrite(er->achan, bp, 0);
+}
+
+static void
+recvarp(Ipifc *ifc)
+{
+ int n;
+ Block *ebp, *rbp;
+ Etherarp *e, *r;
+ uchar ip[IPaddrlen];
+ static uchar eprinted[4];
+ Etherrock *er = ifc->arg;
+
+ ebp = devtab[er->achan->type]->bread(er->achan, ifc->maxtu, 0);
+ if(ebp == nil)
+ return;
+
+ e = (Etherarp*)ebp->rp;
+ switch(nhgets(e->op)) {
+ default:
+ break;
+
+ case ARPREPLY:
+ /* check for machine using my ip address */
+ v4tov6(ip, e->spa);
+ if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+ if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
+ print("arprep: 0x%E/0x%E also has ip addr %V\n",
+ e->s, e->sha, e->spa);
+ break;
+ }
+ }
+
+ /* make sure we're not entering broadcast addresses */
+ if(ipcmp(ip, ipbroadcast) == 0 ||
+ !memcmp(e->sha, etherbroadcast, sizeof(e->sha))){
+ print("arprep: 0x%E/0x%E cannot register broadcast address %I\n",
+ e->s, e->sha, e->spa);
+ break;
+ }
+
+ arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 0);
+ break;
+
+ case ARPREQUEST:
+ /* don't answer arps till we know who we are */
+ if(ifc->lifc == 0)
+ break;
+
+ /* check for machine using my ip or ether address */
+ v4tov6(ip, e->spa);
+ if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+ if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
+ if (memcmp(eprinted, e->spa, sizeof(e->spa))){
+ /* print only once */
+ print("arpreq: 0x%E also has ip addr %V\n", e->sha, e->spa);
+ memmove(eprinted, e->spa, sizeof(e->spa));
+ }
+ }
+ } else {
+ if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) == 0){
+ print("arpreq: %V also has ether addr %E\n", e->spa, e->sha);
+ break;
+ }
+ }
+
+ /* refresh what we know about sender */
+ arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 1);
+
+ /* answer only requests for our address or systems we're proxying for */
+ v4tov6(ip, e->tpa);
+ if(!iplocalonifc(ifc, ip))
+ if(!ipproxyifc(er->f, ifc, ip))
+ break;
+
+ n = sizeof(Etherarp);
+ if(n < ifc->mintu)
+ n = ifc->mintu;
+ rbp = allocb(n);
+ r = (Etherarp*)rbp->rp;
+ memset(r, 0, sizeof(Etherarp));
+ hnputs(r->type, ETARP);
+ hnputs(r->hrd, 1);
+ hnputs(r->pro, ETIP4);
+ r->hln = sizeof(r->sha);
+ r->pln = sizeof(r->spa);
+ hnputs(r->op, ARPREPLY);
+ memmove(r->tha, e->sha, sizeof(r->tha));
+ memmove(r->tpa, e->spa, sizeof(r->tpa));
+ memmove(r->sha, ifc->mac, sizeof(r->sha));
+ memmove(r->spa, e->tpa, sizeof(r->spa));
+ memmove(r->d, e->sha, sizeof(r->d));
+ memmove(r->s, ifc->mac, sizeof(r->s));
+ rbp->wp += n;
+
+ devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
+ }
+ freeb(ebp);
+}
+
+static void
+recvarpproc(void *v)
+{
+ Ipifc *ifc = v;
+ Etherrock *er = ifc->arg;
+
+ er->arpp = up;
+ if(waserror()){
+ er->arpp = 0;
+ pexit("hangup", 1);
+ }
+ for(;;)
+ recvarp(ifc);
+}
+
+static int
+multicastea(uchar *ea, uchar *ip)
+{
+ int x;
+
+ switch(x = ipismulticast(ip)){
+ case V4:
+ ea[0] = 0x01;
+ ea[1] = 0x00;
+ ea[2] = 0x5e;
+ ea[3] = ip[13] & 0x7f;
+ ea[4] = ip[14];
+ ea[5] = ip[15];
+ break;
+ case V6:
+ ea[0] = 0x33;
+ ea[1] = 0x33;
+ ea[2] = ip[12];
+ ea[3] = ip[13];
+ ea[4] = ip[14];
+ ea[5] = ip[15];
+ break;
+ }
+ return x;
+}
+
+/*
+ * fill in an arp entry for broadcast or multicast
+ * addresses. Return the first queued packet for the
+ * IP address.
+ */
+static Block*
+multicastarp(Fs *f, Arpent *a, Medium *medium, uchar *mac)
+{
+ /* is it broadcast? */
+ switch(ipforme(f, a->ip)){
+ case Runi:
+ return nil;
+ case Rbcast:
+ memset(mac, 0xff, 6);
+ return arpresolve(f->arp, a, medium, mac);
+ default:
+ break;
+ }
+
+ /* if multicast, fill in mac */
+ switch(multicastea(mac, a->ip)){
+ case V4:
+ case V6:
+ return arpresolve(f->arp, a, medium, mac);
+ }
+
+ /* let arp take care of it */
+ return nil;
+}
+
+void
+ethermediumlink(void)
+{
+ addipmedium(ðermedium);
+ addipmedium(&gbemedium);
+}
+
+
+static void
+etherpref2addr(uchar *pref, uchar *ea)
+{
+ pref[8] = ea[0] | 0x2;
+ pref[9] = ea[1];
+ pref[10] = ea[2];
+ pref[11] = 0xFF;
+ pref[12] = 0xFE;
+ pref[13] = ea[3];
+ pref[14] = ea[4];
+ pref[15] = ea[5];
+}
diff --git a/src/9vx/a/ip/gre.c b/src/9vx/a/ip/gre.c
@@ -0,0 +1,283 @@
+/*
+ * Generic Routing Encapsulation over IPv4, rfc1702
+ */
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+enum
+{
+ GRE_IPONLY = 12, /* size of ip header */
+ GRE_IPPLUSGRE = 12, /* minimum size of GRE header */
+ IP_GREPROTO = 47,
+
+ GRErxms = 200,
+ GREtickms = 100,
+ GREmaxxmit = 10,
+};
+
+typedef struct GREhdr
+{
+ /* ip header */
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar len[2]; /* packet length (including headers) */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+ uchar Unused;
+ uchar proto; /* Protocol */
+ uchar cksum[2]; /* checksum */
+ uchar src[4]; /* Ip source */
+ uchar dst[4]; /* Ip destination */
+
+ /* gre header */
+ uchar flags[2];
+ uchar eproto[2]; /* encapsulation protocol */
+} GREhdr;
+
+typedef struct GREpriv GREpriv;
+struct GREpriv
+{
+ int raw; /* Raw GRE mode */
+
+ /* non-MIB stats */
+ ulong csumerr; /* checksum errors */
+ ulong lenerr; /* short packet */
+};
+
+static void grekick(void *x, Block *bp);
+
+static char*
+greconnect(Conv *c, char **argv, int argc)
+{
+ Proto *p;
+ char *err;
+ Conv *tc, **cp, **ecp;
+
+ err = Fsstdconnect(c, argv, argc);
+ if(err != nil)
+ return err;
+
+ /* make sure noone's already connected to this other sys */
+ p = c->p;
+ QLOCK(p);
+ ecp = &p->conv[p->nc];
+ for(cp = p->conv; cp < ecp; cp++){
+ tc = *cp;
+ if(tc == nil)
+ break;
+ if(tc == c)
+ continue;
+ if(tc->rport == c->rport && ipcmp(tc->raddr, c->raddr) == 0){
+ err = "already connected to that addr/proto";
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ break;
+ }
+ }
+ QUNLOCK(p);
+
+ if(err != nil)
+ return err;
+ Fsconnected(c, nil);
+
+ return nil;
+}
+
+static void
+grecreate(Conv *c)
+{
+ c->rq = qopen(64*1024, Qmsg, 0, c);
+ c->wq = qbypass(grekick, c);
+}
+
+static int
+grestate(Conv *c, char *state, int n)
+{
+ USED(c);
+ return snprint(state, n, "%s\n", "Datagram");
+}
+
+static char*
+greannounce(Conv* _, char** __, int ___)
+{
+ return "pktifc does not support announce";
+}
+
+static void
+greclose(Conv *c)
+{
+ qclose(c->rq);
+ qclose(c->wq);
+ qclose(c->eq);
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ c->lport = 0;
+ c->rport = 0;
+}
+
+int drop;
+
+static void
+grekick(void *x, Block *bp)
+{
+ Conv *c = x;
+ GREhdr *ghp;
+ uchar laddr[IPaddrlen], raddr[IPaddrlen];
+
+ if(bp == nil)
+ return;
+
+ /* Make space to fit ip header (gre header already there) */
+ bp = padblock(bp, GRE_IPONLY);
+ if(bp == nil)
+ return;
+
+ /* make sure the message has a GRE header */
+ bp = pullupblock(bp, GRE_IPONLY+GRE_IPPLUSGRE);
+ if(bp == nil)
+ return;
+
+ ghp = (GREhdr *)(bp->rp);
+ ghp->vihl = IP_VER4;
+
+ if(!((GREpriv*)c->p->priv)->raw){
+ v4tov6(raddr, ghp->dst);
+ if(ipcmp(raddr, v4prefix) == 0)
+ memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen);
+ v4tov6(laddr, ghp->src);
+ if(ipcmp(laddr, v4prefix) == 0){
+ if(ipcmp(c->laddr, IPnoaddr) == 0)
+ findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */
+ memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen);
+ }
+ hnputs(ghp->eproto, c->rport);
+ }
+
+ ghp->proto = IP_GREPROTO;
+ ghp->frag[0] = 0;
+ ghp->frag[1] = 0;
+
+ ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
+}
+
+static void
+greiput(Proto *gre, Ipifc* __, Block *bp)
+{
+ int len;
+ GREhdr *ghp;
+ Conv *c, **p;
+ ushort eproto;
+ uchar raddr[IPaddrlen];
+ GREpriv *gpriv;
+
+ gpriv = gre->priv;
+ ghp = (GREhdr*)(bp->rp);
+
+ v4tov6(raddr, ghp->src);
+ eproto = nhgets(ghp->eproto);
+ QLOCK(gre);
+
+ /* Look for a conversation structure for this port and address */
+ c = nil;
+ for(p = gre->conv; *p; p++) {
+ c = *p;
+ if(c->inuse == 0)
+ continue;
+ if(c->rport == eproto &&
+ (gpriv->raw || ipcmp(c->raddr, raddr) == 0))
+ break;
+ }
+
+ if(*p == nil) {
+ QUNLOCK(gre);
+ freeblist(bp);
+ return;
+ }
+
+ QUNLOCK(gre);
+
+ /*
+ * Trim the packet down to data size
+ */
+ len = nhgets(ghp->len) - GRE_IPONLY;
+ if(len < GRE_IPPLUSGRE){
+ freeblist(bp);
+ return;
+ }
+ bp = trimblock(bp, GRE_IPONLY, len);
+ if(bp == nil){
+ gpriv->lenerr++;
+ return;
+ }
+
+ /*
+ * Can't delimit packet so pull it all into one block.
+ */
+ if(qlen(c->rq) > 64*1024)
+ freeblist(bp);
+ else{
+ bp = concatblock(bp);
+ if(bp == 0)
+ panic("greiput");
+ qpass(c->rq, bp);
+ }
+}
+
+int
+grestats(Proto *gre, char *buf, int len)
+{
+ GREpriv *gpriv;
+
+ gpriv = gre->priv;
+
+ return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr);
+}
+
+char*
+grectl(Conv *c, char **f, int n)
+{
+ GREpriv *gpriv;
+
+ gpriv = c->p->priv;
+ if(n == 1){
+ if(strcmp(f[0], "raw") == 0){
+ gpriv->raw = 1;
+ return nil;
+ }
+ else if(strcmp(f[0], "cooked") == 0){
+ gpriv->raw = 0;
+ return nil;
+ }
+ }
+ return "unknown control request";
+}
+
+void
+greinit(Fs *fs)
+{
+ Proto *gre;
+
+ gre = smalloc(sizeof(Proto));
+ gre->priv = smalloc(sizeof(GREpriv));
+ gre->name = "gre";
+ gre->connect = greconnect;
+ gre->announce = greannounce;
+ gre->state = grestate;
+ gre->create = grecreate;
+ gre->close = greclose;
+ gre->rcv = greiput;
+ gre->ctl = grectl;
+ gre->advise = nil;
+ gre->stats = grestats;
+ gre->ipproto = IP_GREPROTO;
+ gre->nc = 64;
+ gre->ptclsize = 0;
+
+ Fsproto(fs, gre);
+}
diff --git a/src/9vx/a/ip/icmp.c b/src/9vx/a/ip/icmp.c
@@ -0,0 +1,490 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+typedef struct Icmp {
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+ uchar ttl; /* Time to live */
+ uchar proto; /* Protocol */
+ uchar ipcksum[2]; /* Header checksum */
+ uchar src[4]; /* Ip source */
+ uchar dst[4]; /* Ip destination */
+ uchar type;
+ uchar code;
+ uchar cksum[2];
+ uchar icmpid[2];
+ uchar seq[2];
+ uchar data[1];
+} Icmp;
+
+enum { /* Packet Types */
+ EchoReply = 0,
+ Unreachable = 3,
+ SrcQuench = 4,
+ Redirect = 5,
+ EchoRequest = 8,
+ TimeExceed = 11,
+ InParmProblem = 12,
+ Timestamp = 13,
+ TimestampReply = 14,
+ InfoRequest = 15,
+ InfoReply = 16,
+ AddrMaskRequest = 17,
+ AddrMaskReply = 18,
+
+ Maxtype = 18,
+};
+
+enum
+{
+ MinAdvise = 24, /* minimum needed for us to advise another protocol */
+};
+
+char *icmpnames[Maxtype+1] =
+{
+[EchoReply] "EchoReply",
+[Unreachable] "Unreachable",
+[SrcQuench] "SrcQuench",
+[Redirect] "Redirect",
+[EchoRequest] "EchoRequest",
+[TimeExceed] "TimeExceed",
+[InParmProblem] "InParmProblem",
+[Timestamp] "Timestamp",
+[TimestampReply] "TimestampReply",
+[InfoRequest] "InfoRequest",
+[InfoReply] "InfoReply",
+[AddrMaskRequest] "AddrMaskRequest",
+[AddrMaskReply ] "AddrMaskReply ",
+};
+
+enum {
+ IP_ICMPPROTO = 1,
+ ICMP_IPSIZE = 20,
+ ICMP_HDRSIZE = 8,
+};
+
+enum
+{
+ InMsgs,
+ InErrors,
+ OutMsgs,
+ CsumErrs,
+ LenErrs,
+ HlenErrs,
+
+ Nstats,
+};
+
+static char *statnames[Nstats] =
+{
+[InMsgs] "InMsgs",
+[InErrors] "InErrors",
+[OutMsgs] "OutMsgs",
+[CsumErrs] "CsumErrs",
+[LenErrs] "LenErrs",
+[HlenErrs] "HlenErrs",
+};
+
+typedef struct Icmppriv Icmppriv;
+struct Icmppriv
+{
+ ulong stats[Nstats];
+
+ /* message counts */
+ ulong in[Maxtype+1];
+ ulong out[Maxtype+1];
+};
+
+static void icmpkick(void *x, Block*);
+
+static void
+icmpcreate(Conv *c)
+{
+ c->rq = qopen(64*1024, Qmsg, 0, c);
+ c->wq = qbypass(icmpkick, c);
+}
+
+extern char*
+icmpconnect(Conv *c, char **argv, int argc)
+{
+ char *e;
+
+ e = Fsstdconnect(c, argv, argc);
+ if(e != nil)
+ return e;
+ Fsconnected(c, e);
+
+ return nil;
+}
+
+extern int
+icmpstate(Conv *c, char *state, int n)
+{
+ USED(c);
+ return snprint(state, n, "%s qin %d qout %d\n",
+ "Datagram",
+ c->rq ? qlen(c->rq) : 0,
+ c->wq ? qlen(c->wq) : 0
+ );
+}
+
+extern char*
+icmpannounce(Conv *c, char **argv, int argc)
+{
+ char *e;
+
+ e = Fsstdannounce(c, argv, argc);
+ if(e != nil)
+ return e;
+ Fsconnected(c, nil);
+
+ return nil;
+}
+
+extern void
+icmpclose(Conv *c)
+{
+ qclose(c->rq);
+ qclose(c->wq);
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ c->lport = 0;
+}
+
+static void
+icmpkick(void *x, Block *bp)
+{
+ Conv *c = x;
+ Icmp *p;
+ Icmppriv *ipriv;
+
+ if(bp == nil)
+ return;
+
+ if(blocklen(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
+ freeblist(bp);
+ return;
+ }
+ p = (Icmp *)(bp->rp);
+ p->vihl = IP_VER4;
+ ipriv = c->p->priv;
+ if(p->type <= Maxtype)
+ ipriv->out[p->type]++;
+
+ v6tov4(p->dst, c->raddr);
+ v6tov4(p->src, c->laddr);
+ p->proto = IP_ICMPPROTO;
+ hnputs(p->icmpid, c->lport);
+ memset(p->cksum, 0, sizeof(p->cksum));
+ hnputs(p->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE));
+ ipriv->stats[OutMsgs]++;
+ ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
+}
+
+extern void
+icmpttlexceeded(Fs *f, uchar *ia, Block *bp)
+{
+ Block *nbp;
+ Icmp *p, *np;
+
+ p = (Icmp *)bp->rp;
+
+ netlog(f, Logicmp, "sending icmpttlexceeded -> %V\n", p->src);
+ nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
+ nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
+ np = (Icmp *)nbp->rp;
+ np->vihl = IP_VER4;
+ memmove(np->dst, p->src, sizeof(np->dst));
+ v6tov4(np->src, ia);
+ memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
+ np->type = TimeExceed;
+ np->code = 0;
+ np->proto = IP_ICMPPROTO;
+ hnputs(np->icmpid, 0);
+ hnputs(np->seq, 0);
+ memset(np->cksum, 0, sizeof(np->cksum));
+ hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
+ ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+
+}
+
+static void
+icmpunreachable(Fs *f, Block *bp, int code, int seq)
+{
+ Block *nbp;
+ Icmp *p, *np;
+ int i;
+ uchar addr[IPaddrlen];
+
+ p = (Icmp *)bp->rp;
+
+ /* only do this for unicast sources and destinations */
+ v4tov6(addr, p->dst);
+ i = ipforme(f, addr);
+ if((i&Runi) == 0)
+ return;
+ v4tov6(addr, p->src);
+ i = ipforme(f, addr);
+ if(i != 0 && (i&Runi) == 0)
+ return;
+
+ netlog(f, Logicmp, "sending icmpnoconv -> %V\n", p->src);
+ nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
+ nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
+ np = (Icmp *)nbp->rp;
+ np->vihl = IP_VER4;
+ memmove(np->dst, p->src, sizeof(np->dst));
+ memmove(np->src, p->dst, sizeof(np->src));
+ memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
+ np->type = Unreachable;
+ np->code = code;
+ np->proto = IP_ICMPPROTO;
+ hnputs(np->icmpid, 0);
+ hnputs(np->seq, seq);
+ memset(np->cksum, 0, sizeof(np->cksum));
+ hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
+ ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+extern void
+icmpnoconv(Fs *f, Block *bp)
+{
+ icmpunreachable(f, bp, 3, 0);
+}
+
+extern void
+icmpcantfrag(Fs *f, Block *bp, int mtu)
+{
+ icmpunreachable(f, bp, 4, mtu);
+}
+
+static void
+goticmpkt(Proto *icmp, Block *bp)
+{
+ Conv **c, *s;
+ Icmp *p;
+ uchar dst[IPaddrlen];
+ ushort recid;
+
+ p = (Icmp *) bp->rp;
+ v4tov6(dst, p->src);
+ recid = nhgets(p->icmpid);
+
+ for(c = icmp->conv; *c; c++) {
+ s = *c;
+ if(s->lport == recid)
+ if(ipcmp(s->raddr, dst) == 0){
+ bp = concatblock(bp);
+ if(bp != nil)
+ qpass(s->rq, bp);
+ return;
+ }
+ }
+ freeblist(bp);
+}
+
+static Block *
+mkechoreply(Block *bp)
+{
+ Icmp *q;
+ uchar ip[4];
+
+ q = (Icmp *)bp->rp;
+ q->vihl = IP_VER4;
+ memmove(ip, q->src, sizeof(q->dst));
+ memmove(q->src, q->dst, sizeof(q->src));
+ memmove(q->dst, ip, sizeof(q->dst));
+ q->type = EchoReply;
+ memset(q->cksum, 0, sizeof(q->cksum));
+ hnputs(q->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE));
+
+ return bp;
+}
+
+static char *unreachcode[] =
+{
+[0] "net unreachable",
+[1] "host unreachable",
+[2] "protocol unreachable",
+[3] "port unreachable",
+[4] "fragmentation needed and DF set",
+[5] "source route failed",
+};
+
+static void
+icmpiput(Proto *icmp, Ipifc* __, Block *bp)
+{
+ int n, iplen;
+ Icmp *p;
+ Block *r;
+ Proto *pr;
+ char *msg;
+ char m2[128];
+ Icmppriv *ipriv;
+
+ ipriv = icmp->priv;
+
+ ipriv->stats[InMsgs]++;
+
+ p = (Icmp *)bp->rp;
+ netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code);
+ n = blocklen(bp);
+ if(n < ICMP_IPSIZE+ICMP_HDRSIZE){
+ ipriv->stats[InErrors]++;
+ ipriv->stats[HlenErrs]++;
+ netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+ goto raise;
+ }
+ iplen = nhgets(p->length);
+ if(iplen > n || ((uint)iplen % 1)){
+ ipriv->stats[LenErrs]++;
+ ipriv->stats[InErrors]++;
+ netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
+ goto raise;
+ }
+ if(ptclcsum(bp, ICMP_IPSIZE, iplen - ICMP_IPSIZE)){
+ ipriv->stats[InErrors]++;
+ ipriv->stats[CsumErrs]++;
+ netlog(icmp->f, Logicmp, "icmp checksum error\n");
+ goto raise;
+ }
+ if(p->type <= Maxtype)
+ ipriv->in[p->type]++;
+
+ switch(p->type) {
+ case EchoRequest:
+ if (iplen < n)
+ bp = trimblock(bp, 0, iplen);
+ r = mkechoreply(bp);
+ ipriv->out[EchoReply]++;
+ ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
+ break;
+ case Unreachable:
+ if(p->code > 5)
+ msg = unreachcode[1];
+ else
+ msg = unreachcode[p->code];
+
+ bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
+ if(blocklen(bp) < MinAdvise){
+ ipriv->stats[LenErrs]++;
+ goto raise;
+ }
+ p = (Icmp *)bp->rp;
+ pr = Fsrcvpcolx(icmp->f, p->proto);
+ if(pr != nil && pr->advise != nil) {
+ (*pr->advise)(pr, bp, msg);
+ return;
+ }
+
+ bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+ goticmpkt(icmp, bp);
+ break;
+ case TimeExceed:
+ if(p->code == 0){
+ sprint(m2, "ttl exceeded at %V", p->src);
+
+ bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
+ if(blocklen(bp) < MinAdvise){
+ ipriv->stats[LenErrs]++;
+ goto raise;
+ }
+ p = (Icmp *)bp->rp;
+ pr = Fsrcvpcolx(icmp->f, p->proto);
+ if(pr != nil && pr->advise != nil) {
+ (*pr->advise)(pr, bp, m2);
+ return;
+ }
+ bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+ }
+
+ goticmpkt(icmp, bp);
+ break;
+ default:
+ goticmpkt(icmp, bp);
+ break;
+ }
+ return;
+
+raise:
+ freeblist(bp);
+}
+
+void
+icmpadvise(Proto *icmp, Block *bp, char *msg)
+{
+ Conv **c, *s;
+ Icmp *p;
+ uchar dst[IPaddrlen];
+ ushort recid;
+
+ p = (Icmp *) bp->rp;
+ v4tov6(dst, p->dst);
+ recid = nhgets(p->icmpid);
+
+ for(c = icmp->conv; *c; c++) {
+ s = *c;
+ if(s->lport == recid)
+ if(ipcmp(s->raddr, dst) == 0){
+ qhangup(s->rq, msg);
+ qhangup(s->wq, msg);
+ break;
+ }
+ }
+ freeblist(bp);
+}
+
+int
+icmpstats(Proto *icmp, char *buf, int len)
+{
+ Icmppriv *priv;
+ char *p, *e;
+ int i;
+
+ priv = icmp->priv;
+ p = buf;
+ e = p+len;
+ for(i = 0; i < Nstats; i++)
+ p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+ for(i = 0; i <= Maxtype; i++){
+ if(icmpnames[i])
+ p = seprint(p, e, "%s: %lud %lud\n", icmpnames[i], priv->in[i], priv->out[i]);
+ else
+ p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
+ }
+ return p - buf;
+}
+
+void
+icmpinit(Fs *fs)
+{
+ Proto *icmp;
+
+ icmp = smalloc(sizeof(Proto));
+ icmp->priv = smalloc(sizeof(Icmppriv));
+ icmp->name = "icmp";
+ icmp->connect = icmpconnect;
+ icmp->announce = icmpannounce;
+ icmp->state = icmpstate;
+ icmp->create = icmpcreate;
+ icmp->close = icmpclose;
+ icmp->rcv = icmpiput;
+ icmp->stats = icmpstats;
+ icmp->ctl = nil;
+ icmp->advise = icmpadvise;
+ icmp->gc = nil;
+ icmp->ipproto = IP_ICMPPROTO;
+ icmp->nc = 128;
+ icmp->ptclsize = 0;
+
+ Fsproto(fs, icmp);
+}
diff --git a/src/9vx/a/ip/icmp6.c b/src/9vx/a/ip/icmp6.c
@@ -0,0 +1,946 @@
+/*
+ * Internet Control Message Protocol for IPv6
+ */
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "ip.h"
+#include "ipv6.h"
+
+enum
+{
+ InMsgs6,
+ InErrors6,
+ OutMsgs6,
+ CsumErrs6,
+ LenErrs6,
+ HlenErrs6,
+ HoplimErrs6,
+ IcmpCodeErrs6,
+ TargetErrs6,
+ OptlenErrs6,
+ AddrmxpErrs6,
+ RouterAddrErrs6,
+
+ Nstats6,
+};
+
+enum {
+ ICMP_USEAD6 = 40,
+};
+
+enum {
+ Oflag = 1<<5,
+ Sflag = 1<<6,
+ Rflag = 1<<7,
+};
+
+enum {
+ /* ICMPv6 types */
+ EchoReply = 0,
+ UnreachableV6 = 1,
+ PacketTooBigV6 = 2,
+ TimeExceedV6 = 3,
+ SrcQuench = 4,
+ ParamProblemV6 = 4,
+ Redirect = 5,
+ EchoRequest = 8,
+ TimeExceed = 11,
+ InParmProblem = 12,
+ Timestamp = 13,
+ TimestampReply = 14,
+ InfoRequest = 15,
+ InfoReply = 16,
+ AddrMaskRequest = 17,
+ AddrMaskReply = 18,
+ EchoRequestV6 = 128,
+ EchoReplyV6 = 129,
+ RouterSolicit = 133,
+ RouterAdvert = 134,
+ NbrSolicit = 135,
+ NbrAdvert = 136,
+ RedirectV6 = 137,
+
+ Maxtype6 = 137,
+};
+
+typedef struct ICMPpkt ICMPpkt;
+typedef struct IPICMP IPICMP;
+typedef struct Ndpkt Ndpkt;
+typedef struct NdiscC NdiscC;
+
+struct ICMPpkt {
+ uchar type;
+ uchar code;
+ uchar cksum[2];
+ uchar icmpid[2];
+ uchar seq[2];
+};
+
+struct IPICMP {
+ /* Ip6hdr; */
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+
+ /* ICMPpkt; */
+ uchar type;
+ uchar code;
+ uchar cksum[2];
+ uchar icmpid[2];
+ uchar seq[2];
+};
+
+struct NdiscC
+{
+ /* IPICMP; */
+ /* Ip6hdr; */
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+
+ /* ICMPpkt; */
+ uchar type;
+ uchar code;
+ uchar cksum[2];
+ uchar icmpid[2];
+ uchar seq[2];
+
+ uchar target[IPaddrlen];
+};
+
+struct Ndpkt
+{
+ /* NdiscC; */
+ /* IPICMP; */
+ /* Ip6hdr; */
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+
+ /* ICMPpkt; */
+ uchar type;
+ uchar code;
+ uchar cksum[2];
+ uchar icmpid[2];
+ uchar seq[2];
+
+ uchar target[IPaddrlen];
+
+ uchar otype;
+ uchar olen; /* length in units of 8 octets(incl type, code),
+ * 1 for IEEE 802 addresses */
+ uchar lnaddr[6]; /* link-layer address */
+};
+
+typedef struct Icmppriv6
+{
+ ulong stats[Nstats6];
+
+ /* message counts */
+ ulong in[Maxtype6+1];
+ ulong out[Maxtype6+1];
+} Icmppriv6;
+
+typedef struct Icmpcb6
+{
+ QLock qlock;
+ uchar headers;
+} Icmpcb6;
+
+char *icmpnames6[Maxtype6+1] =
+{
+[EchoReply] "EchoReply",
+[UnreachableV6] "UnreachableV6",
+[PacketTooBigV6] "PacketTooBigV6",
+[TimeExceedV6] "TimeExceedV6",
+[SrcQuench] "SrcQuench",
+[Redirect] "Redirect",
+[EchoRequest] "EchoRequest",
+[TimeExceed] "TimeExceed",
+[InParmProblem] "InParmProblem",
+[Timestamp] "Timestamp",
+[TimestampReply] "TimestampReply",
+[InfoRequest] "InfoRequest",
+[InfoReply] "InfoReply",
+[AddrMaskRequest] "AddrMaskRequest",
+[AddrMaskReply] "AddrMaskReply",
+[EchoRequestV6] "EchoRequestV6",
+[EchoReplyV6] "EchoReplyV6",
+[RouterSolicit] "RouterSolicit",
+[RouterAdvert] "RouterAdvert",
+[NbrSolicit] "NbrSolicit",
+[NbrAdvert] "NbrAdvert",
+[RedirectV6] "RedirectV6",
+};
+
+static char *statnames6[Nstats6] =
+{
+[InMsgs6] "InMsgs",
+[InErrors6] "InErrors",
+[OutMsgs6] "OutMsgs",
+[CsumErrs6] "CsumErrs",
+[LenErrs6] "LenErrs",
+[HlenErrs6] "HlenErrs",
+[HoplimErrs6] "HoplimErrs",
+[IcmpCodeErrs6] "IcmpCodeErrs",
+[TargetErrs6] "TargetErrs",
+[OptlenErrs6] "OptlenErrs",
+[AddrmxpErrs6] "AddrmxpErrs",
+[RouterAddrErrs6] "RouterAddrErrs",
+};
+
+static char *unreachcode[] =
+{
+[Icmp6_no_route] "no route to destination",
+[Icmp6_ad_prohib] "comm with destination administratively prohibited",
+[Icmp6_out_src_scope] "beyond scope of source address",
+[Icmp6_adr_unreach] "address unreachable",
+[Icmp6_port_unreach] "port unreachable",
+[Icmp6_gress_src_fail] "source address failed ingress/egress policy",
+[Icmp6_rej_route] "reject route to destination",
+[Icmp6_unknown] "icmp unreachable: unknown code",
+};
+
+static void icmpkick6(void *x, Block *bp);
+
+static void
+icmpcreate6(Conv *c)
+{
+ c->rq = qopen(64*1024, Qmsg, 0, c);
+ c->wq = qbypass(icmpkick6, c);
+}
+
+static void
+set_cksum(Block *bp)
+{
+ IPICMP *p = (IPICMP *)(bp->rp);
+
+ hnputl(p->vcf, 0); /* borrow IP header as pseudoheader */
+ hnputs(p->ploadlen, blocklen(bp) - IP6HDR);
+ p->proto = 0;
+ p->ttl = ICMPv6; /* ttl gets set later */
+ hnputs(p->cksum, 0);
+ hnputs(p->cksum, ptclcsum(bp, 0, blocklen(bp)));
+ p->proto = ICMPv6;
+}
+
+static Block *
+newIPICMP(int packetlen)
+{
+ Block *nbp;
+
+ nbp = allocb(packetlen);
+ nbp->wp += packetlen;
+ memset(nbp->rp, 0, packetlen);
+ return nbp;
+}
+
+void
+icmpadvise6(Proto *icmp, Block *bp, char *msg)
+{
+ ushort recid;
+ Conv **c, *s;
+ IPICMP *p;
+
+ p = (IPICMP *)bp->rp;
+ recid = nhgets(p->icmpid);
+
+ for(c = icmp->conv; *c; c++) {
+ s = *c;
+ if(s->lport == recid && ipcmp(s->raddr, p->dst) == 0){
+ qhangup(s->rq, msg);
+ qhangup(s->wq, msg);
+ break;
+ }
+ }
+ freeblist(bp);
+}
+
+static void
+icmpkick6(void *x, Block *bp)
+{
+ uchar laddr[IPaddrlen], raddr[IPaddrlen];
+ Conv *c = x;
+ IPICMP *p;
+ Icmppriv6 *ipriv = c->p->priv;
+ Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
+
+ if(bp == nil)
+ return;
+
+ if(icb->headers==6) {
+ /* get user specified addresses */
+ bp = pullupblock(bp, ICMP_USEAD6);
+ if(bp == nil)
+ return;
+ bp->rp += 8;
+ ipmove(laddr, bp->rp);
+ bp->rp += IPaddrlen;
+ ipmove(raddr, bp->rp);
+ bp->rp += IPaddrlen;
+ bp = padblock(bp, sizeof(Ip6hdr));
+ }
+
+ if(blocklen(bp) < sizeof(IPICMP)){
+ freeblist(bp);
+ return;
+ }
+ p = (IPICMP *)(bp->rp);
+ if(icb->headers == 6) {
+ ipmove(p->dst, raddr);
+ ipmove(p->src, laddr);
+ } else {
+ ipmove(p->dst, c->raddr);
+ ipmove(p->src, c->laddr);
+ hnputs(p->icmpid, c->lport);
+ }
+
+ set_cksum(bp);
+ p->vcf[0] = 0x06 << 4;
+ if(p->type <= Maxtype6)
+ ipriv->out[p->type]++;
+ ipoput6(c->p->f, bp, 0, c->ttl, c->tos, nil);
+}
+
+char*
+icmpctl6(Conv *c, char **argv, int argc)
+{
+ Icmpcb6 *icb;
+
+ icb = (Icmpcb6*) c->ptcl;
+ if(argc==1 && strcmp(argv[0], "headers")==0) {
+ icb->headers = 6;
+ return nil;
+ }
+ return "unknown control request";
+}
+
+static void
+goticmpkt6(Proto *icmp, Block *bp, int muxkey)
+{
+ ushort recid;
+ uchar *addr;
+ Conv **c, *s;
+ IPICMP *p = (IPICMP *)bp->rp;
+
+ if(muxkey == 0) {
+ recid = nhgets(p->icmpid);
+ addr = p->src;
+ } else {
+ recid = muxkey;
+ addr = p->dst;
+ }
+
+ for(c = icmp->conv; *c; c++){
+ s = *c;
+ if(s->lport == recid && ipcmp(s->raddr, addr) == 0){
+ bp = concatblock(bp);
+ if(bp != nil)
+ qpass(s->rq, bp);
+ return;
+ }
+ }
+
+ freeblist(bp);
+}
+
+static Block *
+mkechoreply6(Block *bp, Ipifc *ifc)
+{
+ uchar addr[IPaddrlen];
+ IPICMP *p = (IPICMP *)(bp->rp);
+
+ ipmove(addr, p->src);
+ if(!isv6mcast(p->dst))
+ ipmove(p->src, p->dst);
+ else if (!ipv6anylocal(ifc, p->src))
+ return nil;
+ ipmove(p->dst, addr);
+ p->type = EchoReplyV6;
+ set_cksum(bp);
+ return bp;
+}
+
+/*
+ * sends out an ICMPv6 neighbor solicitation
+ * suni == SRC_UNSPEC or SRC_UNI,
+ * tuni == TARG_MULTI => multicast for address resolution,
+ * and tuni == TARG_UNI => neighbor reachability.
+ */
+extern void
+icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac)
+{
+ Block *nbp;
+ Ndpkt *np;
+ Proto *icmp = f->t2p[ICMPv6];
+ Icmppriv6 *ipriv = icmp->priv;
+
+ nbp = newIPICMP(sizeof(Ndpkt));
+ np = (Ndpkt*) nbp->rp;
+
+ if(suni == SRC_UNSPEC)
+ memmove(np->src, v6Unspecified, IPaddrlen);
+ else
+ memmove(np->src, src, IPaddrlen);
+
+ if(tuni == TARG_UNI)
+ memmove(np->dst, targ, IPaddrlen);
+ else
+ ipv62smcast(np->dst, targ);
+
+ np->type = NbrSolicit;
+ np->code = 0;
+ memmove(np->target, targ, IPaddrlen);
+ if(suni != SRC_UNSPEC) {
+ np->otype = SRC_LLADDR;
+ np->olen = 1; /* 1+1+6 = 8 = 1 8-octet */
+ memmove(np->lnaddr, mac, sizeof(np->lnaddr));
+ } else
+ nbp->wp -= sizeof(Ndpkt) - sizeof(NdiscC);
+
+ set_cksum(nbp);
+ np = (Ndpkt*)nbp->rp;
+ np->ttl = HOP_LIMIT;
+ np->vcf[0] = 0x06 << 4;
+ ipriv->out[NbrSolicit]++;
+ netlog(f, Logicmp, "sending neighbor solicitation %I\n", targ);
+ ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+/*
+ * sends out an ICMPv6 neighbor advertisement. pktflags == RSO flags.
+ */
+extern void
+icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags)
+{
+ Block *nbp;
+ Ndpkt *np;
+ Proto *icmp = f->t2p[ICMPv6];
+ Icmppriv6 *ipriv = icmp->priv;
+
+ nbp = newIPICMP(sizeof(Ndpkt));
+ np = (Ndpkt*)nbp->rp;
+
+ memmove(np->src, src, IPaddrlen);
+ memmove(np->dst, dst, IPaddrlen);
+
+ np->type = NbrAdvert;
+ np->code = 0;
+ np->icmpid[0] = flags;
+ memmove(np->target, targ, IPaddrlen);
+
+ np->otype = TARGET_LLADDR;
+ np->olen = 1;
+ memmove(np->lnaddr, mac, sizeof(np->lnaddr));
+
+ set_cksum(nbp);
+ np = (Ndpkt*) nbp->rp;
+ np->ttl = HOP_LIMIT;
+ np->vcf[0] = 0x06 << 4;
+ ipriv->out[NbrAdvert]++;
+ netlog(f, Logicmp, "sending neighbor advertisement %I\n", src);
+ ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+extern void
+icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free)
+{
+ int osz = BLEN(bp);
+ int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+ Block *nbp;
+ IPICMP *np;
+ Ip6hdr *p;
+ Proto *icmp = f->t2p[ICMPv6];
+ Icmppriv6 *ipriv = icmp->priv;
+
+ p = (Ip6hdr *)bp->rp;
+
+ if(isv6mcast(p->src))
+ goto clean;
+
+ nbp = newIPICMP(sz);
+ np = (IPICMP *)nbp->rp;
+
+ RLOCK(ifc);
+ if(ipv6anylocal(ifc, np->src))
+ netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n",
+ p->src, p->dst);
+ else {
+ netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n",
+ p->src, p->dst);
+ freeblist(nbp);
+ if(free)
+ goto clean;
+ else
+ return;
+ }
+
+ memmove(np->dst, p->src, IPaddrlen);
+ np->type = UnreachableV6;
+ np->code = code;
+ memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ set_cksum(nbp);
+ np->ttl = HOP_LIMIT;
+ np->vcf[0] = 0x06 << 4;
+ ipriv->out[UnreachableV6]++;
+
+ if(free)
+ ipiput6(f, ifc, nbp);
+ else {
+ ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+ return;
+ }
+
+clean:
+ RUNLOCK(ifc);
+ freeblist(bp);
+}
+
+extern void
+icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp)
+{
+ int osz = BLEN(bp);
+ int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+ Block *nbp;
+ IPICMP *np;
+ Ip6hdr *p;
+ Proto *icmp = f->t2p[ICMPv6];
+ Icmppriv6 *ipriv = icmp->priv;
+
+ p = (Ip6hdr *)bp->rp;
+
+ if(isv6mcast(p->src))
+ return;
+
+ nbp = newIPICMP(sz);
+ np = (IPICMP *) nbp->rp;
+
+ if(ipv6anylocal(ifc, np->src))
+ netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n",
+ p->src, p->dst);
+ else {
+ netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n",
+ p->src, p->dst);
+ return;
+ }
+
+ memmove(np->dst, p->src, IPaddrlen);
+ np->type = TimeExceedV6;
+ np->code = 0;
+ memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ set_cksum(nbp);
+ np->ttl = HOP_LIMIT;
+ np->vcf[0] = 0x06 << 4;
+ ipriv->out[TimeExceedV6]++;
+ ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+extern void
+icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp)
+{
+ int osz = BLEN(bp);
+ int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+ Block *nbp;
+ IPICMP *np;
+ Ip6hdr *p;
+ Proto *icmp = f->t2p[ICMPv6];
+ Icmppriv6 *ipriv = icmp->priv;
+
+ p = (Ip6hdr *)bp->rp;
+
+ if(isv6mcast(p->src))
+ return;
+
+ nbp = newIPICMP(sz);
+ np = (IPICMP *)nbp->rp;
+
+ if(ipv6anylocal(ifc, np->src))
+ netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n",
+ p->src, p->dst);
+ else {
+ netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n",
+ p->src, p->dst);
+ return;
+ }
+
+ memmove(np->dst, p->src, IPaddrlen);
+ np->type = PacketTooBigV6;
+ np->code = 0;
+ hnputl(np->icmpid, ifc->maxtu - ifc->m->hsize);
+ memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ set_cksum(nbp);
+ np->ttl = HOP_LIMIT;
+ np->vcf[0] = 0x06 << 4;
+ ipriv->out[PacketTooBigV6]++;
+ ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+/*
+ * RFC 2461, pages 39-40, pages 57-58.
+ */
+static int
+valid(Proto *icmp, Ipifc *ifc, Block *bp, Icmppriv6 *ipriv)
+{
+ int sz, osz, unsp, n, ttl, iplen;
+ int pktsz = BLEN(bp);
+ uchar *packet = bp->rp;
+ IPICMP *p = (IPICMP *) packet;
+ Ndpkt *np;
+
+ USED(ifc);
+ n = blocklen(bp);
+ if(n < sizeof(IPICMP)) {
+ ipriv->stats[HlenErrs6]++;
+ netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+ goto err;
+ }
+
+ iplen = nhgets(p->ploadlen);
+ if(iplen > n - IP6HDR || ((uint)iplen % 1) != 0) {
+ ipriv->stats[LenErrs6]++;
+ netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
+ goto err;
+ }
+
+ /* Rather than construct explicit pseudoheader, overwrite IPv6 header */
+ if(p->proto != ICMPv6) {
+ /* This code assumes no extension headers!!! */
+ netlog(icmp->f, Logicmp, "icmp error: extension header\n");
+ goto err;
+ }
+ memset(packet, 0, 4);
+ ttl = p->ttl;
+ p->ttl = p->proto;
+ p->proto = 0;
+ if(ptclcsum(bp, 0, iplen + IP6HDR)) {
+ ipriv->stats[CsumErrs6]++;
+ netlog(icmp->f, Logicmp, "icmp checksum error\n");
+ goto err;
+ }
+ p->proto = p->ttl;
+ p->ttl = ttl;
+
+ /* additional tests for some pkt types */
+ if (p->type == NbrSolicit || p->type == NbrAdvert ||
+ p->type == RouterAdvert || p->type == RouterSolicit ||
+ p->type == RedirectV6) {
+ if(p->ttl != HOP_LIMIT) {
+ ipriv->stats[HoplimErrs6]++;
+ goto err;
+ }
+ if(p->code != 0) {
+ ipriv->stats[IcmpCodeErrs6]++;
+ goto err;
+ }
+
+ switch (p->type) {
+ case NbrSolicit:
+ case NbrAdvert:
+ np = (Ndpkt*) p;
+ if(isv6mcast(np->target)) {
+ ipriv->stats[TargetErrs6]++;
+ goto err;
+ }
+ if(optexsts(np) && np->olen == 0) {
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
+ }
+
+ if (p->type == NbrSolicit &&
+ ipcmp(np->src, v6Unspecified) == 0)
+ if(!issmcast(np->dst) || optexsts(np)) {
+ ipriv->stats[AddrmxpErrs6]++;
+ goto err;
+ }
+
+ if(p->type == NbrAdvert)
+ if(isv6mcast(np->dst) &&
+ (nhgets(np->icmpid) & Sflag)){
+ ipriv->stats[AddrmxpErrs6]++;
+ goto err;
+ }
+ break;
+
+ case RouterAdvert:
+ if(pktsz - sizeof(Ip6hdr) < 16) {
+ ipriv->stats[HlenErrs6]++;
+ goto err;
+ }
+ if(!islinklocal(p->src)) {
+ ipriv->stats[RouterAddrErrs6]++;
+ goto err;
+ }
+ sz = sizeof(IPICMP) + 8;
+ while (sz+1 < pktsz) {
+ osz = packet[sz+1];
+ if(osz <= 0) {
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
+ }
+ sz += 8*osz;
+ }
+ break;
+
+ case RouterSolicit:
+ if(pktsz - sizeof(Ip6hdr) < 8) {
+ ipriv->stats[HlenErrs6]++;
+ goto err;
+ }
+ unsp = (ipcmp(p->src, v6Unspecified) == 0);
+ sz = sizeof(IPICMP) + 8;
+ while (sz+1 < pktsz) {
+ osz = packet[sz+1];
+ if(osz <= 0 ||
+ (unsp && packet[sz] == SRC_LLADDR)) {
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
+ }
+ sz += 8*osz;
+ }
+ break;
+
+ case RedirectV6:
+ /* to be filled in */
+ break;
+
+ default:
+ goto err;
+ }
+ }
+ return 1;
+err:
+ ipriv->stats[InErrors6]++;
+ return 0;
+}
+
+static int
+targettype(Fs *f, Ipifc *ifc, uchar *target)
+{
+ Iplifc *lifc;
+ int t;
+
+ RLOCK(ifc);
+ if(ipproxyifc(f, ifc, target)) {
+ RUNLOCK(ifc);
+ return Tuniproxy;
+ }
+
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+ if(ipcmp(lifc->local, target) == 0) {
+ t = (lifc->tentative)? Tunitent: Tunirany;
+ RUNLOCK(ifc);
+ return t;
+ }
+
+ RUNLOCK(ifc);
+ return 0;
+}
+
+static void
+icmpiput6(Proto *icmp, Ipifc *ipifc, Block *bp)
+{
+ int refresh = 1;
+ char *msg, m2[128];
+ uchar pktflags;
+ uchar *packet = bp->rp;
+ uchar lsrc[IPaddrlen];
+ Block *r;
+ IPICMP *p = (IPICMP *)packet;
+ Icmppriv6 *ipriv = icmp->priv;
+ Iplifc *lifc;
+ Ndpkt* np;
+ Proto *pr;
+
+ if(!valid(icmp, ipifc, bp, ipriv) || p->type > Maxtype6)
+ goto raise;
+
+ ipriv->in[p->type]++;
+
+ switch(p->type) {
+ case EchoRequestV6:
+ r = mkechoreply6(bp, ipifc);
+ if(r == nil)
+ goto raise;
+ ipriv->out[EchoReply]++;
+ ipoput6(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
+ break;
+
+ case UnreachableV6:
+ if(p->code >= nelem(unreachcode))
+ msg = unreachcode[Icmp6_unknown];
+ else
+ msg = unreachcode[p->code];
+
+ bp->rp += sizeof(IPICMP);
+ if(blocklen(bp) < 8){
+ ipriv->stats[LenErrs6]++;
+ goto raise;
+ }
+ p = (IPICMP *)bp->rp;
+ pr = Fsrcvpcolx(icmp->f, p->proto);
+ if(pr != nil && pr->advise != nil) {
+ (*pr->advise)(pr, bp, msg);
+ return;
+ }
+
+ bp->rp -= sizeof(IPICMP);
+ goticmpkt6(icmp, bp, 0);
+ break;
+
+ case TimeExceedV6:
+ if(p->code == 0){
+ sprint(m2, "ttl exceeded at %I", p->src);
+
+ bp->rp += sizeof(IPICMP);
+ if(blocklen(bp) < 8){
+ ipriv->stats[LenErrs6]++;
+ goto raise;
+ }
+ p = (IPICMP *)bp->rp;
+ pr = Fsrcvpcolx(icmp->f, p->proto);
+ if(pr && pr->advise) {
+ (*pr->advise)(pr, bp, m2);
+ return;
+ }
+ bp->rp -= sizeof(IPICMP);
+ }
+
+ goticmpkt6(icmp, bp, 0);
+ break;
+
+ case RouterAdvert:
+ case RouterSolicit:
+ /* using lsrc as a temp, munge hdr for goticmp6 */
+ if (0) {
+ memmove(lsrc, p->src, IPaddrlen);
+ memmove(p->src, p->dst, IPaddrlen);
+ memmove(p->dst, lsrc, IPaddrlen);
+ }
+ goticmpkt6(icmp, bp, p->type);
+ break;
+
+ case NbrSolicit:
+ np = (Ndpkt*) p;
+ pktflags = 0;
+ switch (targettype(icmp->f, ipifc, np->target)) {
+ case Tunirany:
+ pktflags |= Oflag;
+ /* fall through */
+
+ case Tuniproxy:
+ if(ipcmp(np->src, v6Unspecified) != 0) {
+ arpenter(icmp->f, V6, np->src, np->lnaddr,
+ 8*np->olen-2, 0);
+ pktflags |= Sflag;
+ }
+ if(ipv6local(ipifc, lsrc))
+ icmpna(icmp->f, lsrc,
+ (ipcmp(np->src, v6Unspecified) == 0?
+ v6allnodesL: np->src),
+ np->target, ipifc->mac, pktflags);
+ else
+ freeblist(bp);
+ break;
+
+ case Tunitent:
+ /* not clear what needs to be done. send up
+ * an icmp mesg saying don't use this address? */
+ default:
+ freeblist(bp);
+ }
+ break;
+
+ case NbrAdvert:
+ np = (Ndpkt*) p;
+
+ /*
+ * if the target address matches one of the local interface
+ * addresses and the local interface address has tentative bit
+ * set, insert into ARP table. this is so the duplicate address
+ * detection part of ipconfig can discover duplication through
+ * the arp table.
+ */
+ lifc = iplocalonifc(ipifc, np->target);
+ if(lifc && lifc->tentative)
+ refresh = 0;
+ arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2,
+ refresh);
+ freeblist(bp);
+ break;
+
+ case PacketTooBigV6:
+ default:
+ goticmpkt6(icmp, bp, 0);
+ break;
+ }
+ return;
+raise:
+ freeblist(bp);
+}
+
+int
+icmpstats6(Proto *icmp6, char *buf, int len)
+{
+ Icmppriv6 *priv;
+ char *p, *e;
+ int i;
+
+ priv = icmp6->priv;
+ p = buf;
+ e = p+len;
+ for(i = 0; i < Nstats6; i++)
+ p = seprint(p, e, "%s: %lud\n", statnames6[i], priv->stats[i]);
+ for(i = 0; i <= Maxtype6; i++)
+ if(icmpnames6[i])
+ p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i],
+ priv->in[i], priv->out[i]);
+/* else
+ p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i],
+ priv->out[i]);
+ */
+ return p - buf;
+}
+
+
+/* import from icmp.c */
+extern int icmpstate(Conv *c, char *state, int n);
+extern char* icmpannounce(Conv *c, char **argv, int argc);
+extern char* icmpconnect(Conv *c, char **argv, int argc);
+extern void icmpclose(Conv *c);
+
+void
+icmp6init(Fs *fs)
+{
+ Proto *icmp6 = smalloc(sizeof(Proto));
+
+ icmp6->priv = smalloc(sizeof(Icmppriv6));
+ icmp6->name = "icmpv6";
+ icmp6->connect = icmpconnect;
+ icmp6->announce = icmpannounce;
+ icmp6->state = icmpstate;
+ icmp6->create = icmpcreate6;
+ icmp6->close = icmpclose;
+ icmp6->rcv = icmpiput6;
+ icmp6->stats = icmpstats6;
+ icmp6->ctl = icmpctl6;
+ icmp6->advise = icmpadvise6;
+ icmp6->gc = nil;
+ icmp6->ipproto = ICMPv6;
+ icmp6->nc = 16;
+ icmp6->ptclsize = sizeof(Icmpcb6);
+
+ Fsproto(fs, icmp6);
+}
diff --git a/src/9vx/a/ip/igmp.c b/src/9vx/a/ip/igmp.c
@@ -0,0 +1,294 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+enum
+{
+ IGMP_IPHDRSIZE = 20, /* size of ip header */
+ IGMP_HDRSIZE = 8, /* size of IGMP header */
+ IP_IGMPPROTO = 2,
+
+ IGMPquery = 1,
+ IGMPreport = 2,
+
+ MSPTICK = 100,
+ MAXTIMEOUT = 10000/MSPTICK, /* at most 10 secs for a response */
+};
+
+typedef struct IGMPpkt IGMPpkt;
+typedef char byte;
+
+struct IGMPpkt
+{
+ /* ip header */
+ byte vihl; /* Version and header length */
+ byte tos; /* Type of service */
+ byte len[2]; /* packet length (including headers) */
+ byte id[2]; /* Identification */
+ byte frag[2]; /* Fragment information */
+ byte Unused;
+ byte proto; /* Protocol */
+ byte cksum[2]; /* checksum of ip portion */
+ byte src[IPaddrlen]; /* Ip source */
+ byte dst[IPaddrlen]; /* Ip destination */
+
+ /* igmp header */
+ byte vertype; /* version and type */
+ byte unused;
+ byte igmpcksum[2]; /* checksum of igmp portion */
+ byte group[IPaddrlen]; /* multicast group */
+};
+
+/*
+ * lists for group reports
+ */
+typedef struct IGMPrep IGMPrep;
+struct IGMPrep
+{
+ IGMPrep *next;
+ Media *m;
+ int ticks;
+ Multicast *multi;
+};
+
+typedef struct IGMP IGMP;
+struct IGMP
+{
+ Lock lk;
+
+ Rendez r;
+ IGMPrep *reports;
+};
+
+IGMP igmpalloc;
+
+ Proto igmp;
+extern Fs fs;
+
+static struct Stats
+{
+ ulong inqueries;
+ ulong outqueries;
+ ulong inreports;
+ ulong outreports;
+} stats;
+
+void
+igmpsendreport(Media *m, byte *addr)
+{
+ IGMPpkt *p;
+ Block *bp;
+
+ bp = allocb(sizeof(IGMPpkt));
+ if(bp == nil)
+ return;
+ p = (IGMPpkt*)bp->wp;
+ p->vihl = IP_VER4;
+ bp->wp += sizeof(IGMPpkt);
+ memset(bp->rp, 0, sizeof(IGMPpkt));
+ hnputl(p->src, Mediagetaddr(m));
+ hnputl(p->dst, Ipallsys);
+ p->vertype = (1<<4) | IGMPreport;
+ p->proto = IP_IGMPPROTO;
+ memmove(p->group, addr, IPaddrlen);
+ hnputs(p->igmpcksum, ptclcsum(bp, IGMP_IPHDRSIZE, IGMP_HDRSIZE));
+ netlog(Logigmp, "igmpreport %I\n", p->group);
+ stats.outreports++;
+ ipoput4(bp, 0, 1, DFLTTOS, nil); /* TTL of 1 */
+}
+
+static int
+isreport(void *a)
+{
+ USED(a);
+ return igmpalloc.reports != 0;
+}
+
+
+void
+igmpproc(void *a)
+{
+ IGMPrep *rp, **lrp;
+ Multicast *mp, **lmp;
+ byte ip[IPaddrlen];
+
+ USED(a);
+
+ for(;;){
+ sleep(&igmpalloc.r, isreport, 0);
+ for(;;){
+ lock(&igmpalloc);
+
+ if(igmpalloc.reports == nil)
+ break;
+
+ /* look for a single report */
+ lrp = &igmpalloc.reports;
+ mp = nil;
+ for(rp = *lrp; rp; rp = *lrp){
+ rp->ticks++;
+ lmp = &rp->multi;
+ for(mp = *lmp; mp; mp = *lmp){
+ if(rp->ticks >= mp->timeout){
+ *lmp = mp->next;
+ break;
+ }
+ lmp = &mp->next;
+ }
+ if(mp != nil)
+ break;
+
+ if(rp->multi != nil){
+ lrp = &rp->next;
+ continue;
+ } else {
+ *lrp = rp->next;
+ free(rp);
+ }
+ }
+ unlock(&igmpalloc);
+
+ if(mp){
+ /* do a single report and try again */
+ hnputl(ip, mp->addr);
+ igmpsendreport(rp->m, ip);
+ free(mp);
+ continue;
+ }
+
+ tsleep(&up->sleep, return0, 0, MSPTICK);
+ }
+ unlock(&igmpalloc);
+ }
+
+}
+
+void
+igmpiput(Media *m, Ipifc *, Block *bp)
+{
+ int n;
+ IGMPpkt *ghp;
+ Ipaddr group;
+ IGMPrep *rp, **lrp;
+ Multicast *mp, **lmp;
+
+ ghp = (IGMPpkt*)(bp->rp);
+ netlog(Logigmp, "igmpiput: %d %I\n", ghp->vertype, ghp->group);
+
+ n = blocklen(bp);
+ if(n < IGMP_IPHDRSIZE+IGMP_HDRSIZE){
+ netlog(Logigmp, "igmpiput: bad len\n");
+ goto error;
+ }
+ if((ghp->vertype>>4) != 1){
+ netlog(Logigmp, "igmpiput: bad igmp type\n");
+ goto error;
+ }
+ if(ptclcsum(bp, IGMP_IPHDRSIZE, IGMP_HDRSIZE)){
+ netlog(Logigmp, "igmpiput: checksum error %I\n", ghp->src);
+ goto error;
+ }
+
+ group = nhgetl(ghp->group);
+
+ lock(&igmpalloc);
+ switch(ghp->vertype & 0xf){
+ case IGMPquery:
+ /*
+ * start reporting groups that we're a member of.
+ */
+ stats.inqueries++;
+ for(rp = igmpalloc.reports; rp; rp = rp->next)
+ if(rp->m == m)
+ break;
+ if(rp != nil)
+ break; /* already reporting */
+
+ mp = Mediacopymulti(m);
+ if(mp == nil)
+ break;
+
+ rp = malloc(sizeof(*rp));
+ if(rp == nil)
+ break;
+
+ rp->m = m;
+ rp->multi = mp;
+ rp->ticks = 0;
+ for(; mp; mp = mp->next)
+ mp->timeout = nrand(MAXTIMEOUT);
+ rp->next = igmpalloc.reports;
+ igmpalloc.reports = rp;
+
+ wakeup(&igmpalloc.r);
+
+ break;
+ case IGMPreport:
+ /*
+ * find report list for this medium
+ */
+ stats.inreports++;
+ lrp = &igmpalloc.reports;
+ for(rp = *lrp; rp; rp = *lrp){
+ if(rp->m == m)
+ break;
+ lrp = &rp->next;
+ }
+ if(rp == nil)
+ break;
+
+ /*
+ * if someone else has reported a group,
+ * we don't have to.
+ */
+ lmp = &rp->multi;
+ for(mp = *lmp; mp; mp = *lmp){
+ if(mp->addr == group){
+ *lmp = mp->next;
+ free(mp);
+ break;
+ }
+ lmp = &mp->next;
+ }
+
+ break;
+ }
+ unlock(&igmpalloc);
+
+error:
+ freeb(bp);
+}
+
+int
+igmpstats(char *buf, int len)
+{
+ return snprint(buf, len, "\trcvd %d %d\n\tsent %d %d\n",
+ stats.inqueries, stats.inreports,
+ stats.outqueries, stats.outreports);
+}
+
+void
+igmpinit(Fs *fs)
+{
+ igmp.name = "igmp";
+ igmp.connect = nil;
+ igmp.announce = nil;
+ igmp.ctl = nil;
+ igmp.state = nil;
+ igmp.close = nil;
+ igmp.rcv = igmpiput;
+ igmp.stats = igmpstats;
+ igmp.ipproto = IP_IGMPPROTO;
+ igmp.nc = 0;
+ igmp.ptclsize = 0;
+
+ igmpreportfn = igmpsendreport;
+ kproc("igmpproc", igmpproc, 0);
+
+ Fsproto(fs, &igmp);
+}
diff --git a/src/9vx/a/ip/il.c b/src/9vx/a/ip/il.c
@@ -0,0 +1,1408 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+enum /* Connection state */
+{
+ Ilclosed,
+ Ilsyncer,
+ Ilsyncee,
+ Ilestablished,
+ Illistening,
+ Ilclosing,
+ Ilopening, /* only for file server */
+};
+
+char *ilstates[] =
+{
+ "Closed",
+ "Syncer",
+ "Syncee",
+ "Established",
+ "Listen",
+ "Closing",
+ "Opening", /* only for file server */
+};
+
+enum /* Packet types */
+{
+ Ilsync,
+ Ildata,
+ Ildataquery,
+ Ilack,
+ Ilquery,
+ Ilstate,
+ Ilclose,
+};
+
+char *iltype[] =
+{
+ "sync",
+ "data",
+ "dataquery",
+ "ack",
+ "query",
+ "state",
+ "close"
+};
+
+enum
+{
+ Seconds = 1000,
+ Iltickms = 50, /* time base */
+ AckDelay = 2*Iltickms, /* max time twixt message rcvd & ack sent */
+ MaxTimeout = 30*Seconds, /* max time between rexmit */
+ QueryTime = 10*Seconds, /* time between subsequent queries */
+ DeathTime = 30*QueryTime,
+
+ MaxRexmit = 16, /* max retransmissions before hangup */
+ Defaultwin = 20,
+
+ LogAGain = 3,
+ AGain = 1<<LogAGain,
+ LogDGain = 2,
+ DGain = 1<<LogDGain,
+
+ DefByteRate = 100, /* assume a megabit link */
+ DefRtt = 50, /* cross country on a great day */
+
+ Maxrq = 64*1024,
+};
+
+enum
+{
+ Nqt= 8,
+};
+
+typedef struct Ilcb Ilcb;
+struct Ilcb /* Control block */
+{
+ int state; /* Connection state */
+ Conv *conv;
+ QLock ackq; /* Unacknowledged queue */
+ Block *unacked;
+ Block *unackedtail;
+ ulong unackedbytes;
+ QLock outo; /* Out of order packet queue */
+ Block *outoforder;
+ ulong next; /* Id of next to send */
+ ulong recvd; /* Last packet received */
+ ulong acksent; /* Last packet acked */
+ ulong start; /* Local start id */
+ ulong rstart; /* Remote start id */
+ int window; /* Maximum receive window */
+ int rxquery; /* number of queries on this connection */
+ int rxtot; /* number of retransmits on this connection */
+ int rexmit; /* number of retransmits of *unacked */
+ ulong qt[Nqt+1]; /* state table for query messages */
+ int qtx; /* ... index into qt */
+
+ /* if set, fasttimeout causes a connection request to terminate after 4*Iltickms */
+ int fasttimeout;
+
+ /* timers */
+ ulong lastxmit; /* time of last xmit */
+ ulong lastrecv; /* time of last recv */
+ ulong timeout; /* retransmission time for *unacked */
+ ulong acktime; /* time to send next ack */
+ ulong querytime; /* time to send next query */
+
+ /* adaptive measurements */
+ int delay; /* Average of the fixed rtt delay */
+ int rate; /* Average uchar rate */
+ int mdev; /* Mean deviation of rtt */
+ int maxrtt; /* largest rtt seen */
+ ulong rttack; /* The ack we are waiting for */
+ int rttlen; /* Length of rttack packet */
+ uvlong rttstart; /* Time we issued rttack packet */
+};
+
+enum
+{
+ IL_IPSIZE = 20,
+ IL_HDRSIZE = 18,
+ IL_LISTEN = 0,
+ IL_CONNECT = 1,
+ IP_ILPROTO = 40,
+};
+
+typedef struct Ilhdr Ilhdr;
+struct Ilhdr
+{
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+ uchar ttl; /* Time to live */
+ uchar proto; /* Protocol */
+ uchar cksum[2]; /* Header checksum */
+ uchar src[4]; /* Ip source */
+ uchar dst[4]; /* Ip destination */
+ uchar ilsum[2]; /* Checksum including header */
+ uchar illen[2]; /* Packet length */
+ uchar iltype; /* Packet type */
+ uchar ilspec; /* Special */
+ uchar ilsrc[2]; /* Src port */
+ uchar ildst[2]; /* Dst port */
+ uchar ilid[4]; /* Sequence id */
+ uchar ilack[4]; /* Acked sequence */
+};
+
+enum
+{
+ InMsgs,
+ OutMsgs,
+ CsumErrs, /* checksum errors */
+ HlenErrs, /* header length error */
+ LenErrs, /* short packet */
+ OutOfOrder, /* out of order */
+ Retrans, /* retransmissions */
+ DupMsg,
+ DupBytes,
+ DroppedMsgs,
+
+ Nstats,
+};
+
+static char *statnames[] =
+{
+[InMsgs] "InMsgs",
+[OutMsgs] "OutMsgs",
+[CsumErrs] "CsumErrs",
+[HlenErrs] "HlenErr",
+[LenErrs] "LenErrs",
+[OutOfOrder] "OutOfOrder",
+[Retrans] "Retrans",
+[DupMsg] "DupMsg",
+[DupBytes] "DupBytes",
+[DroppedMsgs] "DroppedMsgs",
+};
+
+typedef struct Ilpriv Ilpriv;
+struct Ilpriv
+{
+ Ipht ht;
+
+ ulong stats[Nstats];
+
+ ulong csumerr; /* checksum errors */
+ ulong hlenerr; /* header length error */
+ ulong lenerr; /* short packet */
+ ulong order; /* out of order */
+ ulong rexmit; /* retransmissions */
+ ulong dup;
+ ulong dupb;
+
+ /* keeping track of the ack kproc */
+ int ackprocstarted;
+ QLock apl;
+};
+
+/* state for query/dataquery messages */
+
+
+void ilrcvmsg(Conv*, Block*);
+void ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
+void ilackq(Ilcb*, Block*);
+void ilprocess(Conv*, Ilhdr*, Block*);
+void ilpullup(Conv*);
+void ilhangup(Conv*, char*);
+void ilfreeq(Ilcb*);
+void ilrexmit(Ilcb*);
+void ilbackoff(Ilcb*);
+void ilsettimeout(Ilcb*);
+char* ilstart(Conv*, int, int);
+void ilackproc(void*);
+void iloutoforder(Conv*, Ilhdr*, Block*);
+void iliput(Proto*, Ipifc*, Block*);
+void iladvise(Proto*, Block*, char*);
+int ilnextqt(Ilcb*);
+void ilcbinit(Ilcb*);
+int later(ulong, ulong, char*);
+void ilreject(Fs*, Ilhdr*);
+void illocalclose(Conv *c);
+ int ilcksum = 1;
+static int initseq = 25001;
+static ulong scalediv, scalemul;
+static char *etime = "connection timed out";
+
+static char*
+ilconnect(Conv *c, char **argv, int argc)
+{
+ char *e, *p;
+ int fast;
+
+ /* huge hack to quickly try an il connection */
+ fast = 0;
+ if(argc > 1){
+ p = strstr(argv[1], "!fasttimeout");
+ if(p != nil){
+ *p = 0;
+ fast = 1;
+ }
+ }
+
+ e = Fsstdconnect(c, argv, argc);
+ if(e != nil)
+ return e;
+ return ilstart(c, IL_CONNECT, fast);
+}
+
+static int
+ilstate(Conv *c, char *state, int n)
+{
+ Ilcb *ic;
+
+ ic = (Ilcb*)(c->ptcl);
+ return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d\n",
+ ilstates[ic->state],
+ c->rq ? qlen(c->rq) : 0,
+ c->wq ? qlen(c->wq) : 0,
+ ic->delay>>LogAGain, ic->rate>>LogAGain, ic->mdev>>LogDGain,
+ ic->unackedbytes, ic->rxtot, ic->rxquery, ic->maxrtt);
+}
+
+static int
+ilinuse(Conv *c)
+{
+ Ilcb *ic;
+
+ ic = (Ilcb*)(c->ptcl);
+ return ic->state != Ilclosed;
+
+}
+
+/* called with c locked */
+static char*
+ilannounce(Conv *c, char **argv, int argc)
+{
+ char *e;
+
+ e = Fsstdannounce(c, argv, argc);
+ if(e != nil)
+ return e;
+ e = ilstart(c, IL_LISTEN, 0);
+ if(e != nil)
+ return e;
+ Fsconnected(c, nil);
+
+ return nil;
+}
+
+void
+illocalclose(Conv *c)
+{
+ Ilcb *ic;
+ Ilpriv *ipriv;
+
+ ipriv = c->p->priv;
+ ic = (Ilcb*)c->ptcl;
+ ic->state = Ilclosed;
+ iphtrem(&ipriv->ht, c);
+ ipmove(c->laddr, IPnoaddr);
+ c->lport = 0;
+}
+
+static void
+ilclose(Conv *c)
+{
+ Ilcb *ic;
+
+ ic = (Ilcb*)c->ptcl;
+
+ qclose(c->rq);
+ qclose(c->wq);
+ qclose(c->eq);
+
+ switch(ic->state) {
+ case Ilclosing:
+ case Ilclosed:
+ break;
+ case Ilsyncer:
+ case Ilsyncee:
+ case Ilestablished:
+ ic->state = Ilclosing;
+ ilsettimeout(ic);
+ ilsendctl(c, nil, Ilclose, ic->next, ic->recvd, 0);
+ break;
+ case Illistening:
+ illocalclose(c);
+ break;
+ }
+ ilfreeq(ic);
+}
+
+void
+ilkick(void *x, Block *bp)
+{
+ Conv *c = x;
+ Ilhdr *ih;
+ Ilcb *ic;
+ int dlen;
+ ulong id, ack;
+ Fs *f;
+ Ilpriv *priv;
+
+ f = c->p->f;
+ priv = c->p->priv;
+ ic = (Ilcb*)c->ptcl;
+
+ if(bp == nil)
+ return;
+
+ switch(ic->state) {
+ case Ilclosed:
+ case Illistening:
+ case Ilclosing:
+ freeblist(bp);
+ qhangup(c->rq, nil);
+ return;
+ }
+
+ dlen = blocklen(bp);
+
+ /* Make space to fit il & ip */
+ bp = padblock(bp, IL_IPSIZE+IL_HDRSIZE);
+ ih = (Ilhdr *)(bp->rp);
+ ih->vihl = IP_VER4;
+
+ /* Ip fields */
+ ih->frag[0] = 0;
+ ih->frag[1] = 0;
+ v6tov4(ih->dst, c->raddr);
+ v6tov4(ih->src, c->laddr);
+ ih->proto = IP_ILPROTO;
+
+ /* Il fields */
+ hnputs(ih->illen, dlen+IL_HDRSIZE);
+ hnputs(ih->ilsrc, c->lport);
+ hnputs(ih->ildst, c->rport);
+
+ qlock(&ic->ackq);
+ id = ic->next++;
+ hnputl(ih->ilid, id);
+ ack = ic->recvd;
+ hnputl(ih->ilack, ack);
+ ic->acksent = ack;
+ ic->acktime = NOW + AckDelay;
+ ih->iltype = Ildata;
+ ih->ilspec = 0;
+ ih->ilsum[0] = 0;
+ ih->ilsum[1] = 0;
+
+ /* Checksum of ilheader plus data (not ip & no pseudo header) */
+ if(ilcksum)
+ hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, dlen+IL_HDRSIZE));
+
+ ilackq(ic, bp);
+ qunlock(&ic->ackq);
+
+ /* Start the round trip timer for this packet if the timer is free */
+ if(ic->rttack == 0) {
+ ic->rttack = id;
+ ic->rttstart = fastticks(nil);
+ ic->rttlen = dlen + IL_IPSIZE + IL_HDRSIZE;
+ }
+
+ if(later(NOW, ic->timeout, nil))
+ ilsettimeout(ic);
+ ipoput4(f, bp, 0, c->ttl, c->tos, c);
+ priv->stats[OutMsgs]++;
+}
+
+static void
+ilcreate(Conv *c)
+{
+ c->rq = qopen(Maxrq, 0, 0, c);
+ c->wq = qbypass(ilkick, c);
+}
+
+int
+ilxstats(Proto *il, char *buf, int len)
+{
+ Ilpriv *priv;
+ char *p, *e;
+ int i;
+
+ priv = il->priv;
+ p = buf;
+ e = p+len;
+ for(i = 0; i < Nstats; i++)
+ p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+ return p - buf;
+}
+
+void
+ilackq(Ilcb *ic, Block *bp)
+{
+ Block *np;
+ int n;
+
+ n = blocklen(bp);
+
+ /* Enqueue a copy on the unacked queue in case this one gets lost */
+ np = copyblock(bp, n);
+ if(ic->unacked)
+ ic->unackedtail->list = np;
+ else
+ ic->unacked = np;
+ ic->unackedtail = np;
+ np->list = nil;
+ ic->unackedbytes += n;
+}
+
+static
+void
+ilrttcalc(Ilcb *ic, Block *bp)
+{
+ int rtt, tt, pt, delay, rate;
+
+ rtt = fastticks(nil) - ic->rttstart;
+ rtt = (rtt*scalemul)/scalediv;
+ delay = ic->delay;
+ rate = ic->rate;
+
+ /* Guard against zero wrap */
+ if(rtt > 120000 || rtt < 0)
+ return;
+
+ /* this block had to be transmitted after the one acked so count its size */
+ ic->rttlen += blocklen(bp) + IL_IPSIZE + IL_HDRSIZE;
+
+ if(ic->rttlen < 256){
+ /* guess fixed delay as rtt of small packets */
+ delay += rtt - (delay>>LogAGain);
+ if(delay < AGain)
+ delay = AGain;
+ ic->delay = delay;
+ } else {
+ /* if packet took longer than avg rtt delay, recalc rate */
+ tt = rtt - (delay>>LogAGain);
+ if(tt > 0){
+ rate += ic->rttlen/tt - (rate>>LogAGain);
+ if(rate < AGain)
+ rate = AGain;
+ ic->rate = rate;
+ }
+ }
+
+ /* mdev */
+ pt = ic->rttlen/(rate>>LogAGain) + (delay>>LogAGain);
+ ic->mdev += abs(rtt-pt) - (ic->mdev>>LogDGain);
+
+ if(rtt > ic->maxrtt)
+ ic->maxrtt = rtt;
+}
+
+void
+ilackto(Ilcb *ic, ulong ackto, Block *bp)
+{
+ Ilhdr *h;
+ ulong id;
+
+ if(ic->rttack == ackto)
+ ilrttcalc(ic, bp);
+
+ /* Cancel if we've passed the packet we were interested in */
+ if(ic->rttack <= ackto)
+ ic->rttack = 0;
+
+ qlock(&ic->ackq);
+ while(ic->unacked) {
+ h = (Ilhdr *)ic->unacked->rp;
+ id = nhgetl(h->ilid);
+ if(ackto < id)
+ break;
+
+ bp = ic->unacked;
+ ic->unacked = bp->list;
+ bp->list = nil;
+ ic->unackedbytes -= blocklen(bp);
+ freeblist(bp);
+ ic->rexmit = 0;
+ ilsettimeout(ic);
+ }
+ qunlock(&ic->ackq);
+}
+
+void
+iliput(Proto *il, Ipifc *dummy, Block *bp)
+{
+ char *st;
+ Ilcb *ic;
+ Ilhdr *ih;
+ uchar raddr[IPaddrlen];
+ uchar laddr[IPaddrlen];
+ ushort sp, dp, csum;
+ int plen, illen;
+ Conv *new, *s;
+ Ilpriv *ipriv;
+
+ ipriv = il->priv;
+
+ ih = (Ilhdr *)bp->rp;
+ plen = blocklen(bp);
+ if(plen < IL_IPSIZE+IL_HDRSIZE){
+ netlog(il->f, Logil, "il: hlenerr\n");
+ ipriv->stats[HlenErrs]++;
+ goto raise;
+ }
+
+ illen = nhgets(ih->illen);
+ if(illen+IL_IPSIZE > plen){
+ netlog(il->f, Logil, "il: lenerr\n");
+ ipriv->stats[LenErrs]++;
+ goto raise;
+ }
+
+ sp = nhgets(ih->ildst);
+ dp = nhgets(ih->ilsrc);
+ v4tov6(raddr, ih->src);
+ v4tov6(laddr, ih->dst);
+
+ if((csum = ptclcsum(bp, IL_IPSIZE, illen)) != 0) {
+ if(ih->iltype > Ilclose)
+ st = "?";
+ else
+ st = iltype[ih->iltype];
+ ipriv->stats[CsumErrs]++;
+ netlog(il->f, Logil, "il: cksum %ux %ux, pkt(%s id %lud ack %lud %I/%d->%d)\n",
+ csum, st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
+ goto raise;
+ }
+
+ QLOCK(il);
+ s = iphtlook(&ipriv->ht, raddr, dp, laddr, sp);
+ if(s == nil){
+ if(ih->iltype == Ilsync)
+ ilreject(il->f, ih); /* no listener */
+ QUNLOCK(il);
+ goto raise;
+ }
+
+ ic = (Ilcb*)s->ptcl;
+ if(ic->state == Illistening){
+ if(ih->iltype != Ilsync){
+ QUNLOCK(il);
+ if(ih->iltype > Ilclose)
+ st = "?";
+ else
+ st = iltype[ih->iltype];
+ ilreject(il->f, ih); /* no channel and not sync */
+ netlog(il->f, Logil, "il: no channel, pkt(%s id %lud ack %lud %I/%ud->%ud)\n",
+ st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
+ goto raise;
+ }
+
+ new = Fsnewcall(s, raddr, dp, laddr, sp, V4);
+ if(new == nil){
+ QUNLOCK(il);
+ netlog(il->f, Logil, "il: bad newcall %I/%ud->%ud\n", raddr, sp, dp);
+ ilsendctl(s, ih, Ilclose, 0, nhgetl(ih->ilid), 0);
+ goto raise;
+ }
+ s = new;
+
+ ic = (Ilcb*)s->ptcl;
+
+ ic->conv = s;
+ ic->state = Ilsyncee;
+ ilcbinit(ic);
+ ic->rstart = nhgetl(ih->ilid);
+ iphtadd(&ipriv->ht, s);
+ }
+
+ QLOCK(s);
+ QUNLOCK(il);
+ if(waserror()){
+ QUNLOCK(s);
+ nexterror();
+ }
+ ilprocess(s, ih, bp);
+ QUNLOCK(s);
+ poperror();
+ return;
+raise:
+ freeblist(bp);
+}
+
+void
+_ilprocess(Conv *s, Ilhdr *h, Block *bp)
+{
+ Ilcb *ic;
+ ulong id, ack;
+ Ilpriv *priv;
+
+ id = nhgetl(h->ilid);
+ ack = nhgetl(h->ilack);
+
+ ic = (Ilcb*)s->ptcl;
+
+ ic->lastrecv = NOW;
+ ic->querytime = NOW + QueryTime;
+ priv = s->p->priv;
+ priv->stats[InMsgs]++;
+
+ switch(ic->state) {
+ default:
+ netlog(s->p->f, Logil, "il: unknown state %d\n", ic->state);
+ case Ilclosed:
+ freeblist(bp);
+ break;
+ case Ilsyncer:
+ switch(h->iltype) {
+ default:
+ break;
+ case Ilsync:
+ if(ack != ic->start)
+ ilhangup(s, "connection rejected");
+ else {
+ ic->recvd = id;
+ ic->rstart = id;
+ ilsendctl(s, nil, Ilack, ic->next, ic->recvd, 0);
+ ic->state = Ilestablished;
+ ic->fasttimeout = 0;
+ ic->rexmit = 0;
+ Fsconnected(s, nil);
+ ilpullup(s);
+ }
+ break;
+ case Ilclose:
+ if(ack == ic->start)
+ ilhangup(s, "connection rejected");
+ break;
+ }
+ freeblist(bp);
+ break;
+ case Ilsyncee:
+ switch(h->iltype) {
+ default:
+ break;
+ case Ilsync:
+ if(id != ic->rstart || ack != 0){
+ illocalclose(s);
+ } else {
+ ic->recvd = id;
+ ilsendctl(s, nil, Ilsync, ic->start, ic->recvd, 0);
+ }
+ break;
+ case Ilack:
+ if(ack == ic->start) {
+ ic->state = Ilestablished;
+ ic->fasttimeout = 0;
+ ic->rexmit = 0;
+ ilpullup(s);
+ }
+ break;
+ case Ildata:
+ if(ack == ic->start) {
+ ic->state = Ilestablished;
+ ic->fasttimeout = 0;
+ ic->rexmit = 0;
+ goto established;
+ }
+ break;
+ case Ilclose:
+ if(ack == ic->start)
+ ilhangup(s, "remote close");
+ break;
+ }
+ freeblist(bp);
+ break;
+ case Ilestablished:
+ established:
+ switch(h->iltype) {
+ case Ilsync:
+ if(id != ic->rstart)
+ ilhangup(s, "remote close");
+ else
+ ilsendctl(s, nil, Ilack, ic->next, ic->rstart, 0);
+ freeblist(bp);
+ break;
+ case Ildata:
+ /*
+ * avoid consuming all the mount rpc buffers in the
+ * system. if the input queue is too long, drop this
+ * packet.
+ */
+ if (s->rq && qlen(s->rq) >= Maxrq) {
+ priv->stats[DroppedMsgs]++;
+ freeblist(bp);
+ break;
+ }
+
+ ilackto(ic, ack, bp);
+ iloutoforder(s, h, bp);
+ ilpullup(s);
+ break;
+ case Ildataquery:
+ ilackto(ic, ack, bp);
+ iloutoforder(s, h, bp);
+ ilpullup(s);
+ ilsendctl(s, nil, Ilstate, ic->next, ic->recvd, h->ilspec);
+ break;
+ case Ilack:
+ ilackto(ic, ack, bp);
+ freeblist(bp);
+ break;
+ case Ilquery:
+ ilackto(ic, ack, bp);
+ ilsendctl(s, nil, Ilstate, ic->next, ic->recvd, h->ilspec);
+ freeblist(bp);
+ break;
+ case Ilstate:
+ if(ack >= ic->rttack)
+ ic->rttack = 0;
+ ilackto(ic, ack, bp);
+ if(h->ilspec > Nqt)
+ h->ilspec = 0;
+ if(ic->qt[h->ilspec] > ack){
+ ilrexmit(ic);
+ ilsettimeout(ic);
+ }
+ freeblist(bp);
+ break;
+ case Ilclose:
+ freeblist(bp);
+ if(ack < ic->start || ack > ic->next)
+ break;
+ ic->recvd = id;
+ ilsendctl(s, nil, Ilclose, ic->next, ic->recvd, 0);
+ ic->state = Ilclosing;
+ ilsettimeout(ic);
+ ilfreeq(ic);
+ break;
+ }
+ break;
+ case Illistening:
+ freeblist(bp);
+ break;
+ case Ilclosing:
+ switch(h->iltype) {
+ case Ilclose:
+ ic->recvd = id;
+ ilsendctl(s, nil, Ilclose, ic->next, ic->recvd, 0);
+ if(ack == ic->next)
+ ilhangup(s, nil);
+ break;
+ default:
+ break;
+ }
+ freeblist(bp);
+ break;
+ }
+}
+
+void
+ilrexmit(Ilcb *ic)
+{
+ Ilhdr *h;
+ Block *nb;
+ Conv *c;
+ ulong id;
+ Ilpriv *priv;
+
+ nb = nil;
+ qlock(&ic->ackq);
+ if(ic->unacked)
+ nb = copyblock(ic->unacked, blocklen(ic->unacked));
+ qunlock(&ic->ackq);
+
+ if(nb == nil)
+ return;
+
+ h = (Ilhdr*)nb->rp;
+ h->vihl = IP_VER4;
+
+ h->iltype = Ildataquery;
+ hnputl(h->ilack, ic->recvd);
+ h->ilspec = ilnextqt(ic);
+ h->ilsum[0] = 0;
+ h->ilsum[1] = 0;
+ hnputs(h->ilsum, ptclcsum(nb, IL_IPSIZE, nhgets(h->illen)));
+
+ c = ic->conv;
+ id = nhgetl(h->ilid);
+ netlog(c->p->f, Logil, "il: rexmit %d %ud: %d %d: %i %d/%d\n", id, ic->recvd,
+ ic->rexmit, ic->timeout,
+ c->raddr, c->lport, c->rport);
+
+ ilbackoff(ic);
+
+ ipoput4(c->p->f, nb, 0, c->ttl, c->tos, c);
+
+ /* statistics */
+ ic->rxtot++;
+ priv = c->p->priv;
+ priv->rexmit++;
+}
+
+/* DEBUG */
+void
+ilprocess(Conv *s, Ilhdr *h, Block *bp)
+{
+ Ilcb *ic;
+
+ ic = (Ilcb*)s->ptcl;
+
+ USED(ic);
+ netlog(s->p->f, Logilmsg, "%11s rcv %d/%d snt %d/%d pkt(%s id %d ack %d %d->%d) ",
+ ilstates[ic->state], ic->rstart, ic->recvd, ic->start,
+ ic->next, iltype[h->iltype], nhgetl(h->ilid),
+ nhgetl(h->ilack), nhgets(h->ilsrc), nhgets(h->ildst));
+
+ _ilprocess(s, h, bp);
+
+ netlog(s->p->f, Logilmsg, "%11s rcv %d snt %d\n", ilstates[ic->state], ic->recvd, ic->next);
+}
+
+void
+ilhangup(Conv *s, char *msg)
+{
+ Ilcb *ic;
+ int callout;
+
+ netlog(s->p->f, Logil, "il: hangup! %I %d/%d: %s\n", s->raddr,
+ s->lport, s->rport, msg?msg:"no reason");
+
+ ic = (Ilcb*)s->ptcl;
+ callout = ic->state == Ilsyncer;
+ illocalclose(s);
+
+ qhangup(s->rq, msg);
+ qhangup(s->wq, msg);
+
+ if(callout)
+ Fsconnected(s, msg);
+}
+
+void
+ilpullup(Conv *s)
+{
+ Ilcb *ic;
+ Ilhdr *oh;
+ Block *bp;
+ ulong oid, dlen;
+ Ilpriv *ipriv;
+
+ ic = (Ilcb*)s->ptcl;
+ if(ic->state != Ilestablished)
+ return;
+
+ qlock(&ic->outo);
+ while(ic->outoforder) {
+ bp = ic->outoforder;
+ oh = (Ilhdr*)bp->rp;
+ oid = nhgetl(oh->ilid);
+ if(oid <= ic->recvd) {
+ ic->outoforder = bp->list;
+ freeblist(bp);
+ continue;
+ }
+ if(oid != ic->recvd+1){
+ ipriv = s->p->priv;
+ ipriv->stats[OutOfOrder]++;
+ break;
+ }
+
+ ic->recvd = oid;
+ ic->outoforder = bp->list;
+
+ bp->list = nil;
+ dlen = nhgets(oh->illen)-IL_HDRSIZE;
+ bp = trimblock(bp, IL_IPSIZE+IL_HDRSIZE, dlen);
+ /*
+ * Upper levels don't know about multiple-block
+ * messages so copy all into one (yick).
+ */
+ bp = concatblock(bp);
+ if(bp == 0)
+ panic("ilpullup");
+ bp = packblock(bp);
+ if(bp == 0)
+ panic("ilpullup2");
+ qpass(s->rq, bp);
+ }
+ qunlock(&ic->outo);
+}
+
+void
+iloutoforder(Conv *s, Ilhdr *h, Block *bp)
+{
+ Ilcb *ic;
+ uchar *lid;
+ Block *f, **l;
+ ulong id, newid;
+ Ilpriv *ipriv;
+
+ ipriv = s->p->priv;
+ ic = (Ilcb*)s->ptcl;
+ bp->list = nil;
+
+ id = nhgetl(h->ilid);
+ /* Window checks */
+ if(id <= ic->recvd || id > ic->recvd+ic->window) {
+ netlog(s->p->f, Logil, "il: message outside window %ud <%ud-%ud>: %i %d/%d\n",
+ id, ic->recvd, ic->recvd+ic->window, s->raddr, s->lport, s->rport);
+ freeblist(bp);
+ return;
+ }
+
+ /* Packet is acceptable so sort onto receive queue for pullup */
+ qlock(&ic->outo);
+ if(ic->outoforder == nil)
+ ic->outoforder = bp;
+ else {
+ l = &ic->outoforder;
+ for(f = *l; f; f = f->list) {
+ lid = ((Ilhdr*)(f->rp))->ilid;
+ newid = nhgetl(lid);
+ if(id <= newid) {
+ if(id == newid) {
+ ipriv->stats[DupMsg]++;
+ ipriv->stats[DupBytes] += blocklen(bp);
+ qunlock(&ic->outo);
+ freeblist(bp);
+ return;
+ }
+ bp->list = f;
+ *l = bp;
+ qunlock(&ic->outo);
+ return;
+ }
+ l = &f->list;
+ }
+ *l = bp;
+ }
+ qunlock(&ic->outo);
+}
+
+void
+ilsendctl(Conv *ipc, Ilhdr *inih, int type, ulong id, ulong ack, int ilspec)
+{
+ Ilhdr *ih;
+ Ilcb *ic;
+ Block *bp;
+ int ttl, tos;
+
+ bp = allocb(IL_IPSIZE+IL_HDRSIZE);
+ bp->wp += IL_IPSIZE+IL_HDRSIZE;
+
+ ih = (Ilhdr *)(bp->rp);
+ ih->vihl = IP_VER4;
+
+ /* Ip fields */
+ ih->proto = IP_ILPROTO;
+ hnputs(ih->illen, IL_HDRSIZE);
+ ih->frag[0] = 0;
+ ih->frag[1] = 0;
+ if(inih) {
+ hnputl(ih->dst, nhgetl(inih->src));
+ hnputl(ih->src, nhgetl(inih->dst));
+ hnputs(ih->ilsrc, nhgets(inih->ildst));
+ hnputs(ih->ildst, nhgets(inih->ilsrc));
+ hnputl(ih->ilid, nhgetl(inih->ilack));
+ hnputl(ih->ilack, nhgetl(inih->ilid));
+ ttl = MAXTTL;
+ tos = DFLTTOS;
+ }
+ else {
+ v6tov4(ih->dst, ipc->raddr);
+ v6tov4(ih->src, ipc->laddr);
+ hnputs(ih->ilsrc, ipc->lport);
+ hnputs(ih->ildst, ipc->rport);
+ hnputl(ih->ilid, id);
+ hnputl(ih->ilack, ack);
+ ic = (Ilcb*)ipc->ptcl;
+ ic->acksent = ack;
+ ic->acktime = NOW;
+ ttl = ipc->ttl;
+ tos = ipc->tos;
+ }
+ ih->iltype = type;
+ ih->ilspec = ilspec;
+ ih->ilsum[0] = 0;
+ ih->ilsum[1] = 0;
+
+ if(ilcksum)
+ hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
+
+if(ipc==nil)
+ panic("ipc is nil caller is %#p", getcallerpc(&ipc));
+if(ipc->p==nil)
+ panic("ipc->p is nil");
+
+ netlog(ipc->p->f, Logilmsg, "ctl(%s id %d ack %d %d->%d)\n",
+ iltype[ih->iltype], nhgetl(ih->ilid), nhgetl(ih->ilack),
+ nhgets(ih->ilsrc), nhgets(ih->ildst));
+
+ ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
+}
+
+void
+ilreject(Fs *f, Ilhdr *inih)
+{
+ Ilhdr *ih;
+ Block *bp;
+
+ bp = allocb(IL_IPSIZE+IL_HDRSIZE);
+ bp->wp += IL_IPSIZE+IL_HDRSIZE;
+
+ ih = (Ilhdr *)(bp->rp);
+ ih->vihl = IP_VER4;
+
+ /* Ip fields */
+ ih->proto = IP_ILPROTO;
+ hnputs(ih->illen, IL_HDRSIZE);
+ ih->frag[0] = 0;
+ ih->frag[1] = 0;
+ hnputl(ih->dst, nhgetl(inih->src));
+ hnputl(ih->src, nhgetl(inih->dst));
+ hnputs(ih->ilsrc, nhgets(inih->ildst));
+ hnputs(ih->ildst, nhgets(inih->ilsrc));
+ hnputl(ih->ilid, nhgetl(inih->ilack));
+ hnputl(ih->ilack, nhgetl(inih->ilid));
+ ih->iltype = Ilclose;
+ ih->ilspec = 0;
+ ih->ilsum[0] = 0;
+ ih->ilsum[1] = 0;
+
+ if(ilcksum)
+ hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
+
+ ipoput4(f, bp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+void
+ilsettimeout(Ilcb *ic)
+{
+ ulong pt;
+
+ pt = (ic->delay>>LogAGain)
+ + ic->unackedbytes/(ic->rate>>LogAGain)
+ + (ic->mdev>>(LogDGain-1))
+ + AckDelay;
+ if(pt > MaxTimeout)
+ pt = MaxTimeout;
+ ic->timeout = NOW + pt;
+}
+
+void
+ilbackoff(Ilcb *ic)
+{
+ ulong pt;
+ int i;
+
+ pt = (ic->delay>>LogAGain)
+ + ic->unackedbytes/(ic->rate>>LogAGain)
+ + (ic->mdev>>(LogDGain-1))
+ + AckDelay;
+ for(i = 0; i < ic->rexmit; i++)
+ pt = pt + (pt>>1);
+ if(pt > MaxTimeout)
+ pt = MaxTimeout;
+ ic->timeout = NOW + pt;
+
+ if(ic->fasttimeout)
+ ic->timeout = NOW+Iltickms;
+
+ ic->rexmit++;
+}
+
+// complain if two numbers not within an hour of each other
+#define Tfuture (1000*60*60)
+int
+later(ulong t1, ulong t2, char *x)
+{
+ int dt;
+
+ dt = t1 - t2;
+ if(dt > 0) {
+ if(x != nil && dt > Tfuture)
+ print("%s: way future %d\n", x, dt);
+ return 1;
+ }
+ if(dt < -Tfuture) {
+ if(x != nil)
+ print("%s: way past %d\n", x, -dt);
+ return 1;
+ }
+ return 0;
+}
+
+void
+ilackproc(void *x)
+{
+ Ilcb *ic;
+ Conv **s, *p;
+ Proto *il;
+
+ il = x;
+
+loop:
+ tsleep(&up->sleep, return0, 0, Iltickms);
+ for(s = il->conv; s && *s; s++) {
+ p = *s;
+ ic = (Ilcb*)p->ptcl;
+
+ switch(ic->state) {
+ case Ilclosed:
+ case Illistening:
+ break;
+ case Ilclosing:
+ if(later(NOW, ic->timeout, "timeout0")) {
+ if(ic->rexmit > MaxRexmit){
+ ilhangup(p, nil);
+ break;
+ }
+ ilsendctl(p, nil, Ilclose, ic->next, ic->recvd, 0);
+ ilbackoff(ic);
+ }
+ break;
+
+ case Ilsyncee:
+ case Ilsyncer:
+ if(later(NOW, ic->timeout, "timeout1")) {
+ if(ic->rexmit > MaxRexmit){
+ ilhangup(p, etime);
+ break;
+ }
+ ilsendctl(p, nil, Ilsync, ic->start, ic->recvd, 0);
+ ilbackoff(ic);
+ }
+ break;
+
+ case Ilestablished:
+ if(ic->recvd != ic->acksent)
+ if(later(NOW, ic->acktime, "acktime"))
+ ilsendctl(p, nil, Ilack, ic->next, ic->recvd, 0);
+
+ if(later(NOW, ic->querytime, "querytime")){
+ if(later(NOW, ic->lastrecv+DeathTime, "deathtime")){
+ netlog(il->f, Logil, "il: hangup: deathtime\n");
+ ilhangup(p, etime);
+ break;
+ }
+ ilsendctl(p, nil, Ilquery, ic->next, ic->recvd, ilnextqt(ic));
+ ic->querytime = NOW + QueryTime;
+ }
+
+ if(ic->unacked != nil)
+ if(later(NOW, ic->timeout, "timeout2")) {
+ if(ic->rexmit > MaxRexmit){
+ netlog(il->f, Logil, "il: hangup: too many rexmits\n");
+ ilhangup(p, etime);
+ break;
+ }
+ ilsendctl(p, nil, Ilquery, ic->next, ic->recvd, ilnextqt(ic));
+ ic->rxquery++;
+ ilbackoff(ic);
+ }
+ break;
+ }
+ }
+ goto loop;
+}
+
+void
+ilcbinit(Ilcb *ic)
+{
+ ic->start = nrand(0x1000000);
+ ic->next = ic->start+1;
+ ic->recvd = 0;
+ ic->window = Defaultwin;
+ ic->unackedbytes = 0;
+ ic->unacked = nil;
+ ic->outoforder = nil;
+ ic->rexmit = 0;
+ ic->rxtot = 0;
+ ic->rxquery = 0;
+ ic->qtx = 1;
+ ic->fasttimeout = 0;
+
+ /* timers */
+ ic->delay = DefRtt<<LogAGain;
+ ic->mdev = DefRtt<<LogDGain;
+ ic->rate = DefByteRate<<LogAGain;
+ ic->querytime = NOW + QueryTime;
+ ic->lastrecv = NOW; /* or we'll timeout right away */
+ ilsettimeout(ic);
+}
+
+char*
+ilstart(Conv *c, int type, int fasttimeout)
+{
+ Ilcb *ic;
+ Ilpriv *ipriv;
+ char kpname[KNAMELEN];
+
+ ipriv = c->p->priv;
+
+ if(ipriv->ackprocstarted == 0){
+ qlock(&ipriv->apl);
+ if(ipriv->ackprocstarted == 0){
+ sprint(kpname, "#I%dilack", c->p->f->dev);
+ kproc(kpname, ilackproc, c->p);
+ ipriv->ackprocstarted = 1;
+ }
+ qunlock(&ipriv->apl);
+ }
+
+ ic = (Ilcb*)c->ptcl;
+ ic->conv = c;
+
+ if(ic->state != Ilclosed)
+ return nil;
+
+ ilcbinit(ic);
+
+ if(fasttimeout){
+ /* timeout if we can't connect quickly */
+ ic->fasttimeout = 1;
+ ic->timeout = NOW+Iltickms;
+ ic->rexmit = MaxRexmit - 4;
+ };
+
+ switch(type) {
+ default:
+ netlog(c->p->f, Logil, "il: start: type %d\n", type);
+ break;
+ case IL_LISTEN:
+ ic->state = Illistening;
+ iphtadd(&ipriv->ht, c);
+ break;
+ case IL_CONNECT:
+ ic->state = Ilsyncer;
+ iphtadd(&ipriv->ht, c);
+ ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0);
+ break;
+ }
+
+ return nil;
+}
+
+void
+ilfreeq(Ilcb *ic)
+{
+ Block *bp, *next;
+
+ qlock(&ic->ackq);
+ for(bp = ic->unacked; bp; bp = next) {
+ next = bp->list;
+ freeblist(bp);
+ }
+ ic->unacked = nil;
+ qunlock(&ic->ackq);
+
+ qlock(&ic->outo);
+ for(bp = ic->outoforder; bp; bp = next) {
+ next = bp->list;
+ freeblist(bp);
+ }
+ ic->outoforder = nil;
+ qunlock(&ic->outo);
+}
+
+void
+iladvise(Proto *il, Block *bp, char *msg)
+{
+ Ilhdr *h;
+ Ilcb *ic;
+ uchar source[IPaddrlen], dest[IPaddrlen];
+ ushort psource;
+ Conv *s, **p;
+
+ h = (Ilhdr*)(bp->rp);
+
+ v4tov6(dest, h->dst);
+ v4tov6(source, h->src);
+ psource = nhgets(h->ilsrc);
+
+
+ /* Look for a connection, unfortunately the destination port is missing */
+ QLOCK(il);
+ for(p = il->conv; *p; p++) {
+ s = *p;
+ if(s->lport == psource)
+ if(ipcmp(s->laddr, source) == 0)
+ if(ipcmp(s->raddr, dest) == 0){
+ QUNLOCK(il);
+ ic = (Ilcb*)s->ptcl;
+ switch(ic->state){
+ case Ilsyncer:
+ ilhangup(s, msg);
+ break;
+ }
+ freeblist(bp);
+ return;
+ }
+ }
+ QUNLOCK(il);
+ freeblist(bp);
+}
+
+int
+ilnextqt(Ilcb *ic)
+{
+ int x;
+
+ qlock(&ic->ackq);
+ x = ic->qtx;
+ if(++x > Nqt)
+ x = 1;
+ ic->qtx = x;
+ ic->qt[x] = ic->next-1; /* highest xmitted packet */
+ ic->qt[0] = ic->qt[x]; /* compatibility with old implementations */
+ qunlock(&ic->ackq);
+
+ return x;
+}
+
+/* calculate scale constants that converts fast ticks to ms (more or less) */
+static void
+inittimescale(void)
+{
+ uvlong hz;
+
+ fastticks(&hz);
+ if(hz > 1000){
+ scalediv = hz/1000;
+ scalemul = 1;
+ } else {
+ scalediv = 1;
+ scalemul = 1000/hz;
+ }
+}
+
+void
+ilinit(Fs *f)
+{
+ Proto *il;
+
+ inittimescale();
+
+ il = smalloc(sizeof(Proto));
+ il->priv = smalloc(sizeof(Ilpriv));
+ il->name = "il";
+ il->connect = ilconnect;
+ il->announce = ilannounce;
+ il->state = ilstate;
+ il->create = ilcreate;
+ il->close = ilclose;
+ il->rcv = iliput;
+ il->ctl = nil;
+ il->advise = iladvise;
+ il->stats = ilxstats;
+ il->inuse = ilinuse;
+ il->gc = nil;
+ il->ipproto = IP_ILPROTO;
+ il->nc = scalednconv();
+ il->ptclsize = sizeof(Ilcb);
+ Fsproto(f, il);
+}
diff --git a/src/9vx/a/ip/inferno.c b/src/9vx/a/ip/inferno.c
@@ -0,0 +1,46 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "ip.h"
+
+/*
+ * some hacks for commonality twixt inferno and plan9
+ */
+
+char*
+commonuser(void)
+{
+ return up->user;
+}
+
+Chan*
+commonfdtochan(int fd, int mode, int a, int b)
+{
+ return fdtochan(fd, mode, a, b);
+}
+
+char*
+commonerror(void)
+{
+ return up->errstr;
+}
+
+char*
+bootp(Ipifc* _)
+{
+ return "unimplmented";
+}
+
+int
+bootpread(char* _, ulong __, int ___)
+{
+ return 0;
+}
+
+Medium tripmedium =
+{
+ "trip",
+};
diff --git a/src/9vx/a/ip/ip.c b/src/9vx/a/ip/ip.c
@@ -0,0 +1,776 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+typedef struct Fragment4 Fragment4;
+typedef struct Fragment6 Fragment6;
+typedef struct Ipfrag Ipfrag;
+
+#define BLKIPVER(xp) (((Ip4hdr*)((xp)->rp))->vihl&0xF0)
+
+/* MIB II counters */
+enum
+{
+ Forwarding,
+ DefaultTTL,
+ InReceives,
+ InHdrErrors,
+ InAddrErrors,
+ ForwDatagrams,
+ InUnknownProtos,
+ InDiscards,
+ InDelivers,
+ OutRequests,
+ OutDiscards,
+ OutNoRoutes,
+ ReasmTimeout,
+ ReasmReqds,
+ ReasmOKs,
+ ReasmFails,
+ FragOKs,
+ FragFails,
+ FragCreates,
+
+ Nstats,
+};
+
+struct Fragment4
+{
+ Block* blist;
+ Fragment4* next;
+ ulong src;
+ ulong dst;
+ ushort id;
+ ulong age;
+};
+
+struct Fragment6
+{
+ Block* blist;
+ Fragment6* next;
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+ uint id;
+ ulong age;
+};
+
+struct Ipfrag
+{
+ ushort foff;
+ ushort flen;
+};
+
+/* an instance of IP */
+struct IP
+{
+ ulong stats[Nstats];
+
+ QLock fraglock4;
+ Fragment4* flisthead4;
+ Fragment4* fragfree4;
+ Ref id4;
+
+ QLock fraglock6;
+ Fragment6* flisthead6;
+ Fragment6* fragfree6;
+ Ref id6;
+
+ int iprouting; /* true if we route like a gateway */
+};
+
+static char *statnames[] =
+{
+[Forwarding] "Forwarding",
+[DefaultTTL] "DefaultTTL",
+[InReceives] "InReceives",
+[InHdrErrors] "InHdrErrors",
+[InAddrErrors] "InAddrErrors",
+[ForwDatagrams] "ForwDatagrams",
+[InUnknownProtos] "InUnknownProtos",
+[InDiscards] "InDiscards",
+[InDelivers] "InDelivers",
+[OutRequests] "OutRequests",
+[OutDiscards] "OutDiscards",
+[OutNoRoutes] "OutNoRoutes",
+[ReasmTimeout] "ReasmTimeout",
+[ReasmReqds] "ReasmReqds",
+[ReasmOKs] "ReasmOKs",
+[ReasmFails] "ReasmFails",
+[FragOKs] "FragOKs",
+[FragFails] "FragFails",
+[FragCreates] "FragCreates",
+};
+
+#define BLKIP(xp) ((Ip4hdr*)((xp)->rp))
+/*
+ * This sleazy macro relies on the media header size being
+ * larger than sizeof(Ipfrag). ipreassemble checks this is true
+ */
+#define BKFG(xp) ((Ipfrag*)((xp)->base))
+
+ushort ipcsum(uchar*);
+Block* ip4reassemble(IP*, int, Block*, Ip4hdr*);
+void ipfragfree4(IP*, Fragment4*);
+Fragment4* ipfragallo4(IP*);
+
+void
+ip_init_6(Fs *f)
+{
+ v6params *v6p;
+
+ v6p = smalloc(sizeof(v6params));
+
+ v6p->rp.mflag = 0; /* default not managed */
+ v6p->rp.oflag = 0;
+ v6p->rp.maxraint = 600000; /* millisecs */
+ v6p->rp.minraint = 200000;
+ v6p->rp.linkmtu = 0; /* no mtu sent */
+ v6p->rp.reachtime = 0;
+ v6p->rp.rxmitra = 0;
+ v6p->rp.ttl = MAXTTL;
+ v6p->rp.routerlt = 3 * v6p->rp.maxraint;
+
+ v6p->hp.rxmithost = 1000; /* v6 RETRANS_TIMER */
+
+ v6p->cdrouter = -1;
+
+ f->v6p = v6p;
+}
+
+void
+initfrag(IP *ip, int size)
+{
+ Fragment4 *fq4, *eq4;
+ Fragment6 *fq6, *eq6;
+
+ ip->fragfree4 = (Fragment4*)malloc(sizeof(Fragment4) * size);
+ if(ip->fragfree4 == nil)
+ panic("initfrag");
+
+ eq4 = &ip->fragfree4[size];
+ for(fq4 = ip->fragfree4; fq4 < eq4; fq4++)
+ fq4->next = fq4+1;
+
+ ip->fragfree4[size-1].next = nil;
+
+ ip->fragfree6 = (Fragment6*)malloc(sizeof(Fragment6) * size);
+ if(ip->fragfree6 == nil)
+ panic("initfrag");
+
+ eq6 = &ip->fragfree6[size];
+ for(fq6 = ip->fragfree6; fq6 < eq6; fq6++)
+ fq6->next = fq6+1;
+
+ ip->fragfree6[size-1].next = nil;
+}
+
+void
+ip_init(Fs *f)
+{
+ IP *ip;
+
+ ip = smalloc(sizeof(IP));
+ initfrag(ip, 100);
+ f->ip = ip;
+
+ ip_init_6(f);
+}
+
+void
+iprouting(Fs *f, int on)
+{
+ f->ip->iprouting = on;
+ if(f->ip->iprouting==0)
+ f->ip->stats[Forwarding] = 2;
+ else
+ f->ip->stats[Forwarding] = 1;
+}
+
+int
+ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+{
+ Ipifc *ifc;
+ uchar *gate;
+ ulong fragoff;
+ Block *xp, *nb;
+ Ip4hdr *eh, *feh;
+ int lid, len, seglen, chunk, dlen, blklen, offset, medialen;
+ Route *r, *sr;
+ IP *ip;
+ int rv = 0;
+
+ ip = f->ip;
+
+ /* Fill out the ip header */
+ eh = (Ip4hdr*)(bp->rp);
+
+ ip->stats[OutRequests]++;
+
+ /* Number of uchars in data and ip header to write */
+ len = blocklen(bp);
+
+ if(gating){
+ chunk = nhgets(eh->length);
+ if(chunk > len){
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "short gated packet\n");
+ goto free;
+ }
+ if(chunk < len)
+ len = chunk;
+ }
+ if(len >= IP_MAX){
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "exceeded ip max size %V\n", eh->dst);
+ goto free;
+ }
+
+ r = v4lookup(f, eh->dst, c);
+ if(r == nil){
+ ip->stats[OutNoRoutes]++;
+ netlog(f, Logip, "no interface %V\n", eh->dst);
+ rv = -1;
+ goto free;
+ }
+
+ ifc = r->ifc;
+ if(r->type & (Rifc|Runi))
+ gate = eh->dst;
+ else
+ if(r->type & (Rbcast|Rmulti)) {
+ gate = eh->dst;
+ sr = v4lookup(f, eh->src, nil);
+ if(sr != nil && (sr->type & Runi))
+ ifc = sr->ifc;
+ }
+ else
+ gate = r->v4.gate;
+
+ if(!gating)
+ eh->vihl = IP_VER4|IP_HLEN4;
+ eh->ttl = ttl;
+ if(!gating)
+ eh->tos = tos;
+
+ if(!CANRLOCK(ifc))
+ goto free;
+ if(waserror()){
+ RUNLOCK(ifc);
+ nexterror();
+ }
+ if(ifc->m == nil)
+ goto raise;
+
+ /* If we dont need to fragment just send it */
+ medialen = ifc->maxtu - ifc->m->hsize;
+ if(len <= medialen) {
+ if(!gating)
+ hnputs(eh->id, incref(&ip->id4));
+ hnputs(eh->length, len);
+ if(!gating){
+ eh->frag[0] = 0;
+ eh->frag[1] = 0;
+ }
+ eh->cksum[0] = 0;
+ eh->cksum[1] = 0;
+ hnputs(eh->cksum, ipcsum(&eh->vihl));
+ ifc->m->bwrite(ifc, bp, V4, gate);
+ RUNLOCK(ifc);
+ poperror();
+ return 0;
+ }
+
+if((eh->frag[0] & (IP_DF>>8)) && !gating) print("%V: DF set\n", eh->dst);
+
+ if(eh->frag[0] & (IP_DF>>8)){
+ ip->stats[FragFails]++;
+ ip->stats[OutDiscards]++;
+ icmpcantfrag(f, bp, medialen);
+ netlog(f, Logip, "%V: eh->frag[0] & (IP_DF>>8)\n", eh->dst);
+ goto raise;
+ }
+
+ seglen = (medialen - IP4HDR) & ~7;
+ if(seglen < 8){
+ ip->stats[FragFails]++;
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "%V seglen < 8\n", eh->dst);
+ goto raise;
+ }
+
+ dlen = len - IP4HDR;
+ xp = bp;
+ if(gating)
+ lid = nhgets(eh->id);
+ else
+ lid = incref(&ip->id4);
+
+ offset = IP4HDR;
+ while(xp != nil && offset && offset >= BLEN(xp)) {
+ offset -= BLEN(xp);
+ xp = xp->next;
+ }
+ xp->rp += offset;
+
+ if(gating)
+ fragoff = nhgets(eh->frag)<<3;
+ else
+ fragoff = 0;
+ dlen += fragoff;
+ for(; fragoff < dlen; fragoff += seglen) {
+ nb = allocb(IP4HDR+seglen);
+ feh = (Ip4hdr*)(nb->rp);
+
+ memmove(nb->wp, eh, IP4HDR);
+ nb->wp += IP4HDR;
+
+ if((fragoff + seglen) >= dlen) {
+ seglen = dlen - fragoff;
+ hnputs(feh->frag, fragoff>>3);
+ }
+ else
+ hnputs(feh->frag, (fragoff>>3)|IP_MF);
+
+ hnputs(feh->length, seglen + IP4HDR);
+ hnputs(feh->id, lid);
+
+ /* Copy up the data area */
+ chunk = seglen;
+ while(chunk) {
+ if(!xp) {
+ ip->stats[OutDiscards]++;
+ ip->stats[FragFails]++;
+ freeblist(nb);
+ netlog(f, Logip, "!xp: chunk %d\n", chunk);
+ goto raise;
+ }
+ blklen = chunk;
+ if(BLEN(xp) < chunk)
+ blklen = BLEN(xp);
+ memmove(nb->wp, xp->rp, blklen);
+ nb->wp += blklen;
+ xp->rp += blklen;
+ chunk -= blklen;
+ if(xp->rp == xp->wp)
+ xp = xp->next;
+ }
+
+ feh->cksum[0] = 0;
+ feh->cksum[1] = 0;
+ hnputs(feh->cksum, ipcsum(&feh->vihl));
+ ifc->m->bwrite(ifc, nb, V4, gate);
+ ip->stats[FragCreates]++;
+ }
+ ip->stats[FragOKs]++;
+raise:
+ RUNLOCK(ifc);
+ poperror();
+free:
+ freeblist(bp);
+ return rv;
+}
+
+void
+ipiput4(Fs *f, Ipifc *ifc, Block *bp)
+{
+ int hl;
+ int hop, tos, proto, olen;
+ Ip4hdr *h;
+ Proto *p;
+ ushort frag;
+ int notforme;
+ uchar *dp, v6dst[IPaddrlen];
+ IP *ip;
+ Route *r;
+
+ if(BLKIPVER(bp) != IP_VER4) {
+ ipiput6(f, ifc, bp);
+ return;
+ }
+
+ ip = f->ip;
+ ip->stats[InReceives]++;
+
+ /*
+ * Ensure we have all the header info in the first
+ * block. Make life easier for other protocols by
+ * collecting up to the first 64 bytes in the first block.
+ */
+ if(BLEN(bp) < 64) {
+ hl = blocklen(bp);
+ if(hl < IP4HDR)
+ hl = IP4HDR;
+ if(hl > 64)
+ hl = 64;
+ bp = pullupblock(bp, hl);
+ if(bp == nil)
+ return;
+ }
+
+ h = (Ip4hdr*)(bp->rp);
+
+ /* dump anything that whose header doesn't checksum */
+ if((bp->flag & Bipck) == 0 && ipcsum(&h->vihl)) {
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "ip: checksum error %V\n", h->src);
+ freeblist(bp);
+ return;
+ }
+ v4tov6(v6dst, h->dst);
+ notforme = ipforme(f, v6dst) == 0;
+
+ /* Check header length and version */
+ if((h->vihl&0x0F) != IP_HLEN4) {
+ hl = (h->vihl&0xF)<<2;
+ if(hl < (IP_HLEN4<<2)) {
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "ip: %V bad hivl %ux\n", h->src, h->vihl);
+ freeblist(bp);
+ return;
+ }
+ /* If this is not routed strip off the options */
+ if(notforme == 0) {
+ olen = nhgets(h->length);
+ dp = bp->rp + (hl - (IP_HLEN4<<2));
+ memmove(dp, h, IP_HLEN4<<2);
+ bp->rp = dp;
+ h = (Ip4hdr*)(bp->rp);
+ h->vihl = (IP_VER4|IP_HLEN4);
+ hnputs(h->length, olen-hl+(IP_HLEN4<<2));
+ }
+ }
+
+ /* route */
+ if(notforme) {
+ Conv conv;
+
+ if(!ip->iprouting){
+ freeb(bp);
+ return;
+ }
+
+ /* don't forward to source's network */
+ conv.r = nil;
+ r = v4lookup(f, h->dst, &conv);
+ if(r == nil || r->ifc == ifc){
+ ip->stats[OutDiscards]++;
+ freeblist(bp);
+ return;
+ }
+
+ /* don't forward if packet has timed out */
+ hop = h->ttl;
+ if(hop < 1) {
+ ip->stats[InHdrErrors]++;
+ icmpttlexceeded(f, ifc->lifc->local, bp);
+ freeblist(bp);
+ return;
+ }
+
+ /* reassemble if the interface expects it */
+if(r->ifc == nil) panic("nil route rfc");
+ if(r->ifc->reassemble){
+ frag = nhgets(h->frag);
+ if(frag) {
+ h->tos = 0;
+ if(frag & IP_MF)
+ h->tos = 1;
+ bp = ip4reassemble(ip, frag, bp, h);
+ if(bp == nil)
+ return;
+ h = (Ip4hdr*)(bp->rp);
+ }
+ }
+
+ ip->stats[ForwDatagrams]++;
+ tos = h->tos;
+ hop = h->ttl;
+ ipoput4(f, bp, 1, hop - 1, tos, &conv);
+ return;
+ }
+
+ frag = nhgets(h->frag);
+ if(frag) {
+ h->tos = 0;
+ if(frag & IP_MF)
+ h->tos = 1;
+ bp = ip4reassemble(ip, frag, bp, h);
+ if(bp == nil)
+ return;
+ h = (Ip4hdr*)(bp->rp);
+ }
+
+ /* don't let any frag info go up the stack */
+ h->frag[0] = 0;
+ h->frag[1] = 0;
+
+ proto = h->proto;
+ p = Fsrcvpcol(f, proto);
+ if(p != nil && p->rcv != nil) {
+ ip->stats[InDelivers]++;
+ (*p->rcv)(p, ifc, bp);
+ return;
+ }
+ ip->stats[InDiscards]++;
+ ip->stats[InUnknownProtos]++;
+ freeblist(bp);
+}
+
+int
+ipstats(Fs *f, char *buf, int len)
+{
+ IP *ip;
+ char *p, *e;
+ int i;
+
+ ip = f->ip;
+ ip->stats[DefaultTTL] = MAXTTL;
+
+ p = buf;
+ e = p+len;
+ for(i = 0; i < Nstats; i++)
+ p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]);
+ return p - buf;
+}
+
+Block*
+ip4reassemble(IP *ip, int offset, Block *bp, Ip4hdr *ih)
+{
+ int fend;
+ ushort id;
+ Fragment4 *f, *fnext;
+ ulong src, dst;
+ Block *bl, **l, *last, *prev;
+ int ovlap, len, fragsize, pktposn;
+
+ src = nhgetl(ih->src);
+ dst = nhgetl(ih->dst);
+ id = nhgets(ih->id);
+
+ /*
+ * block lists are too hard, pullupblock into a single block
+ */
+ if(bp->next){
+ bp = pullupblock(bp, blocklen(bp));
+ ih = (Ip4hdr*)(bp->rp);
+ }
+
+ qlock(&ip->fraglock4);
+
+ /*
+ * find a reassembly queue for this fragment
+ */
+ for(f = ip->flisthead4; f; f = fnext){
+ fnext = f->next; /* because ipfragfree4 changes the list */
+ if(f->src == src && f->dst == dst && f->id == id)
+ break;
+ if(f->age < NOW){
+ ip->stats[ReasmTimeout]++;
+ ipfragfree4(ip, f);
+ }
+ }
+
+ /*
+ * if this isn't a fragmented packet, accept it
+ * and get rid of any fragments that might go
+ * with it.
+ */
+ if(!ih->tos && (offset & ~(IP_MF|IP_DF)) == 0) {
+ if(f != nil) {
+ ipfragfree4(ip, f);
+ ip->stats[ReasmFails]++;
+ }
+ qunlock(&ip->fraglock4);
+ return bp;
+ }
+
+ if(bp->base+sizeof(Ipfrag) >= bp->rp){
+ bp = padblock(bp, sizeof(Ipfrag));
+ bp->rp += sizeof(Ipfrag);
+ }
+
+ BKFG(bp)->foff = offset<<3;
+ BKFG(bp)->flen = nhgets(ih->length)-IP4HDR;
+
+ /* First fragment allocates a reassembly queue */
+ if(f == nil) {
+ f = ipfragallo4(ip);
+ f->id = id;
+ f->src = src;
+ f->dst = dst;
+
+ f->blist = bp;
+
+ qunlock(&ip->fraglock4);
+ ip->stats[ReasmReqds]++;
+ return nil;
+ }
+
+ /*
+ * find the new fragment's position in the queue
+ */
+ prev = nil;
+ l = &f->blist;
+ bl = f->blist;
+ while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+ prev = bl;
+ l = &bl->next;
+ bl = bl->next;
+ }
+
+ /* Check overlap of a previous fragment - trim away as necessary */
+ if(prev) {
+ ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+ if(ovlap > 0) {
+ if(ovlap >= BKFG(bp)->flen) {
+ freeblist(bp);
+ qunlock(&ip->fraglock4);
+ return nil;
+ }
+ BKFG(prev)->flen -= ovlap;
+ }
+ }
+
+ /* Link onto assembly queue */
+ bp->next = *l;
+ *l = bp;
+
+ /* Check to see if succeeding segments overlap */
+ if(bp->next) {
+ l = &bp->next;
+ fend = BKFG(bp)->foff + BKFG(bp)->flen;
+ /* Take completely covered segments out */
+ while(*l) {
+ ovlap = fend - BKFG(*l)->foff;
+ if(ovlap <= 0)
+ break;
+ if(ovlap < BKFG(*l)->flen) {
+ BKFG(*l)->flen -= ovlap;
+ BKFG(*l)->foff += ovlap;
+ /* move up ih hdrs */
+ memmove((*l)->rp + ovlap, (*l)->rp, IP4HDR);
+ (*l)->rp += ovlap;
+ break;
+ }
+ last = (*l)->next;
+ (*l)->next = nil;
+ freeblist(*l);
+ *l = last;
+ }
+ }
+
+ /*
+ * look for a complete packet. if we get to a fragment
+ * without IP_MF set, we're done.
+ */
+ pktposn = 0;
+ for(bl = f->blist; bl; bl = bl->next) {
+ if(BKFG(bl)->foff != pktposn)
+ break;
+ if((BLKIP(bl)->frag[0]&(IP_MF>>8)) == 0) {
+ bl = f->blist;
+ len = nhgets(BLKIP(bl)->length);
+ bl->wp = bl->rp + len;
+
+ /* Pullup all the fragment headers and
+ * return a complete packet
+ */
+ for(bl = bl->next; bl; bl = bl->next) {
+ fragsize = BKFG(bl)->flen;
+ len += fragsize;
+ bl->rp += IP4HDR;
+ bl->wp = bl->rp + fragsize;
+ }
+
+ bl = f->blist;
+ f->blist = nil;
+ ipfragfree4(ip, f);
+ ih = BLKIP(bl);
+ hnputs(ih->length, len);
+ qunlock(&ip->fraglock4);
+ ip->stats[ReasmOKs]++;
+ return bl;
+ }
+ pktposn += BKFG(bl)->flen;
+ }
+ qunlock(&ip->fraglock4);
+ return nil;
+}
+
+/*
+ * ipfragfree4 - Free a list of fragments - assume hold fraglock4
+ */
+void
+ipfragfree4(IP *ip, Fragment4 *frag)
+{
+ Fragment4 *fl, **l;
+
+ if(frag->blist)
+ freeblist(frag->blist);
+
+ frag->src = 0;
+ frag->id = 0;
+ frag->blist = nil;
+
+ l = &ip->flisthead4;
+ for(fl = *l; fl; fl = fl->next) {
+ if(fl == frag) {
+ *l = frag->next;
+ break;
+ }
+ l = &fl->next;
+ }
+
+ frag->next = ip->fragfree4;
+ ip->fragfree4 = frag;
+
+}
+
+/*
+ * ipfragallo4 - allocate a reassembly queue - assume hold fraglock4
+ */
+Fragment4 *
+ipfragallo4(IP *ip)
+{
+ Fragment4 *f;
+
+ while(ip->fragfree4 == nil) {
+ /* free last entry on fraglist */
+ for(f = ip->flisthead4; f->next; f = f->next)
+ ;
+ ipfragfree4(ip, f);
+ }
+ f = ip->fragfree4;
+ ip->fragfree4 = f->next;
+ f->next = ip->flisthead4;
+ ip->flisthead4 = f;
+ f->age = NOW + 30000;
+
+ return f;
+}
+
+ushort
+ipcsum(uchar *addr)
+{
+ int len;
+ ulong sum;
+
+ sum = 0;
+ len = (addr[0]&0xf)<<2;
+
+ while(len > 0) {
+ sum += addr[0]<<8 | addr[1] ;
+ len -= 2;
+ addr += 2;
+ }
+
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum = (sum & 0xffff) + (sum >> 16);
+
+ return (sum^0xffff);
+}
diff --git a/src/9vx/a/ip/ip.h b/src/9vx/a/ip/ip.h
@@ -0,0 +1,677 @@
+typedef struct Conv Conv;
+typedef struct Fs Fs;
+typedef union Hwaddr Hwaddr;
+typedef struct IP IP;
+typedef struct IPaux IPaux;
+typedef struct Ipself Ipself;
+typedef struct Ipselftab Ipselftab;
+typedef struct Iplink Iplink;
+typedef struct Iplifc Iplifc;
+typedef struct Ipmulti Ipmulti;
+typedef struct Ipifc Ipifc;
+typedef struct Iphash Iphash;
+typedef struct Ipht Ipht;
+typedef struct Netlog Netlog;
+typedef struct Medium Medium;
+typedef struct Proto Proto;
+typedef struct Arpent Arpent;
+typedef struct Arp Arp;
+typedef struct Route Route;
+
+typedef struct Routerparams Routerparams;
+typedef struct Hostparams Hostparams;
+typedef struct v6router v6router;
+typedef struct v6params v6params;
+
+enum
+{
+ Addrlen= 64,
+ Maxproto= 20,
+ Nhash= 64,
+ Maxincall= 5,
+ Nchans= 1024,
+ MAClen= 16, /* longest mac address */
+
+ MAXTTL= 255,
+ DFLTTOS= 0,
+
+ IPaddrlen= 16,
+ IPv4addrlen= 4,
+ IPv4off= 12,
+ IPllen= 4,
+
+ /* ip versions */
+ V4= 4,
+ V6= 6,
+ IP_VER4= 0x40,
+ IP_VER6= 0x60,
+ IP_HLEN4= 5, /* v4: Header length in words */
+ IP_DF= 0x4000, /* v4: Don't fragment */
+ IP_MF= 0x2000, /* v4: More fragments */
+ IP4HDR= 20, /* sizeof(Ip4hdr) */
+ IP_MAX= 64*1024, /* Max. Internet packet size, v4 & v6 */
+
+ /* 2^Lroot trees in the root table */
+ Lroot= 10,
+
+ Maxpath = 64,
+};
+
+enum
+{
+ Idle= 0,
+ Announcing= 1,
+ Announced= 2,
+ Connecting= 3,
+ Connected= 4,
+};
+
+/* on the wire packet header */
+typedef struct Ip4hdr Ip4hdr;
+struct Ip4hdr
+{
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* ip->identification */
+ uchar frag[2]; /* Fragment information */
+ uchar ttl; /* Time to live */
+ uchar proto; /* Protocol */
+ uchar cksum[2]; /* Header checksum */
+ uchar src[4]; /* IP source */
+ uchar dst[4]; /* IP destination */
+};
+
+/*
+ * one per conversation directory
+ */
+struct Conv
+{
+ QLock qlock;
+
+ int x; /* conversation index */
+ Proto* p;
+
+ int restricted; /* remote port is restricted */
+ uint ttl; /* max time to live */
+ uint tos; /* type of service */
+ int ignoreadvice; /* don't terminate connection on icmp errors */
+
+ uchar ipversion;
+ uchar laddr[IPaddrlen]; /* local IP address */
+ uchar raddr[IPaddrlen]; /* remote IP address */
+ ushort lport; /* local port number */
+ ushort rport; /* remote port number */
+
+ char *owner; /* protections */
+ int perm;
+ int inuse; /* opens of listen/data/ctl */
+ int length;
+ int state;
+
+ int maxfragsize; /* If set, used for fragmentation */
+
+ /* udp specific */
+ int headers; /* data src/dst headers in udp */
+ int reliable; /* true if reliable udp */
+
+ Conv* incall; /* calls waiting to be listened for */
+ Conv* next;
+
+ Queue* rq; /* queued data waiting to be read */
+ Queue* wq; /* queued data waiting to be written */
+ Queue* eq; /* returned error packets */
+ Queue* sq; /* snooping queue */
+ Ref snoopers; /* number of processes with snoop open */
+
+ QLock car;
+ Rendez cr;
+ char cerr[ERRMAX];
+
+ QLock listenq;
+ Rendez listenr;
+
+ Ipmulti *multi; /* multicast bindings for this interface */
+
+ void* ptcl; /* protocol specific stuff */
+
+ Route *r; /* last route used */
+ ulong rgen; /* routetable generation for *r */
+};
+
+struct Medium
+{
+ char *name;
+ int hsize; /* medium header size */
+ int mintu; /* default min mtu */
+ int maxtu; /* default max mtu */
+ int maclen; /* mac address length */
+ void (*bind)(Ipifc*, int, char**);
+ void (*unbind)(Ipifc*);
+ void (*bwrite)(Ipifc *ifc, Block *b, int version, uchar *ip);
+
+ /* for arming interfaces to receive multicast */
+ void (*addmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+ void (*remmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+
+ /* process packets written to 'data' */
+ void (*pktin)(Fs *f, Ipifc *ifc, Block *bp);
+
+ /* routes for router boards */
+ void (*addroute)(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
+ void (*remroute)(Ipifc *ifc, int, uchar*, uchar*);
+ void (*flushroutes)(Ipifc *ifc);
+
+ /* for routing multicast groups */
+ void (*joinmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+ void (*leavemulti)(Ipifc *ifc, uchar *a, uchar *ia);
+
+ /* address resolution */
+ void (*ares)(Fs*, int, uchar*, uchar*, int, int); /* resolve */
+ void (*areg)(Ipifc*, uchar*); /* register */
+
+ /* v6 address generation */
+ void (*pref2addr)(uchar *pref, uchar *ea);
+
+ int unbindonclose; /* if non-zero, unbind on last close */
+};
+
+/* logical interface associated with a physical one */
+struct Iplifc
+{
+ uchar local[IPaddrlen];
+ uchar mask[IPaddrlen];
+ uchar remote[IPaddrlen];
+ uchar net[IPaddrlen];
+ uchar tentative; /* =1 => v6 dup disc on, =0 => confirmed unique */
+ uchar onlink; /* =1 => onlink, =0 offlink. */
+ uchar autoflag; /* v6 autonomous flag */
+ long validlt; /* v6 valid lifetime */
+ long preflt; /* v6 preferred lifetime */
+ long origint; /* time when addr was added */
+ Iplink *link; /* addresses linked to this lifc */
+ Iplifc *next;
+};
+
+/* binding twixt Ipself and Iplifc */
+struct Iplink
+{
+ Ipself *self;
+ Iplifc *lifc;
+ Iplink *selflink; /* next link for this local address */
+ Iplink *lifclink; /* next link for this ifc */
+ ulong expire;
+ Iplink *next; /* free list */
+ int ref;
+};
+
+/* rfc 2461, pp.40—43. */
+
+/* default values, one per stack */
+struct Routerparams {
+ int mflag; /* flag: managed address configuration */
+ int oflag; /* flag: other stateful configuration */
+ int maxraint; /* max. router adv interval (ms) */
+ int minraint; /* min. router adv interval (ms) */
+ int linkmtu; /* mtu options */
+ int reachtime; /* reachable time */
+ int rxmitra; /* retransmit interval */
+ int ttl; /* cur hop count limit */
+ int routerlt; /* router lifetime */
+};
+
+struct Hostparams {
+ int rxmithost;
+};
+
+struct Ipifc
+{
+ RWlock rwlock;
+
+ Conv *conv; /* link to its conversation structure */
+ char dev[64]; /* device we're attached to */
+ Medium *m; /* Media pointer */
+ int maxtu; /* Maximum transfer unit */
+ int mintu; /* Minumum tranfer unit */
+ int mbps; /* megabits per second */
+ void *arg; /* medium specific */
+ int reassemble; /* reassemble IP packets before forwarding */
+
+ /* these are used so that we can unbind on the fly */
+ Lock idlock;
+ uchar ifcid; /* incremented each 'bind/unbind/add/remove' */
+ int ref; /* number of proc's using this ipifc */
+ Rendez wait; /* where unbinder waits for ref == 0 */
+ int unbinding;
+
+ uchar mac[MAClen]; /* MAC address */
+
+ Iplifc *lifc; /* logical interfaces on this physical one */
+
+ ulong in, out; /* message statistics */
+ ulong inerr, outerr; /* ... */
+
+ uchar sendra6; /* flag: send router advs on this ifc */
+ uchar recvra6; /* flag: recv router advs on this ifc */
+ Routerparams rp; /* router parameters as in RFC 2461, pp.40—43.
+ used only if node is router */
+};
+
+/*
+ * one per multicast-lifc pair used by a Conv
+ */
+struct Ipmulti
+{
+ uchar ma[IPaddrlen];
+ uchar ia[IPaddrlen];
+ Ipmulti *next;
+};
+
+/*
+ * hash table for 2 ip addresses + 2 ports
+ */
+enum
+{
+ Nipht= 521, /* convenient prime */
+
+ IPmatchexact= 0, /* match on 4 tuple */
+ IPmatchany, /* *!* */
+ IPmatchport, /* *!port */
+ IPmatchaddr, /* addr!* */
+ IPmatchpa, /* addr!port */
+};
+struct Iphash
+{
+ Iphash *next;
+ Conv *c;
+ int match;
+};
+struct Ipht
+{
+ Lock lk;
+
+ Iphash *tab[Nipht];
+};
+void iphtadd(Ipht*, Conv*);
+void iphtrem(Ipht*, Conv*);
+Conv* iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp);
+
+/*
+ * one per multiplexed protocol
+ */
+struct Proto
+{
+ QLock qlock;
+
+ char* name; /* protocol name */
+ int x; /* protocol index */
+ int ipproto; /* ip protocol type */
+
+ char* (*connect)(Conv*, char**, int);
+ char* (*announce)(Conv*, char**, int);
+ char* (*bind)(Conv*, char**, int);
+ int (*state)(Conv*, char*, int);
+ void (*create)(Conv*);
+ void (*close)(Conv*);
+ void (*rcv)(Proto*, Ipifc*, Block*);
+ char* (*ctl)(Conv*, char**, int);
+ void (*advise)(Proto*, Block*, char*);
+ int (*stats)(Proto*, char*, int);
+ int (*local)(Conv*, char*, int);
+ int (*remote)(Conv*, char*, int);
+ int (*inuse)(Conv*);
+ int (*gc)(Proto*); /* returns true if any conversations are freed */
+
+ Fs *f; /* file system this proto is part of */
+ Conv **conv; /* array of conversations */
+ int ptclsize; /* size of per protocol ctl block */
+ int nc; /* number of conversations */
+ int ac;
+ Qid qid; /* qid for protocol directory */
+ ushort nextrport;
+
+ void *priv;
+};
+
+
+/*
+ * one per IP protocol stack
+ */
+struct Fs
+{
+ RWlock rwlock;
+
+ Conv *conv; /* link to its conversation structure */
+ int dev;
+
+ int np;
+ Proto* p[Maxproto+1]; /* list of supported protocols */
+ Proto* t2p[256]; /* vector of all protocols */
+ Proto* ipifc; /* kludge for ipifcremroute & ipifcaddroute */
+ Proto* ipmux; /* kludge for finding an ip multiplexor */
+
+ IP *ip;
+ Ipselftab *self;
+ Arp *arp;
+ v6params *v6p;
+
+ Route *v4root[1<<Lroot]; /* v4 routing forest */
+ Route *v6root[1<<Lroot]; /* v6 routing forest */
+ Route *queue; /* used as temp when reinjecting routes */
+
+ Netlog *alog;
+
+ char ndb[1024]; /* an ndb entry for this interface */
+ int ndbvers;
+ long ndbmtime;
+};
+
+/* one per default router known to host */
+struct v6router {
+ uchar inuse;
+ Ipifc *ifc;
+ int ifcid;
+ uchar routeraddr[IPaddrlen];
+ long ltorigin;
+ Routerparams rp;
+};
+
+struct v6params
+{
+ Routerparams rp; /* v6 params, one copy per node now */
+ Hostparams hp;
+ v6router v6rlist[3]; /* max 3 default routers, currently */
+ int cdrouter; /* uses only v6rlist[cdrouter] if */
+ /* cdrouter >= 0. */
+};
+
+
+int Fsconnected(Conv*, char*);
+Conv* Fsnewcall(Conv*, uchar*, ushort, uchar*, ushort, uchar);
+int Fspcolstats(char*, int);
+int Fsproto(Fs*, Proto*);
+int Fsbuiltinproto(Fs*, uchar);
+Conv* Fsprotoclone(Proto*, char*);
+Proto* Fsrcvpcol(Fs*, uchar);
+Proto* Fsrcvpcolx(Fs*, uchar);
+char* Fsstdconnect(Conv*, char**, int);
+char* Fsstdannounce(Conv*, char**, int);
+char* Fsstdbind(Conv*, char**, int);
+ulong scalednconv(void);
+void closeconv(Conv*);
+/*
+ * logging
+ */
+enum
+{
+ Logip= 1<<1,
+ Logtcp= 1<<2,
+ Logfs= 1<<3,
+ Logil= 1<<4,
+ Logicmp= 1<<5,
+ Logudp= 1<<6,
+ Logcompress= 1<<7,
+ Logilmsg= 1<<8,
+ Loggre= 1<<9,
+ Logppp= 1<<10,
+ Logtcprxmt= 1<<11,
+ Logigmp= 1<<12,
+ Logudpmsg= 1<<13,
+ Logipmsg= 1<<14,
+ Logrudp= 1<<15,
+ Logrudpmsg= 1<<16,
+ Logesp= 1<<17,
+ Logtcpwin= 1<<18,
+};
+
+void netloginit(Fs*);
+void netlogopen(Fs*);
+void netlogclose(Fs*);
+void netlogctl(Fs*, char*, int);
+long netlogread(Fs*, void*, ulong, long);
+void netlog(Fs*, int, char*, ...);
+void ifcloginit(Fs*);
+long ifclogread(Fs*, Chan *,void*, ulong, long);
+void ifclog(Fs*, uchar *, int);
+void ifclogopen(Fs*, Chan*);
+void ifclogclose(Fs*, Chan*);
+
+/*
+ * iproute.c
+ */
+typedef struct RouteTree RouteTree;
+typedef struct Routewalk Routewalk;
+typedef struct V4route V4route;
+typedef struct V6route V6route;
+
+enum
+{
+
+ /* type bits */
+ Rv4= (1<<0), /* this is a version 4 route */
+ Rifc= (1<<1), /* this route is a directly connected interface */
+ Rptpt= (1<<2), /* this route is a pt to pt interface */
+ Runi= (1<<3), /* a unicast self address */
+ Rbcast= (1<<4), /* a broadcast self address */
+ Rmulti= (1<<5), /* a multicast self address */
+ Rproxy= (1<<6), /* this route should be proxied */
+};
+
+struct Routewalk
+{
+ int o;
+ int h;
+ char* p;
+ char* e;
+ void* state;
+ void (*walk)(Route*, Routewalk*);
+};
+
+struct RouteTree
+{
+ Route* right;
+ Route* left;
+ Route* mid;
+ uchar depth;
+ uchar type;
+ uchar ifcid; /* must match ifc->id */
+ Ipifc *ifc;
+ char tag[4];
+ int ref;
+};
+
+struct V4route
+{
+ ulong address;
+ ulong endaddress;
+ uchar gate[IPv4addrlen];
+};
+
+struct V6route
+{
+ ulong address[IPllen];
+ ulong endaddress[IPllen];
+ uchar gate[IPaddrlen];
+};
+
+struct Route
+{
+/* RouteTree; */
+ Route* right;
+ Route* left;
+ Route* mid;
+ uchar depth;
+ uchar type;
+ uchar ifcid; /* must match ifc->id */
+ Ipifc *ifc;
+ char tag[4];
+ int ref;
+
+ union {
+ V6route v6;
+ V4route v4;
+ };
+};
+extern void v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
+extern void v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
+extern void v4delroute(Fs *f, uchar *a, uchar *mask, int dolock);
+extern void v6delroute(Fs *f, uchar *a, uchar *mask, int dolock);
+extern Route* v4lookup(Fs *f, uchar *a, Conv *c);
+extern Route* v6lookup(Fs *f, uchar *a, Conv *c);
+extern long routeread(Fs *f, char*, ulong, int);
+extern long routewrite(Fs *f, Chan*, char*, int);
+extern void routetype(int, char*);
+extern void ipwalkroutes(Fs*, Routewalk*);
+extern void convroute(Route*, uchar*, uchar*, uchar*, char*, int*);
+
+/*
+ * devip.c
+ */
+
+/*
+ * Hanging off every ip channel's ->aux is the following structure.
+ * It maintains the state used by devip and iproute.
+ */
+struct IPaux
+{
+ char *owner; /* the user that did the attach */
+ char tag[4];
+};
+
+extern IPaux* newipaux(char*, char*);
+
+/*
+ * arp.c
+ */
+struct Arpent
+{
+ uchar ip[IPaddrlen];
+ uchar mac[MAClen];
+ Medium *type; /* media type */
+ Arpent* hash;
+ Block* hold;
+ Block* last;
+ uint ctime; /* time entry was created or refreshed */
+ uint utime; /* time entry was last used */
+ uchar state;
+ Arpent *nextrxt; /* re-transmit chain */
+ uint rtime; /* time for next retransmission */
+ uchar rxtsrem;
+ Ipifc *ifc;
+ uchar ifcid; /* must match ifc->id */
+};
+
+extern void arpinit(Fs*);
+extern int arpread(Arp*, char*, ulong, int);
+extern int arpwrite(Fs*, char*, int);
+extern Arpent* arpget(Arp*, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *h);
+extern void arprelease(Arp*, Arpent *a);
+extern Block* arpresolve(Arp*, Arpent *a, Medium *type, uchar *mac);
+extern void arpenter(Fs*, int version, uchar *ip, uchar *mac, int len, int norefresh);
+
+/*
+ * ipaux.c
+ */
+
+extern int myetheraddr(uchar*, char*);
+extern vlong parseip(uchar*, char*);
+extern vlong parseipmask(uchar*, char*);
+extern char* v4parseip(uchar*, char*);
+extern void maskip(uchar *from, uchar *mask, uchar *to);
+extern int parsemac(uchar *to, char *from, int len);
+extern uchar* defmask(uchar*);
+extern int isv4(uchar*);
+extern void v4tov6(uchar *v6, uchar *v4);
+extern int v6tov4(uchar *v4, uchar *v6);
+extern int eipfmt(Fmt*);
+
+#define ipmove(x, y) memmove(x, y, IPaddrlen)
+#define ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) )
+
+extern uchar IPv4bcast[IPaddrlen];
+extern uchar IPv4bcastobs[IPaddrlen];
+extern uchar IPv4allsys[IPaddrlen];
+extern uchar IPv4allrouter[IPaddrlen];
+extern uchar IPnoaddr[IPaddrlen];
+extern uchar v4prefix[IPaddrlen];
+extern uchar IPallbits[IPaddrlen];
+
+#define NOW msec()
+
+/*
+ * media
+ */
+extern Medium ethermedium;
+extern Medium nullmedium;
+extern Medium pktmedium;
+extern Medium tripmedium;
+
+/*
+ * ipifc.c
+ */
+extern Medium* ipfindmedium(char *name);
+extern void addipmedium(Medium *med);
+extern int ipforme(Fs*, uchar *addr);
+extern int iptentative(Fs*, uchar *addr);
+extern int ipisbm(uchar *);
+extern int ipismulticast(uchar *);
+extern Ipifc* findipifc(Fs*, uchar *remote, int type);
+extern void findlocalip(Fs*, uchar *local, uchar *remote);
+extern int ipv4local(Ipifc *ifc, uchar *addr);
+extern int ipv6local(Ipifc *ifc, uchar *addr);
+extern int ipv6anylocal(Ipifc *ifc, uchar *addr);
+extern Iplifc* iplocalonifc(Ipifc *ifc, uchar *ip);
+extern int ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip);
+extern int ipismulticast(uchar *ip);
+extern int ipisbooting(void);
+extern int ipifccheckin(Ipifc *ifc, Medium *med);
+extern void ipifccheckout(Ipifc *ifc);
+extern int ipifcgrab(Ipifc *ifc);
+extern void ipifcaddroute(Fs*, int, uchar*, uchar*, uchar*, int);
+extern void ipifcremroute(Fs*, int, uchar*, uchar*);
+extern void ipifcremmulti(Conv *c, uchar *ma, uchar *ia);
+extern void ipifcaddmulti(Conv *c, uchar *ma, uchar *ia);
+extern char* ipifcrem(Ipifc *ifc, char **argv, int argc);
+extern char* ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp);
+extern long ipselftabread(Fs*, char *a, ulong offset, int n);
+extern char* ipifcadd6(Ipifc *ifc, char**argv, int argc);
+/*
+ * ip.c
+ */
+extern void iprouting(Fs*, int);
+extern void icmpnoconv(Fs*, Block*);
+extern void icmpcantfrag(Fs*, Block*, int);
+extern void icmpttlexceeded(Fs*, uchar*, Block*);
+extern ushort ipcsum(uchar*);
+extern void ipiput4(Fs*, Ipifc*, Block*);
+extern void ipiput6(Fs*, Ipifc*, Block*);
+extern int ipoput4(Fs*, Block*, int, int, int, Conv*);
+extern int ipoput6(Fs*, Block*, int, int, int, Conv*);
+extern int ipstats(Fs*, char*, int);
+extern ushort ptclbsum(uchar*, int);
+extern ushort ptclcsum(Block*, int, int);
+extern void ip_init(Fs*);
+extern void update_mtucache(uchar*, ulong);
+extern ulong restrict_mtu(uchar*, ulong);
+/*
+ * bootp.c
+ */
+extern char* bootp(Ipifc*);
+extern int bootpread(char*, ulong, int);
+
+/*
+ * resolving inferno/plan9 differences
+ */
+Chan* commonfdtochan(int, int, int, int);
+char* commonuser(void);
+char* commonerror(void);
+
+/*
+ * chandial.c
+ */
+extern Chan* chandial(char*, char*, char*, Chan**);
+
+/*
+ * global to all of the stack
+ */
+extern void (*igmpreportfn)(Ipifc*, uchar*);
diff --git a/src/9vx/a/ip/ipaux.c b/src/9vx/a/ip/ipaux.c
@@ -0,0 +1,368 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "ip.h"
+#include "ipv6.h"
+
+char *v6hdrtypes[Maxhdrtype] =
+{
+ [HBH] "HopbyHop",
+ [ICMP] "ICMP",
+ [IGMP] "IGMP",
+ [GGP] "GGP",
+ [IPINIP] "IP",
+ [ST] "ST",
+ [TCP] "TCP",
+ [UDP] "UDP",
+ [ISO_TP4] "ISO_TP4",
+ [RH] "Routinghdr",
+ [FH] "Fraghdr",
+ [IDRP] "IDRP",
+ [RSVP] "RSVP",
+ [AH] "Authhdr",
+ [ESP] "ESP",
+ [ICMPv6] "ICMPv6",
+ [NNH] "Nonexthdr",
+ [ISO_IP] "ISO_IP",
+ [IGRP] "IGRP",
+ [OSPF] "OSPF",
+};
+
+/*
+ * well known IPv6 addresses
+ */
+uchar v6Unspecified[IPaddrlen] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+uchar v6loopback[IPaddrlen] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x01
+};
+
+uchar v6linklocal[IPaddrlen] = {
+ 0xfe, 0x80, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+uchar v6linklocalmask[IPaddrlen] = {
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+int v6llpreflen = 8; /* link-local prefix length in bytes */
+
+uchar v6multicast[IPaddrlen] = {
+ 0xff, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+uchar v6multicastmask[IPaddrlen] = {
+ 0xff, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+int v6mcpreflen = 1; /* multicast prefix length */
+
+uchar v6allnodesN[IPaddrlen] = {
+ 0xff, 0x01, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x01
+};
+uchar v6allroutersN[IPaddrlen] = {
+ 0xff, 0x01, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x02
+};
+uchar v6allnodesNmask[IPaddrlen] = {
+ 0xff, 0xff, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+int v6aNpreflen = 2; /* all nodes (N) prefix */
+
+uchar v6allnodesL[IPaddrlen] = {
+ 0xff, 0x02, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x01
+};
+uchar v6allroutersL[IPaddrlen] = {
+ 0xff, 0x02, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x02
+};
+uchar v6allnodesLmask[IPaddrlen] = {
+ 0xff, 0xff, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+int v6aLpreflen = 2; /* all nodes (L) prefix */
+
+uchar v6solicitednode[IPaddrlen] = {
+ 0xff, 0x02, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x01,
+ 0xff, 0, 0, 0
+};
+uchar v6solicitednodemask[IPaddrlen] = {
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0x0, 0x0, 0x0
+};
+int v6snpreflen = 13;
+
+ushort
+ptclcsum(Block *bp, int offset, int len)
+{
+ uchar *addr;
+ ulong losum, hisum;
+ ushort csum;
+ int odd, blocklen, x;
+
+ /* Correct to front of data area */
+ while(bp != nil && offset && offset >= BLEN(bp)) {
+ offset -= BLEN(bp);
+ bp = bp->next;
+ }
+ if(bp == nil)
+ return 0;
+
+ addr = bp->rp + offset;
+ blocklen = BLEN(bp) - offset;
+
+ if(bp->next == nil) {
+ if(blocklen < len)
+ len = blocklen;
+ return ~ptclbsum(addr, len) & 0xffff;
+ }
+
+ losum = 0;
+ hisum = 0;
+
+ odd = 0;
+ while(len) {
+ x = blocklen;
+ if(len < x)
+ x = len;
+
+ csum = ptclbsum(addr, x);
+ if(odd)
+ hisum += csum;
+ else
+ losum += csum;
+ odd = (odd+x) & 1;
+ len -= x;
+
+ bp = bp->next;
+ if(bp == nil)
+ break;
+ blocklen = BLEN(bp);
+ addr = bp->rp;
+ }
+
+ losum += hisum>>8;
+ losum += (hisum&0xff)<<8;
+ while((csum = losum>>16) != 0)
+ losum = csum + (losum & 0xffff);
+
+ return ~losum & 0xffff;
+}
+
+enum
+{
+ Isprefix= 16,
+};
+
+#define CLASS(p) ((*(uchar*)(p))>>6)
+
+void
+ipv62smcast(uchar *smcast, uchar *a)
+{
+ assert(IPaddrlen == 16);
+ memmove(smcast, v6solicitednode, IPaddrlen);
+ smcast[13] = a[13];
+ smcast[14] = a[14];
+ smcast[15] = a[15];
+}
+
+
+/*
+ * parse a hex mac address
+ */
+int
+parsemac(uchar *to, char *from, int len)
+{
+ char nip[4];
+ char *p;
+ int i;
+
+ p = from;
+ memset(to, 0, len);
+ for(i = 0; i < len; i++){
+ if(p[0] == '\0' || p[1] == '\0')
+ break;
+
+ nip[0] = p[0];
+ nip[1] = p[1];
+ nip[2] = '\0';
+ p += 2;
+
+ to[i] = strtoul(nip, 0, 16);
+ if(*p == ':')
+ p++;
+ }
+ return i;
+}
+
+/*
+ * hashing tcp, udp, ... connections
+ */
+ulong
+iphash(uchar *sa, ushort sp, uchar *da, ushort dp)
+{
+ return (ulong)(sa[IPaddrlen-1]<<24 ^ sp<< 16 ^ da[IPaddrlen-1]<<8 ^ dp) % Nhash;
+}
+
+void
+iphtadd(Ipht *ht, Conv *c)
+{
+ ulong hv;
+ Iphash *h;
+
+ hv = iphash(c->raddr, c->rport, c->laddr, c->lport);
+ h = smalloc(sizeof(*h));
+ if(ipcmp(c->raddr, IPnoaddr) != 0)
+ h->match = IPmatchexact;
+ else {
+ if(ipcmp(c->laddr, IPnoaddr) != 0){
+ if(c->lport == 0)
+ h->match = IPmatchaddr;
+ else
+ h->match = IPmatchpa;
+ } else {
+ if(c->lport == 0)
+ h->match = IPmatchany;
+ else
+ h->match = IPmatchport;
+ }
+ }
+ h->c = c;
+
+ LOCK(ht);
+ h->next = ht->tab[hv];
+ ht->tab[hv] = h;
+ UNLOCK(ht);
+}
+
+void
+iphtrem(Ipht *ht, Conv *c)
+{
+ ulong hv;
+ Iphash **l, *h;
+
+ hv = iphash(c->raddr, c->rport, c->laddr, c->lport);
+ LOCK(ht);
+ for(l = &ht->tab[hv]; (*l) != nil; l = &(*l)->next)
+ if((*l)->c == c){
+ h = *l;
+ (*l) = h->next;
+ free(h);
+ break;
+ }
+ UNLOCK(ht);
+}
+
+/* look for a matching conversation with the following precedence
+ * connected && raddr,rport,laddr,lport
+ * announced && laddr,lport
+ * announced && *,lport
+ * announced && laddr,*
+ * announced && *,*
+ */
+Conv*
+iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp)
+{
+ ulong hv;
+ Iphash *h;
+ Conv *c;
+
+ /* exact 4 pair match (connection) */
+ hv = iphash(sa, sp, da, dp);
+ LOCK(ht);
+ for(h = ht->tab[hv]; h != nil; h = h->next){
+ if(h->match != IPmatchexact)
+ continue;
+ c = h->c;
+ if(sp == c->rport && dp == c->lport
+ && ipcmp(sa, c->raddr) == 0 && ipcmp(da, c->laddr) == 0){
+ UNLOCK(ht);
+ return c;
+ }
+ }
+
+ /* match local address and port */
+ hv = iphash(IPnoaddr, 0, da, dp);
+ for(h = ht->tab[hv]; h != nil; h = h->next){
+ if(h->match != IPmatchpa)
+ continue;
+ c = h->c;
+ if(dp == c->lport && ipcmp(da, c->laddr) == 0){
+ UNLOCK(ht);
+ return c;
+ }
+ }
+
+ /* match just port */
+ hv = iphash(IPnoaddr, 0, IPnoaddr, dp);
+ for(h = ht->tab[hv]; h != nil; h = h->next){
+ if(h->match != IPmatchport)
+ continue;
+ c = h->c;
+ if(dp == c->lport){
+ UNLOCK(ht);
+ return c;
+ }
+ }
+
+ /* match local address */
+ hv = iphash(IPnoaddr, 0, da, 0);
+ for(h = ht->tab[hv]; h != nil; h = h->next){
+ if(h->match != IPmatchaddr)
+ continue;
+ c = h->c;
+ if(ipcmp(da, c->laddr) == 0){
+ UNLOCK(ht);
+ return c;
+ }
+ }
+
+ /* look for something that matches anything */
+ hv = iphash(IPnoaddr, 0, IPnoaddr, 0);
+ for(h = ht->tab[hv]; h != nil; h = h->next){
+ if(h->match != IPmatchany)
+ continue;
+ c = h->c;
+ UNLOCK(ht);
+ return c;
+ }
+ UNLOCK(ht);
+ return nil;
+}
diff --git a/src/9vx/a/ip/ipifc.c b/src/9vx/a/ip/ipifc.c
@@ -0,0 +1,1654 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+
+#define DPRINT if(0)print
+
+enum {
+ Maxmedia = 32,
+ Nself = Maxmedia*5,
+ NHASH = 1<<6,
+ NCACHE = 256,
+ QMAX = 64*1024-1,
+};
+
+Medium *media[Maxmedia] = { 0 };
+
+/*
+ * cache of local addresses (addresses we answer to)
+ */
+struct Ipself
+{
+ uchar a[IPaddrlen];
+ Ipself *hnext; /* next address in the hash table */
+ Iplink *link; /* binding twixt Ipself and Ipifc */
+ ulong expire;
+ uchar type; /* type of address */
+ int ref;
+ Ipself *next; /* free list */
+};
+
+struct Ipselftab
+{
+ QLock qlock;
+ int inited;
+ int acceptall; /* true if an interface has the null address */
+ Ipself *hash[NHASH]; /* hash chains */
+};
+
+/*
+ * Multicast addresses are chained onto a Chan so that
+ * we can remove them when the Chan is closed.
+ */
+typedef struct Ipmcast Ipmcast;
+struct Ipmcast
+{
+ Ipmcast *next;
+ uchar ma[IPaddrlen]; /* multicast address */
+ uchar ia[IPaddrlen]; /* interface address */
+};
+
+/* quick hash for ip addresses */
+#define hashipa(a) ( (ulong)(((a)[IPaddrlen-2]<<8) | (a)[IPaddrlen-1])%NHASH )
+
+static char tifc[] = "ifc ";
+
+static void addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type);
+static void remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a);
+static char* ipifcjoinmulti(Ipifc *ifc, char **argv, int argc);
+static char* ipifcleavemulti(Ipifc *ifc, char **argv, int argc);
+static void ipifcregisterproxy(Fs*, Ipifc*, uchar*);
+static char* ipifcremlifc(Ipifc*, Iplifc*);
+
+/*
+ * link in a new medium
+ */
+void
+addipmedium(Medium *med)
+{
+ int i;
+
+ for(i = 0; i < nelem(media)-1; i++)
+ if(media[i] == nil){
+ media[i] = med;
+ break;
+ }
+}
+
+/*
+ * find the medium with this name
+ */
+Medium*
+ipfindmedium(char *name)
+{
+ Medium **mp;
+
+ for(mp = media; *mp != nil; mp++)
+ if(strcmp((*mp)->name, name) == 0)
+ break;
+ return *mp;
+}
+
+/*
+ * attach a device (or pkt driver) to the interface.
+ * called with c locked
+ */
+static char*
+ipifcbind(Conv *c, char **argv, int argc)
+{
+ Ipifc *ifc;
+ Medium *m;
+
+ if(argc < 2)
+ return Ebadarg;
+
+ ifc = (Ipifc*)c->ptcl;
+
+ /* bind the device to the interface */
+ m = ipfindmedium(argv[1]);
+ if(m == nil)
+ return "unknown interface type";
+
+ WLOCK(ifc);
+ if(ifc->m != nil){
+ WUNLOCK(ifc);
+ return "interface already bound";
+ }
+ if(waserror()){
+ WUNLOCK(ifc);
+ nexterror();
+ }
+
+ /* do medium specific binding */
+ (*m->bind)(ifc, argc, argv);
+
+ /* set the bound device name */
+ if(argc > 2)
+ strncpy(ifc->dev, argv[2], sizeof(ifc->dev));
+ else
+ snprint(ifc->dev, sizeof ifc->dev, "%s%d", m->name, c->x);
+ ifc->dev[sizeof(ifc->dev)-1] = 0;
+
+ /* set up parameters */
+ ifc->m = m;
+ ifc->mintu = ifc->m->mintu;
+ ifc->maxtu = ifc->m->maxtu;
+ if(ifc->m->unbindonclose == 0)
+ ifc->conv->inuse++;
+ ifc->rp.mflag = 0; /* default not managed */
+ ifc->rp.oflag = 0;
+ ifc->rp.maxraint = 600000; /* millisecs */
+ ifc->rp.minraint = 200000;
+ ifc->rp.linkmtu = 0; /* no mtu sent */
+ ifc->rp.reachtime = 0;
+ ifc->rp.rxmitra = 0;
+ ifc->rp.ttl = MAXTTL;
+ ifc->rp.routerlt = 3 * ifc->rp.maxraint;
+
+ /* any ancillary structures (like routes) no longer pertain */
+ ifc->ifcid++;
+
+ /* reopen all the queues closed by a previous unbind */
+ qreopen(c->rq);
+ qreopen(c->eq);
+ qreopen(c->sq);
+
+ WUNLOCK(ifc);
+ poperror();
+
+ return nil;
+}
+
+/*
+ * detach a device from an interface, close the interface
+ * called with ifc->conv closed
+ */
+static char*
+ipifcunbind(Ipifc *ifc)
+{
+ char *err;
+
+ if(waserror()){
+ WUNLOCK(ifc);
+ nexterror();
+ }
+ WLOCK(ifc);
+
+ /* dissociate routes */
+ if(ifc->m != nil && ifc->m->unbindonclose == 0)
+ ifc->conv->inuse--;
+ ifc->ifcid++;
+
+ /* disassociate logical interfaces (before zeroing ifc->arg) */
+ while(ifc->lifc){
+ err = ipifcremlifc(ifc, ifc->lifc);
+ /*
+ * note: err non-zero means lifc not found,
+ * which can't happen in this case.
+ */
+ if(err)
+ error(err);
+ }
+
+ /* disassociate device */
+ if(ifc->m && ifc->m->unbind)
+ (*ifc->m->unbind)(ifc);
+ memset(ifc->dev, 0, sizeof(ifc->dev));
+ ifc->arg = nil;
+ ifc->reassemble = 0;
+
+ /* close queues to stop queuing of packets */
+ qclose(ifc->conv->rq);
+ qclose(ifc->conv->wq);
+ qclose(ifc->conv->sq);
+
+ ifc->m = nil;
+ WUNLOCK(ifc);
+ poperror();
+ return nil;
+}
+
+char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag"
+" %d maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt"
+" %d pktin %lud pktout %lud errin %lud errout %lud\n";
+
+char slineformat[] = " %-40I %-10M %-40I %-12lud %-12lud\n";
+
+static int
+ipifcstate(Conv *c, char *state, int n)
+{
+ Ipifc *ifc;
+ Iplifc *lifc;
+ int m;
+
+ ifc = (Ipifc*)c->ptcl;
+ m = snprint(state, n, sfixedformat,
+ ifc->dev, ifc->maxtu, ifc->sendra6, ifc->recvra6,
+ ifc->rp.mflag, ifc->rp.oflag, ifc->rp.maxraint,
+ ifc->rp.minraint, ifc->rp.linkmtu, ifc->rp.reachtime,
+ ifc->rp.rxmitra, ifc->rp.ttl, ifc->rp.routerlt,
+ ifc->in, ifc->out, ifc->inerr, ifc->outerr);
+
+ RLOCK(ifc);
+ for(lifc = ifc->lifc; lifc && n > m; lifc = lifc->next)
+ m += snprint(state+m, n - m, slineformat, lifc->local,
+ lifc->mask, lifc->remote, lifc->validlt, lifc->preflt);
+ if(ifc->lifc == nil)
+ m += snprint(state+m, n - m, "\n");
+ RUNLOCK(ifc);
+ return m;
+}
+
+static int
+ipifclocal(Conv *c, char *state, int n)
+{
+ Ipifc *ifc;
+ Iplifc *lifc;
+ Iplink *link;
+ int m;
+
+ ifc = (Ipifc*)c->ptcl;
+ m = 0;
+
+ RLOCK(ifc);
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ m += snprint(state+m, n - m, "%-40.40I ->", lifc->local);
+ for(link = lifc->link; link; link = link->lifclink)
+ m += snprint(state+m, n - m, " %-40.40I", link->self->a);
+ m += snprint(state+m, n - m, "\n");
+ }
+ RUNLOCK(ifc);
+ return m;
+}
+
+static int
+ipifcinuse(Conv *c)
+{
+ Ipifc *ifc;
+
+ ifc = (Ipifc*)c->ptcl;
+ return ifc->m != nil;
+}
+
+/*
+ * called when a process writes to an interface's 'data'
+ */
+static void
+ipifckick(void *x)
+{
+ Conv *c = x;
+ Block *bp;
+ Ipifc *ifc;
+
+ bp = qget(c->wq);
+ if(bp == nil)
+ return;
+
+ ifc = (Ipifc*)c->ptcl;
+ if(!CANRLOCK(ifc)){
+ freeb(bp);
+ return;
+ }
+ if(waserror()){
+ RUNLOCK(ifc);
+ nexterror();
+ }
+ if(ifc->m == nil || ifc->m->pktin == nil)
+ freeb(bp);
+ else
+ (*ifc->m->pktin)(c->p->f, ifc, bp);
+ RUNLOCK(ifc);
+ poperror();
+}
+
+/*
+ * called when a new ipifc structure is created
+ */
+static void
+ipifccreate(Conv *c)
+{
+ Ipifc *ifc;
+
+ c->rq = qopen(QMAX, 0, 0, 0);
+ c->sq = qopen(2*QMAX, 0, 0, 0);
+ c->wq = qopen(QMAX, Qkick, ipifckick, c);
+ ifc = (Ipifc*)c->ptcl;
+ ifc->conv = c;
+ ifc->unbinding = 0;
+ ifc->m = nil;
+ ifc->reassemble = 0;
+}
+
+/*
+ * called after last close of ipifc data or ctl
+ * called with c locked, we must unlock
+ */
+static void
+ipifcclose(Conv *c)
+{
+ Ipifc *ifc;
+ Medium *m;
+
+ ifc = (Ipifc*)c->ptcl;
+ m = ifc->m;
+ if(m && m->unbindonclose)
+ ipifcunbind(ifc);
+}
+
+/*
+ * change an interface's mtu
+ */
+char*
+ipifcsetmtu(Ipifc *ifc, char **argv, int argc)
+{
+ int mtu;
+
+ if(argc < 2 || ifc->m == nil)
+ return Ebadarg;
+ mtu = strtoul(argv[1], 0, 0);
+ if(mtu < ifc->m->mintu || mtu > ifc->m->maxtu)
+ return Ebadarg;
+ ifc->maxtu = mtu;
+ return nil;
+}
+
+/*
+ * add an address to an interface.
+ */
+char*
+ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp)
+{
+ int i, type, mtu, sendnbrdisc = 0;
+ uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen];
+ uchar bcast[IPaddrlen], net[IPaddrlen];
+ Iplifc *lifc, **l;
+ Fs *f;
+
+ if(ifc->m == nil)
+ return "ipifc not yet bound to device";
+
+ f = ifc->conv->p->f;
+
+ type = Rifc;
+ memset(ip, 0, IPaddrlen);
+ memset(mask, 0, IPaddrlen);
+ memset(rem, 0, IPaddrlen);
+ switch(argc){
+ case 6:
+ if(strcmp(argv[5], "proxy") == 0)
+ type |= Rproxy;
+ /* fall through */
+ case 5:
+ mtu = strtoul(argv[4], 0, 0);
+ if(mtu >= ifc->m->mintu && mtu <= ifc->m->maxtu)
+ ifc->maxtu = mtu;
+ /* fall through */
+ case 4:
+ if (parseip(ip, argv[1]) == -1 || parseip(rem, argv[3]) == -1)
+ return Ebadip;
+ parseipmask(mask, argv[2]);
+ maskip(rem, mask, net);
+ break;
+ case 3:
+ if (parseip(ip, argv[1]) == -1)
+ return Ebadip;
+ parseipmask(mask, argv[2]);
+ maskip(ip, mask, rem);
+ maskip(rem, mask, net);
+ break;
+ case 2:
+ if (parseip(ip, argv[1]) == -1)
+ return Ebadip;
+ memmove(mask, defmask(ip), IPaddrlen);
+ maskip(ip, mask, rem);
+ maskip(rem, mask, net);
+ break;
+ default:
+ return Ebadarg;
+ }
+ if(isv4(ip))
+ tentative = 0;
+ WLOCK(ifc);
+
+ /* ignore if this is already a local address for this ifc */
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
+ if(ipcmp(lifc->local, ip) == 0) {
+ if(lifc->tentative != tentative)
+ lifc->tentative = tentative;
+ if(lifcp) {
+ lifc->onlink = lifcp->onlink;
+ lifc->autoflag = lifcp->autoflag;
+ lifc->validlt = lifcp->validlt;
+ lifc->preflt = lifcp->preflt;
+ lifc->origint = lifcp->origint;
+ }
+ goto out;
+ }
+ }
+
+ /* add the address to the list of logical ifc's for this ifc */
+ lifc = smalloc(sizeof(Iplifc));
+ ipmove(lifc->local, ip);
+ ipmove(lifc->mask, mask);
+ ipmove(lifc->remote, rem);
+ ipmove(lifc->net, net);
+ lifc->tentative = tentative;
+ if(lifcp) {
+ lifc->onlink = lifcp->onlink;
+ lifc->autoflag = lifcp->autoflag;
+ lifc->validlt = lifcp->validlt;
+ lifc->preflt = lifcp->preflt;
+ lifc->origint = lifcp->origint;
+ } else { /* default values */
+ lifc->onlink = lifc->autoflag = 1;
+ lifc->validlt = lifc->preflt = ~0L;
+ lifc->origint = NOW / 1000;
+ }
+ lifc->next = nil;
+
+ for(l = &ifc->lifc; *l; l = &(*l)->next)
+ ;
+ *l = lifc;
+
+ /* check for point-to-point interface */
+ if(ipcmp(ip, v6loopback)) /* skip v6 loopback, it's a special address */
+ if(ipcmp(mask, IPallbits) == 0)
+ type |= Rptpt;
+
+ /* add local routes */
+ if(isv4(ip))
+ v4addroute(f, tifc, rem+IPv4off, mask+IPv4off, rem+IPv4off, type);
+ else
+ v6addroute(f, tifc, rem, mask, rem, type);
+
+ addselfcache(f, ifc, lifc, ip, Runi);
+
+ if((type & (Rproxy|Rptpt)) == (Rproxy|Rptpt)){
+ ipifcregisterproxy(f, ifc, rem);
+ goto out;
+ }
+
+ if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0) {
+ /* add subnet directed broadcast address to the self cache */
+ for(i = 0; i < IPaddrlen; i++)
+ bcast[i] = (ip[i] & mask[i]) | ~mask[i];
+ addselfcache(f, ifc, lifc, bcast, Rbcast);
+
+ /* add subnet directed network address to the self cache */
+ for(i = 0; i < IPaddrlen; i++)
+ bcast[i] = (ip[i] & mask[i]) & mask[i];
+ addselfcache(f, ifc, lifc, bcast, Rbcast);
+
+ /* add network directed broadcast address to the self cache */
+ memmove(mask, defmask(ip), IPaddrlen);
+ for(i = 0; i < IPaddrlen; i++)
+ bcast[i] = (ip[i] & mask[i]) | ~mask[i];
+ addselfcache(f, ifc, lifc, bcast, Rbcast);
+
+ /* add network directed network address to the self cache */
+ memmove(mask, defmask(ip), IPaddrlen);
+ for(i = 0; i < IPaddrlen; i++)
+ bcast[i] = (ip[i] & mask[i]) & mask[i];
+ addselfcache(f, ifc, lifc, bcast, Rbcast);
+
+ addselfcache(f, ifc, lifc, IPv4bcast, Rbcast);
+ }
+ else {
+ if(ipcmp(ip, v6loopback) == 0) {
+ /* add node-local mcast address */
+ addselfcache(f, ifc, lifc, v6allnodesN, Rmulti);
+
+ /* add route for all node multicast */
+ v6addroute(f, tifc, v6allnodesN, v6allnodesNmask,
+ v6allnodesN, Rmulti);
+ }
+
+ /* add all nodes multicast address */
+ addselfcache(f, ifc, lifc, v6allnodesL, Rmulti);
+
+ /* add route for all nodes multicast */
+ v6addroute(f, tifc, v6allnodesL, v6allnodesLmask, v6allnodesL,
+ Rmulti);
+
+ /* add solicited-node multicast address */
+ ipv62smcast(bcast, ip);
+ addselfcache(f, ifc, lifc, bcast, Rmulti);
+
+ sendnbrdisc = 1;
+ }
+
+ /* register the address on this network for address resolution */
+ if(isv4(ip) && ifc->m->areg != nil)
+ (*ifc->m->areg)(ifc, ip);
+
+out:
+ WUNLOCK(ifc);
+ if(tentative && sendnbrdisc)
+ icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+ return nil;
+}
+
+/*
+ * remove a logical interface from an ifc
+ * always called with ifc WLOCK'd
+ */
+static char*
+ipifcremlifc(Ipifc *ifc, Iplifc *lifc)
+{
+ Iplifc **l;
+ Fs *f;
+
+ f = ifc->conv->p->f;
+
+ /*
+ * find address on this interface and remove from chain.
+ * for pt to pt we actually specify the remote address as the
+ * addresss to remove.
+ */
+ for(l = &ifc->lifc; *l != nil && *l != lifc; l = &(*l)->next)
+ ;
+ if(*l == nil)
+ return "address not on this interface";
+ *l = lifc->next;
+
+ /* disassociate any addresses */
+ while(lifc->link)
+ remselfcache(f, ifc, lifc, lifc->link->self->a);
+
+ /* remove the route for this logical interface */
+ if(isv4(lifc->local))
+ v4delroute(f, lifc->remote+IPv4off, lifc->mask+IPv4off, 1);
+ else {
+ v6delroute(f, lifc->remote, lifc->mask, 1);
+ if(ipcmp(lifc->local, v6loopback) == 0)
+ /* remove route for all node multicast */
+ v6delroute(f, v6allnodesN, v6allnodesNmask, 1);
+ else if(memcmp(lifc->local, v6linklocal, v6llpreflen) == 0)
+ /* remove route for all link multicast */
+ v6delroute(f, v6allnodesL, v6allnodesLmask, 1);
+ }
+
+ free(lifc);
+ return nil;
+}
+
+/*
+ * remove an address from an interface.
+ * called with c->car locked
+ */
+char*
+ipifcrem(Ipifc *ifc, char **argv, int argc)
+{
+ char *rv;
+ uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen];
+ Iplifc *lifc;
+
+ if(argc < 3)
+ return Ebadarg;
+
+ if (parseip(ip, argv[1]) == -1)
+ return Ebadip;
+ parseipmask(mask, argv[2]);
+ if(argc < 4)
+ maskip(ip, mask, rem);
+ else
+ if (parseip(rem, argv[3]) == -1)
+ return Ebadip;
+
+ WLOCK(ifc);
+
+ /*
+ * find address on this interface and remove from chain.
+ * for pt to pt we actually specify the remote address as the
+ * addresss to remove.
+ */
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next) {
+ if (memcmp(ip, lifc->local, IPaddrlen) == 0
+ && memcmp(mask, lifc->mask, IPaddrlen) == 0
+ && memcmp(rem, lifc->remote, IPaddrlen) == 0)
+ break;
+ }
+
+ rv = ipifcremlifc(ifc, lifc);
+ WUNLOCK(ifc);
+ return rv;
+}
+
+/*
+ * distribute routes to active interfaces like the
+ * TRIP linecards
+ */
+void
+ipifcaddroute(Fs *f, int vers, uchar *addr, uchar *mask, uchar *gate, int type)
+{
+ Medium *m;
+ Conv **cp, **e;
+ Ipifc *ifc;
+
+ e = &f->ipifc->conv[f->ipifc->nc];
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp != nil) {
+ ifc = (Ipifc*)(*cp)->ptcl;
+ m = ifc->m;
+ if(m && m->addroute)
+ m->addroute(ifc, vers, addr, mask, gate, type);
+ }
+ }
+}
+
+void
+ipifcremroute(Fs *f, int vers, uchar *addr, uchar *mask)
+{
+ Medium *m;
+ Conv **cp, **e;
+ Ipifc *ifc;
+
+ e = &f->ipifc->conv[f->ipifc->nc];
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp != nil) {
+ ifc = (Ipifc*)(*cp)->ptcl;
+ m = ifc->m;
+ if(m && m->remroute)
+ m->remroute(ifc, vers, addr, mask);
+ }
+ }
+}
+
+/*
+ * associate an address with the interface. This wipes out any previous
+ * addresses. This is a macro that means, remove all the old interfaces
+ * and add a new one.
+ */
+static char*
+ipifcconnect(Conv* c, char **argv, int argc)
+{
+ char *err;
+ Ipifc *ifc;
+
+ ifc = (Ipifc*)c->ptcl;
+
+ if(ifc->m == nil)
+ return "ipifc not yet bound to device";
+
+ if(waserror()){
+ WUNLOCK(ifc);
+ nexterror();
+ }
+ WLOCK(ifc);
+ while(ifc->lifc){
+ err = ipifcremlifc(ifc, ifc->lifc);
+ if(err)
+ error(err);
+ }
+ WUNLOCK(ifc);
+ poperror();
+
+ err = ipifcadd(ifc, argv, argc, 0, nil);
+ if(err)
+ return err;
+
+ Fsconnected(c, nil);
+ return nil;
+}
+
+char*
+ipifcra6(Ipifc *ifc, char **argv, int argc)
+{
+ int i, argsleft, vmax = ifc->rp.maxraint, vmin = ifc->rp.minraint;
+
+ argsleft = argc - 1;
+ i = 1;
+
+ if(argsleft % 2 != 0)
+ return Ebadarg;
+
+ while (argsleft > 1) {
+ if(strcmp(argv[i], "recvra") == 0)
+ ifc->recvra6 = (atoi(argv[i+1]) != 0);
+ else if(strcmp(argv[i], "sendra") == 0)
+ ifc->sendra6 = (atoi(argv[i+1]) != 0);
+ else if(strcmp(argv[i], "mflag") == 0)
+ ifc->rp.mflag = (atoi(argv[i+1]) != 0);
+ else if(strcmp(argv[i], "oflag") == 0)
+ ifc->rp.oflag = (atoi(argv[i+1]) != 0);
+ else if(strcmp(argv[i], "maxraint") == 0)
+ ifc->rp.maxraint = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "minraint") == 0)
+ ifc->rp.minraint = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "linkmtu") == 0)
+ ifc->rp.linkmtu = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "reachtime") == 0)
+ ifc->rp.reachtime = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "rxmitra") == 0)
+ ifc->rp.rxmitra = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "ttl") == 0)
+ ifc->rp.ttl = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "routerlt") == 0)
+ ifc->rp.routerlt = atoi(argv[i+1]);
+ else
+ return Ebadarg;
+
+ argsleft -= 2;
+ i += 2;
+ }
+
+ /* consistency check */
+ if(ifc->rp.maxraint < ifc->rp.minraint) {
+ ifc->rp.maxraint = vmax;
+ ifc->rp.minraint = vmin;
+ return Ebadarg;
+ }
+ return nil;
+}
+
+/*
+ * non-standard control messages.
+ * called with c->car locked.
+ */
+static char*
+ipifcctl(Conv* c, char**argv, int argc)
+{
+ Ipifc *ifc;
+ int i;
+
+ ifc = (Ipifc*)c->ptcl;
+ if(strcmp(argv[0], "add") == 0)
+ return ipifcadd(ifc, argv, argc, 0, nil);
+ else if(strcmp(argv[0], "try") == 0)
+ return ipifcadd(ifc, argv, argc, 1, nil);
+ else if(strcmp(argv[0], "remove") == 0)
+ return ipifcrem(ifc, argv, argc);
+ else if(strcmp(argv[0], "unbind") == 0)
+ return ipifcunbind(ifc);
+ else if(strcmp(argv[0], "joinmulti") == 0)
+ return ipifcjoinmulti(ifc, argv, argc);
+ else if(strcmp(argv[0], "leavemulti") == 0)
+ return ipifcleavemulti(ifc, argv, argc);
+ else if(strcmp(argv[0], "mtu") == 0)
+ return ipifcsetmtu(ifc, argv, argc);
+ else if(strcmp(argv[0], "reassemble") == 0){
+ ifc->reassemble = 1;
+ return nil;
+ }
+ else if(strcmp(argv[0], "iprouting") == 0){
+ i = 1;
+ if(argc > 1)
+ i = atoi(argv[1]);
+ iprouting(c->p->f, i);
+ return nil;
+ }
+ else if(strcmp(argv[0], "add6") == 0)
+ return ipifcadd6(ifc, argv, argc);
+ else if(strcmp(argv[0], "ra6") == 0)
+ return ipifcra6(ifc, argv, argc);
+ return "unsupported ctl";
+}
+
+int
+ipifcstats(Proto *ipifc, char *buf, int len)
+{
+ return ipstats(ipifc->f, buf, len);
+}
+
+void
+ipifcinit(Fs *f)
+{
+ Proto *ipifc;
+
+ ipifc = smalloc(sizeof(Proto));
+ ipifc->name = "ipifc";
+ ipifc->connect = ipifcconnect;
+ ipifc->announce = nil;
+ ipifc->bind = ipifcbind;
+ ipifc->state = ipifcstate;
+ ipifc->create = ipifccreate;
+ ipifc->close = ipifcclose;
+ ipifc->rcv = nil;
+ ipifc->ctl = ipifcctl;
+ ipifc->advise = nil;
+ ipifc->stats = ipifcstats;
+ ipifc->inuse = ipifcinuse;
+ ipifc->local = ipifclocal;
+ ipifc->ipproto = -1;
+ ipifc->nc = Maxmedia;
+ ipifc->ptclsize = sizeof(Ipifc);
+
+ f->ipifc = ipifc; /* hack for ipifcremroute, findipifc, ... */
+ f->self = smalloc(sizeof(Ipselftab)); /* hack for ipforme */
+
+ Fsproto(f, ipifc);
+}
+
+/*
+ * add to self routing cache
+ * called with c->car locked
+ */
+static void
+addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type)
+{
+ Ipself *p;
+ Iplink *lp;
+ int h;
+
+ QLOCK(f->self);
+
+ /* see if the address already exists */
+ h = hashipa(a);
+ for(p = f->self->hash[h]; p; p = p->next)
+ if(memcmp(a, p->a, IPaddrlen) == 0)
+ break;
+
+ /* allocate a local address and add to hash chain */
+ if(p == nil){
+ p = smalloc(sizeof(*p));
+ ipmove(p->a, a);
+ p->type = type;
+ p->next = f->self->hash[h];
+ f->self->hash[h] = p;
+
+ /* if the null address, accept all packets */
+ if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
+ f->self->acceptall = 1;
+ }
+
+ /* look for a link for this lifc */
+ for(lp = p->link; lp; lp = lp->selflink)
+ if(lp->lifc == lifc)
+ break;
+
+ /* allocate a lifc-to-local link and link to both */
+ if(lp == nil){
+ lp = smalloc(sizeof(*lp));
+ lp->ref = 1;
+ lp->lifc = lifc;
+ lp->self = p;
+ lp->selflink = p->link;
+ p->link = lp;
+ lp->lifclink = lifc->link;
+ lifc->link = lp;
+
+ /* add to routing table */
+ if(isv4(a))
+ v4addroute(f, tifc, a+IPv4off, IPallbits+IPv4off,
+ a+IPv4off, type);
+ else
+ v6addroute(f, tifc, a, IPallbits, a, type);
+
+ if((type & Rmulti) && ifc->m->addmulti != nil)
+ (*ifc->m->addmulti)(ifc, a, lifc->local);
+ } else
+ lp->ref++;
+
+ QUNLOCK(f->self);
+}
+
+/*
+ * These structures are unlinked from their chains while
+ * other threads may be using them. To avoid excessive locking,
+ * just put them aside for a while before freeing them.
+ * called with f->self locked
+ */
+static Iplink *freeiplink;
+static Ipself *freeipself;
+
+static void
+iplinkfree(Iplink *p)
+{
+ Iplink **l, *np;
+ ulong now = NOW;
+
+ l = &freeiplink;
+ for(np = *l; np; np = *l){
+ if(np->expire > now){
+ *l = np->next;
+ free(np);
+ continue;
+ }
+ l = &np->next;
+ }
+ p->expire = now + 5000; /* give other threads 5 secs to get out */
+ p->next = nil;
+ *l = p;
+}
+
+static void
+ipselffree(Ipself *p)
+{
+ Ipself **l, *np;
+ ulong now = NOW;
+
+ l = &freeipself;
+ for(np = *l; np; np = *l){
+ if(np->expire > now){
+ *l = np->next;
+ free(np);
+ continue;
+ }
+ l = &np->next;
+ }
+ p->expire = now + 5000; /* give other threads 5 secs to get out */
+ p->next = nil;
+ *l = p;
+}
+
+/*
+ * Decrement reference for this address on this link.
+ * Unlink from selftab if this is the last ref.
+ * called with c->car locked
+ */
+static void
+remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a)
+{
+ Ipself *p, **l;
+ Iplink *link, **l_self, **l_lifc;
+
+ QLOCK(f->self);
+
+ /* find the unique selftab entry */
+ l = &f->self->hash[hashipa(a)];
+ for(p = *l; p; p = *l){
+ if(ipcmp(p->a, a) == 0)
+ break;
+ l = &p->next;
+ }
+
+ if(p == nil)
+ goto out;
+
+ /*
+ * walk down links from an ifc looking for one
+ * that matches the selftab entry
+ */
+ l_lifc = &lifc->link;
+ for(link = *l_lifc; link; link = *l_lifc){
+ if(link->self == p)
+ break;
+ l_lifc = &link->lifclink;
+ }
+
+ if(link == nil)
+ goto out;
+
+ /*
+ * walk down the links from the selftab looking for
+ * the one we just found
+ */
+ l_self = &p->link;
+ for(link = *l_self; link; link = *l_self){
+ if(link == *l_lifc)
+ break;
+ l_self = &link->selflink;
+ }
+
+ if(link == nil)
+ panic("remselfcache");
+
+ if(--(link->ref) != 0)
+ goto out;
+
+ if((p->type & Rmulti) && ifc->m->remmulti != nil)
+ (*ifc->m->remmulti)(ifc, a, lifc->local);
+
+ /* ref == 0, remove from both chains and free the link */
+ *l_lifc = link->lifclink;
+ *l_self = link->selflink;
+ iplinkfree(link);
+
+ if(p->link != nil)
+ goto out;
+
+ /* remove from routing table */
+ if(isv4(a))
+ v4delroute(f, a+IPv4off, IPallbits+IPv4off, 1);
+ else
+ v6delroute(f, a, IPallbits, 1);
+
+ /* no more links, remove from hash and free */
+ *l = p->next;
+ ipselffree(p);
+
+ /* if IPnoaddr, forget */
+ if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
+ f->self->acceptall = 0;
+
+out:
+ QUNLOCK(f->self);
+}
+
+static char *stformat = "%-44.44I %2.2d %4.4s\n";
+enum
+{
+ Nstformat= 41,
+};
+
+long
+ipselftabread(Fs *f, char *cp, ulong offset, int n)
+{
+ int i, m, nifc, off;
+ Ipself *p;
+ Iplink *link;
+ char state[8];
+
+ m = 0;
+ off = offset;
+ QLOCK(f->self);
+ for(i = 0; i < NHASH && m < n; i++){
+ for(p = f->self->hash[i]; p != nil && m < n; p = p->next){
+ nifc = 0;
+ for(link = p->link; link; link = link->selflink)
+ nifc++;
+ routetype(p->type, state);
+ m += snprint(cp + m, n - m, stformat, p->a, nifc, state);
+ if(off > 0){
+ off -= m;
+ m = 0;
+ }
+ }
+ }
+ QUNLOCK(f->self);
+ return m;
+}
+
+int
+iptentative(Fs *f, uchar *addr)
+{
+ Ipself *p;
+
+ p = f->self->hash[hashipa(addr)];
+ for(; p; p = p->next){
+ if(ipcmp(addr, p->a) == 0)
+ return p->link->lifc->tentative;
+ }
+ return 0;
+}
+
+/*
+ * returns
+ * 0 - no match
+ * Runi
+ * Rbcast
+ * Rmcast
+ */
+int
+ipforme(Fs *f, uchar *addr)
+{
+ Ipself *p;
+
+ p = f->self->hash[hashipa(addr)];
+ for(; p; p = p->next){
+ if(ipcmp(addr, p->a) == 0)
+ return p->type;
+ }
+
+ /* hack to say accept anything */
+ if(f->self->acceptall)
+ return Runi;
+ return 0;
+}
+
+/*
+ * find the ifc on same net as the remote system. If none,
+ * return nil.
+ */
+Ipifc*
+findipifc(Fs *f, uchar *remote, int type)
+{
+ Ipifc *ifc, *x;
+ Iplifc *lifc;
+ Conv **cp, **e;
+ uchar gnet[IPaddrlen], xmask[IPaddrlen];
+
+ x = nil;
+ memset(xmask, 0, IPaddrlen);
+
+ /* find most specific match */
+ e = &f->ipifc->conv[f->ipifc->nc];
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp == 0)
+ continue;
+ ifc = (Ipifc*)(*cp)->ptcl;
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ maskip(remote, lifc->mask, gnet);
+ if(ipcmp(gnet, lifc->net) == 0){
+ if(x == nil || ipcmp(lifc->mask, xmask) > 0){
+ x = ifc;
+ ipmove(xmask, lifc->mask);
+ }
+ }
+ }
+ }
+ if(x != nil)
+ return x;
+
+ /* for now for broadcast and multicast, just use first interface */
+ if(type & (Rbcast|Rmulti)){
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp == 0)
+ continue;
+ ifc = (Ipifc*)(*cp)->ptcl;
+ if(ifc->lifc != nil)
+ return ifc;
+ }
+ }
+ return nil;
+}
+
+enum {
+ unknownv6, /* UGH */
+// multicastv6,
+ unspecifiedv6,
+ linklocalv6,
+ globalv6,
+};
+
+int
+v6addrtype(uchar *addr)
+{
+ if(islinklocal(addr) ||
+ (isv6mcast(addr) && (addr[1] & 0xF) <= Link_local_scop))
+ return linklocalv6;
+ else
+ return globalv6;
+}
+
+#define v6addrcurr(lifc) ((lifc)->preflt == ~0L || \
+ (lifc)->origint + (lifc)->preflt >= NOW/1000)
+
+static void
+findprimaryipv6(Fs *f, uchar *local)
+{
+ int atype, atypel;
+ Conv **cp, **e;
+ Ipifc *ifc;
+ Iplifc *lifc;
+
+ ipmove(local, v6Unspecified);
+ atype = unspecifiedv6;
+
+ /*
+ * find "best" (global > link local > unspecified)
+ * local address; address must be current.
+ */
+ e = &f->ipifc->conv[f->ipifc->nc];
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp == 0)
+ continue;
+ ifc = (Ipifc*)(*cp)->ptcl;
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ atypel = v6addrtype(lifc->local);
+ if(atypel > atype && v6addrcurr(lifc)) {
+ ipmove(local, lifc->local);
+ atype = atypel;
+ if(atype == globalv6)
+ return;
+ }
+ }
+ }
+}
+
+/*
+ * returns first ip address configured
+ */
+static void
+findprimaryipv4(Fs *f, uchar *local)
+{
+ Conv **cp, **e;
+ Ipifc *ifc;
+ Iplifc *lifc;
+
+ /* find first ifc local address */
+ e = &f->ipifc->conv[f->ipifc->nc];
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp == 0)
+ continue;
+ ifc = (Ipifc*)(*cp)->ptcl;
+ if((lifc = ifc->lifc) != nil){
+ ipmove(local, lifc->local);
+ return;
+ }
+ }
+}
+
+/*
+ * find the local address 'closest' to the remote system, copy it to
+ * local and return the ifc for that address
+ */
+void
+findlocalip(Fs *f, uchar *local, uchar *remote)
+{
+ int version, atype = unspecifiedv6, atypel = unknownv6;
+ int atyper, deprecated;
+ uchar gate[IPaddrlen], gnet[IPaddrlen];
+ Ipifc *ifc;
+ Iplifc *lifc;
+ Route *r;
+
+ QLOCK(f->ipifc);
+ r = v6lookup(f, remote, nil);
+ version = (memcmp(remote, v4prefix, IPv4off) == 0)? V4: V6;
+
+ if(r != nil){
+ ifc = r->ifc;
+ if(r->type & Rv4)
+ v4tov6(gate, r->v4.gate);
+ else {
+ ipmove(gate, r->v6.gate);
+ ipmove(local, v6Unspecified);
+ }
+
+ switch(version) {
+ case V4:
+ /* find ifc address closest to the gateway to use */
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ maskip(gate, lifc->mask, gnet);
+ if(ipcmp(gnet, lifc->net) == 0){
+ ipmove(local, lifc->local);
+ goto out;
+ }
+ }
+ break;
+ case V6:
+ /* find ifc address with scope matching the destination */
+ atyper = v6addrtype(remote);
+ deprecated = 0;
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ atypel = v6addrtype(lifc->local);
+ /* prefer appropriate scope */
+ if((atypel > atype && atype < atyper) ||
+ (atypel < atype && atype > atyper)){
+ ipmove(local, lifc->local);
+ deprecated = !v6addrcurr(lifc);
+ atype = atypel;
+ } else if(atypel == atype){
+ /* avoid deprecated addresses */
+ if(deprecated && v6addrcurr(lifc)){
+ ipmove(local, lifc->local);
+ atype = atypel;
+ deprecated = 0;
+ }
+ }
+ if(atype == atyper && !deprecated)
+ goto out;
+ }
+ if(atype >= atyper)
+ goto out;
+ break;
+ default:
+ panic("findlocalip: version %d", version);
+ }
+ }
+
+ switch(version){
+ case V4:
+ findprimaryipv4(f, local);
+ break;
+ case V6:
+ findprimaryipv6(f, local);
+ break;
+ default:
+ panic("findlocalip2: version %d", version);
+ }
+
+out:
+ QUNLOCK(f->ipifc);
+}
+
+/*
+ * return first v4 address associated with an interface
+ */
+int
+ipv4local(Ipifc *ifc, uchar *addr)
+{
+ Iplifc *lifc;
+
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ if(isv4(lifc->local)){
+ memmove(addr, lifc->local+IPv4off, IPv4addrlen);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * return first v6 address associated with an interface
+ */
+int
+ipv6local(Ipifc *ifc, uchar *addr)
+{
+ Iplifc *lifc;
+
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ if(!isv4(lifc->local) && !(lifc->tentative)){
+ ipmove(addr, lifc->local);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int
+ipv6anylocal(Ipifc *ifc, uchar *addr)
+{
+ Iplifc *lifc;
+
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ if(!isv4(lifc->local)){
+ ipmove(addr, lifc->local);
+ return SRC_UNI;
+ }
+ }
+ return SRC_UNSPEC;
+}
+
+/*
+ * see if this address is bound to the interface
+ */
+Iplifc*
+iplocalonifc(Ipifc *ifc, uchar *ip)
+{
+ Iplifc *lifc;
+
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+ if(ipcmp(ip, lifc->local) == 0)
+ return lifc;
+ return nil;
+}
+
+
+/*
+ * See if we're proxying for this address on this interface
+ */
+int
+ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip)
+{
+ Route *r;
+ uchar net[IPaddrlen];
+ Iplifc *lifc;
+
+ /* see if this is a direct connected pt to pt address */
+ r = v6lookup(f, ip, nil);
+ if(r == nil || (r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
+ return 0;
+
+ /* see if this is on the right interface */
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ maskip(ip, lifc->mask, net);
+ if(ipcmp(net, lifc->remote) == 0)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * return multicast version if any
+ */
+int
+ipismulticast(uchar *ip)
+{
+ if(isv4(ip)){
+ if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
+ return V4;
+ }
+ else if(ip[0] == 0xff)
+ return V6;
+ return 0;
+}
+int
+ipisbm(uchar *ip)
+{
+ if(isv4(ip)){
+ if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
+ return V4;
+ else if(ipcmp(ip, IPv4bcast) == 0)
+ return V4;
+ }
+ else if(ip[0] == 0xff)
+ return V6;
+ return 0;
+}
+
+
+/*
+ * add a multicast address to an interface, called with c->car locked
+ */
+void
+ipifcaddmulti(Conv *c, uchar *ma, uchar *ia)
+{
+ Ipifc *ifc;
+ Iplifc *lifc;
+ Conv **p;
+ Ipmulti *multi, **l;
+ Fs *f;
+
+ f = c->p->f;
+
+ for(l = &c->multi; *l; l = &(*l)->next)
+ if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
+ return; /* it's already there */
+
+ multi = *l = smalloc(sizeof(*multi));
+ ipmove(multi->ma, ma);
+ ipmove(multi->ia, ia);
+ multi->next = nil;
+
+ for(p = f->ipifc->conv; *p; p++){
+ if((*p)->inuse == 0)
+ continue;
+ ifc = (Ipifc*)(*p)->ptcl;
+ if(waserror()){
+ WUNLOCK(ifc);
+ nexterror();
+ }
+ WLOCK(ifc);
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+ if(ipcmp(ia, lifc->local) == 0)
+ addselfcache(f, ifc, lifc, ma, Rmulti);
+ WUNLOCK(ifc);
+ poperror();
+ }
+}
+
+
+/*
+ * remove a multicast address from an interface, called with c->car locked
+ */
+void
+ipifcremmulti(Conv *c, uchar *ma, uchar *ia)
+{
+ Ipmulti *multi, **l;
+ Iplifc *lifc;
+ Conv **p;
+ Ipifc *ifc;
+ Fs *f;
+
+ f = c->p->f;
+
+ for(l = &c->multi; *l; l = &(*l)->next)
+ if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
+ break;
+
+ multi = *l;
+ if(multi == nil)
+ return; /* we don't have it open */
+
+ *l = multi->next;
+
+ for(p = f->ipifc->conv; *p; p++){
+ if((*p)->inuse == 0)
+ continue;
+
+ ifc = (Ipifc*)(*p)->ptcl;
+ if(waserror()){
+ WUNLOCK(ifc);
+ nexterror();
+ }
+ WLOCK(ifc);
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+ if(ipcmp(ia, lifc->local) == 0)
+ remselfcache(f, ifc, lifc, ma);
+ WUNLOCK(ifc);
+ poperror();
+ }
+
+ free(multi);
+}
+
+/*
+ * make lifc's join and leave multicast groups
+ */
+static char*
+ipifcjoinmulti(Ipifc *ifc, char **argv, int argc)
+{
+ return nil;
+}
+
+static char*
+ipifcleavemulti(Ipifc *ifc, char **argv, int argc)
+{
+ return nil;
+}
+
+static void
+ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip)
+{
+ Conv **cp, **e;
+ Ipifc *nifc;
+ Iplifc *lifc;
+ Medium *m;
+ uchar net[IPaddrlen];
+
+ /* register the address on any network that will proxy for us */
+ e = &f->ipifc->conv[f->ipifc->nc];
+
+ if(!isv4(ip)) { /* V6 */
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp == nil || (nifc = (Ipifc*)(*cp)->ptcl) == ifc)
+ continue;
+ RLOCK(nifc);
+ m = nifc->m;
+ if(m == nil || m->addmulti == nil) {
+ RUNLOCK(nifc);
+ continue;
+ }
+ for(lifc = nifc->lifc; lifc; lifc = lifc->next){
+ maskip(ip, lifc->mask, net);
+ if(ipcmp(net, lifc->remote) == 0) {
+ /* add solicited-node multicast addr */
+ ipv62smcast(net, ip);
+ addselfcache(f, nifc, lifc, net, Rmulti);
+ arpenter(f, V6, ip, nifc->mac, 6, 0);
+ // (*m->addmulti)(nifc, net, ip);
+ break;
+ }
+ }
+ RUNLOCK(nifc);
+ }
+ }
+ else { /* V4 */
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp == nil || (nifc = (Ipifc*)(*cp)->ptcl) == ifc)
+ continue;
+ RLOCK(nifc);
+ m = nifc->m;
+ if(m == nil || m->areg == nil){
+ RUNLOCK(nifc);
+ continue;
+ }
+ for(lifc = nifc->lifc; lifc; lifc = lifc->next){
+ maskip(ip, lifc->mask, net);
+ if(ipcmp(net, lifc->remote) == 0){
+ (*m->areg)(nifc, ip);
+ break;
+ }
+ }
+ RUNLOCK(nifc);
+ }
+ }
+}
+
+
+/* added for new v6 mesg types */
+static void
+adddefroute6(Fs *f, uchar *gate, int force)
+{
+ Route *r;
+
+ r = v6lookup(f, v6Unspecified, nil);
+ /*
+ * route entries generated by all other means take precedence
+ * over router announcements.
+ */
+ if (r && !force && strcmp(r->tag, "ra") != 0)
+ return;
+
+ v6delroute(f, v6Unspecified, v6Unspecified, 1);
+ v6addroute(f, "ra", v6Unspecified, v6Unspecified, gate, 0);
+}
+
+enum {
+ Ngates = 3,
+};
+
+char*
+ipifcadd6(Ipifc *ifc, char**argv, int argc)
+{
+ int plen = 64;
+ long origint = NOW / 1000, preflt = ~0L, validlt = ~0L;
+ char addr[40], preflen[6];
+ char *params[3];
+ uchar autoflag = 1, onlink = 1;
+ uchar prefix[IPaddrlen];
+ Iplifc *lifc;
+
+ switch(argc) {
+ case 7:
+ preflt = atoi(argv[6]);
+ /* fall through */
+ case 6:
+ validlt = atoi(argv[5]);
+ /* fall through */
+ case 5:
+ autoflag = atoi(argv[4]);
+ /* fall through */
+ case 4:
+ onlink = atoi(argv[3]);
+ /* fall through */
+ case 3:
+ plen = atoi(argv[2]);
+ /* fall through */
+ case 2:
+ break;
+ default:
+ return Ebadarg;
+ }
+
+ if (parseip(prefix, argv[1]) != 6 || validlt < preflt || plen < 0 ||
+ plen > 64 || islinklocal(prefix))
+ return Ebadarg;
+
+ lifc = smalloc(sizeof(Iplifc));
+ lifc->onlink = (onlink != 0);
+ lifc->autoflag = (autoflag != 0);
+ lifc->validlt = validlt;
+ lifc->preflt = preflt;
+ lifc->origint = origint;
+
+ /* issue "add" ctl msg for v6 link-local addr and prefix len */
+ if(!ifc->m->pref2addr)
+ return Ebadarg;
+ ifc->m->pref2addr(prefix, ifc->mac); /* mac → v6 link-local addr */
+ sprint(addr, "%I", prefix);
+ sprint(preflen, "/%d", plen);
+ params[0] = "add";
+ params[1] = addr;
+ params[2] = preflen;
+
+ return ipifcadd(ifc, params, 3, 0, lifc);
+}
diff --git a/src/9vx/a/ip/ipmux.c b/src/9vx/a/ip/ipmux.c
@@ -0,0 +1,842 @@
+/*
+ * IP packet filter
+ */
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+
+typedef struct Ipmuxrock Ipmuxrock;
+typedef struct Ipmux Ipmux;
+
+typedef struct Myip4hdr Myip4hdr;
+struct Myip4hdr
+{
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* ip->identification */
+ uchar frag[2]; /* Fragment information */
+ uchar ttl; /* Time to live */
+ uchar proto; /* Protocol */
+ uchar cksum[2]; /* Header checksum */
+ uchar src[4]; /* IP source */
+ uchar dst[4]; /* IP destination */
+
+ uchar data[1]; /* start of data */
+};
+Myip4hdr *ipoff = 0;
+
+enum
+{
+ Tproto,
+ Tdata,
+ Tiph,
+ Tdst,
+ Tsrc,
+ Tifc,
+
+ Cother = 0,
+ Cbyte, /* single byte */
+ Cmbyte, /* single byte with mask */
+ Cshort, /* single short */
+ Cmshort, /* single short with mask */
+ Clong, /* single long */
+ Cmlong, /* single long with mask */
+ Cifc,
+ Cmifc,
+};
+
+char *ftname[] =
+{
+[Tproto] "proto",
+[Tdata] "data",
+[Tiph] "iph",
+[Tdst] "dst",
+[Tsrc] "src",
+[Tifc] "ifc",
+};
+
+/*
+ * a node in the decision tree
+ */
+struct Ipmux
+{
+ Ipmux *yes;
+ Ipmux *no;
+ uchar type; /* type of field(Txxxx) */
+ uchar ctype; /* tupe of comparison(Cxxxx) */
+ uchar len; /* length in bytes of item to compare */
+ uchar n; /* number of items val points to */
+ short off; /* offset of comparison */
+ short eoff; /* end offset of comparison */
+ uchar skiphdr; /* should offset start after ipheader */
+ uchar *val;
+ uchar *mask;
+ uchar *e; /* val+n*len*/
+
+ int ref; /* so we can garbage collect */
+ Conv *conv;
+};
+
+/*
+ * someplace to hold per conversation data
+ */
+struct Ipmuxrock
+{
+ Ipmux *chain;
+};
+
+static int ipmuxsprint(Ipmux*, int, char*, int);
+static void ipmuxkick(void *x);
+
+static char*
+skipwhite(char *p)
+{
+ while(*p == ' ' || *p == '\t')
+ p++;
+ return p;
+}
+
+static char*
+follows(char *p, char c)
+{
+ char *f;
+
+ f = strchr(p, c);
+ if(f == nil)
+ return nil;
+ *f++ = 0;
+ f = skipwhite(f);
+ if(*f == 0)
+ return nil;
+ return f;
+}
+
+static Ipmux*
+parseop(char **pp)
+{
+ char *p = *pp;
+ int type, off, end, len;
+ Ipmux *f;
+
+ p = skipwhite(p);
+ if(strncmp(p, "dst", 3) == 0){
+ type = Tdst;
+ off = (ulong)(ipoff->dst);
+ len = IPv4addrlen;
+ p += 3;
+ }
+ else if(strncmp(p, "src", 3) == 0){
+ type = Tsrc;
+ off = (ulong)(ipoff->src);
+ len = IPv4addrlen;
+ p += 3;
+ }
+ else if(strncmp(p, "ifc", 3) == 0){
+ type = Tifc;
+ off = -IPv4addrlen;
+ len = IPv4addrlen;
+ p += 3;
+ }
+ else if(strncmp(p, "proto", 5) == 0){
+ type = Tproto;
+ off = (ulong)&(ipoff->proto);
+ len = 1;
+ p += 5;
+ }
+ else if(strncmp(p, "data", 4) == 0 || strncmp(p, "iph", 3) == 0){
+ if(strncmp(p, "data", 4) == 0) {
+ type = Tdata;
+ p += 4;
+ }
+ else {
+ type = Tiph;
+ p += 3;
+ }
+ p = skipwhite(p);
+ if(*p != '[')
+ return nil;
+ p++;
+ off = strtoul(p, &p, 0);
+ if(off < 0 || off > (64-IP4HDR))
+ return nil;
+ p = skipwhite(p);
+ if(*p != ':')
+ end = off;
+ else {
+ p++;
+ p = skipwhite(p);
+ end = strtoul(p, &p, 0);
+ if(end < off)
+ return nil;
+ p = skipwhite(p);
+ }
+ if(*p != ']')
+ return nil;
+ p++;
+ len = end - off + 1;
+ }
+ else
+ return nil;
+
+ f = smalloc(sizeof(*f));
+ f->type = type;
+ f->len = len;
+ f->off = off;
+ f->val = nil;
+ f->mask = nil;
+ f->n = 1;
+ f->ref = 1;
+ if(type == Tdata)
+ f->skiphdr = 1;
+ else
+ f->skiphdr = 0;
+
+ return f;
+}
+
+static int
+htoi(char x)
+{
+ if(x >= '0' && x <= '9')
+ x -= '0';
+ else if(x >= 'a' && x <= 'f')
+ x -= 'a' - 10;
+ else if(x >= 'A' && x <= 'F')
+ x -= 'A' - 10;
+ else
+ x = 0;
+ return x;
+}
+
+static int
+hextoi(char *p)
+{
+ return (htoi(p[0])<<4) | htoi(p[1]);
+}
+
+static void
+parseval(uchar *v, char *p, int len)
+{
+ while(*p && len-- > 0){
+ *v++ = hextoi(p);
+ p += 2;
+ }
+}
+
+static Ipmux*
+parsemux(char *p)
+{
+ int n, nomask;
+ Ipmux *f;
+ char *val;
+ char *mask;
+ char *vals[20];
+ uchar *v;
+
+ /* parse operand */
+ f = parseop(&p);
+ if(f == nil)
+ return nil;
+
+ /* find value */
+ val = follows(p, '=');
+ if(val == nil)
+ goto parseerror;
+
+ /* parse mask */
+ mask = follows(p, '&');
+ if(mask != nil){
+ switch(f->type){
+ case Tsrc:
+ case Tdst:
+ case Tifc:
+ f->mask = smalloc(f->len);
+ v4parseip(f->mask, mask);
+ break;
+ case Tdata:
+ case Tiph:
+ f->mask = smalloc(f->len);
+ parseval(f->mask, mask, f->len);
+ break;
+ default:
+ goto parseerror;
+ }
+ nomask = 0;
+ } else {
+ nomask = 1;
+ f->mask = smalloc(f->len);
+ memset(f->mask, 0xff, f->len);
+ }
+
+ /* parse vals */
+ f->n = getfields(val, vals, sizeof(vals)/sizeof(char*), 1, "|");
+ if(f->n == 0)
+ goto parseerror;
+ f->val = smalloc(f->n*f->len);
+ v = f->val;
+ for(n = 0; n < f->n; n++){
+ switch(f->type){
+ case Tsrc:
+ case Tdst:
+ case Tifc:
+ v4parseip(v, vals[n]);
+ break;
+ case Tproto:
+ case Tdata:
+ case Tiph:
+ parseval(v, vals[n], f->len);
+ break;
+ }
+ v += f->len;
+ }
+
+ f->eoff = f->off + f->len;
+ f->e = f->val + f->n*f->len;
+ f->ctype = Cother;
+ if(f->n == 1){
+ switch(f->len){
+ case 1:
+ f->ctype = nomask ? Cbyte : Cmbyte;
+ break;
+ case 2:
+ f->ctype = nomask ? Cshort : Cmshort;
+ break;
+ case 4:
+ if(f->type == Tifc)
+ f->ctype = nomask ? Cifc : Cmifc;
+ else
+ f->ctype = nomask ? Clong : Cmlong;
+ break;
+ }
+ }
+ return f;
+
+parseerror:
+ if(f->mask)
+ free(f->mask);
+ if(f->val)
+ free(f->val);
+ free(f);
+ return nil;
+}
+
+/*
+ * Compare relative ordering of two ipmuxs. This doesn't compare the
+ * values, just the fields being looked at.
+ *
+ * returns: <0 if a is a more specific match
+ * 0 if a and b are matching on the same fields
+ * >0 if b is a more specific match
+ */
+static int
+ipmuxcmp(Ipmux *a, Ipmux *b)
+{
+ int n;
+
+ /* compare types, lesser ones are more important */
+ n = a->type - b->type;
+ if(n != 0)
+ return n;
+
+ /* compare offsets, call earlier ones more specific */
+ n = (a->off+((int)a->skiphdr)*(ulong)ipoff->data) -
+ (b->off+((int)b->skiphdr)*(ulong)ipoff->data);
+ if(n != 0)
+ return n;
+
+ /* compare match lengths, longer ones are more specific */
+ n = b->len - a->len;
+ if(n != 0)
+ return n;
+
+ /*
+ * if we get here we have two entries matching
+ * the same bytes of the record. Now check
+ * the mask for equality. Longer masks are
+ * more specific.
+ */
+ if(a->mask != nil && b->mask == nil)
+ return -1;
+ if(a->mask == nil && b->mask != nil)
+ return 1;
+ if(a->mask != nil && b->mask != nil){
+ n = memcmp(b->mask, a->mask, a->len);
+ if(n != 0)
+ return n;
+ }
+ return 0;
+}
+
+/*
+ * Compare the values of two ipmuxs. We're assuming that ipmuxcmp
+ * returned 0 comparing them.
+ */
+static int
+ipmuxvalcmp(Ipmux *a, Ipmux *b)
+{
+ int n;
+
+ n = b->len*b->n - a->len*a->n;
+ if(n != 0)
+ return n;
+ return memcmp(a->val, b->val, a->len*a->n);
+}
+
+/*
+ * add onto an existing ipmux chain in the canonical comparison
+ * order
+ */
+static void
+ipmuxchain(Ipmux **l, Ipmux *f)
+{
+ for(; *l; l = &(*l)->yes)
+ if(ipmuxcmp(f, *l) < 0)
+ break;
+ f->yes = *l;
+ *l = f;
+}
+
+/*
+ * copy a tree
+ */
+static Ipmux*
+ipmuxcopy(Ipmux *f)
+{
+ Ipmux *nf;
+
+ if(f == nil)
+ return nil;
+ nf = smalloc(sizeof *nf);
+ *nf = *f;
+ nf->no = ipmuxcopy(f->no);
+ nf->yes = ipmuxcopy(f->yes);
+ nf->val = smalloc(f->n*f->len);
+ nf->e = nf->val + f->len*f->n;
+ memmove(nf->val, f->val, f->n*f->len);
+ return nf;
+}
+
+static void
+ipmuxfree(Ipmux *f)
+{
+ if(f->val != nil)
+ free(f->val);
+ free(f);
+}
+
+static void
+ipmuxtreefree(Ipmux *f)
+{
+ if(f == nil)
+ return;
+ if(f->no != nil)
+ ipmuxfree(f->no);
+ if(f->yes != nil)
+ ipmuxfree(f->yes);
+ ipmuxfree(f);
+}
+
+/*
+ * merge two trees
+ */
+static Ipmux*
+ipmuxmerge(Ipmux *a, Ipmux *b)
+{
+ int n;
+ Ipmux *f;
+
+ if(a == nil)
+ return b;
+ if(b == nil)
+ return a;
+ n = ipmuxcmp(a, b);
+ if(n < 0){
+ f = ipmuxcopy(b);
+ a->yes = ipmuxmerge(a->yes, b);
+ a->no = ipmuxmerge(a->no, f);
+ return a;
+ }
+ if(n > 0){
+ f = ipmuxcopy(a);
+ b->yes = ipmuxmerge(b->yes, a);
+ b->no = ipmuxmerge(b->no, f);
+ return b;
+ }
+ if(ipmuxvalcmp(a, b) == 0){
+ a->yes = ipmuxmerge(a->yes, b->yes);
+ a->no = ipmuxmerge(a->no, b->no);
+ a->ref++;
+ ipmuxfree(b);
+ return a;
+ }
+ a->no = ipmuxmerge(a->no, b);
+ return a;
+}
+
+/*
+ * remove a chain from a demux tree. This is like merging accept that
+ * we remove instead of insert.
+ */
+static int
+ipmuxremove(Ipmux **l, Ipmux *f)
+{
+ int n, rv;
+ Ipmux *ft;
+
+ if(f == nil)
+ return 0; /* we've removed it all */
+ if(*l == nil)
+ return -1;
+
+ ft = *l;
+ n = ipmuxcmp(ft, f);
+ if(n < 0){
+ /* *l is maching an earlier field, descend both paths */
+ rv = ipmuxremove(&ft->yes, f);
+ rv += ipmuxremove(&ft->no, f);
+ return rv;
+ }
+ if(n > 0){
+ /* f represents an earlier field than *l, this should be impossible */
+ return -1;
+ }
+
+ /* if we get here f and *l are comparing the same fields */
+ if(ipmuxvalcmp(ft, f) != 0){
+ /* different values mean mutually exclusive */
+ return ipmuxremove(&ft->no, f);
+ }
+
+ /* we found a match */
+ if(--(ft->ref) == 0){
+ /*
+ * a dead node implies the whole yes side is also dead.
+ * since our chain is constrained to be on that side,
+ * we're done.
+ */
+ ipmuxtreefree(ft->yes);
+ *l = ft->no;
+ ipmuxfree(ft);
+ return 0;
+ }
+
+ /*
+ * free the rest of the chain. it is constrained to match the
+ * yes side.
+ */
+ return ipmuxremove(&ft->yes, f->yes);
+}
+
+/*
+ * connection request is a semi separated list of filters
+ * e.g. proto=17;data[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
+ *
+ * there's no protection against overlapping specs.
+ */
+static char*
+ipmuxconnect(Conv *c, char **argv, int argc)
+{
+ int i, n;
+ char *field[10];
+ Ipmux *mux, *chain;
+ Ipmuxrock *r;
+ Fs *f;
+
+ f = c->p->f;
+
+ if(argc != 2)
+ return Ebadarg;
+
+ n = getfields(argv[1], field, nelem(field), 1, ";");
+ if(n <= 0)
+ return Ebadarg;
+
+ chain = nil;
+ mux = nil;
+ for(i = 0; i < n; i++){
+ mux = parsemux(field[i]);
+ if(mux == nil){
+ ipmuxtreefree(chain);
+ return Ebadarg;
+ }
+ ipmuxchain(&chain, mux);
+ }
+ if(chain == nil)
+ return Ebadarg;
+ mux->conv = c;
+
+ /* save a copy of the chain so we can later remove it */
+ mux = ipmuxcopy(chain);
+ r = (Ipmuxrock*)(c->ptcl);
+ r->chain = chain;
+
+ /* add the chain to the protocol demultiplexor tree */
+ WLOCK(f);
+ f->ipmux->priv = ipmuxmerge(f->ipmux->priv, mux);
+ WUNLOCK(f);
+
+ Fsconnected(c, nil);
+ return nil;
+}
+
+static int
+ipmuxstate(Conv *c, char *state, int n)
+{
+ Ipmuxrock *r;
+
+ r = (Ipmuxrock*)(c->ptcl);
+ return ipmuxsprint(r->chain, 0, state, n);
+}
+
+static void
+ipmuxcreate(Conv *c)
+{
+ Ipmuxrock *r;
+
+ c->rq = qopen(64*1024, Qmsg, 0, c);
+ c->wq = qopen(64*1024, Qkick, ipmuxkick, c);
+ r = (Ipmuxrock*)(c->ptcl);
+ r->chain = nil;
+}
+
+static char*
+ipmuxannounce(Conv* _, char** __, int ___)
+{
+ return "ipmux does not support announce";
+}
+
+static void
+ipmuxclose(Conv *c)
+{
+ Ipmux *i;
+ Ipmuxrock *r;
+ Fs *f = c->p->f;
+
+ r = (Ipmuxrock*)(c->ptcl);
+
+ qclose(c->rq);
+ qclose(c->wq);
+ qclose(c->eq);
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ c->lport = 0;
+ c->rport = 0;
+
+ WLOCK(f);
+ i = (Ipmux *)c->p->priv;
+ ipmuxremove(&i, r->chain);
+ WUNLOCK(f);
+ ipmuxtreefree(r->chain);
+ r->chain = nil;
+}
+
+/*
+ * takes a fully formed ip packet and just passes it down
+ * the stack
+ */
+static void
+ipmuxkick(void *x)
+{
+ Conv *c = x;
+ Block *bp;
+
+ bp = qget(c->wq);
+ if(bp != nil) {
+ Myip4hdr *ih4 = (Myip4hdr*)(bp->rp);
+
+ if((ih4->vihl & 0xF0) != IP_VER6)
+ ipoput4(c->p->f, bp, 0, ih4->ttl, ih4->tos, nil);
+ else
+ ipoput6(c->p->f, bp, 0, ((Ip6hdr*)ih4)->ttl, 0, nil);
+ }
+}
+
+static void
+ipmuxiput(Proto *p, Ipifc *ifc, Block *bp)
+{
+ int len, hl;
+ Fs *f = p->f;
+ uchar *m, *h, *v, *e, *ve, *hp;
+ Conv *c;
+ Ipmux *mux;
+ Myip4hdr *ip;
+ Ip6hdr *ip6;
+
+ ip = (Myip4hdr*)bp->rp;
+ hl = (ip->vihl&0x0F)<<2;
+
+ if(p->priv == nil)
+ goto nomatch;
+
+ h = bp->rp;
+ len = BLEN(bp);
+
+ /* run the v4 filter */
+ RLOCK(f);
+ c = nil;
+ mux = f->ipmux->priv;
+ while(mux != nil){
+ if(mux->eoff > len){
+ mux = mux->no;
+ continue;
+ }
+ hp = h + mux->off + ((int)mux->skiphdr)*hl;
+ switch(mux->ctype){
+ case Cbyte:
+ if(*mux->val == *hp)
+ goto yes;
+ break;
+ case Cmbyte:
+ if((*hp & *mux->mask) == *mux->val)
+ goto yes;
+ break;
+ case Cshort:
+ if(*((ushort*)mux->val) == *(ushort*)hp)
+ goto yes;
+ break;
+ case Cmshort:
+ if((*(ushort*)hp & (*((ushort*)mux->mask))) == *((ushort*)mux->val))
+ goto yes;
+ break;
+ case Clong:
+ if(*((ulong*)mux->val) == *(ulong*)hp)
+ goto yes;
+ break;
+ case Cmlong:
+ if((*(ulong*)hp & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
+ goto yes;
+ break;
+ case Cifc:
+ if(*((ulong*)mux->val) == *(ulong*)(ifc->lifc->local + IPv4off))
+ goto yes;
+ break;
+ case Cmifc:
+ if((*(ulong*)(ifc->lifc->local + IPv4off) & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
+ goto yes;
+ break;
+ default:
+ v = mux->val;
+ for(e = mux->e; v < e; v = ve){
+ m = mux->mask;
+ hp = h + mux->off;
+ for(ve = v + mux->len; v < ve; v++){
+ if((*hp++ & *m++) != *v)
+ break;
+ }
+ if(v == ve)
+ goto yes;
+ }
+ }
+ mux = mux->no;
+ continue;
+yes:
+ if(mux->conv != nil)
+ c = mux->conv;
+ mux = mux->yes;
+ }
+ RUNLOCK(f);
+
+ if(c != nil){
+ /* tack on interface address */
+ bp = padblock(bp, IPaddrlen);
+ ipmove(bp->rp, ifc->lifc->local);
+ bp = concatblock(bp);
+ if(bp != nil)
+ if(qpass(c->rq, bp) < 0)
+ print("Q");
+ return;
+ }
+
+nomatch:
+ /* doesn't match any filter, hand it to the specific protocol handler */
+ ip = (Myip4hdr*)bp->rp;
+ if((ip->vihl & 0xF0) == IP_VER4) {
+ p = f->t2p[ip->proto];
+ } else {
+ ip6 = (Ip6hdr*)bp->rp;
+ p = f->t2p[ip6->proto];
+ }
+ if(p && p->rcv)
+ (*p->rcv)(p, ifc, bp);
+ else
+ freeblist(bp);
+ return;
+}
+
+static int
+ipmuxsprint(Ipmux *mux, int level, char *buf, int len)
+{
+ int i, j, n;
+ uchar *v;
+
+ n = 0;
+ for(i = 0; i < level; i++)
+ n += snprint(buf+n, len-n, " ");
+ if(mux == nil){
+ n += snprint(buf+n, len-n, "\n");
+ return n;
+ }
+ n += snprint(buf+n, len-n, "h[%d:%d]&",
+ mux->off+((int)mux->skiphdr)*((int)ipoff->data),
+ mux->off+(((int)mux->skiphdr)*((int)ipoff->data))+mux->len-1);
+ for(i = 0; i < mux->len; i++)
+ n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+ n += snprint(buf+n, len-n, "=");
+ v = mux->val;
+ for(j = 0; j < mux->n; j++){
+ for(i = 0; i < mux->len; i++)
+ n += snprint(buf+n, len - n, "%2.2ux", *v++);
+ n += snprint(buf+n, len-n, "|");
+ }
+ n += snprint(buf+n, len-n, "\n");
+ level++;
+ n += ipmuxsprint(mux->no, level, buf+n, len-n);
+ n += ipmuxsprint(mux->yes, level, buf+n, len-n);
+ return n;
+}
+
+static int
+ipmuxstats(Proto *p, char *buf, int len)
+{
+ int n;
+ Fs *f = p->f;
+
+ RLOCK(f);
+ n = ipmuxsprint(p->priv, 0, buf, len);
+ RUNLOCK(f);
+
+ return n;
+}
+
+void
+ipmuxinit(Fs *f)
+{
+ Proto *ipmux;
+
+ ipmux = smalloc(sizeof(Proto));
+ ipmux->priv = nil;
+ ipmux->name = "ipmux";
+ ipmux->connect = ipmuxconnect;
+ ipmux->announce = ipmuxannounce;
+ ipmux->state = ipmuxstate;
+ ipmux->create = ipmuxcreate;
+ ipmux->close = ipmuxclose;
+ ipmux->rcv = ipmuxiput;
+ ipmux->ctl = nil;
+ ipmux->advise = nil;
+ ipmux->stats = ipmuxstats;
+ ipmux->ipproto = -1;
+ ipmux->nc = 64;
+ ipmux->ptclsize = sizeof(Ipmuxrock);
+
+ f->ipmux = ipmux; /* hack for Fsrcvpcol */
+
+ Fsproto(f, ipmux);
+}
diff --git a/src/9vx/a/ip/iproute.c b/src/9vx/a/ip/iproute.c
@@ -0,0 +1,854 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+static void walkadd(Fs*, Route**, Route*);
+static void addnode(Fs*, Route**, Route*);
+static void calcd(Route*);
+
+/* these are used for all instances of IP */
+static Route* v4freelist;
+static Route* v6freelist;
+static RWlock routelock;
+static ulong v4routegeneration, v6routegeneration;
+
+static void
+freeroute(Route *r)
+{
+ Route **l;
+
+ r->left = nil;
+ r->right = nil;
+ if(r->type & Rv4)
+ l = &v4freelist;
+ else
+ l = &v6freelist;
+ r->mid = *l;
+ *l = r;
+}
+
+static Route*
+allocroute(int type)
+{
+ Route *r;
+ int n;
+ Route **l;
+
+ if(type & Rv4){
+ n = sizeof(RouteTree) + sizeof(V4route);
+ l = &v4freelist;
+ } else {
+ n = sizeof(RouteTree) + sizeof(V6route);
+ l = &v6freelist;
+ }
+
+ r = *l;
+ if(r != nil){
+ *l = r->mid;
+ } else {
+ r = malloc(n);
+ if(r == nil)
+ panic("out of routing nodes");
+ }
+ memset(r, 0, n);
+ r->type = type;
+ r->ifc = nil;
+ r->ref = 1;
+
+ return r;
+}
+
+static void
+addqueue(Route **q, Route *r)
+{
+ Route *l;
+
+ if(r == nil)
+ return;
+
+ l = allocroute(r->type);
+ l->mid = *q;
+ *q = l;
+ l->left = r;
+}
+
+/*
+ * compare 2 v6 addresses
+ */
+static int
+lcmp(ulong *a, ulong *b)
+{
+ int i;
+
+ for(i = 0; i < IPllen; i++){
+ if(a[i] > b[i])
+ return 1;
+ if(a[i] < b[i])
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * compare 2 v4 or v6 ranges
+ */
+enum
+{
+ Rpreceeds,
+ Rfollows,
+ Requals,
+ Rcontains,
+ Rcontained,
+};
+
+static int
+rangecompare(Route *a, Route *b)
+{
+ if(a->type & Rv4){
+ if(a->v4.endaddress < b->v4.address)
+ return Rpreceeds;
+
+ if(a->v4.address > b->v4.endaddress)
+ return Rfollows;
+
+ if(a->v4.address <= b->v4.address
+ && a->v4.endaddress >= b->v4.endaddress){
+ if(a->v4.address == b->v4.address
+ && a->v4.endaddress == b->v4.endaddress)
+ return Requals;
+ return Rcontains;
+ }
+ return Rcontained;
+ }
+
+ if(lcmp(a->v6.endaddress, b->v6.address) < 0)
+ return Rpreceeds;
+
+ if(lcmp(a->v6.address, b->v6.endaddress) > 0)
+ return Rfollows;
+
+ if(lcmp(a->v6.address, b->v6.address) <= 0
+ && lcmp(a->v6.endaddress, b->v6.endaddress) >= 0){
+ if(lcmp(a->v6.address, b->v6.address) == 0
+ && lcmp(a->v6.endaddress, b->v6.endaddress) == 0)
+ return Requals;
+ return Rcontains;
+ }
+
+ return Rcontained;
+}
+
+static void
+copygate(Route *old, Route *new)
+{
+ if(new->type & Rv4)
+ memmove(old->v4.gate, new->v4.gate, IPv4addrlen);
+ else
+ memmove(old->v6.gate, new->v6.gate, IPaddrlen);
+}
+
+/*
+ * walk down a tree adding nodes back in
+ */
+static void
+walkadd(Fs *f, Route **root, Route *p)
+{
+ Route *l, *r;
+
+ l = p->left;
+ r = p->right;
+ p->left = 0;
+ p->right = 0;
+ addnode(f, root, p);
+ if(l)
+ walkadd(f, root, l);
+ if(r)
+ walkadd(f, root, r);
+}
+
+/*
+ * calculate depth
+ */
+static void
+calcd(Route *p)
+{
+ Route *q;
+ int d;
+
+ if(p) {
+ d = 0;
+ q = p->left;
+ if(q)
+ d = q->depth;
+ q = p->right;
+ if(q && q->depth > d)
+ d = q->depth;
+ q = p->mid;
+ if(q && q->depth > d)
+ d = q->depth;
+ p->depth = d+1;
+ }
+}
+
+/*
+ * balance the tree at the current node
+ */
+static void
+balancetree(Route **cur)
+{
+ Route *p, *l, *r;
+ int dl, dr;
+
+ /*
+ * if left and right are
+ * too out of balance,
+ * rotate tree node
+ */
+ p = *cur;
+ dl = 0; if((l = p->left) != nil) dl = l->depth;
+ dr = 0; if((r = p->right) != nil) dr = r->depth;
+
+ if(dl > dr+1) {
+ p->left = l->right;
+ l->right = p;
+ *cur = l;
+ calcd(p);
+ calcd(l);
+ } else
+ if(dr > dl+1) {
+ p->right = r->left;
+ r->left = p;
+ *cur = r;
+ calcd(p);
+ calcd(r);
+ } else
+ calcd(p);
+}
+
+/*
+ * add a new node to the tree
+ */
+static void
+addnode(Fs *f, Route **cur, Route *new)
+{
+ Route *p;
+
+ p = *cur;
+ if(p == 0) {
+ *cur = new;
+ new->depth = 1;
+ return;
+ }
+
+ switch(rangecompare(new, p)){
+ case Rpreceeds:
+ addnode(f, &p->left, new);
+ break;
+ case Rfollows:
+ addnode(f, &p->right, new);
+ break;
+ case Rcontains:
+ /*
+ * if new node is superset
+ * of tree node,
+ * replace tree node and
+ * queue tree node to be
+ * merged into root.
+ */
+ *cur = new;
+ new->depth = 1;
+ addqueue(&f->queue, p);
+ break;
+ case Requals:
+ /*
+ * supercede the old entry if the old one isn't
+ * a local interface.
+ */
+ if((p->type & Rifc) == 0){
+ p->type = new->type;
+ p->ifcid = -1;
+ copygate(p, new);
+ } else if(new->type & Rifc)
+ p->ref++;
+ freeroute(new);
+ break;
+ case Rcontained:
+ addnode(f, &p->mid, new);
+ break;
+ }
+
+ balancetree(cur);
+}
+
+#define V4H(a) ((a&0x07ffffff)>>(32-Lroot-5))
+
+void
+v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+{
+ Route *p;
+ ulong sa;
+ ulong m;
+ ulong ea;
+ int h, eh;
+
+ m = nhgetl(mask);
+ sa = nhgetl(a) & m;
+ ea = sa | ~m;
+
+ eh = V4H(ea);
+ for(h=V4H(sa); h<=eh; h++) {
+ p = allocroute(Rv4 | type);
+ p->v4.address = sa;
+ p->v4.endaddress = ea;
+ memmove(p->v4.gate, gate, sizeof(p->v4.gate));
+ memmove(p->tag, tag, sizeof(p->tag));
+
+ wlock(&routelock);
+ addnode(f, &f->v4root[h], p);
+ while((p = f->queue) != nil) {
+ f->queue = p->mid;
+ walkadd(f, &f->v4root[h], p->left);
+ freeroute(p);
+ }
+ wunlock(&routelock);
+ }
+ v4routegeneration++;
+
+ ipifcaddroute(f, Rv4, a, mask, gate, type);
+}
+
+#define V6H(a) (((a)[IPllen-1] & 0x07ffffff)>>(32-Lroot-5))
+#define ISDFLT(a, mask, tag) ((ipcmp((a),v6Unspecified)==0) && (ipcmp((mask),v6Unspecified)==0) && (strcmp((tag), "ra")!=0))
+
+void
+v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+{
+ Route *p;
+ ulong sa[IPllen], ea[IPllen];
+ ulong x, y;
+ int h, eh;
+
+ /*
+ if(ISDFLT(a, mask, tag))
+ f->v6p->cdrouter = -1;
+ */
+
+
+ for(h = 0; h < IPllen; h++){
+ x = nhgetl(a+4*h);
+ y = nhgetl(mask+4*h);
+ sa[h] = x & y;
+ ea[h] = x | ~y;
+ }
+
+ eh = V6H(ea);
+ for(h = V6H(sa); h <= eh; h++) {
+ p = allocroute(type);
+ memmove(p->v6.address, sa, IPaddrlen);
+ memmove(p->v6.endaddress, ea, IPaddrlen);
+ memmove(p->v6.gate, gate, IPaddrlen);
+ memmove(p->tag, tag, sizeof(p->tag));
+
+ wlock(&routelock);
+ addnode(f, &f->v6root[h], p);
+ while((p = f->queue) != nil) {
+ f->queue = p->mid;
+ walkadd(f, &f->v6root[h], p->left);
+ freeroute(p);
+ }
+ wunlock(&routelock);
+ }
+ v6routegeneration++;
+
+ ipifcaddroute(f, 0, a, mask, gate, type);
+}
+
+Route**
+looknode(Route **cur, Route *r)
+{
+ Route *p;
+
+ for(;;){
+ p = *cur;
+ if(p == 0)
+ return 0;
+
+ switch(rangecompare(r, p)){
+ case Rcontains:
+ return 0;
+ case Rpreceeds:
+ cur = &p->left;
+ break;
+ case Rfollows:
+ cur = &p->right;
+ break;
+ case Rcontained:
+ cur = &p->mid;
+ break;
+ case Requals:
+ return cur;
+ }
+ }
+}
+
+void
+v4delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+{
+ Route **r, *p;
+ Route rt;
+ int h, eh;
+ ulong m;
+
+ m = nhgetl(mask);
+ rt.v4.address = nhgetl(a) & m;
+ rt.v4.endaddress = rt.v4.address | ~m;
+ rt.type = Rv4;
+
+ eh = V4H(rt.v4.endaddress);
+ for(h=V4H(rt.v4.address); h<=eh; h++) {
+ if(dolock)
+ wlock(&routelock);
+ r = looknode(&f->v4root[h], &rt);
+ if(r) {
+ p = *r;
+ if(--(p->ref) == 0){
+ *r = 0;
+ addqueue(&f->queue, p->left);
+ addqueue(&f->queue, p->mid);
+ addqueue(&f->queue, p->right);
+ freeroute(p);
+ while((p = f->queue) != nil) {
+ f->queue = p->mid;
+ walkadd(f, &f->v4root[h], p->left);
+ freeroute(p);
+ }
+ }
+ }
+ if(dolock)
+ wunlock(&routelock);
+ }
+ v4routegeneration++;
+
+ ipifcremroute(f, Rv4, a, mask);
+}
+
+void
+v6delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+{
+ Route **r, *p;
+ Route rt;
+ int h, eh;
+ ulong x, y;
+
+ for(h = 0; h < IPllen; h++){
+ x = nhgetl(a+4*h);
+ y = nhgetl(mask+4*h);
+ rt.v6.address[h] = x & y;
+ rt.v6.endaddress[h] = x | ~y;
+ }
+ rt.type = 0;
+
+ eh = V6H(rt.v6.endaddress);
+ for(h=V6H(rt.v6.address); h<=eh; h++) {
+ if(dolock)
+ wlock(&routelock);
+ r = looknode(&f->v6root[h], &rt);
+ if(r) {
+ p = *r;
+ if(--(p->ref) == 0){
+ *r = 0;
+ addqueue(&f->queue, p->left);
+ addqueue(&f->queue, p->mid);
+ addqueue(&f->queue, p->right);
+ freeroute(p);
+ while((p = f->queue) != nil) {
+ f->queue = p->mid;
+ walkadd(f, &f->v6root[h], p->left);
+ freeroute(p);
+ }
+ }
+ }
+ if(dolock)
+ wunlock(&routelock);
+ }
+ v6routegeneration++;
+
+ ipifcremroute(f, 0, a, mask);
+}
+
+Route*
+v4lookup(Fs *f, uchar *a, Conv *c)
+{
+ Route *p, *q;
+ ulong la;
+ uchar gate[IPaddrlen];
+ Ipifc *ifc;
+
+ if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v4routegeneration)
+ return c->r;
+
+ la = nhgetl(a);
+ q = nil;
+ for(p=f->v4root[V4H(la)]; p;)
+ if(la >= p->v4.address) {
+ if(la <= p->v4.endaddress) {
+ q = p;
+ p = p->mid;
+ } else
+ p = p->right;
+ } else
+ p = p->left;
+
+ if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
+ if(q->type & Rifc) {
+ hnputl(gate+IPv4off, q->v4.address);
+ memmove(gate, v4prefix, IPv4off);
+ } else
+ v4tov6(gate, q->v4.gate);
+ ifc = findipifc(f, gate, q->type);
+ if(ifc == nil)
+ return nil;
+ q->ifc = ifc;
+ q->ifcid = ifc->ifcid;
+ }
+
+ if(c != nil){
+ c->r = q;
+ c->rgen = v4routegeneration;
+ }
+
+ return q;
+}
+
+Route*
+v6lookup(Fs *f, uchar *a, Conv *c)
+{
+ Route *p, *q;
+ ulong la[IPllen];
+ int h;
+ ulong x, y;
+ uchar gate[IPaddrlen];
+ Ipifc *ifc;
+
+ if(memcmp(a, v4prefix, IPv4off) == 0){
+ q = v4lookup(f, a+IPv4off, c);
+ if(q != nil)
+ return q;
+ }
+
+ if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v6routegeneration)
+ return c->r;
+
+ for(h = 0; h < IPllen; h++)
+ la[h] = nhgetl(a+4*h);
+
+ q = 0;
+ for(p=f->v6root[V6H(la)]; p;){
+ for(h = 0; h < IPllen; h++){
+ x = la[h];
+ y = p->v6.address[h];
+ if(x == y)
+ continue;
+ if(x < y){
+ p = p->left;
+ goto next;
+ }
+ break;
+ }
+ for(h = 0; h < IPllen; h++){
+ x = la[h];
+ y = p->v6.endaddress[h];
+ if(x == y)
+ continue;
+ if(x > y){
+ p = p->right;
+ goto next;
+ }
+ break;
+ }
+ q = p;
+ p = p->mid;
+next: ;
+ }
+
+ if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
+ if(q->type & Rifc) {
+ for(h = 0; h < IPllen; h++)
+ hnputl(gate+4*h, q->v6.address[h]);
+ ifc = findipifc(f, gate, q->type);
+ } else
+ ifc = findipifc(f, q->v6.gate, q->type);
+ if(ifc == nil)
+ return nil;
+ q->ifc = ifc;
+ q->ifcid = ifc->ifcid;
+ }
+ if(c != nil){
+ c->r = q;
+ c->rgen = v6routegeneration;
+ }
+
+ return q;
+}
+
+void
+routetype(int type, char *p)
+{
+ memset(p, ' ', 4);
+ p[4] = 0;
+ if(type & Rv4)
+ *p++ = '4';
+ else
+ *p++ = '6';
+ if(type & Rifc)
+ *p++ = 'i';
+ if(type & Runi)
+ *p++ = 'u';
+ else if(type & Rbcast)
+ *p++ = 'b';
+ else if(type & Rmulti)
+ *p++ = 'm';
+ if(type & Rptpt)
+ *p = 'p';
+}
+
+static char *rformat = "%-15I %-4M %-15I %4.4s %4.4s %3s\n";
+
+void
+convroute(Route *r, uchar *addr, uchar *mask, uchar *gate, char *t, int *nifc)
+{
+ int i;
+
+ if(r->type & Rv4){
+ memmove(addr, v4prefix, IPv4off);
+ hnputl(addr+IPv4off, r->v4.address);
+ memset(mask, 0xff, IPv4off);
+ hnputl(mask+IPv4off, ~(r->v4.endaddress ^ r->v4.address));
+ memmove(gate, v4prefix, IPv4off);
+ memmove(gate+IPv4off, r->v4.gate, IPv4addrlen);
+ } else {
+ for(i = 0; i < IPllen; i++){
+ hnputl(addr + 4*i, r->v6.address[i]);
+ hnputl(mask + 4*i, ~(r->v6.endaddress[i] ^ r->v6.address[i]));
+ }
+ memmove(gate, r->v6.gate, IPaddrlen);
+ }
+
+ routetype(r->type, t);
+
+ if(r->ifc)
+ *nifc = r->ifc->conv->x;
+ else
+ *nifc = -1;
+}
+
+/*
+ * this code is not in rr to reduce stack size
+ */
+static void
+sprintroute(Route *r, Routewalk *rw)
+{
+ int nifc, n;
+ char t[5], *iname, ifbuf[5];
+ uchar addr[IPaddrlen], mask[IPaddrlen], gate[IPaddrlen];
+ char *p;
+
+ convroute(r, addr, mask, gate, t, &nifc);
+ iname = "-";
+ if(nifc != -1) {
+ iname = ifbuf;
+ snprint(ifbuf, sizeof ifbuf, "%d", nifc);
+ }
+ p = seprint(rw->p, rw->e, rformat, addr, mask, gate, t, r->tag, iname);
+ if(rw->o < 0){
+ n = p - rw->p;
+ if(n > -rw->o){
+ memmove(rw->p, rw->p-rw->o, n+rw->o);
+ rw->p = p + rw->o;
+ }
+ rw->o += n;
+ } else
+ rw->p = p;
+}
+
+/*
+ * recurse descending tree, applying the function in Routewalk
+ */
+static int
+rr(Route *r, Routewalk *rw)
+{
+ int h;
+
+ if(rw->e <= rw->p)
+ return 0;
+ if(r == nil)
+ return 1;
+
+ if(rr(r->left, rw) == 0)
+ return 0;
+
+ if(r->type & Rv4)
+ h = V4H(r->v4.address);
+ else
+ h = V6H(r->v6.address);
+
+ if(h == rw->h)
+ rw->walk(r, rw);
+
+ if(rr(r->mid, rw) == 0)
+ return 0;
+
+ return rr(r->right, rw);
+}
+
+void
+ipwalkroutes(Fs *f, Routewalk *rw)
+{
+ rlock(&routelock);
+ if(rw->e > rw->p) {
+ for(rw->h = 0; rw->h < nelem(f->v4root); rw->h++)
+ if(rr(f->v4root[rw->h], rw) == 0)
+ break;
+ }
+ if(rw->e > rw->p) {
+ for(rw->h = 0; rw->h < nelem(f->v6root); rw->h++)
+ if(rr(f->v6root[rw->h], rw) == 0)
+ break;
+ }
+ runlock(&routelock);
+}
+
+long
+routeread(Fs *f, char *p, ulong offset, int n)
+{
+ Routewalk rw;
+
+ rw.p = p;
+ rw.e = p+n;
+ rw.o = -offset;
+ rw.walk = sprintroute;
+
+ ipwalkroutes(f, &rw);
+
+ return rw.p - p;
+}
+
+/*
+ * this code is not in routeflush to reduce stack size
+ */
+void
+delroute(Fs *f, Route *r, int dolock)
+{
+ uchar addr[IPaddrlen];
+ uchar mask[IPaddrlen];
+ uchar gate[IPaddrlen];
+ char t[5];
+ int nifc;
+
+ convroute(r, addr, mask, gate, t, &nifc);
+ if(r->type & Rv4)
+ v4delroute(f, addr+IPv4off, mask+IPv4off, dolock);
+ else
+ v6delroute(f, addr, mask, dolock);
+}
+
+/*
+ * recurse until one route is deleted
+ * returns 0 if nothing is deleted, 1 otherwise
+ */
+int
+routeflush(Fs *f, Route *r, char *tag)
+{
+ if(r == nil)
+ return 0;
+ if(routeflush(f, r->mid, tag))
+ return 1;
+ if(routeflush(f, r->left, tag))
+ return 1;
+ if(routeflush(f, r->right, tag))
+ return 1;
+ if((r->type & Rifc) == 0){
+ if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0){
+ delroute(f, r, 0);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+long
+routewrite(Fs *f, Chan *c, char *p, int n)
+{
+ int h, changed;
+ char *tag;
+ Cmdbuf *cb;
+ uchar addr[IPaddrlen];
+ uchar mask[IPaddrlen];
+ uchar gate[IPaddrlen];
+ IPaux *a, *na;
+
+ cb = parsecmd(p, n);
+ if(waserror()){
+ free(cb);
+ nexterror();
+ }
+
+ if(strcmp(cb->f[0], "flush") == 0){
+ tag = cb->f[1];
+ for(h = 0; h < nelem(f->v4root); h++)
+ for(changed = 1; changed;){
+ wlock(&routelock);
+ changed = routeflush(f, f->v4root[h], tag);
+ wunlock(&routelock);
+ }
+ for(h = 0; h < nelem(f->v6root); h++)
+ for(changed = 1; changed;){
+ wlock(&routelock);
+ changed = routeflush(f, f->v6root[h], tag);
+ wunlock(&routelock);
+ }
+ } else if(strcmp(cb->f[0], "remove") == 0){
+ if(cb->nf < 3)
+ error(Ebadarg);
+ if (parseip(addr, cb->f[1]) == -1)
+ error(Ebadip);
+ parseipmask(mask, cb->f[2]);
+ if(memcmp(addr, v4prefix, IPv4off) == 0)
+ v4delroute(f, addr+IPv4off, mask+IPv4off, 1);
+ else
+ v6delroute(f, addr, mask, 1);
+ } else if(strcmp(cb->f[0], "add") == 0){
+ if(cb->nf < 4)
+ error(Ebadarg);
+ if(parseip(addr, cb->f[1]) == -1 ||
+ parseip(gate, cb->f[3]) == -1)
+ error(Ebadip);
+ parseipmask(mask, cb->f[2]);
+ tag = "none";
+ if(c != nil){
+ a = c->aux;
+ tag = a->tag;
+ }
+ if(memcmp(addr, v4prefix, IPv4off) == 0)
+ v4addroute(f, tag, addr+IPv4off, mask+IPv4off, gate+IPv4off, 0);
+ else
+ v6addroute(f, tag, addr, mask, gate, 0);
+ } else if(strcmp(cb->f[0], "tag") == 0) {
+ if(cb->nf < 2)
+ error(Ebadarg);
+
+ a = c->aux;
+ na = newipaux(a->owner, cb->f[1]);
+ c->aux = na;
+ free(a);
+ }
+
+ poperror();
+ free(cb);
+ return n;
+}
diff --git a/src/9vx/a/ip/ipv6.c b/src/9vx/a/ip/ipv6.c
@@ -0,0 +1,718 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+
+enum
+{
+ IP6FHDR = 8, /* sizeof(Fraghdr6) */
+};
+
+#define IPV6CLASS(hdr) (((hdr)->vcf[0]&0x0F)<<2 | ((hdr)->vcf[1]&0xF0)>>2)
+#define BLKIPVER(xp) (((Ip6hdr*)((xp)->rp))->vcf[0] & 0xF0)
+/*
+ * This sleazy macro is stolen shamelessly from ip.c, see comment there.
+ */
+#define BKFG(xp) ((Ipfrag*)((xp)->base))
+
+typedef struct Fragment4 Fragment4;
+typedef struct Fragment6 Fragment6;
+typedef struct Ipfrag Ipfrag;
+
+Block* ip6reassemble(IP*, int, Block*, Ip6hdr*);
+Fragment6* ipfragallo6(IP*);
+void ipfragfree6(IP*, Fragment6*);
+Block* procopts(Block *bp);
+static Block* procxtns(IP *ip, Block *bp, int doreasm);
+int unfraglen(Block *bp, uchar *nexthdr, int setfh);
+
+/* MIB II counters */
+enum
+{
+ Forwarding,
+ DefaultTTL,
+ InReceives,
+ InHdrErrors,
+ InAddrErrors,
+ ForwDatagrams,
+ InUnknownProtos,
+ InDiscards,
+ InDelivers,
+ OutRequests,
+ OutDiscards,
+ OutNoRoutes,
+ ReasmTimeout,
+ ReasmReqds,
+ ReasmOKs,
+ ReasmFails,
+ FragOKs,
+ FragFails,
+ FragCreates,
+
+ Nstats,
+};
+
+static char *statnames[] =
+{
+[Forwarding] "Forwarding",
+[DefaultTTL] "DefaultTTL",
+[InReceives] "InReceives",
+[InHdrErrors] "InHdrErrors",
+[InAddrErrors] "InAddrErrors",
+[ForwDatagrams] "ForwDatagrams",
+[InUnknownProtos] "InUnknownProtos",
+[InDiscards] "InDiscards",
+[InDelivers] "InDelivers",
+[OutRequests] "OutRequests",
+[OutDiscards] "OutDiscards",
+[OutNoRoutes] "OutNoRoutes",
+[ReasmTimeout] "ReasmTimeout",
+[ReasmReqds] "ReasmReqds",
+[ReasmOKs] "ReasmOKs",
+[ReasmFails] "ReasmFails",
+[FragOKs] "FragOKs",
+[FragFails] "FragFails",
+[FragCreates] "FragCreates",
+};
+
+struct Fragment4
+{
+ Block* blist;
+ Fragment4* next;
+ ulong src;
+ ulong dst;
+ ushort id;
+ ulong age;
+};
+
+struct Fragment6
+{
+ Block* blist;
+ Fragment6* next;
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+ uint id;
+ ulong age;
+};
+
+struct Ipfrag
+{
+ ushort foff;
+ ushort flen;
+};
+
+/* an instance of IP */
+struct IP
+{
+ ulong stats[Nstats];
+
+ QLock fraglock4;
+ Fragment4* flisthead4;
+ Fragment4* fragfree4;
+ Ref id4;
+
+ QLock fraglock6;
+ Fragment6* flisthead6;
+ Fragment6* fragfree6;
+ Ref id6;
+
+ int iprouting; /* true if we route like a gateway */
+};
+
+int
+ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+{
+ int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff;
+ int morefrags, blklen, rv = 0, tentative;
+ uchar *gate, nexthdr;
+ Block *xp, *nb;
+ Fraghdr6 fraghdr;
+ IP *ip;
+ Ip6hdr *eh;
+ Ipifc *ifc;
+ Route *r, *sr;
+
+ ip = f->ip;
+
+ /* Fill out the ip header */
+ eh = (Ip6hdr*)(bp->rp);
+
+ ip->stats[OutRequests]++;
+
+ /* Number of uchars in data and ip header to write */
+ len = blocklen(bp);
+
+ tentative = iptentative(f, eh->src);
+ if(tentative){
+ netlog(f, Logip, "reject tx of packet with tentative src address %I\n",
+ eh->src);
+ goto free;
+ }
+
+ if(gating){
+ chunk = nhgets(eh->ploadlen);
+ if(chunk > len){
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "short gated packet\n");
+ goto free;
+ }
+ if(chunk + IP6HDR < len)
+ len = chunk + IP6HDR;
+ }
+
+ if(len >= IP_MAX){
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "exceeded ip max size %I\n", eh->dst);
+ goto free;
+ }
+
+ r = v6lookup(f, eh->dst, c);
+ if(r == nil){
+// print("no route for %I, src %I free\n", eh->dst, eh->src);
+ ip->stats[OutNoRoutes]++;
+ netlog(f, Logip, "no interface %I\n", eh->dst);
+ rv = -1;
+ goto free;
+ }
+
+ ifc = r->ifc;
+ if(r->type & (Rifc|Runi))
+ gate = eh->dst;
+ else if(r->type & (Rbcast|Rmulti)) {
+ gate = eh->dst;
+ sr = v6lookup(f, eh->src, nil);
+ if(sr && (sr->type & Runi))
+ ifc = sr->ifc;
+ }
+ else
+ gate = r->v6.gate;
+
+ if(!gating)
+ eh->vcf[0] = IP_VER6;
+ eh->ttl = ttl;
+ if(!gating) {
+ eh->vcf[0] |= tos >> 4;
+ eh->vcf[1] = tos << 4;
+ }
+
+ if(!CANRLOCK(ifc))
+ goto free;
+
+ if(waserror()){
+ RUNLOCK(ifc);
+ nexterror();
+ }
+
+ if(ifc->m == nil)
+ goto raise;
+
+ /* If we dont need to fragment just send it */
+ medialen = ifc->maxtu - ifc->m->hsize;
+ if(len <= medialen) {
+ hnputs(eh->ploadlen, len - IP6HDR);
+ ifc->m->bwrite(ifc, bp, V6, gate);
+ RUNLOCK(ifc);
+ poperror();
+ return 0;
+ }
+
+ if(gating && ifc->reassemble <= 0) {
+ /*
+ * v6 intermediate nodes are not supposed to fragment pkts;
+ * we fragment if ifc->reassemble is turned on; an exception
+ * needed for nat.
+ */
+ ip->stats[OutDiscards]++;
+ icmppkttoobig6(f, ifc, bp);
+ netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst);
+ goto raise;
+ }
+
+ /* start v6 fragmentation */
+ uflen = unfraglen(bp, &nexthdr, 1);
+ if(uflen > medialen) {
+ ip->stats[FragFails]++;
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst);
+ goto raise;
+ }
+
+ flen = len - uflen;
+ seglen = (medialen - (uflen + IP6FHDR)) & ~7;
+ if(seglen < 8) {
+ ip->stats[FragFails]++;
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "%I: seglen < 8\n", eh->dst);
+ goto raise;
+ }
+
+ lid = incref(&ip->id6);
+ fraghdr.nexthdr = nexthdr;
+ fraghdr.res = 0;
+ hnputl(fraghdr.id, lid);
+
+ xp = bp;
+ offset = uflen;
+ while (xp && offset && offset >= BLEN(xp)) {
+ offset -= BLEN(xp);
+ xp = xp->next;
+ }
+ xp->rp += offset;
+
+ fragoff = 0;
+ morefrags = 1;
+
+ for(; fragoff < flen; fragoff += seglen) {
+ nb = allocb(uflen + IP6FHDR + seglen);
+
+ if(fragoff + seglen >= flen) {
+ seglen = flen - fragoff;
+ morefrags = 0;
+ }
+
+ hnputs(eh->ploadlen, seglen+IP6FHDR);
+ memmove(nb->wp, eh, uflen);
+ nb->wp += uflen;
+
+ hnputs(fraghdr.offsetRM, fragoff); /* last 3 bits must be 0 */
+ fraghdr.offsetRM[1] |= morefrags;
+ memmove(nb->wp, &fraghdr, IP6FHDR);
+ nb->wp += IP6FHDR;
+
+ /* Copy data */
+ chunk = seglen;
+ while (chunk) {
+ if(!xp) {
+ ip->stats[OutDiscards]++;
+ ip->stats[FragFails]++;
+ freeblist(nb);
+ netlog(f, Logip, "!xp: chunk in v6%d\n", chunk);
+ goto raise;
+ }
+ blklen = chunk;
+ if(BLEN(xp) < chunk)
+ blklen = BLEN(xp);
+ memmove(nb->wp, xp->rp, blklen);
+
+ nb->wp += blklen;
+ xp->rp += blklen;
+ chunk -= blklen;
+ if(xp->rp == xp->wp)
+ xp = xp->next;
+ }
+
+ ifc->m->bwrite(ifc, nb, V6, gate);
+ ip->stats[FragCreates]++;
+ }
+ ip->stats[FragOKs]++;
+
+raise:
+ RUNLOCK(ifc);
+ poperror();
+free:
+ freeblist(bp);
+ return rv;
+}
+
+void
+ipiput6(Fs *f, Ipifc *ifc, Block *bp)
+{
+ int hl, hop, tos, notforme, tentative;
+ uchar proto;
+ uchar v6dst[IPaddrlen];
+ IP *ip;
+ Ip6hdr *h;
+ Proto *p;
+ Route *r, *sr;
+
+ ip = f->ip;
+ ip->stats[InReceives]++;
+
+ /*
+ * Ensure we have all the header info in the first
+ * block. Make life easier for other protocols by
+ * collecting up to the first 64 bytes in the first block.
+ */
+ if(BLEN(bp) < 64) {
+ hl = blocklen(bp);
+ if(hl < IP6HDR)
+ hl = IP6HDR;
+ if(hl > 64)
+ hl = 64;
+ bp = pullupblock(bp, hl);
+ if(bp == nil)
+ return;
+ }
+
+ h = (Ip6hdr *)bp->rp;
+
+ memmove(&v6dst[0], &h->dst[0], IPaddrlen);
+ notforme = ipforme(f, v6dst) == 0;
+ tentative = iptentative(f, v6dst);
+
+ if(tentative && h->proto != ICMPv6) {
+ print("tentative addr, drop\n");
+ freeblist(bp);
+ return;
+ }
+
+ /* Check header version */
+ if(BLKIPVER(bp) != IP_VER6) {
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "ip: bad version %ux\n", (h->vcf[0]&0xF0)>>2);
+ freeblist(bp);
+ return;
+ }
+
+ /* route */
+ if(notforme) {
+ if(!ip->iprouting){
+ freeb(bp);
+ return;
+ }
+
+ /* don't forward to link-local destinations */
+ if(islinklocal(h->dst) ||
+ (isv6mcast(h->dst) && (h->dst[1]&0xF) <= Link_local_scop)){
+ ip->stats[OutDiscards]++;
+ freeblist(bp);
+ return;
+ }
+
+ /* don't forward to source's network */
+ sr = v6lookup(f, h->src, nil);
+ r = v6lookup(f, h->dst, nil);
+
+ if(r == nil || sr == r){
+ ip->stats[OutDiscards]++;
+ freeblist(bp);
+ return;
+ }
+
+ /* don't forward if packet has timed out */
+ hop = h->ttl;
+ if(hop < 1) {
+ ip->stats[InHdrErrors]++;
+ icmpttlexceeded6(f, ifc, bp);
+ freeblist(bp);
+ return;
+ }
+
+ /* process headers & reassemble if the interface expects it */
+ bp = procxtns(ip, bp, r->ifc->reassemble);
+ if(bp == nil)
+ return;
+
+ ip->stats[ForwDatagrams]++;
+ h = (Ip6hdr *)bp->rp;
+ tos = IPV6CLASS(h);
+ hop = h->ttl;
+ ipoput6(f, bp, 1, hop-1, tos, nil);
+ return;
+ }
+
+ /* reassemble & process headers if needed */
+ bp = procxtns(ip, bp, 1);
+ if(bp == nil)
+ return;
+
+ h = (Ip6hdr *) (bp->rp);
+ proto = h->proto;
+ p = Fsrcvpcol(f, proto);
+ if(p && p->rcv) {
+ ip->stats[InDelivers]++;
+ (*p->rcv)(p, ifc, bp);
+ return;
+ }
+
+ ip->stats[InDiscards]++;
+ ip->stats[InUnknownProtos]++;
+ freeblist(bp);
+}
+
+/*
+ * ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6
+ */
+void
+ipfragfree6(IP *ip, Fragment6 *frag)
+{
+ Fragment6 *fl, **l;
+
+ if(frag->blist)
+ freeblist(frag->blist);
+
+ memset(frag->src, 0, IPaddrlen);
+ frag->id = 0;
+ frag->blist = nil;
+
+ l = &ip->flisthead6;
+ for(fl = *l; fl; fl = fl->next) {
+ if(fl == frag) {
+ *l = frag->next;
+ break;
+ }
+ l = &fl->next;
+ }
+
+ frag->next = ip->fragfree6;
+ ip->fragfree6 = frag;
+}
+
+/*
+ * ipfragallo6 - copied from ipfragalloc4
+ */
+Fragment6*
+ipfragallo6(IP *ip)
+{
+ Fragment6 *f;
+
+ while(ip->fragfree6 == nil) {
+ /* free last entry on fraglist */
+ for(f = ip->flisthead6; f->next; f = f->next)
+ ;
+ ipfragfree6(ip, f);
+ }
+ f = ip->fragfree6;
+ ip->fragfree6 = f->next;
+ f->next = ip->flisthead6;
+ ip->flisthead6 = f;
+ f->age = NOW + 30000;
+
+ return f;
+}
+
+static Block*
+procxtns(IP *ip, Block *bp, int doreasm)
+{
+ int offset;
+ uchar proto;
+ Ip6hdr *h;
+
+ h = (Ip6hdr *)bp->rp;
+ offset = unfraglen(bp, &proto, 0);
+
+ if(proto == FH && doreasm != 0) {
+ bp = ip6reassemble(ip, offset, bp, h);
+ if(bp == nil)
+ return nil;
+ offset = unfraglen(bp, &proto, 0);
+ }
+
+ if(proto == DOH || offset > IP6HDR)
+ bp = procopts(bp);
+ return bp;
+}
+
+/*
+ * returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
+ * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
+ * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
+ * field of the last header in the "Unfragmentable part" is set to FH.
+ */
+int
+unfraglen(Block *bp, uchar *nexthdr, int setfh)
+{
+ uchar *p, *q;
+ int ufl, hs;
+
+ p = bp->rp;
+ q = p+6; /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
+ *nexthdr = *q;
+ ufl = IP6HDR;
+ p += ufl;
+
+ while (*nexthdr == HBH || *nexthdr == RH) {
+ *nexthdr = *p;
+ hs = ((int)*(p+1) + 1) * 8;
+ ufl += hs;
+ q = p;
+ p += hs;
+ }
+
+ if(*nexthdr == FH)
+ *q = *p;
+ if(setfh)
+ *q = FH;
+ return ufl;
+}
+
+Block*
+procopts(Block *bp)
+{
+ return bp;
+}
+
+Block*
+ip6reassemble(IP* ip, int uflen, Block* bp, Ip6hdr* ih)
+{
+ int fend, offset, ovlap, len, fragsize, pktposn;
+ uint id;
+ uchar src[IPaddrlen], dst[IPaddrlen];
+ Block *bl, **l, *last, *prev;
+ Fraghdr6 *fraghdr;
+ Fragment6 *f, *fnext;
+
+ fraghdr = (Fraghdr6 *)(bp->rp + uflen);
+ memmove(src, ih->src, IPaddrlen);
+ memmove(dst, ih->dst, IPaddrlen);
+ id = nhgetl(fraghdr->id);
+ offset = nhgets(fraghdr->offsetRM) & ~7;
+
+ /*
+ * block lists are too hard, pullupblock into a single block
+ */
+ if(bp->next){
+ bp = pullupblock(bp, blocklen(bp));
+ ih = (Ip6hdr *)bp->rp;
+ }
+
+ qlock(&ip->fraglock6);
+
+ /*
+ * find a reassembly queue for this fragment
+ */
+ for(f = ip->flisthead6; f; f = fnext){
+ fnext = f->next;
+ if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id)
+ break;
+ if(f->age < NOW){
+ ip->stats[ReasmTimeout]++;
+ ipfragfree6(ip, f);
+ }
+ }
+
+ /*
+ * if this isn't a fragmented packet, accept it
+ * and get rid of any fragments that might go
+ * with it.
+ */
+ if(nhgets(fraghdr->offsetRM) == 0) { /* 1st frag is also last */
+ if(f) {
+ ipfragfree6(ip, f);
+ ip->stats[ReasmFails]++;
+ }
+ qunlock(&ip->fraglock6);
+ return bp;
+ }
+
+ if(bp->base+sizeof(Ipfrag) >= bp->rp){
+ bp = padblock(bp, sizeof(Ipfrag));
+ bp->rp += sizeof(Ipfrag);
+ }
+
+ BKFG(bp)->foff = offset;
+ BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR;
+
+ /* First fragment allocates a reassembly queue */
+ if(f == nil) {
+ f = ipfragallo6(ip);
+ f->id = id;
+ memmove(f->src, src, IPaddrlen);
+ memmove(f->dst, dst, IPaddrlen);
+
+ f->blist = bp;
+
+ qunlock(&ip->fraglock6);
+ ip->stats[ReasmReqds]++;
+ return nil;
+ }
+
+ /*
+ * find the new fragment's position in the queue
+ */
+ prev = nil;
+ l = &f->blist;
+ bl = f->blist;
+ while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+ prev = bl;
+ l = &bl->next;
+ bl = bl->next;
+ }
+
+ /* Check overlap of a previous fragment - trim away as necessary */
+ if(prev) {
+ ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+ if(ovlap > 0) {
+ if(ovlap >= BKFG(bp)->flen) {
+ freeblist(bp);
+ qunlock(&ip->fraglock6);
+ return nil;
+ }
+ BKFG(prev)->flen -= ovlap;
+ }
+ }
+
+ /* Link onto assembly queue */
+ bp->next = *l;
+ *l = bp;
+
+ /* Check to see if succeeding segments overlap */
+ if(bp->next) {
+ l = &bp->next;
+ fend = BKFG(bp)->foff + BKFG(bp)->flen;
+
+ /* Take completely covered segments out */
+ while(*l) {
+ ovlap = fend - BKFG(*l)->foff;
+ if(ovlap <= 0)
+ break;
+ if(ovlap < BKFG(*l)->flen) {
+ BKFG(*l)->flen -= ovlap;
+ BKFG(*l)->foff += ovlap;
+ /* move up ih hdrs */
+ memmove((*l)->rp + ovlap, (*l)->rp, uflen);
+ (*l)->rp += ovlap;
+ break;
+ }
+ last = (*l)->next;
+ (*l)->next = nil;
+ freeblist(*l);
+ *l = last;
+ }
+ }
+
+ /*
+ * look for a complete packet. if we get to a fragment
+ * with the trailing bit of fraghdr->offsetRM[1] set, we're done.
+ */
+ pktposn = 0;
+ for(bl = f->blist; bl && BKFG(bl)->foff == pktposn; bl = bl->next) {
+ fraghdr = (Fraghdr6 *)(bl->rp + uflen);
+ if((fraghdr->offsetRM[1] & 1) == 0) {
+ bl = f->blist;
+
+ /* get rid of frag header in first fragment */
+ memmove(bl->rp + IP6FHDR, bl->rp, uflen);
+ bl->rp += IP6FHDR;
+ len = nhgets(((Ip6hdr*)bl->rp)->ploadlen) - IP6FHDR;
+ bl->wp = bl->rp + len + IP6HDR;
+ /*
+ * Pullup all the fragment headers and
+ * return a complete packet
+ */
+ for(bl = bl->next; bl; bl = bl->next) {
+ fragsize = BKFG(bl)->flen;
+ len += fragsize;
+ bl->rp += uflen + IP6FHDR;
+ bl->wp = bl->rp + fragsize;
+ }
+
+ bl = f->blist;
+ f->blist = nil;
+ ipfragfree6(ip, f);
+ ih = (Ip6hdr*)bl->rp;
+ hnputs(ih->ploadlen, len);
+ qunlock(&ip->fraglock6);
+ ip->stats[ReasmOKs]++;
+ return bl;
+ }
+ pktposn += BKFG(bl)->flen;
+ }
+ qunlock(&ip->fraglock6);
+ return nil;
+}
diff --git a/src/9vx/a/ip/ipv6.h b/src/9vx/a/ip/ipv6.h
@@ -0,0 +1,185 @@
+/*
+ * Internet Protocol Version 6
+ *
+ * rfc2460 defines the protocol, rfc2461 neighbour discovery, and
+ * rfc2462 address autoconfiguration. rfc4443 defines ICMP; was rfc2463.
+ * rfc4291 defines the address architecture (including prefices), was rfc3513.
+ * rfc4007 defines the scoped address architecture.
+ *
+ * global unicast is anything but unspecified (::), loopback (::1),
+ * multicast (ff00::/8), and link-local unicast (fe80::/10).
+ *
+ * site-local (fec0::/10) is now deprecated, originally by rfc3879.
+ *
+ * Unique Local IPv6 Unicast Addresses are defined by rfc4193.
+ * prefix is fc00::/7, scope is global, routing is limited to roughly a site.
+ */
+#define isv6mcast(addr) ((addr)[0] == 0xff)
+#define islinklocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0x80)
+
+#define optexsts(np) (nhgets((np)->ploadlen) > 24)
+#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0)
+
+#ifndef MIN
+#define MIN(a, b) ((a) <= (b)? (a): (b))
+#endif
+
+#undef ESP
+
+enum { /* Header Types */
+ HBH = 0, /* hop-by-hop multicast routing protocol */
+ ICMP = 1,
+ IGMP = 2,
+ GGP = 3,
+ IPINIP = 4,
+ ST = 5,
+ TCP = 6,
+ UDP = 17,
+ ISO_TP4 = 29,
+ RH = 43,
+ FH = 44,
+ IDRP = 45,
+ RSVP = 46,
+ AH = 51,
+ ESP = 52,
+ ICMPv6 = 58,
+ NNH = 59,
+ DOH = 60,
+ ISO_IP = 80,
+ IGRP = 88,
+ OSPF = 89,
+
+ Maxhdrtype = 256,
+};
+
+enum {
+ /* multicast flags and scopes */
+
+// Well_known_flg = 0,
+// Transient_flg = 1,
+
+// Interface_local_scop = 1,
+ Link_local_scop = 2,
+// Site_local_scop = 5,
+// Org_local_scop = 8,
+ Global_scop = 14,
+
+ /* various prefix lengths */
+ SOLN_PREF_LEN = 13,
+
+ /* icmpv6 unreachability codes */
+ Icmp6_no_route = 0,
+ Icmp6_ad_prohib = 1,
+ Icmp6_out_src_scope = 2,
+ Icmp6_adr_unreach = 3,
+ Icmp6_port_unreach = 4,
+ Icmp6_gress_src_fail = 5,
+ Icmp6_rej_route = 6,
+ Icmp6_unknown = 7, /* our own invention for internal use */
+
+ /* various flags & constants */
+ v6MINTU = 1280,
+ HOP_LIMIT = 255,
+ IP6HDR = 20, /* sizeof(Ip6hdr) */
+
+ /* option types */
+
+ /* neighbour discovery */
+ SRC_LLADDR = 1,
+ TARGET_LLADDR = 2,
+ PREFIX_INFO = 3,
+ REDIR_HEADER = 4,
+ MTU_OPTION = 5,
+ /* new since rfc2461; see iana.org/assignments/icmpv6-parameters */
+ V6nd_home = 8,
+ V6nd_srcaddrs = 9, /* rfc3122 */
+ V6nd_ip = 17,
+ /* /lib/rfc/drafts/draft-jeong-dnsop-ipv6-dns-discovery-12.txt */
+ V6nd_rdns = 25,
+ /* plan 9 extensions */
+ V6nd_9fs = 250,
+ V6nd_9auth = 251,
+
+ SRC_UNSPEC = 0,
+ SRC_UNI = 1,
+ TARG_UNI = 2,
+ TARG_MULTI = 3,
+
+ Tunitent = 1,
+ Tuniproxy = 2,
+ Tunirany = 3,
+
+ /* Node constants */
+ MAX_MULTICAST_SOLICIT = 3,
+ RETRANS_TIMER = 1000,
+};
+
+typedef struct Ip6hdr Ip6hdr;
+typedef struct Opthdr Opthdr;
+typedef struct Routinghdr Routinghdr;
+typedef struct Fraghdr6 Fraghdr6;
+
+struct Ip6hdr {
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+};
+
+struct Opthdr {
+ uchar nexthdr;
+ uchar len;
+};
+
+/*
+ * Beware routing header type 0 (loose source routing); see
+ * http://www.secdev.org/conf/IPv6_RH_security-csw07.pdf.
+ * Type 1 is unused. Type 2 is for MIPv6 (mobile IPv6) filtering
+ * against type 0 header.
+ */
+struct Routinghdr {
+ uchar nexthdr;
+ uchar len;
+ uchar rtetype;
+ uchar segrem;
+};
+
+struct Fraghdr6 {
+ uchar nexthdr;
+ uchar res;
+ uchar offsetRM[2]; /* Offset, Res, M flag */
+ uchar id[4];
+};
+
+extern uchar v6allnodesN[IPaddrlen];
+extern uchar v6allnodesL[IPaddrlen];
+extern uchar v6allroutersN[IPaddrlen];
+extern uchar v6allroutersL[IPaddrlen];
+extern uchar v6allnodesNmask[IPaddrlen];
+extern uchar v6allnodesLmask[IPaddrlen];
+extern uchar v6solicitednode[IPaddrlen];
+extern uchar v6solicitednodemask[IPaddrlen];
+extern uchar v6Unspecified[IPaddrlen];
+extern uchar v6loopback[IPaddrlen];
+extern uchar v6loopbackmask[IPaddrlen];
+extern uchar v6linklocal[IPaddrlen];
+extern uchar v6linklocalmask[IPaddrlen];
+extern uchar v6multicast[IPaddrlen];
+extern uchar v6multicastmask[IPaddrlen];
+
+extern int v6llpreflen;
+extern int v6mcpreflen;
+extern int v6snpreflen;
+extern int v6aNpreflen;
+extern int v6aLpreflen;
+
+extern int ReTransTimer;
+
+void ipv62smcast(uchar *, uchar *);
+void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
+void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
+void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
+void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
+void icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free);
diff --git a/src/9vx/a/ip/loopbackmedium.c b/src/9vx/a/ip/loopbackmedium.c
@@ -0,0 +1,120 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+enum
+{
+ Maxtu= 16*1024,
+};
+
+typedef struct LB LB;
+struct LB
+{
+ Proc *readp;
+ Queue *q;
+ Fs *f;
+};
+
+static void loopbackread(void *a);
+
+static void
+loopbackbind(Ipifc *ifc, int _, char** __)
+{
+ LB *lb;
+
+ lb = smalloc(sizeof(*lb));
+ lb->f = ifc->conv->p->f;
+ lb->q = qopen(1024*1024, Qmsg, nil, nil);
+ ifc->arg = lb;
+ ifc->mbps = 1000;
+
+ kproc("loopbackread", loopbackread, ifc);
+
+}
+
+static void
+loopbackunbind(Ipifc *ifc)
+{
+ LB *lb = ifc->arg;
+
+ if(lb->readp)
+ postnote(lb->readp, 1, "unbind", 0);
+
+ /* wait for reader to die */
+ while(lb->readp != 0)
+ tsleep(&up->sleep, return0, 0, 300);
+
+ /* clean up */
+ qfree(lb->q);
+ free(lb);
+}
+
+static void
+loopbackbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
+{
+ LB *lb;
+
+ lb = ifc->arg;
+ if(qpass(lb->q, bp) < 0)
+ ifc->outerr++;
+ ifc->out++;
+}
+
+static void
+loopbackread(void *a)
+{
+ Ipifc *ifc;
+ Block *bp;
+ LB *lb;
+
+ ifc = a;
+ lb = ifc->arg;
+ lb->readp = up; /* hide identity under a rock for unbind */
+ if(waserror()){
+ lb->readp = 0;
+ pexit("hangup", 1);
+ }
+ for(;;){
+ bp = qbread(lb->q, Maxtu);
+ if(bp == nil)
+ continue;
+ ifc->in++;
+ if(!CANRLOCK(ifc)){
+ freeb(bp);
+ continue;
+ }
+ if(waserror()){
+ RUNLOCK(ifc);
+ nexterror();
+ }
+ if(ifc->lifc == nil)
+ freeb(bp);
+ else
+ ipiput4(lb->f, ifc, bp);
+ RUNLOCK(ifc);
+ poperror();
+ }
+}
+
+Medium loopbackmedium =
+{
+.hsize= 0,
+.mintu= 0,
+.maxtu= Maxtu,
+.maclen= 0,
+.name= "loopback",
+.bind= loopbackbind,
+.unbind= loopbackunbind,
+.bwrite= loopbackbwrite,
+};
+
+void
+loopbackmediumlink(void)
+{
+ addipmedium(&loopbackmedium);
+}
diff --git a/src/9vx/a/ip/netdevmedium.c b/src/9vx/a/ip/netdevmedium.c
@@ -0,0 +1,153 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+static void netdevbind(Ipifc *ifc, int argc, char **argv);
+static void netdevunbind(Ipifc *ifc);
+static void netdevbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
+static void netdevread(void *a);
+
+typedef struct Netdevrock Netdevrock;
+struct Netdevrock
+{
+ Fs *f; /* file system we belong to */
+ Proc *readp; /* reading process */
+ Chan *mchan; /* Data channel */
+};
+
+Medium netdevmedium =
+{
+.name= "netdev",
+.hsize= 0,
+.mintu= 0,
+.maxtu= 64000,
+.maclen= 0,
+.bind= netdevbind,
+.unbind= netdevunbind,
+.bwrite= netdevbwrite,
+.unbindonclose= 0,
+};
+
+/*
+ * called to bind an IP ifc to a generic network device
+ * called with ifc qlock'd
+ */
+static void
+netdevbind(Ipifc *ifc, int argc, char **argv)
+{
+ Chan *mchan;
+ Netdevrock *er;
+
+ if(argc < 2)
+ error(Ebadarg);
+
+ mchan = namec(argv[2], Aopen, ORDWR, 0);
+
+ er = smalloc(sizeof(*er));
+ er->mchan = mchan;
+ er->f = ifc->conv->p->f;
+
+ ifc->arg = er;
+
+ kproc("netdevread", netdevread, ifc);
+}
+
+/*
+ * called with ifc wlock'd
+ */
+static void
+netdevunbind(Ipifc *ifc)
+{
+ Netdevrock *er = ifc->arg;
+
+ if(er->readp != nil)
+ postnote(er->readp, 1, "unbind", 0);
+
+ /* wait for readers to die */
+ while(er->readp != nil)
+ tsleep(&up->sleep, return0, 0, 300);
+
+ if(er->mchan != nil)
+ cclose(er->mchan);
+
+ free(er);
+}
+
+/*
+ * called by ipoput with a single block to write
+ */
+static void
+netdevbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
+{
+ Netdevrock *er = ifc->arg;
+
+ if(bp->next)
+ bp = concatblock(bp);
+ if(BLEN(bp) < ifc->mintu)
+ bp = adjustblock(bp, ifc->mintu);
+
+ devtab[er->mchan->type]->bwrite(er->mchan, bp, 0);
+ ifc->out++;
+}
+
+/*
+ * process to read from the device
+ */
+static void
+netdevread(void *a)
+{
+ Ipifc *ifc;
+ Block *bp;
+ Netdevrock *er;
+ char *argv[1];
+
+ ifc = a;
+ er = ifc->arg;
+ er->readp = up; /* hide identity under a rock for unbind */
+ if(waserror()){
+ er->readp = nil;
+ pexit("hangup", 1);
+ }
+ for(;;){
+ bp = devtab[er->mchan->type]->bread(er->mchan, ifc->maxtu, 0);
+ if(bp == nil){
+ /*
+ * get here if mchan is a pipe and other side hangs up
+ * clean up this interface & get out
+ZZZ is this a good idea?
+ */
+ poperror();
+ er->readp = nil;
+ argv[0] = "unbind";
+ if(!waserror())
+ ifc->conv->p->ctl(ifc->conv, argv, 1);
+ pexit("hangup", 1);
+ }
+ if(!CANRLOCK(ifc)){
+ freeb(bp);
+ continue;
+ }
+ if(waserror()){
+ RUNLOCK(ifc);
+ nexterror();
+ }
+ ifc->in++;
+ if(ifc->lifc == nil)
+ freeb(bp);
+ else
+ ipiput4(er->f, ifc, bp);
+ RUNLOCK(ifc);
+ poperror();
+ }
+}
+
+void
+netdevmediumlink(void)
+{
+ addipmedium(&netdevmedium);
+}
diff --git a/src/9vx/a/ip/netlog.c b/src/9vx/a/ip/netlog.c
@@ -0,0 +1,261 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "ip/ip.h"
+
+enum {
+ Nlog = 16*1024,
+};
+
+/*
+ * action log
+ */
+struct Netlog {
+ Lock lk;
+ int opens;
+ char* buf;
+ char *end;
+ char *rptr;
+ int len;
+
+ int logmask; /* mask of things to debug */
+ uchar iponly[IPaddrlen]; /* ip address to print debugging for */
+ int iponlyset;
+
+ QLock qlock;
+ Rendez rendez;
+};
+
+typedef struct Netlogflag {
+ char* name;
+ int mask;
+} Netlogflag;
+
+static Netlogflag flags[] =
+{
+ { "ppp", Logppp, },
+ { "ip", Logip, },
+ { "fs", Logfs, },
+ { "tcp", Logtcp, },
+ { "icmp", Logicmp, },
+ { "udp", Logudp, },
+ { "compress", Logcompress, },
+ { "gre", Loggre, },
+ { "tcpwin", Logtcp|Logtcpwin, },
+ { "tcprxmt", Logtcp|Logtcprxmt, },
+ { "udpmsg", Logudp|Logudpmsg, },
+ { "ipmsg", Logip|Logipmsg, },
+ { "esp", Logesp, },
+ { nil, 0, },
+};
+
+char Ebadnetctl[] = "too few arguments for netlog control message";
+
+enum
+{
+ CMset,
+ CMclear,
+ CMonly,
+};
+
+static
+Cmdtab routecmd[] = {
+ CMset, "set", 0,
+ CMclear, "clear", 0,
+ CMonly, "only", 0,
+};
+
+void
+netloginit(Fs *f)
+{
+ f->alog = smalloc(sizeof(Netlog));
+}
+
+void
+netlogopen(Fs *f)
+{
+ LOCK(f->alog);
+ if(waserror()){
+ UNLOCK(f->alog);
+ nexterror();
+ }
+ if(f->alog->opens == 0){
+ if(f->alog->buf == nil)
+ f->alog->buf = malloc(Nlog);
+ f->alog->rptr = f->alog->buf;
+ f->alog->end = f->alog->buf + Nlog;
+ }
+ f->alog->opens++;
+ UNLOCK(f->alog);
+ poperror();
+}
+
+void
+netlogclose(Fs *f)
+{
+ LOCK(f->alog);
+ if(waserror()){
+ UNLOCK(f->alog);
+ nexterror();
+ }
+ f->alog->opens--;
+ if(f->alog->opens == 0){
+ free(f->alog->buf);
+ f->alog->buf = nil;
+ }
+ UNLOCK(f->alog);
+ poperror();
+}
+
+static int
+netlogready(void *a)
+{
+ Fs *f = a;
+
+ return f->alog->len;
+}
+
+long
+netlogread(Fs *f, void *a, ulong _, long n)
+{
+ int i, d;
+ char *p, *rptr;
+
+ QLOCK(f->alog);
+ if(waserror()){
+ QUNLOCK(f->alog);
+ nexterror();
+ }
+
+ for(;;){
+ LOCK(f->alog);
+ if(f->alog->len){
+ if(n > f->alog->len)
+ n = f->alog->len;
+ d = 0;
+ rptr = f->alog->rptr;
+ f->alog->rptr += n;
+ if(f->alog->rptr >= f->alog->end){
+ d = f->alog->rptr - f->alog->end;
+ f->alog->rptr = f->alog->buf + d;
+ }
+ f->alog->len -= n;
+ UNLOCK(f->alog);
+
+ i = n-d;
+ p = a;
+ memmove(p, rptr, i);
+ memmove(p+i, f->alog->buf, d);
+ break;
+ }
+ else
+ UNLOCK(f->alog);
+
+ sleep(&f->alog->rendez, netlogready, f);
+ }
+
+ QUNLOCK(f->alog);
+ poperror();
+
+ return n;
+}
+
+void
+netlogctl(Fs *f, char* s, int n)
+{
+ int i, set;
+ Netlogflag *fp;
+ Cmdbuf *cb;
+ Cmdtab *ct;
+
+ cb = parsecmd(s, n);
+ if(waserror()){
+ free(cb);
+ nexterror();
+ }
+
+ if(cb->nf < 2)
+ error(Ebadnetctl);
+
+ ct = lookupcmd(cb, routecmd, nelem(routecmd));
+
+ set = 1;
+
+ switch(ct->index){
+ case CMset:
+ set = 1;
+ break;
+
+ case CMclear:
+ set = 0;
+ break;
+
+ case CMonly:
+ parseip(f->alog->iponly, cb->f[1]);
+ if(ipcmp(f->alog->iponly, IPnoaddr) == 0)
+ f->alog->iponlyset = 0;
+ else
+ f->alog->iponlyset = 1;
+ free(cb);
+ return;
+
+ default:
+ cmderror(cb, "unknown ip control message");
+ }
+
+ for(i = 1; i < cb->nf; i++){
+ for(fp = flags; fp->name; fp++)
+ if(strcmp(fp->name, cb->f[i]) == 0)
+ break;
+ if(fp->name == nil)
+ continue;
+ if(set)
+ f->alog->logmask |= fp->mask;
+ else
+ f->alog->logmask &= ~fp->mask;
+ }
+
+ free(cb);
+ poperror();
+}
+
+void
+netlog(Fs *f, int mask, char *fmt, ...)
+{
+ char buf[128], *t, *fp;
+ int i, n;
+ va_list arg;
+
+ if(!(f->alog->logmask & mask))
+ return;
+
+ if(f->alog->opens == 0)
+ return;
+
+ va_start(arg, fmt);
+ n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf;
+ va_end(arg);
+
+ LOCK(f->alog);
+ i = f->alog->len + n - Nlog;
+ if(i > 0){
+ f->alog->len -= i;
+ f->alog->rptr += i;
+ if(f->alog->rptr >= f->alog->end)
+ f->alog->rptr = f->alog->buf + (f->alog->rptr - f->alog->end);
+ }
+ t = f->alog->rptr + f->alog->len;
+ fp = buf;
+ f->alog->len += n;
+ while(n-- > 0){
+ if(t >= f->alog->end)
+ t = f->alog->buf + (t - f->alog->end);
+ *t++ = *fp++;
+ }
+ UNLOCK(f->alog);
+
+ wakeup(&f->alog->rendez);
+}
diff --git a/src/9vx/a/ip/nullmedium.c b/src/9vx/a/ip/nullmedium.c
@@ -0,0 +1,39 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+static void
+nullbind(Ipifc* _, int __, char** ___)
+{
+ error("cannot bind null device");
+}
+
+static void
+nullunbind(Ipifc* _)
+{
+}
+
+static void
+nullbwrite(Ipifc* _, Block* __, int ___, uchar* ____)
+{
+ error("nullbwrite");
+}
+
+Medium nullmedium =
+{
+.name= "null",
+.bind= nullbind,
+.unbind= nullunbind,
+.bwrite= nullbwrite,
+};
+
+void
+nullmediumlink(void)
+{
+ addipmedium(&nullmedium);
+}
diff --git a/src/9vx/a/ip/pktmedium.c b/src/9vx/a/ip/pktmedium.c
@@ -0,0 +1,78 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+
+static void pktbind(Ipifc*, int, char**);
+static void pktunbind(Ipifc*);
+static void pktbwrite(Ipifc*, Block*, int, uchar*);
+static void pktin(Fs*, Ipifc*, Block*);
+
+Medium pktmedium =
+{
+.name= "pkt",
+.hsize= 14,
+.mintu= 40,
+.maxtu= 4*1024,
+.maclen= 6,
+.bind= pktbind,
+.unbind= pktunbind,
+.bwrite= pktbwrite,
+.pktin= pktin,
+};
+
+/*
+ * called to bind an IP ifc to an ethernet device
+ * called with ifc wlock'd
+ */
+static void
+pktbind(Ipifc* _, int argc, char **argv)
+{
+}
+
+/*
+ * called with ifc wlock'd
+ */
+static void
+pktunbind(Ipifc* _)
+{
+}
+
+/*
+ * called by ipoput with a single packet to write
+ */
+static void
+pktbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
+{
+ /* enqueue onto the conversation's rq */
+ bp = concatblock(bp);
+ if(ifc->conv->snoopers.ref > 0)
+ qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
+ qpass(ifc->conv->rq, bp);
+}
+
+/*
+ * called with ifc rlocked when someone write's to 'data'
+ */
+static void
+pktin(Fs *f, Ipifc *ifc, Block *bp)
+{
+ if(ifc->lifc == nil)
+ freeb(bp);
+ else {
+ if(ifc->conv->snoopers.ref > 0)
+ qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
+ ipiput4(f, ifc, bp);
+ }
+}
+
+void
+pktmediumlink(void)
+{
+ addipmedium(&pktmedium);
+}
diff --git a/src/9vx/a/ip/ptclbsum.c b/src/9vx/a/ip/ptclbsum.c
@@ -0,0 +1,72 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "ip.h"
+
+static short endian = 1;
+static uchar* aendian = (uchar*)&endian;
+#define LITTLE *aendian
+
+ushort
+ptclbsum(uchar *addr, int len)
+{
+ ulong losum, hisum, mdsum, x;
+ ulong t1, t2;
+
+ losum = 0;
+ hisum = 0;
+ mdsum = 0;
+
+ x = 0;
+ if((ulong)addr & 1) {
+ if(len) {
+ hisum += addr[0];
+ len--;
+ addr++;
+ }
+ x = 1;
+ }
+ while(len >= 16) {
+ t1 = *(ushort*)(addr+0);
+ t2 = *(ushort*)(addr+2); mdsum += t1;
+ t1 = *(ushort*)(addr+4); mdsum += t2;
+ t2 = *(ushort*)(addr+6); mdsum += t1;
+ t1 = *(ushort*)(addr+8); mdsum += t2;
+ t2 = *(ushort*)(addr+10); mdsum += t1;
+ t1 = *(ushort*)(addr+12); mdsum += t2;
+ t2 = *(ushort*)(addr+14); mdsum += t1;
+ mdsum += t2;
+ len -= 16;
+ addr += 16;
+ }
+ while(len >= 2) {
+ mdsum += *(ushort*)addr;
+ len -= 2;
+ addr += 2;
+ }
+ if(x) {
+ if(len)
+ losum += addr[0];
+ if(LITTLE)
+ losum += mdsum;
+ else
+ hisum += mdsum;
+ } else {
+ if(len)
+ hisum += addr[0];
+ if(LITTLE)
+ hisum += mdsum;
+ else
+ losum += mdsum;
+ }
+
+ losum += hisum >> 8;
+ losum += (hisum & 0xff) << 8;
+ while((hisum = losum>>16))
+ losum = hisum + (losum & 0xffff);
+
+ return losum & 0xffff;
+}
diff --git a/src/9vx/a/ip/rudp.c b/src/9vx/a/ip/rudp.c
@@ -0,0 +1,1055 @@
+/*
+ * Reliable User Datagram Protocol, currently only for IPv4.
+ * This protocol is compatible with UDP's packet format.
+ * It could be done over UDP if need be.
+ */
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+#define DEBUG 0
+#define DPRINT if(DEBUG)print
+
+#define SEQDIFF(a,b) ( (a)>=(b)?\
+ (a)-(b):\
+ 0xffffffffUL-((b)-(a)) )
+#define INSEQ(a,start,end) ( (start)<=(end)?\
+ ((a)>(start)&&(a)<=(end)):\
+ ((a)>(start)||(a)<=(end)) )
+#define UNACKED(r) SEQDIFF(r->sndseq, r->ackrcvd)
+#define NEXTSEQ(a) ( (a)+1 == 0 ? 1 : (a)+1 )
+
+enum
+{
+ UDP_PHDRSIZE = 12, /* pseudo header */
+// UDP_HDRSIZE = 20, /* pseudo header + udp header */
+ UDP_RHDRSIZE = 36, /* pseudo header + udp header + rudp header */
+ UDP_IPHDR = 8, /* ip header */
+ IP_UDPPROTO = 254,
+ UDP_USEAD7 = 52, /* size of new ipv6 headers struct */
+
+ Rudprxms = 200,
+ Rudptickms = 50,
+ Rudpmaxxmit = 10,
+ Maxunacked = 100,
+};
+
+#define Hangupgen 0xffffffff /* used only in hangup messages */
+
+typedef struct Udphdr Udphdr;
+struct Udphdr
+{
+ /* ip header */
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+
+ /* pseudo header starts here */
+ uchar Unused;
+ uchar udpproto; /* Protocol */
+ uchar udpplen[2]; /* Header plus data length */
+ uchar udpsrc[4]; /* Ip source */
+ uchar udpdst[4]; /* Ip destination */
+
+ /* udp header */
+ uchar udpsport[2]; /* Source port */
+ uchar udpdport[2]; /* Destination port */
+ uchar udplen[2]; /* data length */
+ uchar udpcksum[2]; /* Checksum */
+};
+
+typedef struct Rudphdr Rudphdr;
+struct Rudphdr
+{
+ /* ip header */
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+
+ /* pseudo header starts here */
+ uchar Unused;
+ uchar udpproto; /* Protocol */
+ uchar udpplen[2]; /* Header plus data length */
+ uchar udpsrc[4]; /* Ip source */
+ uchar udpdst[4]; /* Ip destination */
+
+ /* udp header */
+ uchar udpsport[2]; /* Source port */
+ uchar udpdport[2]; /* Destination port */
+ uchar udplen[2]; /* data length (includes rudp header) */
+ uchar udpcksum[2]; /* Checksum */
+
+ /* rudp header */
+ uchar relseq[4]; /* id of this packet (or 0) */
+ uchar relsgen[4]; /* generation/time stamp */
+ uchar relack[4]; /* packet being acked (or 0) */
+ uchar relagen[4]; /* generation/time stamp */
+};
+
+
+/*
+ * one state structure per destination
+ */
+typedef struct Reliable Reliable;
+struct Reliable
+{
+ Ref;
+
+ Reliable *next;
+
+ uchar addr[IPaddrlen]; /* always V6 when put here */
+ ushort port;
+
+ Block *unacked; /* unacked msg list */
+ Block *unackedtail; /* and its tail */
+
+ int timeout; /* time since first unacked msg sent */
+ int xmits; /* number of times first unacked msg sent */
+
+ ulong sndseq; /* next packet to be sent */
+ ulong sndgen; /* and its generation */
+
+ ulong rcvseq; /* last packet received */
+ ulong rcvgen; /* and its generation */
+
+ ulong acksent; /* last ack sent */
+ ulong ackrcvd; /* last msg for which ack was rcvd */
+
+ /* flow control */
+ QLock lock;
+ Rendez vous;
+ int blocked;
+};
+
+
+
+/* MIB II counters */
+typedef struct Rudpstats Rudpstats;
+struct Rudpstats
+{
+ ulong rudpInDatagrams;
+ ulong rudpNoPorts;
+ ulong rudpInErrors;
+ ulong rudpOutDatagrams;
+};
+
+typedef struct Rudppriv Rudppriv;
+struct Rudppriv
+{
+ Ipht ht;
+
+ /* MIB counters */
+ Rudpstats ustats;
+
+ /* non-MIB stats */
+ ulong csumerr; /* checksum errors */
+ ulong lenerr; /* short packet */
+ ulong rxmits; /* # of retransmissions */
+ ulong orders; /* # of out of order pkts */
+
+ /* keeping track of the ack kproc */
+ int ackprocstarted;
+ QLock apl;
+};
+
+
+static ulong generation = 0;
+static Rendez rend;
+
+/*
+ * protocol specific part of Conv
+ */
+typedef struct Rudpcb Rudpcb;
+struct Rudpcb
+{
+ QLock;
+ uchar headers;
+ uchar randdrop;
+ Reliable *r;
+};
+
+/*
+ * local functions
+ */
+void relsendack(Conv*, Reliable*, int);
+int reliput(Conv*, Block*, uchar*, ushort);
+Reliable *relstate(Rudpcb*, uchar*, ushort, char*);
+void relput(Reliable*);
+void relforget(Conv *, uchar*, int, int);
+void relackproc(void *);
+void relackq(Reliable *, Block*);
+void relhangup(Conv *, Reliable*);
+void relrexmit(Conv *, Reliable*);
+void relput(Reliable*);
+void rudpkick(void *x);
+
+static void
+rudpstartackproc(Proto *rudp)
+{
+ Rudppriv *rpriv;
+ char kpname[KNAMELEN];
+
+ rpriv = rudp->priv;
+ if(rpriv->ackprocstarted == 0){
+ qlock(&rpriv->apl);
+ if(rpriv->ackprocstarted == 0){
+ sprint(kpname, "#I%drudpack", rudp->f->dev);
+ kproc(kpname, relackproc, rudp);
+ rpriv->ackprocstarted = 1;
+ }
+ qunlock(&rpriv->apl);
+ }
+}
+
+static char*
+rudpconnect(Conv *c, char **argv, int argc)
+{
+ char *e;
+ Rudppriv *upriv;
+
+ upriv = c->p->priv;
+ rudpstartackproc(c->p);
+ e = Fsstdconnect(c, argv, argc);
+ Fsconnected(c, e);
+ iphtadd(&upriv->ht, c);
+
+ return e;
+}
+
+
+static int
+rudpstate(Conv *c, char *state, int n)
+{
+ Rudpcb *ucb;
+ Reliable *r;
+ int m;
+
+ m = snprint(state, n, "%s", c->inuse?"Open":"Closed");
+ ucb = (Rudpcb*)c->ptcl;
+ qlock(ucb);
+ for(r = ucb->r; r; r = r->next)
+ m += snprint(state+m, n-m, " %I/%ld", r->addr, UNACKED(r));
+ m += snprint(state+m, n-m, "\n");
+ qunlock(ucb);
+ return m;
+}
+
+static char*
+rudpannounce(Conv *c, char** argv, int argc)
+{
+ char *e;
+ Rudppriv *upriv;
+
+ upriv = c->p->priv;
+ rudpstartackproc(c->p);
+ e = Fsstdannounce(c, argv, argc);
+ if(e != nil)
+ return e;
+ Fsconnected(c, nil);
+ iphtadd(&upriv->ht, c);
+
+ return nil;
+}
+
+static void
+rudpcreate(Conv *c)
+{
+ c->rq = qopen(64*1024, Qmsg, 0, 0);
+ c->wq = qopen(64*1024, Qkick, rudpkick, c);
+}
+
+static void
+rudpclose(Conv *c)
+{
+ Rudpcb *ucb;
+ Reliable *r, *nr;
+ Rudppriv *upriv;
+
+ upriv = c->p->priv;
+ iphtrem(&upriv->ht, c);
+
+ /* force out any delayed acks */
+ ucb = (Rudpcb*)c->ptcl;
+ qlock(ucb);
+ for(r = ucb->r; r; r = r->next){
+ if(r->acksent != r->rcvseq)
+ relsendack(c, r, 0);
+ }
+ qunlock(ucb);
+
+ qclose(c->rq);
+ qclose(c->wq);
+ qclose(c->eq);
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ c->lport = 0;
+ c->rport = 0;
+
+ ucb->headers = 0;
+ ucb->randdrop = 0;
+ qlock(ucb);
+ for(r = ucb->r; r; r = nr){
+ if(r->acksent != r->rcvseq)
+ relsendack(c, r, 0);
+ nr = r->next;
+ relhangup(c, r);
+ relput(r);
+ }
+ ucb->r = 0;
+
+ qunlock(ucb);
+}
+
+/*
+ * randomly don't send packets
+ */
+static void
+doipoput(Conv *c, Fs *f, Block *bp, int x, int ttl, int tos)
+{
+ Rudpcb *ucb;
+
+ ucb = (Rudpcb*)c->ptcl;
+ if(ucb->randdrop && nrand(100) < ucb->randdrop)
+ freeblist(bp);
+ else
+ ipoput4(f, bp, x, ttl, tos, nil);
+}
+
+int
+flow(void *v)
+{
+ Reliable *r = v;
+
+ return UNACKED(r) <= Maxunacked;
+}
+
+void
+rudpkick(void *x)
+{
+ Conv *c = x;
+ Udphdr *uh;
+ ushort rport;
+ uchar laddr[IPaddrlen], raddr[IPaddrlen];
+ Block *bp;
+ Rudpcb *ucb;
+ Rudphdr *rh;
+ Reliable *r;
+ int dlen, ptcllen;
+ Rudppriv *upriv;
+ Fs *f;
+
+ upriv = c->p->priv;
+ f = c->p->f;
+
+ netlog(c->p->f, Logrudp, "rudp: kick\n");
+ bp = qget(c->wq);
+ if(bp == nil)
+ return;
+
+ ucb = (Rudpcb*)c->ptcl;
+ switch(ucb->headers) {
+ case 7:
+ /* get user specified addresses */
+ bp = pullupblock(bp, UDP_USEAD7);
+ if(bp == nil)
+ return;
+ ipmove(raddr, bp->rp);
+ bp->rp += IPaddrlen;
+ ipmove(laddr, bp->rp);
+ bp->rp += IPaddrlen;
+ /* pick interface closest to dest */
+ if(ipforme(f, laddr) != Runi)
+ findlocalip(f, laddr, raddr);
+ bp->rp += IPaddrlen; /* Ignore ifc address */
+ rport = nhgets(bp->rp);
+ bp->rp += 2+2; /* Ignore local port */
+ break;
+ default:
+ ipmove(raddr, c->raddr);
+ ipmove(laddr, c->laddr);
+ rport = c->rport;
+ break;
+ }
+
+ dlen = blocklen(bp);
+
+ /* Make space to fit rudp & ip header */
+ bp = padblock(bp, UDP_IPHDR+UDP_RHDRSIZE);
+ if(bp == nil)
+ return;
+
+ uh = (Udphdr *)(bp->rp);
+ uh->vihl = IP_VER4;
+
+ rh = (Rudphdr*)uh;
+
+ ptcllen = dlen + (UDP_RHDRSIZE-UDP_PHDRSIZE);
+ uh->Unused = 0;
+ uh->udpproto = IP_UDPPROTO;
+ uh->frag[0] = 0;
+ uh->frag[1] = 0;
+ hnputs(uh->udpplen, ptcllen);
+ switch(ucb->headers){
+ case 7:
+ v6tov4(uh->udpdst, raddr);
+ hnputs(uh->udpdport, rport);
+ v6tov4(uh->udpsrc, laddr);
+ break;
+ default:
+ v6tov4(uh->udpdst, c->raddr);
+ hnputs(uh->udpdport, c->rport);
+ if(ipcmp(c->laddr, IPnoaddr) == 0)
+ findlocalip(f, c->laddr, c->raddr);
+ v6tov4(uh->udpsrc, c->laddr);
+ break;
+ }
+ hnputs(uh->udpsport, c->lport);
+ hnputs(uh->udplen, ptcllen);
+ uh->udpcksum[0] = 0;
+ uh->udpcksum[1] = 0;
+
+ qlock(ucb);
+ r = relstate(ucb, raddr, rport, "kick");
+ r->sndseq = NEXTSEQ(r->sndseq);
+ hnputl(rh->relseq, r->sndseq);
+ hnputl(rh->relsgen, r->sndgen);
+
+ hnputl(rh->relack, r->rcvseq); /* ACK last rcvd packet */
+ hnputl(rh->relagen, r->rcvgen);
+
+ if(r->rcvseq != r->acksent)
+ r->acksent = r->rcvseq;
+
+ hnputs(uh->udpcksum, ptclcsum(bp, UDP_IPHDR, dlen+UDP_RHDRSIZE));
+
+ relackq(r, bp);
+ qunlock(ucb);
+
+ upriv->ustats.rudpOutDatagrams++;
+
+ DPRINT("sent: %lud/%lud, %lud/%lud\n",
+ r->sndseq, r->sndgen, r->rcvseq, r->rcvgen);
+
+ doipoput(c, f, bp, 0, c->ttl, c->tos);
+
+ if(waserror()) {
+ relput(r);
+ qunlock(&r->lock);
+ nexterror();
+ }
+
+ /* flow control of sorts */
+ qlock(&r->lock);
+ if(UNACKED(r) > Maxunacked){
+ r->blocked = 1;
+ sleep(&r->vous, flow, r);
+ r->blocked = 0;
+ }
+
+ qunlock(&r->lock);
+ relput(r);
+ poperror();
+}
+
+void
+rudpiput(Proto *rudp, Ipifc *ifc, Block *bp)
+{
+ int len, olen, ottl;
+ Udphdr *uh;
+ Conv *c;
+ Rudpcb *ucb;
+ uchar raddr[IPaddrlen], laddr[IPaddrlen];
+ ushort rport, lport;
+ Rudppriv *upriv;
+ Fs *f;
+ uchar *p;
+
+ upriv = rudp->priv;
+ f = rudp->f;
+
+ upriv->ustats.rudpInDatagrams++;
+
+ uh = (Udphdr*)(bp->rp);
+
+ /* Put back pseudo header for checksum
+ * (remember old values for icmpnoconv())
+ */
+ ottl = uh->Unused;
+ uh->Unused = 0;
+ len = nhgets(uh->udplen);
+ olen = nhgets(uh->udpplen);
+ hnputs(uh->udpplen, len);
+
+ v4tov6(raddr, uh->udpsrc);
+ v4tov6(laddr, uh->udpdst);
+ lport = nhgets(uh->udpdport);
+ rport = nhgets(uh->udpsport);
+
+ if(nhgets(uh->udpcksum)) {
+ if(ptclcsum(bp, UDP_IPHDR, len+UDP_PHDRSIZE)) {
+ upriv->ustats.rudpInErrors++;
+ upriv->csumerr++;
+ netlog(f, Logrudp, "rudp: checksum error %I\n", raddr);
+ DPRINT("rudp: checksum error %I\n", raddr);
+ freeblist(bp);
+ return;
+ }
+ }
+
+ qlock(rudp);
+
+ c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
+ if(c == nil){
+ /* no conversation found */
+ upriv->ustats.rudpNoPorts++;
+ qunlock(rudp);
+ netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
+ laddr, lport);
+ uh->Unused = ottl;
+ hnputs(uh->udpplen, olen);
+ icmpnoconv(f, bp);
+ freeblist(bp);
+ return;
+ }
+ ucb = (Rudpcb*)c->ptcl;
+ qlock(ucb);
+ qunlock(rudp);
+
+ if(reliput(c, bp, raddr, rport) < 0){
+ qunlock(ucb);
+ freeb(bp);
+ return;
+ }
+
+ /*
+ * Trim the packet down to data size
+ */
+
+ len -= (UDP_RHDRSIZE-UDP_PHDRSIZE);
+ bp = trimblock(bp, UDP_IPHDR+UDP_RHDRSIZE, len);
+ if(bp == nil) {
+ netlog(f, Logrudp, "rudp: len err %I.%d -> %I.%d\n",
+ raddr, rport, laddr, lport);
+ DPRINT("rudp: len err %I.%d -> %I.%d\n",
+ raddr, rport, laddr, lport);
+ upriv->lenerr++;
+ return;
+ }
+
+ netlog(f, Logrudpmsg, "rudp: %I.%d -> %I.%d l %d\n",
+ raddr, rport, laddr, lport, len);
+
+ switch(ucb->headers){
+ case 7:
+ /* pass the src address */
+ bp = padblock(bp, UDP_USEAD7);
+ p = bp->rp;
+ ipmove(p, raddr); p += IPaddrlen;
+ ipmove(p, laddr); p += IPaddrlen;
+ ipmove(p, ifc->lifc->local); p += IPaddrlen;
+ hnputs(p, rport); p += 2;
+ hnputs(p, lport);
+ break;
+ default:
+ /* connection oriented rudp */
+ if(ipcmp(c->raddr, IPnoaddr) == 0){
+ /* save the src address in the conversation */
+ ipmove(c->raddr, raddr);
+ c->rport = rport;
+
+ /* reply with the same ip address (if not broadcast) */
+ if(ipforme(f, laddr) == Runi)
+ ipmove(c->laddr, laddr);
+ else
+ v4tov6(c->laddr, ifc->lifc->local);
+ }
+ break;
+ }
+ if(bp->next)
+ bp = concatblock(bp);
+
+ if(qfull(c->rq)) {
+ netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n", raddr, rport,
+ laddr, lport);
+ freeblist(bp);
+ }
+ else
+ qpass(c->rq, bp);
+
+ qunlock(ucb);
+}
+
+static char *rudpunknown = "unknown rudp ctl request";
+
+char*
+rudpctl(Conv *c, char **f, int n)
+{
+ Rudpcb *ucb;
+ uchar ip[IPaddrlen];
+ int x;
+
+ ucb = (Rudpcb*)c->ptcl;
+ if(n < 1)
+ return rudpunknown;
+
+ if(strcmp(f[0], "headers") == 0){
+ ucb->headers = 7; /* new headers format */
+ return nil;
+ } else if(strcmp(f[0], "hangup") == 0){
+ if(n < 3)
+ return "bad syntax";
+ if (parseip(ip, f[1]) == -1)
+ return Ebadip;
+ x = atoi(f[2]);
+ qlock(ucb);
+ relforget(c, ip, x, 1);
+ qunlock(ucb);
+ return nil;
+ } else if(strcmp(f[0], "randdrop") == 0){
+ x = 10; /* default is 10% */
+ if(n > 1)
+ x = atoi(f[1]);
+ if(x > 100 || x < 0)
+ return "illegal rudp drop rate";
+ ucb->randdrop = x;
+ return nil;
+ }
+ return rudpunknown;
+}
+
+void
+rudpadvise(Proto *rudp, Block *bp, char *msg)
+{
+ Udphdr *h;
+ uchar source[IPaddrlen], dest[IPaddrlen];
+ ushort psource, pdest;
+ Conv *s, **p;
+
+ h = (Udphdr*)(bp->rp);
+
+ v4tov6(dest, h->udpdst);
+ v4tov6(source, h->udpsrc);
+ psource = nhgets(h->udpsport);
+ pdest = nhgets(h->udpdport);
+
+ /* Look for a connection */
+ for(p = rudp->conv; *p; p++) {
+ s = *p;
+ if(s->rport == pdest)
+ if(s->lport == psource)
+ if(ipcmp(s->raddr, dest) == 0)
+ if(ipcmp(s->laddr, source) == 0){
+ qhangup(s->rq, msg);
+ qhangup(s->wq, msg);
+ break;
+ }
+ }
+ freeblist(bp);
+}
+
+int
+rudpstats(Proto *rudp, char *buf, int len)
+{
+ Rudppriv *upriv;
+
+ upriv = rudp->priv;
+ return snprint(buf, len, "%lud %lud %lud %lud %lud %lud\n",
+ upriv->ustats.rudpInDatagrams,
+ upriv->ustats.rudpNoPorts,
+ upriv->ustats.rudpInErrors,
+ upriv->ustats.rudpOutDatagrams,
+ upriv->rxmits,
+ upriv->orders);
+}
+
+void
+rudpinit(Fs *fs)
+{
+
+ Proto *rudp;
+
+ rudp = smalloc(sizeof(Proto));
+ rudp->priv = smalloc(sizeof(Rudppriv));
+ rudp->name = "rudp";
+ rudp->connect = rudpconnect;
+ rudp->announce = rudpannounce;
+ rudp->ctl = rudpctl;
+ rudp->state = rudpstate;
+ rudp->create = rudpcreate;
+ rudp->close = rudpclose;
+ rudp->rcv = rudpiput;
+ rudp->advise = rudpadvise;
+ rudp->stats = rudpstats;
+ rudp->ipproto = IP_UDPPROTO;
+ rudp->nc = 16;
+ rudp->ptclsize = sizeof(Rudpcb);
+
+ Fsproto(fs, rudp);
+}
+
+/*********************************************/
+/* Here starts the reliable helper functions */
+/*********************************************/
+/*
+ * Enqueue a copy of an unacked block for possible retransmissions
+ */
+void
+relackq(Reliable *r, Block *bp)
+{
+ Block *np;
+
+ np = copyblock(bp, blocklen(bp));
+ if(r->unacked)
+ r->unackedtail->list = np;
+ else {
+ /* restart timer */
+ r->timeout = 0;
+ r->xmits = 1;
+ r->unacked = np;
+ }
+ r->unackedtail = np;
+ np->list = nil;
+}
+
+/*
+ * retransmit unacked blocks
+ */
+void
+relackproc(void *a)
+{
+ Rudpcb *ucb;
+ Proto *rudp;
+ Reliable *r;
+ Conv **s, *c;
+
+ rudp = (Proto *)a;
+
+loop:
+ tsleep(&up->sleep, return0, 0, Rudptickms);
+
+ for(s = rudp->conv; *s; s++) {
+ c = *s;
+ ucb = (Rudpcb*)c->ptcl;
+ qlock(ucb);
+
+ for(r = ucb->r; r; r = r->next) {
+ if(r->unacked != nil){
+ r->timeout += Rudptickms;
+ if(r->timeout > Rudprxms*r->xmits)
+ relrexmit(c, r);
+ }
+ if(r->acksent != r->rcvseq)
+ relsendack(c, r, 0);
+ }
+ qunlock(ucb);
+ }
+ goto loop;
+}
+
+/*
+ * get the state record for a conversation
+ */
+Reliable*
+relstate(Rudpcb *ucb, uchar *addr, ushort port, char *from)
+{
+ Reliable *r, **l;
+
+ l = &ucb->r;
+ for(r = *l; r; r = *l){
+ if(memcmp(addr, r->addr, IPaddrlen) == 0 &&
+ port == r->port)
+ break;
+ l = &r->next;
+ }
+
+ /* no state for this addr/port, create some */
+ if(r == nil){
+ while(generation == 0)
+ generation = rand();
+
+ DPRINT("from %s new state %lud for %I!%ud\n",
+ from, generation, addr, port);
+
+ r = smalloc(sizeof(Reliable));
+ memmove(r->addr, addr, IPaddrlen);
+ r->port = port;
+ r->unacked = 0;
+ if(generation == Hangupgen)
+ generation++;
+ r->sndgen = generation++;
+ r->sndseq = 0;
+ r->ackrcvd = 0;
+ r->rcvgen = 0;
+ r->rcvseq = 0;
+ r->acksent = 0;
+ r->xmits = 0;
+ r->timeout = 0;
+ r->ref = 0;
+ incref(r); /* one reference for being in the list */
+
+ *l = r;
+ }
+
+ incref(r);
+ return r;
+}
+
+void
+relput(Reliable *r)
+{
+ if(decref(r) == 0)
+ free(r);
+}
+
+/*
+ * forget a Reliable state
+ */
+void
+relforget(Conv *c, uchar *ip, int port, int originator)
+{
+ Rudpcb *ucb;
+ Reliable *r, **l;
+
+ ucb = (Rudpcb*)c->ptcl;
+
+ l = &ucb->r;
+ for(r = *l; r; r = *l){
+ if(ipcmp(ip, r->addr) == 0 && port == r->port){
+ *l = r->next;
+ if(originator)
+ relsendack(c, r, 1);
+ relhangup(c, r);
+ relput(r); /* remove from the list */
+ break;
+ }
+ l = &r->next;
+ }
+}
+
+/*
+ * process a rcvd reliable packet. return -1 if not to be passed to user process,
+ * 0 therwise.
+ *
+ * called with ucb locked.
+ */
+int
+reliput(Conv *c, Block *bp, uchar *addr, ushort port)
+{
+ Block *nbp;
+ Rudpcb *ucb;
+ Rudppriv *upriv;
+ Udphdr *uh;
+ Reliable *r;
+ Rudphdr *rh;
+ ulong seq, ack, sgen, agen, ackreal;
+ int rv = -1;
+
+ /* get fields */
+ uh = (Udphdr*)(bp->rp);
+ rh = (Rudphdr*)uh;
+ seq = nhgetl(rh->relseq);
+ sgen = nhgetl(rh->relsgen);
+ ack = nhgetl(rh->relack);
+ agen = nhgetl(rh->relagen);
+
+ upriv = c->p->priv;
+ ucb = (Rudpcb*)c->ptcl;
+ r = relstate(ucb, addr, port, "input");
+
+ DPRINT("rcvd %lud/%lud, %lud/%lud, r->sndgen = %lud\n",
+ seq, sgen, ack, agen, r->sndgen);
+
+ /* if acking an incorrect generation, ignore */
+ if(ack && agen != r->sndgen)
+ goto out;
+
+ /* Look for a hangup */
+ if(sgen == Hangupgen) {
+ if(agen == r->sndgen)
+ relforget(c, addr, port, 0);
+ goto out;
+ }
+
+ /* make sure we're not talking to a new remote side */
+ if(r->rcvgen != sgen){
+ if(seq != 0 && seq != 1)
+ goto out;
+
+ /* new connection */
+ if(r->rcvgen != 0){
+ DPRINT("new con r->rcvgen = %lud, sgen = %lud\n", r->rcvgen, sgen);
+ relhangup(c, r);
+ }
+ r->rcvgen = sgen;
+ }
+
+ /* dequeue acked packets */
+ if(ack && agen == r->sndgen){
+ ackreal = 0;
+ while(r->unacked != nil && INSEQ(ack, r->ackrcvd, r->sndseq)){
+ nbp = r->unacked;
+ r->unacked = nbp->list;
+ DPRINT("%lud/%lud acked, r->sndgen = %lud\n",
+ ack, agen, r->sndgen);
+ freeb(nbp);
+ r->ackrcvd = NEXTSEQ(r->ackrcvd);
+ ackreal = 1;
+ }
+
+ /* flow control */
+ if(UNACKED(r) < Maxunacked/8 && r->blocked)
+ wakeup(&r->vous);
+
+ /*
+ * retransmit next packet if the acked packet
+ * was transmitted more than once
+ */
+ if(ackreal && r->unacked != nil){
+ r->timeout = 0;
+ if(r->xmits > 1){
+ r->xmits = 1;
+ relrexmit(c, r);
+ }
+ }
+
+ }
+
+ /* no message or input queue full */
+ if(seq == 0 || qfull(c->rq))
+ goto out;
+
+ /* refuse out of order delivery */
+ if(seq != NEXTSEQ(r->rcvseq)){
+ relsendack(c, r, 0); /* tell him we got it already */
+ upriv->orders++;
+ DPRINT("out of sequence %lud not %lud\n", seq, NEXTSEQ(r->rcvseq));
+ goto out;
+ }
+ r->rcvseq = seq;
+
+ rv = 0;
+out:
+ relput(r);
+ return rv;
+}
+
+void
+relsendack(Conv *c, Reliable *r, int hangup)
+{
+ Udphdr *uh;
+ Block *bp;
+ Rudphdr *rh;
+ int ptcllen;
+ Fs *f;
+
+ bp = allocb(UDP_IPHDR + UDP_RHDRSIZE);
+ if(bp == nil)
+ return;
+ bp->wp += UDP_IPHDR + UDP_RHDRSIZE;
+ f = c->p->f;
+ uh = (Udphdr *)(bp->rp);
+ uh->vihl = IP_VER4;
+ rh = (Rudphdr*)uh;
+
+ ptcllen = (UDP_RHDRSIZE-UDP_PHDRSIZE);
+ uh->Unused = 0;
+ uh->udpproto = IP_UDPPROTO;
+ uh->frag[0] = 0;
+ uh->frag[1] = 0;
+ hnputs(uh->udpplen, ptcllen);
+
+ v6tov4(uh->udpdst, r->addr);
+ hnputs(uh->udpdport, r->port);
+ hnputs(uh->udpsport, c->lport);
+ if(ipcmp(c->laddr, IPnoaddr) == 0)
+ findlocalip(f, c->laddr, c->raddr);
+ v6tov4(uh->udpsrc, c->laddr);
+ hnputs(uh->udplen, ptcllen);
+
+ if(hangup)
+ hnputl(rh->relsgen, Hangupgen);
+ else
+ hnputl(rh->relsgen, r->sndgen);
+ hnputl(rh->relseq, 0);
+ hnputl(rh->relagen, r->rcvgen);
+ hnputl(rh->relack, r->rcvseq);
+
+ if(r->acksent < r->rcvseq)
+ r->acksent = r->rcvseq;
+
+ uh->udpcksum[0] = 0;
+ uh->udpcksum[1] = 0;
+ hnputs(uh->udpcksum, ptclcsum(bp, UDP_IPHDR, UDP_RHDRSIZE));
+
+ DPRINT("sendack: %lud/%lud, %lud/%lud\n", 0L, r->sndgen, r->rcvseq, r->rcvgen);
+ doipoput(c, f, bp, 0, c->ttl, c->tos);
+}
+
+
+/*
+ * called with ucb locked (and c locked if user initiated close)
+ */
+void
+relhangup(Conv *c, Reliable *r)
+{
+ int n;
+ Block *bp;
+ char hup[ERRMAX];
+
+ n = snprint(hup, sizeof(hup), "hangup %I!%d", r->addr, r->port);
+ qproduce(c->eq, hup, n);
+
+ /*
+ * dump any unacked outgoing messages
+ */
+ for(bp = r->unacked; bp != nil; bp = r->unacked){
+ r->unacked = bp->list;
+ bp->list = nil;
+ freeb(bp);
+ }
+
+ r->rcvgen = 0;
+ r->rcvseq = 0;
+ r->acksent = 0;
+ if(generation == Hangupgen)
+ generation++;
+ r->sndgen = generation++;
+ r->sndseq = 0;
+ r->ackrcvd = 0;
+ r->xmits = 0;
+ r->timeout = 0;
+ wakeup(&r->vous);
+}
+
+/*
+ * called with ucb locked
+ */
+void
+relrexmit(Conv *c, Reliable *r)
+{
+ Rudppriv *upriv;
+ Block *np;
+ Fs *f;
+
+ upriv = c->p->priv;
+ f = c->p->f;
+ r->timeout = 0;
+ if(r->xmits++ > Rudpmaxxmit){
+ relhangup(c, r);
+ return;
+ }
+
+ upriv->rxmits++;
+ np = copyblock(r->unacked, blocklen(r->unacked));
+ DPRINT("rxmit r->ackrvcd+1 = %lud\n", r->ackrcvd+1);
+ doipoput(c, f, np, 0, c->ttl, c->tos);
+}
diff --git a/src/9vx/a/ip/tcp.c b/src/9vx/a/ip/tcp.c
@@ -0,0 +1,3209 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+enum
+{
+ QMAX = 64*1024-1,
+ IP_TCPPROTO = 6,
+
+ TCP4_IPLEN = 8,
+ TCP4_PHDRSIZE = 12,
+ TCP4_HDRSIZE = 20,
+ TCP4_TCBPHDRSZ = 40,
+ TCP4_PKT = TCP4_IPLEN+TCP4_PHDRSIZE,
+
+ TCP6_IPLEN = 0,
+ TCP6_PHDRSIZE = 40,
+ TCP6_HDRSIZE = 20,
+ TCP6_TCBPHDRSZ = 60,
+ TCP6_PKT = TCP6_IPLEN+TCP6_PHDRSIZE,
+
+ TcptimerOFF = 0,
+ TcptimerON = 1,
+ TcptimerDONE = 2,
+ MAX_TIME = (1<<20), /* Forever */
+ TCP_ACK = 50, /* Timed ack sequence in ms */
+ MAXBACKMS = 9*60*1000, /* longest backoff time (ms) before hangup */
+
+ URG = 0x20, /* Data marked urgent */
+ ACK = 0x10, /* Acknowledge is valid */
+ PSH = 0x08, /* Whole data pipe is pushed */
+ RST = 0x04, /* Reset connection */
+ SYN = 0x02, /* Pkt. is synchronise */
+ FIN = 0x01, /* Start close down */
+
+ EOLOPT = 0,
+ NOOPOPT = 1,
+ MSSOPT = 2,
+ MSS_LENGTH = 4, /* Mean segment size */
+ WSOPT = 3,
+ WS_LENGTH = 3, /* Bits to scale window size by */
+ MSL2 = 10,
+ MSPTICK = 50, /* Milliseconds per timer tick */
+ DEF_MSS = 1460, /* Default mean segment */
+ DEF_MSS6 = 1280, /* Default mean segment (min) for v6 */
+ DEF_RTT = 500, /* Default round trip */
+ DEF_KAT = 120000, /* Default time (ms) between keep alives */
+ TCP_LISTEN = 0, /* Listen connection */
+ TCP_CONNECT = 1, /* Outgoing connection */
+ SYNACK_RXTIMER = 250, /* ms between SYNACK retransmits */
+
+ TCPREXMTTHRESH = 3, /* dupack threshhold for rxt */
+
+ FORCE = 1,
+ CLONE = 2,
+ RETRAN = 4,
+ ACTIVE = 8,
+ SYNACK = 16,
+
+ LOGAGAIN = 3,
+ LOGDGAIN = 2,
+
+ Closed = 0, /* Connection states */
+ Listen,
+ Syn_sent,
+ Syn_received,
+ Established,
+ Finwait1,
+ Finwait2,
+ Close_wait,
+ Closing,
+ Last_ack,
+ Time_wait,
+
+ Maxlimbo = 1000, /* maximum procs waiting for response to SYN ACK */
+ NLHT = 256, /* hash table size, must be a power of 2 */
+ LHTMASK = NLHT-1,
+
+ HaveWS = 1<<8,
+};
+
+/* Must correspond to the enumeration above */
+char *tcpstates[] =
+{
+ "Closed", "Listen", "Syn_sent", "Syn_received",
+ "Established", "Finwait1", "Finwait2", "Close_wait",
+ "Closing", "Last_ack", "Time_wait"
+};
+
+typedef struct Tcptimer Tcptimer;
+struct Tcptimer
+{
+ Tcptimer *next;
+ Tcptimer *prev;
+ Tcptimer *readynext;
+ int state;
+ int start;
+ int count;
+ void (*func)(void*);
+ void *arg;
+};
+
+/*
+ * v4 and v6 pseudo headers used for
+ * checksuming tcp
+ */
+typedef struct Tcp4hdr Tcp4hdr;
+struct Tcp4hdr
+{
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+ uchar Unused;
+ uchar proto;
+ uchar tcplen[2];
+ uchar tcpsrc[4];
+ uchar tcpdst[4];
+ uchar tcpsport[2];
+ uchar tcpdport[2];
+ uchar tcpseq[4];
+ uchar tcpack[4];
+ uchar tcpflag[2];
+ uchar tcpwin[2];
+ uchar tcpcksum[2];
+ uchar tcpurg[2];
+ /* Options segment */
+ uchar tcpopt[1];
+};
+
+typedef struct Tcp6hdr Tcp6hdr;
+struct Tcp6hdr
+{
+ uchar vcf[4];
+ uchar ploadlen[2];
+ uchar proto;
+ uchar ttl;
+ uchar tcpsrc[IPaddrlen];
+ uchar tcpdst[IPaddrlen];
+ uchar tcpsport[2];
+ uchar tcpdport[2];
+ uchar tcpseq[4];
+ uchar tcpack[4];
+ uchar tcpflag[2];
+ uchar tcpwin[2];
+ uchar tcpcksum[2];
+ uchar tcpurg[2];
+ /* Options segment */
+ uchar tcpopt[1];
+};
+
+/*
+ * this represents the control info
+ * for a single packet. It is derived from
+ * a packet in ntohtcp{4,6}() and stuck into
+ * a packet in htontcp{4,6}().
+ */
+typedef struct Tcp Tcp;
+struct Tcp
+{
+ ushort source;
+ ushort dest;
+ ulong seq;
+ ulong ack;
+ uchar flags;
+ ushort ws; /* window scale option (if not zero) */
+ ulong wnd;
+ ushort urg;
+ ushort mss; /* max segment size option (if not zero) */
+ ushort len; /* size of data */
+};
+
+/*
+ * this header is malloc'd to thread together fragments
+ * waiting to be coalesced
+ */
+typedef struct Reseq Reseq;
+struct Reseq
+{
+ Reseq *next;
+ Tcp seg;
+ Block *bp;
+ ushort length;
+};
+
+/*
+ * the QLOCK in the Conv locks this structure
+ */
+typedef struct Tcpctl Tcpctl;
+struct Tcpctl
+{
+ uchar state; /* Connection state */
+ uchar type; /* Listening or active connection */
+ uchar code; /* Icmp code */
+ struct {
+ ulong una; /* Unacked data pointer */
+ ulong nxt; /* Next sequence expected */
+ ulong ptr; /* Data pointer */
+ ulong wnd; /* Tcp send window */
+ ulong urg; /* Urgent data pointer */
+ ulong wl2;
+ int scale; /* how much to right shift window in xmitted packets */
+ /* to implement tahoe and reno TCP */
+ ulong dupacks; /* number of duplicate acks rcvd */
+ int recovery; /* loss recovery flag */
+ ulong rxt; /* right window marker for recovery */
+ } snd;
+ struct {
+ ulong nxt; /* Receive pointer to next uchar slot */
+ ulong wnd; /* Receive window incoming */
+ ulong urg; /* Urgent pointer */
+ int blocked;
+ int una; /* unacked data segs */
+ int scale; /* how much to left shift window in rcved packets */
+ } rcv;
+ ulong iss; /* Initial sequence number */
+ int sawwsopt; /* true if we saw a wsopt on the incoming SYN */
+ ulong cwind; /* Congestion window */
+ int scale; /* desired snd.scale */
+ ushort ssthresh; /* Slow start threshold */
+ int resent; /* Bytes just resent */
+ int irs; /* Initial received squence */
+ ushort mss; /* Mean segment size */
+ int rerecv; /* Overlap of data rerecevived */
+ ulong window; /* Recevive window */
+ uchar backoff; /* Exponential backoff counter */
+ int backedoff; /* ms we've backed off for rexmits */
+ uchar flags; /* State flags */
+ Reseq *reseq; /* Resequencing queue */
+ Tcptimer timer; /* Activity timer */
+ Tcptimer acktimer; /* Acknowledge timer */
+ Tcptimer rtt_timer; /* Round trip timer */
+ Tcptimer katimer; /* keep alive timer */
+ ulong rttseq; /* Round trip sequence */
+ int srtt; /* Shortened round trip */
+ int mdev; /* Mean deviation of round trip */
+ int kacounter; /* count down for keep alive */
+ uint sndsyntime; /* time syn sent */
+ ulong time; /* time Finwait2 or Syn_received was sent */
+ int nochecksum; /* non-zero means don't send checksums */
+ int flgcnt; /* number of flags in the sequence (FIN,SEQ) */
+
+ union {
+ Tcp4hdr tcp4hdr;
+ Tcp6hdr tcp6hdr;
+ } protohdr; /* prototype header */
+};
+
+/*
+ * New calls are put in limbo rather than having a conversation structure
+ * allocated. Thus, a SYN attack results in lots of limbo'd calls but not
+ * any real Conv structures mucking things up. Calls in limbo rexmit their
+ * SYN ACK every SYNACK_RXTIMER ms up to 4 times, i.e., they disappear after 1 second.
+ *
+ * In particular they aren't on a listener's queue so that they don't figure
+ * in the input queue limit.
+ *
+ * If 1/2 of a T3 was attacking SYN packets, we'ld have a permanent queue
+ * of 70000 limbo'd calls. Not great for a linear list but doable. Therefore
+ * there is no hashing of this list.
+ */
+typedef struct Limbo Limbo;
+struct Limbo
+{
+ Limbo *next;
+
+ uchar laddr[IPaddrlen];
+ uchar raddr[IPaddrlen];
+ ushort lport;
+ ushort rport;
+ ulong irs; /* initial received sequence */
+ ulong iss; /* initial sent sequence */
+ ushort mss; /* mss from the other end */
+ ushort rcvscale; /* how much to scale rcvd windows */
+ ushort sndscale; /* how much to scale sent windows */
+ ulong lastsend; /* last time we sent a synack */
+ uchar version; /* v4 or v6 */
+ uchar rexmits; /* number of retransmissions */
+};
+
+int tcp_irtt = DEF_RTT; /* Initial guess at round trip time */
+ushort tcp_mss = DEF_MSS; /* Maximum segment size to be sent */
+
+enum {
+ /* MIB stats */
+ MaxConn,
+ ActiveOpens,
+ PassiveOpens,
+ EstabResets,
+ CurrEstab,
+ InSegs,
+ OutSegs,
+ RetransSegs,
+ RetransTimeouts,
+ InErrs,
+ OutRsts,
+
+ /* non-MIB stats */
+ CsumErrs,
+ HlenErrs,
+ LenErrs,
+ OutOfOrder,
+
+ Nstats
+};
+
+static char *statnames[] =
+{
+[MaxConn] "MaxConn",
+[ActiveOpens] "ActiveOpens",
+[PassiveOpens] "PassiveOpens",
+[EstabResets] "EstabResets",
+[CurrEstab] "CurrEstab",
+[InSegs] "InSegs",
+[OutSegs] "OutSegs",
+[RetransSegs] "RetransSegs",
+[RetransTimeouts] "RetransTimeouts",
+[InErrs] "InErrs",
+[OutRsts] "OutRsts",
+[CsumErrs] "CsumErrs",
+[HlenErrs] "HlenErrs",
+[LenErrs] "LenErrs",
+[OutOfOrder] "OutOfOrder",
+};
+
+typedef struct Tcppriv Tcppriv;
+struct Tcppriv
+{
+ /* List of active timers */
+ QLock tl;
+ Tcptimer *timers;
+
+ /* hash table for matching conversations */
+ Ipht ht;
+
+ /* calls in limbo waiting for an ACK to our SYN ACK */
+ int nlimbo;
+ Limbo *lht[NLHT];
+
+ /* for keeping track of tcpackproc */
+ QLock apl;
+ int ackprocstarted;
+
+ ulong stats[Nstats];
+};
+
+/*
+ * Setting tcpporthogdefense to non-zero enables Dong Lin's
+ * solution to hijacked systems staking out port's as a form
+ * of DoS attack.
+ *
+ * To avoid stateless Conv hogs, we pick a sequence number at random. If
+ * that number gets acked by the other end, we shut down the connection.
+ * Look for tcpporthogdefense in the code.
+ */
+int tcpporthogdefense = 0;
+
+int addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
+void getreseq(Tcpctl*, Tcp*, Block**, ushort*);
+void localclose(Conv*, char*);
+void procsyn(Conv*, Tcp*);
+void tcpiput(Proto*, Ipifc*, Block*);
+void tcpoutput(Conv*);
+int tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
+void tcpstart(Conv*, int);
+void tcptimeout(void*);
+void tcpsndsyn(Conv*, Tcpctl*);
+void tcprcvwin(Conv*);
+void tcpacktimer(void*);
+void tcpkeepalive(void*);
+void tcpsetkacounter(Tcpctl*);
+void tcprxmit(Conv*);
+void tcpsettimer(Tcpctl*);
+void tcpsynackrtt(Conv*);
+void tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+
+static void limborexmit(Proto*);
+static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
+
+void
+tcpsetstate(Conv *s, uchar newstate)
+{
+ Tcpctl *tcb;
+ uchar oldstate;
+ Tcppriv *tpriv;
+
+ tpriv = s->p->priv;
+
+ tcb = (Tcpctl*)s->ptcl;
+
+ oldstate = tcb->state;
+ if(oldstate == newstate)
+ return;
+
+ if(oldstate == Established)
+ tpriv->stats[CurrEstab]--;
+ if(newstate == Established)
+ tpriv->stats[CurrEstab]++;
+
+ /**
+ print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport,
+ tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab );
+ **/
+
+ switch(newstate) {
+ case Closed:
+ qclose(s->rq);
+ qclose(s->wq);
+ qclose(s->eq);
+ break;
+
+ case Close_wait: /* Remote closes */
+ qhangup(s->rq, nil);
+ break;
+ }
+
+ tcb->state = newstate;
+
+ if(oldstate == Syn_sent && newstate != Closed)
+ Fsconnected(s, nil);
+}
+
+static char*
+tcpconnect(Conv *c, char **argv, int argc)
+{
+ char *e;
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)(c->ptcl);
+ if(tcb->state != Closed)
+ return Econinuse;
+
+ e = Fsstdconnect(c, argv, argc);
+ if(e != nil)
+ return e;
+ tcpstart(c, TCP_CONNECT);
+
+ return nil;
+}
+
+static int
+tcpstate(Conv *c, char *state, int n)
+{
+ Tcpctl *s;
+
+ s = (Tcpctl*)(c->ptcl);
+
+ return snprint(state, n,
+ "%s qin %d qout %d srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
+ tcpstates[s->state],
+ c->rq ? qlen(c->rq) : 0,
+ c->wq ? qlen(c->wq) : 0,
+ s->srtt, s->mdev,
+ s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale,
+ s->timer.start, s->timer.count, s->rerecv,
+ s->katimer.start, s->katimer.count);
+}
+
+static int
+tcpinuse(Conv *c)
+{
+ Tcpctl *s;
+
+ s = (Tcpctl*)(c->ptcl);
+ return s->state != Closed;
+}
+
+static char*
+tcpannounce(Conv *c, char **argv, int argc)
+{
+ char *e;
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)(c->ptcl);
+ if(tcb->state != Closed)
+ return Econinuse;
+
+ e = Fsstdannounce(c, argv, argc);
+ if(e != nil)
+ return e;
+ tcpstart(c, TCP_LISTEN);
+ Fsconnected(c, nil);
+
+ return nil;
+}
+
+/*
+ * tcpclose is always called with the q locked
+ */
+static void
+tcpclose(Conv *c)
+{
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)c->ptcl;
+
+ qhangup(c->rq, nil);
+ qhangup(c->wq, nil);
+ qhangup(c->eq, nil);
+ qflush(c->rq);
+
+ switch(tcb->state) {
+ case Listen:
+ /*
+ * reset any incoming calls to this listener
+ */
+ Fsconnected(c, "Hangup");
+
+ localclose(c, nil);
+ break;
+ case Closed:
+ case Syn_sent:
+ localclose(c, nil);
+ break;
+ case Syn_received:
+ case Established:
+ tcb->flgcnt++;
+ tcb->snd.nxt++;
+ tcpsetstate(c, Finwait1);
+ tcpoutput(c);
+ break;
+ case Close_wait:
+ tcb->flgcnt++;
+ tcb->snd.nxt++;
+ tcpsetstate(c, Last_ack);
+ tcpoutput(c);
+ break;
+ }
+}
+
+void
+tcpkick(void *x)
+{
+ Conv *s = x;
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)s->ptcl;
+
+ if(waserror()){
+ QUNLOCK(s);
+ nexterror();
+ }
+ QLOCK(s);
+
+ switch(tcb->state) {
+ case Syn_sent:
+ case Syn_received:
+ case Established:
+ case Close_wait:
+ /*
+ * Push data
+ */
+ tcprcvwin(s);
+ tcpoutput(s);
+ break;
+ default:
+ localclose(s, "Hangup");
+ break;
+ }
+
+ QUNLOCK(s);
+ poperror();
+}
+
+void
+tcprcvwin(Conv *s) /* Call with tcb locked */
+{
+ int w;
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)s->ptcl;
+ w = tcb->window - qlen(s->rq);
+ if(w < 0)
+ w = 0;
+ tcb->rcv.wnd = w;
+ if(w == 0)
+ tcb->rcv.blocked = 1;
+}
+
+void
+tcpacktimer(void *v)
+{
+ Tcpctl *tcb;
+ Conv *s;
+
+ s = v;
+ tcb = (Tcpctl*)s->ptcl;
+
+ if(waserror()){
+ QUNLOCK(s);
+ nexterror();
+ }
+ QLOCK(s);
+ if(tcb->state != Closed){
+ tcb->flags |= FORCE;
+ tcprcvwin(s);
+ tcpoutput(s);
+ }
+ QUNLOCK(s);
+ poperror();
+}
+
+static void
+tcpcreate(Conv *c)
+{
+ c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c);
+ c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c);
+}
+
+static void
+timerstate(Tcppriv *priv, Tcptimer *t, int newstate)
+{
+ if(newstate != TcptimerON){
+ if(t->state == TcptimerON){
+ /* unchain */
+ if(priv->timers == t){
+ priv->timers = t->next;
+ if(t->prev != nil)
+ panic("timerstate1");
+ }
+ if(t->next)
+ t->next->prev = t->prev;
+ if(t->prev)
+ t->prev->next = t->next;
+ t->next = t->prev = nil;
+ }
+ } else {
+ if(t->state != TcptimerON){
+ /* chain */
+ if(t->prev != nil || t->next != nil)
+ panic("timerstate2");
+ t->prev = nil;
+ t->next = priv->timers;
+ if(t->next)
+ t->next->prev = t;
+ priv->timers = t;
+ }
+ }
+ t->state = newstate;
+}
+
+void
+tcpackproc(void *a)
+{
+ Tcptimer *t, *tp, *timeo;
+ Proto *tcp;
+ Tcppriv *priv;
+ int loop;
+
+ tcp = a;
+ priv = tcp->priv;
+
+ for(;;) {
+ tsleep(&up->sleep, return0, 0, MSPTICK);
+
+ qlock(&priv->tl);
+ timeo = nil;
+ loop = 0;
+ for(t = priv->timers; t != nil; t = tp) {
+ if(loop++ > 10000)
+ panic("tcpackproc1");
+ tp = t->next;
+ if(t->state == TcptimerON) {
+ t->count--;
+ if(t->count == 0) {
+ timerstate(priv, t, TcptimerDONE);
+ t->readynext = timeo;
+ timeo = t;
+ }
+ }
+ }
+ qunlock(&priv->tl);
+
+ loop = 0;
+ for(t = timeo; t != nil; t = t->readynext) {
+ if(loop++ > 10000)
+ panic("tcpackproc2");
+ if(t->state == TcptimerDONE && t->func != nil && !waserror()){
+ (*t->func)(t->arg);
+ poperror();
+ }
+ }
+
+ limborexmit(tcp);
+ }
+}
+
+void
+tcpgo(Tcppriv *priv, Tcptimer *t)
+{
+ if(t == nil || t->start == 0)
+ return;
+
+ qlock(&priv->tl);
+ t->count = t->start;
+ timerstate(priv, t, TcptimerON);
+ qunlock(&priv->tl);
+}
+
+void
+tcphalt(Tcppriv *priv, Tcptimer *t)
+{
+ if(t == nil)
+ return;
+
+ qlock(&priv->tl);
+ timerstate(priv, t, TcptimerOFF);
+ qunlock(&priv->tl);
+}
+
+int
+backoff(int n)
+{
+ return 1 << n;
+}
+
+void
+localclose(Conv *s, char *reason) /* called with tcb locked */
+{
+ Tcpctl *tcb;
+ Reseq *rp,*rp1;
+ Tcppriv *tpriv;
+
+ tpriv = s->p->priv;
+ tcb = (Tcpctl*)s->ptcl;
+
+ iphtrem(&tpriv->ht, s);
+
+ tcphalt(tpriv, &tcb->timer);
+ tcphalt(tpriv, &tcb->rtt_timer);
+ tcphalt(tpriv, &tcb->acktimer);
+ tcphalt(tpriv, &tcb->katimer);
+
+ /* Flush reassembly queue; nothing more can arrive */
+ for(rp = tcb->reseq; rp != nil; rp = rp1) {
+ rp1 = rp->next;
+ freeblist(rp->bp);
+ free(rp);
+ }
+ tcb->reseq = nil;
+
+ if(tcb->state == Syn_sent)
+ Fsconnected(s, reason);
+ if(s->state == Announced)
+ wakeup(&s->listenr);
+
+ qhangup(s->rq, reason);
+ qhangup(s->wq, reason);
+
+ tcpsetstate(s, Closed);
+}
+
+/* mtu (- TCP + IP hdr len) of 1st hop */
+int
+tcpmtu(Proto *tcp, uchar *addr, int version, int *scale)
+{
+ Ipifc *ifc;
+ int mtu;
+
+ ifc = findipifc(tcp->f, addr, 0);
+ switch(version){
+ default:
+ case V4:
+ mtu = DEF_MSS;
+ if(ifc != nil)
+ mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
+ break;
+ case V6:
+ mtu = DEF_MSS6;
+ if(ifc != nil)
+ mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
+ break;
+ }
+ if(ifc != nil){
+ if(ifc->mbps > 1000)
+ *scale = HaveWS | 4;
+ else if(ifc->mbps > 100)
+ *scale = HaveWS | 3;
+ else if(ifc->mbps > 10)
+ *scale = HaveWS | 1;
+ else
+ *scale = HaveWS | 0;
+ } else
+ *scale = HaveWS | 0;
+
+ return mtu;
+}
+
+void
+inittcpctl(Conv *s, int mode)
+{
+ Tcpctl *tcb;
+ Tcp4hdr* h4;
+ Tcp6hdr* h6;
+ int mss;
+
+ tcb = (Tcpctl*)s->ptcl;
+
+ memset(tcb, 0, sizeof(Tcpctl));
+
+ tcb->ssthresh = 65535;
+ tcb->srtt = tcp_irtt<<LOGAGAIN;
+ tcb->mdev = 0;
+
+ /* setup timers */
+ tcb->timer.start = tcp_irtt / MSPTICK;
+ tcb->timer.func = tcptimeout;
+ tcb->timer.arg = s;
+ tcb->rtt_timer.start = MAX_TIME;
+ tcb->acktimer.start = TCP_ACK / MSPTICK;
+ tcb->acktimer.func = tcpacktimer;
+ tcb->acktimer.arg = s;
+ tcb->katimer.start = DEF_KAT / MSPTICK;
+ tcb->katimer.func = tcpkeepalive;
+ tcb->katimer.arg = s;
+
+ mss = DEF_MSS;
+
+ /* create a prototype(pseudo) header */
+ if(mode != TCP_LISTEN){
+ if(ipcmp(s->laddr, IPnoaddr) == 0)
+ findlocalip(s->p->f, s->laddr, s->raddr);
+
+ switch(s->ipversion){
+ case V4:
+ h4 = &tcb->protohdr.tcp4hdr;
+ memset(h4, 0, sizeof(*h4));
+ h4->proto = IP_TCPPROTO;
+ hnputs(h4->tcpsport, s->lport);
+ hnputs(h4->tcpdport, s->rport);
+ v6tov4(h4->tcpsrc, s->laddr);
+ v6tov4(h4->tcpdst, s->raddr);
+ break;
+ case V6:
+ h6 = &tcb->protohdr.tcp6hdr;
+ memset(h6, 0, sizeof(*h6));
+ h6->proto = IP_TCPPROTO;
+ hnputs(h6->tcpsport, s->lport);
+ hnputs(h6->tcpdport, s->rport);
+ ipmove(h6->tcpsrc, s->laddr);
+ ipmove(h6->tcpdst, s->raddr);
+ mss = DEF_MSS6;
+ break;
+ default:
+ panic("inittcpctl: version %d", s->ipversion);
+ }
+ }
+
+ tcb->mss = tcb->cwind = mss;
+
+ /* default is no window scaling */
+ tcb->window = QMAX;
+ tcb->rcv.wnd = QMAX;
+ tcb->rcv.scale = 0;
+ tcb->snd.scale = 0;
+ qsetlimit(s->rq, QMAX);
+}
+
+/*
+ * called with s QLOCKed
+ */
+void
+tcpstart(Conv *s, int mode)
+{
+ Tcpctl *tcb;
+ Tcppriv *tpriv;
+ char kpname[KNAMELEN];
+
+ tpriv = s->p->priv;
+
+ if(tpriv->ackprocstarted == 0){
+ qlock(&tpriv->apl);
+ if(tpriv->ackprocstarted == 0){
+ sprint(kpname, "#I%dtcpack", s->p->f->dev);
+ kproc(kpname, tcpackproc, s->p);
+ tpriv->ackprocstarted = 1;
+ }
+ qunlock(&tpriv->apl);
+ }
+
+ tcb = (Tcpctl*)s->ptcl;
+
+ inittcpctl(s, mode);
+
+ iphtadd(&tpriv->ht, s);
+ switch(mode) {
+ case TCP_LISTEN:
+ tpriv->stats[PassiveOpens]++;
+ tcb->flags |= CLONE;
+ tcpsetstate(s, Listen);
+ break;
+
+ case TCP_CONNECT:
+ tpriv->stats[ActiveOpens]++;
+ tcb->flags |= ACTIVE;
+ tcpsndsyn(s, tcb);
+ tcpsetstate(s, Syn_sent);
+ tcpoutput(s);
+ break;
+ }
+}
+
+static char*
+tcpflag(ushort flag)
+{
+ static char buf[128];
+
+ sprint(buf, "%d", flag>>10); /* Head len */
+ if(flag & URG)
+ strcat(buf, " URG");
+ if(flag & ACK)
+ strcat(buf, " ACK");
+ if(flag & PSH)
+ strcat(buf, " PSH");
+ if(flag & RST)
+ strcat(buf, " RST");
+ if(flag & SYN)
+ strcat(buf, " SYN");
+ if(flag & FIN)
+ strcat(buf, " FIN");
+
+ return buf;
+}
+
+Block *
+htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb)
+{
+ int dlen;
+ Tcp6hdr *h;
+ ushort csum;
+ ushort hdrlen, optpad = 0;
+ uchar *opt;
+
+ hdrlen = TCP6_HDRSIZE;
+ if(tcph->flags & SYN){
+ if(tcph->mss)
+ hdrlen += MSS_LENGTH;
+ if(tcph->ws)
+ hdrlen += WS_LENGTH;
+ optpad = hdrlen & 3;
+ if(optpad)
+ optpad = 4 - optpad;
+ hdrlen += optpad;
+ }
+
+ if(data) {
+ dlen = blocklen(data);
+ data = padblock(data, hdrlen + TCP6_PKT);
+ if(data == nil)
+ return nil;
+ }
+ else {
+ dlen = 0;
+ data = allocb(hdrlen + TCP6_PKT + 64); /* the 64 pad is to meet mintu's */
+ if(data == nil)
+ return nil;
+ data->wp += hdrlen + TCP6_PKT;
+ }
+
+ /* copy in pseudo ip header plus port numbers */
+ h = (Tcp6hdr *)(data->rp);
+ memmove(h, ph, TCP6_TCBPHDRSZ);
+
+ /* compose pseudo tcp header, do cksum calculation */
+ hnputl(h->vcf, hdrlen + dlen);
+ h->ploadlen[0] = h->ploadlen[1] = h->proto = 0;
+ h->ttl = ph->proto;
+
+ /* copy in variable bits */
+ hnputl(h->tcpseq, tcph->seq);
+ hnputl(h->tcpack, tcph->ack);
+ hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags);
+ hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0));
+ hnputs(h->tcpurg, tcph->urg);
+
+ if(tcph->flags & SYN){
+ opt = h->tcpopt;
+ if(tcph->mss != 0){
+ *opt++ = MSSOPT;
+ *opt++ = MSS_LENGTH;
+ hnputs(opt, tcph->mss);
+ opt += 2;
+ }
+ if(tcph->ws != 0){
+ *opt++ = WSOPT;
+ *opt++ = WS_LENGTH;
+ *opt++ = tcph->ws;
+ }
+ while(optpad-- > 0)
+ *opt++ = NOOPOPT;
+ }
+
+ if(tcb != nil && tcb->nochecksum){
+ h->tcpcksum[0] = h->tcpcksum[1] = 0;
+ } else {
+ csum = ptclcsum(data, TCP6_IPLEN, hdrlen+dlen+TCP6_PHDRSIZE);
+ hnputs(h->tcpcksum, csum);
+ }
+
+ /* move from pseudo header back to normal ip header */
+ memset(h->vcf, 0, 4);
+ h->vcf[0] = IP_VER6;
+ hnputs(h->ploadlen, hdrlen+dlen);
+ h->proto = ph->proto;
+
+ return data;
+}
+
+Block *
+htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb)
+{
+ int dlen;
+ Tcp4hdr *h;
+ ushort csum;
+ ushort hdrlen, optpad = 0;
+ uchar *opt;
+
+ hdrlen = TCP4_HDRSIZE;
+ if(tcph->flags & SYN){
+ if(tcph->mss)
+ hdrlen += MSS_LENGTH;
+ if(tcph->ws)
+ hdrlen += WS_LENGTH;
+ optpad = hdrlen & 3;
+ if(optpad)
+ optpad = 4 - optpad;
+ hdrlen += optpad;
+ }
+
+ if(data) {
+ dlen = blocklen(data);
+ data = padblock(data, hdrlen + TCP4_PKT);
+ if(data == nil)
+ return nil;
+ }
+ else {
+ dlen = 0;
+ data = allocb(hdrlen + TCP4_PKT + 64); /* the 64 pad is to meet mintu's */
+ if(data == nil)
+ return nil;
+ data->wp += hdrlen + TCP4_PKT;
+ }
+
+ /* copy in pseudo ip header plus port numbers */
+ h = (Tcp4hdr *)(data->rp);
+ memmove(h, ph, TCP4_TCBPHDRSZ);
+
+ /* copy in variable bits */
+ hnputs(h->tcplen, hdrlen + dlen);
+ hnputl(h->tcpseq, tcph->seq);
+ hnputl(h->tcpack, tcph->ack);
+ hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags);
+ hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0));
+ hnputs(h->tcpurg, tcph->urg);
+
+ if(tcph->flags & SYN){
+ opt = h->tcpopt;
+ if(tcph->mss != 0){
+ *opt++ = MSSOPT;
+ *opt++ = MSS_LENGTH;
+ hnputs(opt, tcph->mss);
+ opt += 2;
+ }
+ if(tcph->ws != 0){
+ *opt++ = WSOPT;
+ *opt++ = WS_LENGTH;
+ *opt++ = tcph->ws;
+ }
+ while(optpad-- > 0)
+ *opt++ = NOOPOPT;
+ }
+
+ if(tcb != nil && tcb->nochecksum){
+ h->tcpcksum[0] = h->tcpcksum[1] = 0;
+ } else {
+ csum = ptclcsum(data, TCP4_IPLEN, hdrlen+dlen+TCP4_PHDRSIZE);
+ hnputs(h->tcpcksum, csum);
+ }
+
+ return data;
+}
+
+int
+ntohtcp6(Tcp *tcph, Block **bpp)
+{
+ Tcp6hdr *h;
+ uchar *optr;
+ ushort hdrlen;
+ ushort optlen;
+ int n;
+
+ *bpp = pullupblock(*bpp, TCP6_PKT+TCP6_HDRSIZE);
+ if(*bpp == nil)
+ return -1;
+
+ h = (Tcp6hdr *)((*bpp)->rp);
+ tcph->source = nhgets(h->tcpsport);
+ tcph->dest = nhgets(h->tcpdport);
+ tcph->seq = nhgetl(h->tcpseq);
+ tcph->ack = nhgetl(h->tcpack);
+ hdrlen = (h->tcpflag[0]>>2) & ~3;
+ if(hdrlen < TCP6_HDRSIZE) {
+ freeblist(*bpp);
+ return -1;
+ }
+
+ tcph->flags = h->tcpflag[1];
+ tcph->wnd = nhgets(h->tcpwin);
+ tcph->urg = nhgets(h->tcpurg);
+ tcph->mss = 0;
+ tcph->ws = 0;
+ tcph->len = nhgets(h->ploadlen) - hdrlen;
+
+ *bpp = pullupblock(*bpp, hdrlen+TCP6_PKT);
+ if(*bpp == nil)
+ return -1;
+
+ optr = h->tcpopt;
+ n = hdrlen - TCP6_HDRSIZE;
+ while(n > 0 && *optr != EOLOPT) {
+ if(*optr == NOOPOPT) {
+ n--;
+ optr++;
+ continue;
+ }
+ optlen = optr[1];
+ if(optlen < 2 || optlen > n)
+ break;
+ switch(*optr) {
+ case MSSOPT:
+ if(optlen == MSS_LENGTH)
+ tcph->mss = nhgets(optr+2);
+ break;
+ case WSOPT:
+ if(optlen == WS_LENGTH && *(optr+2) <= 14)
+ tcph->ws = HaveWS | *(optr+2);
+ break;
+ }
+ n -= optlen;
+ optr += optlen;
+ }
+ return hdrlen;
+}
+
+int
+ntohtcp4(Tcp *tcph, Block **bpp)
+{
+ Tcp4hdr *h;
+ uchar *optr;
+ ushort hdrlen;
+ ushort optlen;
+ int n;
+
+ *bpp = pullupblock(*bpp, TCP4_PKT+TCP4_HDRSIZE);
+ if(*bpp == nil)
+ return -1;
+
+ h = (Tcp4hdr *)((*bpp)->rp);
+ tcph->source = nhgets(h->tcpsport);
+ tcph->dest = nhgets(h->tcpdport);
+ tcph->seq = nhgetl(h->tcpseq);
+ tcph->ack = nhgetl(h->tcpack);
+
+ hdrlen = (h->tcpflag[0]>>2) & ~3;
+ if(hdrlen < TCP4_HDRSIZE) {
+ freeblist(*bpp);
+ return -1;
+ }
+
+ tcph->flags = h->tcpflag[1];
+ tcph->wnd = nhgets(h->tcpwin);
+ tcph->urg = nhgets(h->tcpurg);
+ tcph->mss = 0;
+ tcph->ws = 0;
+ tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT);
+
+ *bpp = pullupblock(*bpp, hdrlen+TCP4_PKT);
+ if(*bpp == nil)
+ return -1;
+
+ optr = h->tcpopt;
+ n = hdrlen - TCP4_HDRSIZE;
+ while(n > 0 && *optr != EOLOPT) {
+ if(*optr == NOOPOPT) {
+ n--;
+ optr++;
+ continue;
+ }
+ optlen = optr[1];
+ if(optlen < 2 || optlen > n)
+ break;
+ switch(*optr) {
+ case MSSOPT:
+ if(optlen == MSS_LENGTH)
+ tcph->mss = nhgets(optr+2);
+ break;
+ case WSOPT:
+ if(optlen == WS_LENGTH && *(optr+2) <= 14)
+ tcph->ws = HaveWS | *(optr+2);
+ break;
+ }
+ n -= optlen;
+ optr += optlen;
+ }
+ return hdrlen;
+}
+
+/*
+ * For outgiing calls, generate an initial sequence
+ * number and put a SYN on the send queue
+ */
+void
+tcpsndsyn(Conv *s, Tcpctl *tcb)
+{
+ tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16);
+ tcb->rttseq = tcb->iss;
+ tcb->snd.wl2 = tcb->iss;
+ tcb->snd.una = tcb->iss;
+ tcb->snd.ptr = tcb->rttseq;
+ tcb->snd.nxt = tcb->rttseq;
+ tcb->flgcnt++;
+ tcb->flags |= FORCE;
+ tcb->sndsyntime = NOW;
+
+ /* set desired mss and scale */
+ tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale);
+}
+
+void
+sndrst(Proto *tcp, uchar *source, uchar *dest, ushort length, Tcp *seg, uchar version, char *reason)
+{
+ Block *hbp;
+ uchar rflags;
+ Tcppriv *tpriv;
+ Tcp4hdr ph4;
+ Tcp6hdr ph6;
+
+ netlog(tcp->f, Logtcp, "sndrst: %s\n", reason);
+
+ tpriv = tcp->priv;
+
+ if(seg->flags & RST)
+ return;
+
+ /* make pseudo header */
+ switch(version) {
+ case V4:
+ memset(&ph4, 0, sizeof(ph4));
+ ph4.vihl = IP_VER4;
+ v6tov4(ph4.tcpsrc, dest);
+ v6tov4(ph4.tcpdst, source);
+ ph4.proto = IP_TCPPROTO;
+ hnputs(ph4.tcplen, TCP4_HDRSIZE);
+ hnputs(ph4.tcpsport, seg->dest);
+ hnputs(ph4.tcpdport, seg->source);
+ break;
+ case V6:
+ memset(&ph6, 0, sizeof(ph6));
+ ph6.vcf[0] = IP_VER6;
+ ipmove(ph6.tcpsrc, dest);
+ ipmove(ph6.tcpdst, source);
+ ph6.proto = IP_TCPPROTO;
+ hnputs(ph6.ploadlen, TCP6_HDRSIZE);
+ hnputs(ph6.tcpsport, seg->dest);
+ hnputs(ph6.tcpdport, seg->source);
+ break;
+ default:
+ panic("sndrst: version %d", version);
+ }
+
+ tpriv->stats[OutRsts]++;
+ rflags = RST;
+
+ /* convince the other end that this reset is in band */
+ if(seg->flags & ACK) {
+ seg->seq = seg->ack;
+ seg->ack = 0;
+ }
+ else {
+ rflags |= ACK;
+ seg->ack = seg->seq;
+ seg->seq = 0;
+ if(seg->flags & SYN)
+ seg->ack++;
+ seg->ack += length;
+ if(seg->flags & FIN)
+ seg->ack++;
+ }
+ seg->flags = rflags;
+ seg->wnd = 0;
+ seg->urg = 0;
+ seg->mss = 0;
+ seg->ws = 0;
+ switch(version) {
+ case V4:
+ hbp = htontcp4(seg, nil, &ph4, nil);
+ if(hbp == nil)
+ return;
+ ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
+ break;
+ case V6:
+ hbp = htontcp6(seg, nil, &ph6, nil);
+ if(hbp == nil)
+ return;
+ ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
+ break;
+ default:
+ panic("sndrst2: version %d", version);
+ }
+}
+
+/*
+ * send a reset to the remote side and close the conversation
+ * called with s QLOCKed
+ */
+char*
+tcphangup(Conv *s)
+{
+ Tcp seg;
+ Tcpctl *tcb;
+ Block *hbp;
+
+ tcb = (Tcpctl*)s->ptcl;
+ if(waserror())
+ return commonerror();
+ if(ipcmp(s->raddr, IPnoaddr) != 0) {
+ if(!waserror()){
+ seg.flags = RST | ACK;
+ seg.ack = tcb->rcv.nxt;
+ tcb->rcv.una = 0;
+ seg.seq = tcb->snd.ptr;
+ seg.wnd = 0;
+ seg.urg = 0;
+ seg.mss = 0;
+ seg.ws = 0;
+ switch(s->ipversion) {
+ case V4:
+ tcb->protohdr.tcp4hdr.vihl = IP_VER4;
+ hbp = htontcp4(&seg, nil, &tcb->protohdr.tcp4hdr, tcb);
+ ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s);
+ break;
+ case V6:
+ tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
+ hbp = htontcp6(&seg, nil, &tcb->protohdr.tcp6hdr, tcb);
+ ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s);
+ break;
+ default:
+ panic("tcphangup: version %d", s->ipversion);
+ }
+ poperror();
+ }
+ }
+ localclose(s, nil);
+ poperror();
+ return nil;
+}
+
+/*
+ * (re)send a SYN ACK
+ */
+int
+sndsynack(Proto *tcp, Limbo *lp)
+{
+ Block *hbp;
+ Tcp4hdr ph4;
+ Tcp6hdr ph6;
+ Tcp seg;
+ int scale;
+
+ /* make pseudo header */
+ switch(lp->version) {
+ case V4:
+ memset(&ph4, 0, sizeof(ph4));
+ ph4.vihl = IP_VER4;
+ v6tov4(ph4.tcpsrc, lp->laddr);
+ v6tov4(ph4.tcpdst, lp->raddr);
+ ph4.proto = IP_TCPPROTO;
+ hnputs(ph4.tcplen, TCP4_HDRSIZE);
+ hnputs(ph4.tcpsport, lp->lport);
+ hnputs(ph4.tcpdport, lp->rport);
+ break;
+ case V6:
+ memset(&ph6, 0, sizeof(ph6));
+ ph6.vcf[0] = IP_VER6;
+ ipmove(ph6.tcpsrc, lp->laddr);
+ ipmove(ph6.tcpdst, lp->raddr);
+ ph6.proto = IP_TCPPROTO;
+ hnputs(ph6.ploadlen, TCP6_HDRSIZE);
+ hnputs(ph6.tcpsport, lp->lport);
+ hnputs(ph6.tcpdport, lp->rport);
+ break;
+ default:
+ panic("sndrst: version %d", lp->version);
+ }
+
+ seg.seq = lp->iss;
+ seg.ack = lp->irs+1;
+ seg.flags = SYN|ACK;
+ seg.urg = 0;
+ seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale);
+ seg.wnd = QMAX;
+
+ /* if the other side set scale, we should too */
+ if(lp->rcvscale){
+ seg.ws = scale;
+ lp->sndscale = scale;
+ } else {
+ seg.ws = 0;
+ lp->sndscale = 0;
+ }
+
+ switch(lp->version) {
+ case V4:
+ hbp = htontcp4(&seg, nil, &ph4, nil);
+ if(hbp == nil)
+ return -1;
+ ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
+ break;
+ case V6:
+ hbp = htontcp6(&seg, nil, &ph6, nil);
+ if(hbp == nil)
+ return -1;
+ ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
+ break;
+ default:
+ panic("sndsnack: version %d", lp->version);
+ }
+ lp->lastsend = NOW;
+ return 0;
+}
+
+#define hashipa(a, p) ( ( (a)[IPaddrlen-2] + (a)[IPaddrlen-1] + p )&LHTMASK )
+
+/*
+ * put a call into limbo and respond with a SYN ACK
+ *
+ * called with proto locked
+ */
+static void
+limbo(Conv *s, uchar *source, uchar *dest, Tcp *seg, int version)
+{
+ Limbo *lp, **l;
+ Tcppriv *tpriv;
+ int h;
+
+ tpriv = s->p->priv;
+ h = hashipa(source, seg->source);
+
+ for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){
+ lp = *l;
+ if(lp->lport != seg->dest || lp->rport != seg->source || lp->version != version)
+ continue;
+ if(ipcmp(lp->raddr, source) != 0)
+ continue;
+ if(ipcmp(lp->laddr, dest) != 0)
+ continue;
+
+ /* each new SYN restarts the retransmits */
+ lp->irs = seg->seq;
+ break;
+ }
+ lp = *l;
+ if(lp == nil){
+ if(tpriv->nlimbo >= Maxlimbo && tpriv->lht[h]){
+ lp = tpriv->lht[h];
+ tpriv->lht[h] = lp->next;
+ lp->next = nil;
+ } else {
+ lp = malloc(sizeof(*lp));
+ if(lp == nil)
+ return;
+ tpriv->nlimbo++;
+ }
+ *l = lp;
+ lp->version = version;
+ ipmove(lp->laddr, dest);
+ ipmove(lp->raddr, source);
+ lp->lport = seg->dest;
+ lp->rport = seg->source;
+ lp->mss = seg->mss;
+ lp->rcvscale = seg->ws;
+ lp->irs = seg->seq;
+ lp->iss = (nrand(1<<16)<<16)|nrand(1<<16);
+ }
+
+ if(sndsynack(s->p, lp) < 0){
+ *l = lp->next;
+ tpriv->nlimbo--;
+ free(lp);
+ }
+}
+
+/*
+ * resend SYN ACK's once every SYNACK_RXTIMER ms.
+ */
+static void
+limborexmit(Proto *tcp)
+{
+ Tcppriv *tpriv;
+ Limbo **l, *lp;
+ int h;
+ int seen;
+ ulong now;
+
+ tpriv = tcp->priv;
+
+ if(!CANQLOCK(tcp))
+ return;
+ seen = 0;
+ now = NOW;
+ for(h = 0; h < NLHT && seen < tpriv->nlimbo; h++){
+ for(l = &tpriv->lht[h]; *l != nil && seen < tpriv->nlimbo; ){
+ lp = *l;
+ seen++;
+ if(now - lp->lastsend < (lp->rexmits+1)*SYNACK_RXTIMER)
+ continue;
+
+ /* time it out after 1 second */
+ if(++(lp->rexmits) > 5){
+ tpriv->nlimbo--;
+ *l = lp->next;
+ free(lp);
+ continue;
+ }
+
+ /* if we're being attacked, don't bother resending SYN ACK's */
+ if(tpriv->nlimbo > 100)
+ continue;
+
+ if(sndsynack(tcp, lp) < 0){
+ tpriv->nlimbo--;
+ *l = lp->next;
+ free(lp);
+ continue;
+ }
+
+ l = &lp->next;
+ }
+ }
+ QUNLOCK(tcp);
+}
+
+/*
+ * lookup call in limbo. if found, throw it out.
+ *
+ * called with proto locked
+ */
+static void
+limborst(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version)
+{
+ Limbo *lp, **l;
+ int h;
+ Tcppriv *tpriv;
+
+ tpriv = s->p->priv;
+
+ /* find a call in limbo */
+ h = hashipa(src, segp->source);
+ for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){
+ lp = *l;
+ if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version)
+ continue;
+ if(ipcmp(lp->laddr, dst) != 0)
+ continue;
+ if(ipcmp(lp->raddr, src) != 0)
+ continue;
+
+ /* RST can only follow the SYN */
+ if(segp->seq == lp->irs+1){
+ tpriv->nlimbo--;
+ *l = lp->next;
+ free(lp);
+ }
+ break;
+ }
+}
+
+/*
+ * come here when we finally get an ACK to our SYN-ACK.
+ * lookup call in limbo. if found, create a new conversation
+ *
+ * called with proto locked
+ */
+static Conv*
+tcpincoming(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version)
+{
+ Conv *new;
+ Tcpctl *tcb;
+ Tcppriv *tpriv;
+ Tcp4hdr *h4;
+ Tcp6hdr *h6;
+ Limbo *lp, **l;
+ int h;
+
+ /* unless it's just an ack, it can't be someone coming out of limbo */
+ if((segp->flags & SYN) || (segp->flags & ACK) == 0)
+ return nil;
+
+ tpriv = s->p->priv;
+
+ /* find a call in limbo */
+ h = hashipa(src, segp->source);
+ for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){
+ netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d\n",
+ src, segp->source, lp->raddr, lp->rport,
+ dst, segp->dest, lp->laddr, lp->lport,
+ version, lp->version
+ );
+
+ if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version)
+ continue;
+ if(ipcmp(lp->laddr, dst) != 0)
+ continue;
+ if(ipcmp(lp->raddr, src) != 0)
+ continue;
+
+ /* we're assuming no data with the initial SYN */
+ if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){
+ netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux\n",
+ segp->seq, lp->irs+1, segp->ack, lp->iss+1);
+ lp = nil;
+ } else {
+ tpriv->nlimbo--;
+ *l = lp->next;
+ }
+ break;
+ }
+ if(lp == nil)
+ return nil;
+
+ new = Fsnewcall(s, src, segp->source, dst, segp->dest, version);
+ if(new == nil)
+ return nil;
+
+ memmove(new->ptcl, s->ptcl, sizeof(Tcpctl));
+ tcb = (Tcpctl*)new->ptcl;
+ tcb->flags &= ~CLONE;
+ tcb->timer.arg = new;
+ tcb->timer.state = TcptimerOFF;
+ tcb->acktimer.arg = new;
+ tcb->acktimer.state = TcptimerOFF;
+ tcb->katimer.arg = new;
+ tcb->katimer.state = TcptimerOFF;
+ tcb->rtt_timer.arg = new;
+ tcb->rtt_timer.state = TcptimerOFF;
+
+ tcb->irs = lp->irs;
+ tcb->rcv.nxt = tcb->irs+1;
+ tcb->rcv.urg = tcb->rcv.nxt;
+
+ tcb->iss = lp->iss;
+ tcb->rttseq = tcb->iss;
+ tcb->snd.wl2 = tcb->iss;
+ tcb->snd.una = tcb->iss+1;
+ tcb->snd.ptr = tcb->iss+1;
+ tcb->snd.nxt = tcb->iss+1;
+ tcb->flgcnt = 0;
+ tcb->flags |= SYNACK;
+
+ /* our sending max segment size cannot be bigger than what he asked for */
+ if(lp->mss != 0 && lp->mss < tcb->mss)
+ tcb->mss = lp->mss;
+
+ /* window scaling */
+ tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale);
+
+ /* the congestion window always starts out as a single segment */
+ tcb->snd.wnd = segp->wnd;
+ tcb->cwind = tcb->mss;
+
+ /* set initial round trip time */
+ tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER;
+ tcpsynackrtt(new);
+
+ free(lp);
+
+ /* set up proto header */
+ switch(version){
+ case V4:
+ h4 = &tcb->protohdr.tcp4hdr;
+ memset(h4, 0, sizeof(*h4));
+ h4->proto = IP_TCPPROTO;
+ hnputs(h4->tcpsport, new->lport);
+ hnputs(h4->tcpdport, new->rport);
+ v6tov4(h4->tcpsrc, dst);
+ v6tov4(h4->tcpdst, src);
+ break;
+ case V6:
+ h6 = &tcb->protohdr.tcp6hdr;
+ memset(h6, 0, sizeof(*h6));
+ h6->proto = IP_TCPPROTO;
+ hnputs(h6->tcpsport, new->lport);
+ hnputs(h6->tcpdport, new->rport);
+ ipmove(h6->tcpsrc, dst);
+ ipmove(h6->tcpdst, src);
+ break;
+ default:
+ panic("tcpincoming: version %d", new->ipversion);
+ }
+
+ tcpsetstate(new, Established);
+
+ iphtadd(&tpriv->ht, new);
+
+ return new;
+}
+
+int
+seq_within(ulong x, ulong low, ulong high)
+{
+ if(low <= high){
+ if(low <= x && x <= high)
+ return 1;
+ }
+ else {
+ if(x >= low || x <= high)
+ return 1;
+ }
+ return 0;
+}
+
+int
+seq_lt(ulong x, ulong y)
+{
+ return (int)(x-y) < 0;
+}
+
+int
+seq_le(ulong x, ulong y)
+{
+ return (int)(x-y) <= 0;
+}
+
+int
+seq_gt(ulong x, ulong y)
+{
+ return (int)(x-y) > 0;
+}
+
+int
+seq_ge(ulong x, ulong y)
+{
+ return (int)(x-y) >= 0;
+}
+
+/*
+ * use the time between the first SYN and it's ack as the
+ * initial round trip time
+ */
+void
+tcpsynackrtt(Conv *s)
+{
+ Tcpctl *tcb;
+ int delta;
+ Tcppriv *tpriv;
+
+ tcb = (Tcpctl*)s->ptcl;
+ tpriv = s->p->priv;
+
+ delta = NOW - tcb->sndsyntime;
+ tcb->srtt = delta<<LOGAGAIN;
+ tcb->mdev = delta<<LOGDGAIN;
+
+ /* halt round trip timer */
+ tcphalt(tpriv, &tcb->rtt_timer);
+}
+
+void
+update(Conv *s, Tcp *seg)
+{
+ int rtt, delta;
+ Tcpctl *tcb;
+ ulong acked;
+ ulong expand;
+ Tcppriv *tpriv;
+
+ tpriv = s->p->priv;
+ tcb = (Tcpctl*)s->ptcl;
+
+ /* if everything has been acked, force output(?) */
+ if(seq_gt(seg->ack, tcb->snd.nxt)) {
+ tcb->flags |= FORCE;
+ return;
+ }
+
+ /* added by Dong Lin for fast retransmission */
+ if(seg->ack == tcb->snd.una
+ && tcb->snd.una != tcb->snd.nxt
+ && seg->len == 0
+ && seg->wnd == tcb->snd.wnd) {
+
+ /* this is a pure ack w/o window update */
+ netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
+ tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd);
+
+ if(++tcb->snd.dupacks == TCPREXMTTHRESH) {
+ /*
+ * tahoe tcp rxt the packet, half sshthresh,
+ * and set cwnd to one packet
+ */
+ tcb->snd.recovery = 1;
+ tcb->snd.rxt = tcb->snd.nxt;
+ netlog(s->p->f, Logtcprxmt, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt);
+ tcprxmit(s);
+ } else {
+ /* do reno tcp here. */
+ }
+ }
+
+ /*
+ * update window
+ */
+ if(seq_gt(seg->ack, tcb->snd.wl2)
+ || (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){
+ tcb->snd.wnd = seg->wnd;
+ tcb->snd.wl2 = seg->ack;
+ }
+
+ if(!seq_gt(seg->ack, tcb->snd.una)){
+ /*
+ * don't let us hangup if sending into a closed window and
+ * we're still getting acks
+ */
+ if((tcb->flags&RETRAN) && tcb->snd.wnd == 0){
+ tcb->backedoff = MAXBACKMS/4;
+ }
+ return;
+ }
+
+ /*
+ * any positive ack turns off fast rxt,
+ * (should we do new-reno on partial acks?)
+ */
+ if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) {
+ tcb->snd.dupacks = 0;
+ tcb->snd.recovery = 0;
+ } else
+ netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
+
+ /* Compute the new send window size */
+ acked = seg->ack - tcb->snd.una;
+
+ /* avoid slow start and timers for SYN acks */
+ if((tcb->flags & SYNACK) == 0) {
+ tcb->flags |= SYNACK;
+ acked--;
+ tcb->flgcnt--;
+ goto done;
+ }
+
+ /* slow start as long as we're not recovering from lost packets */
+ if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) {
+ if(tcb->cwind < tcb->ssthresh) {
+ expand = tcb->mss;
+ if(acked < expand)
+ expand = acked;
+ }
+ else
+ expand = ((int)tcb->mss * tcb->mss) / tcb->cwind;
+
+ if(tcb->cwind + expand < tcb->cwind)
+ expand = tcb->snd.wnd - tcb->cwind;
+ if(tcb->cwind + expand > tcb->snd.wnd)
+ expand = tcb->snd.wnd - tcb->cwind;
+ tcb->cwind += expand;
+ }
+
+ /* Adjust the timers according to the round trip time */
+ if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) {
+ tcphalt(tpriv, &tcb->rtt_timer);
+ if((tcb->flags&RETRAN) == 0) {
+ tcb->backoff = 0;
+ tcb->backedoff = 0;
+ rtt = tcb->rtt_timer.start - tcb->rtt_timer.count;
+ if(rtt == 0)
+ rtt = 1; /* otherwise all close systems will rexmit in 0 time */
+ rtt *= MSPTICK;
+ if(tcb->srtt == 0) {
+ tcb->srtt = rtt << LOGAGAIN;
+ tcb->mdev = rtt << LOGDGAIN;
+ } else {
+ delta = rtt - (tcb->srtt>>LOGAGAIN);
+ tcb->srtt += delta;
+ if(tcb->srtt <= 0)
+ tcb->srtt = 1;
+
+ delta = abs(delta) - (tcb->mdev>>LOGDGAIN);
+ tcb->mdev += delta;
+ if(tcb->mdev <= 0)
+ tcb->mdev = 1;
+ }
+ tcpsettimer(tcb);
+ }
+ }
+
+done:
+ if(qdiscard(s->wq, acked) < acked)
+ tcb->flgcnt--;
+
+ tcb->snd.una = seg->ack;
+ if(seq_gt(seg->ack, tcb->snd.urg))
+ tcb->snd.urg = seg->ack;
+
+ if(tcb->snd.una != tcb->snd.nxt)
+ tcpgo(tpriv, &tcb->timer);
+ else
+ tcphalt(tpriv, &tcb->timer);
+
+ if(seq_lt(tcb->snd.ptr, tcb->snd.una))
+ tcb->snd.ptr = tcb->snd.una;
+
+ tcb->flags &= ~RETRAN;
+ tcb->backoff = 0;
+ tcb->backedoff = 0;
+}
+
+void
+tcpiput(Proto *tcp, Ipifc* _, Block *bp)
+{
+ Tcp seg;
+ Tcp4hdr *h4;
+ Tcp6hdr *h6;
+ int hdrlen;
+ Tcpctl *tcb;
+ ushort length, csum;
+ uchar source[IPaddrlen], dest[IPaddrlen];
+ Conv *s;
+ Fs *f;
+ Tcppriv *tpriv;
+ uchar version;
+
+ f = tcp->f;
+ tpriv = tcp->priv;
+
+ tpriv->stats[InSegs]++;
+
+ h4 = (Tcp4hdr*)(bp->rp);
+ h6 = (Tcp6hdr*)(bp->rp);
+
+ if((h4->vihl&0xF0)==IP_VER4) {
+ version = V4;
+ length = nhgets(h4->length);
+ v4tov6(dest, h4->tcpdst);
+ v4tov6(source, h4->tcpsrc);
+
+ h4->Unused = 0;
+ hnputs(h4->tcplen, length-TCP4_PKT);
+ if(!(bp->flag & Btcpck) && (h4->tcpcksum[0] || h4->tcpcksum[1]) &&
+ ptclcsum(bp, TCP4_IPLEN, length-TCP4_IPLEN)) {
+ tpriv->stats[CsumErrs]++;
+ tpriv->stats[InErrs]++;
+ netlog(f, Logtcp, "bad tcp proto cksum\n");
+ freeblist(bp);
+ return;
+ }
+
+ hdrlen = ntohtcp4(&seg, &bp);
+ if(hdrlen < 0){
+ tpriv->stats[HlenErrs]++;
+ tpriv->stats[InErrs]++;
+ netlog(f, Logtcp, "bad tcp hdr len\n");
+ return;
+ }
+
+ /* trim the packet to the size claimed by the datagram */
+ length -= hdrlen+TCP4_PKT;
+ bp = trimblock(bp, hdrlen+TCP4_PKT, length);
+ if(bp == nil){
+ tpriv->stats[LenErrs]++;
+ tpriv->stats[InErrs]++;
+ netlog(f, Logtcp, "tcp len < 0 after trim\n");
+ return;
+ }
+ }
+ else {
+ int ttl = h6->ttl;
+ int proto = h6->proto;
+
+ version = V6;
+ length = nhgets(h6->ploadlen);
+ ipmove(dest, h6->tcpdst);
+ ipmove(source, h6->tcpsrc);
+
+ h6->ploadlen[0] = h6->ploadlen[1] = h6->proto = 0;
+ h6->ttl = proto;
+ hnputl(h6->vcf, length);
+ if((h6->tcpcksum[0] || h6->tcpcksum[1]) &&
+ (csum = ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) != 0) {
+ tpriv->stats[CsumErrs]++;
+ tpriv->stats[InErrs]++;
+ netlog(f, Logtcp,
+ "bad tcpv6 proto cksum: got %#ux, computed %#ux\n",
+ h6->tcpcksum[0]<<8 | h6->tcpcksum[1], csum);
+ freeblist(bp);
+ return;
+ }
+ h6->ttl = ttl;
+ h6->proto = proto;
+ hnputs(h6->ploadlen, length);
+
+ hdrlen = ntohtcp6(&seg, &bp);
+ if(hdrlen < 0){
+ tpriv->stats[HlenErrs]++;
+ tpriv->stats[InErrs]++;
+ netlog(f, Logtcp, "bad tcpv6 hdr len\n");
+ return;
+ }
+
+ /* trim the packet to the size claimed by the datagram */
+ length -= hdrlen;
+ bp = trimblock(bp, hdrlen+TCP6_PKT, length);
+ if(bp == nil){
+ tpriv->stats[LenErrs]++;
+ tpriv->stats[InErrs]++;
+ netlog(f, Logtcp, "tcpv6 len < 0 after trim\n");
+ return;
+ }
+ }
+
+ /* lock protocol while searching for a conversation */
+ QLOCK(tcp);
+
+ /* Look for a matching conversation */
+ s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
+ if(s == nil){
+ netlog(f, Logtcp, "iphtlook failed\n");
+reset:
+ QUNLOCK(tcp);
+ sndrst(tcp, source, dest, length, &seg, version, "no conversation");
+ freeblist(bp);
+ return;
+ }
+
+ /* if it's a listener, look for the right flags and get a new conv */
+ tcb = (Tcpctl*)s->ptcl;
+ if(tcb->state == Listen){
+ if(seg.flags & RST){
+ limborst(s, &seg, source, dest, version);
+ QUNLOCK(tcp);
+ freeblist(bp);
+ return;
+ }
+
+ /* if this is a new SYN, put the call into limbo */
+ if((seg.flags & SYN) && (seg.flags & ACK) == 0){
+ limbo(s, source, dest, &seg, version);
+ QUNLOCK(tcp);
+ freeblist(bp);
+ return;
+ }
+
+ /*
+ * if there's a matching call in limbo, tcpincoming will
+ * return it in state Syn_received
+ */
+ s = tcpincoming(s, &seg, source, dest, version);
+ if(s == nil)
+ goto reset;
+ }
+
+ /* The rest of the input state machine is run with the control block
+ * locked and implements the state machine directly out of the RFC.
+ * Out-of-band data is ignored - it was always a bad idea.
+ */
+ tcb = (Tcpctl*)s->ptcl;
+ if(waserror()){
+ QUNLOCK(s);
+ nexterror();
+ }
+ QLOCK(s);
+ QUNLOCK(tcp);
+
+ /* fix up window */
+ seg.wnd <<= tcb->rcv.scale;
+
+ /* every input packet in puts off the keep alive time out */
+ tcpsetkacounter(tcb);
+
+ switch(tcb->state) {
+ case Closed:
+ sndrst(tcp, source, dest, length, &seg, version, "sending to Closed");
+ goto raise;
+ case Syn_sent:
+ if(seg.flags & ACK) {
+ if(!seq_within(seg.ack, tcb->iss+1, tcb->snd.nxt)) {
+ sndrst(tcp, source, dest, length, &seg, version,
+ "bad seq in Syn_sent");
+ goto raise;
+ }
+ }
+ if(seg.flags & RST) {
+ if(seg.flags & ACK)
+ localclose(s, Econrefused);
+ goto raise;
+ }
+
+ if(seg.flags & SYN) {
+ procsyn(s, &seg);
+ if(seg.flags & ACK){
+ update(s, &seg);
+ tcpsynackrtt(s);
+ tcpsetstate(s, Established);
+ tcpsetscale(s, tcb, seg.ws, tcb->scale);
+ }
+ else {
+ tcb->time = NOW;
+ tcpsetstate(s, Syn_received); /* DLP - shouldn't this be a reset? */
+ }
+
+ if(length != 0 || (seg.flags & FIN))
+ break;
+
+ freeblist(bp);
+ goto output;
+ }
+ else
+ freeblist(bp);
+
+ QUNLOCK(s);
+ poperror();
+ return;
+ case Syn_received:
+ /* doesn't matter if it's the correct ack, we're just trying to set timing */
+ if(seg.flags & ACK)
+ tcpsynackrtt(s);
+ break;
+ }
+
+ /*
+ * One DOS attack is to open connections to us and then forget about them,
+ * thereby tying up a conv at no long term cost to the attacker.
+ * This is an attempt to defeat these stateless DOS attacks. See
+ * corresponding code in tcpsendka().
+ */
+ if(tcb->state != Syn_received && (seg.flags & RST) == 0){
+ if(tcpporthogdefense
+ && seq_within(seg.ack, tcb->snd.una-(1<<31), tcb->snd.una-(1<<29))){
+ print("stateless hog %I.%d->%I.%d f %ux %lux - %lux - %lux\n",
+ source, seg.source, dest, seg.dest, seg.flags,
+ tcb->snd.una-(1<<31), seg.ack, tcb->snd.una-(1<<29));
+ localclose(s, "stateless hog");
+ }
+ }
+
+ /* Cut the data to fit the receive window */
+ if(tcptrim(tcb, &seg, &bp, &length) == -1) {
+ netlog(f, Logtcp, "tcp len < 0, %lud %d\n", seg.seq, length);
+ update(s, &seg);
+ if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) {
+ tcphalt(tpriv, &tcb->rtt_timer);
+ tcphalt(tpriv, &tcb->acktimer);
+ tcphalt(tpriv, &tcb->katimer);
+ tcpsetstate(s, Time_wait);
+ tcb->timer.start = MSL2*(1000 / MSPTICK);
+ tcpgo(tpriv, &tcb->timer);
+ }
+ if(!(seg.flags & RST)) {
+ tcb->flags |= FORCE;
+ goto output;
+ }
+ QUNLOCK(s);
+ poperror();
+ return;
+ }
+
+ /* Cannot accept so answer with a rst */
+ if(length && tcb->state == Closed) {
+ sndrst(tcp, source, dest, length, &seg, version, "sending to Closed");
+ goto raise;
+ }
+
+ /* The segment is beyond the current receive pointer so
+ * queue the data in the resequence queue
+ */
+ if(seg.seq != tcb->rcv.nxt)
+ if(length != 0 || (seg.flags & (SYN|FIN))) {
+ update(s, &seg);
+ if(addreseq(tcb, tpriv, &seg, bp, length) < 0)
+ print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport);
+ tcb->flags |= FORCE;
+ goto output;
+ }
+
+ /*
+ * keep looping till we've processed this packet plus any
+ * adjacent packets in the resequence queue
+ */
+ for(;;) {
+ if(seg.flags & RST) {
+ if(tcb->state == Established) {
+ tpriv->stats[EstabResets]++;
+ if(tcb->rcv.nxt != seg.seq)
+ print("out of order RST rcvd: %I.%d -> %I.%d, rcv.nxt %lux seq %lux\n", s->raddr, s->rport, s->laddr, s->lport, tcb->rcv.nxt, seg.seq);
+ }
+ localclose(s, Econrefused);
+ goto raise;
+ }
+
+ if((seg.flags&ACK) == 0)
+ goto raise;
+
+ switch(tcb->state) {
+ case Syn_received:
+ if(!seq_within(seg.ack, tcb->snd.una+1, tcb->snd.nxt)){
+ sndrst(tcp, source, dest, length, &seg, version,
+ "bad seq in Syn_received");
+ goto raise;
+ }
+ update(s, &seg);
+ tcpsetstate(s, Established);
+ case Established:
+ case Close_wait:
+ update(s, &seg);
+ break;
+ case Finwait1:
+ update(s, &seg);
+ if(qlen(s->wq)+tcb->flgcnt == 0){
+ tcphalt(tpriv, &tcb->rtt_timer);
+ tcphalt(tpriv, &tcb->acktimer);
+ tcpsetkacounter(tcb);
+ tcb->time = NOW;
+ tcpsetstate(s, Finwait2);
+ tcb->katimer.start = MSL2 * (1000 / MSPTICK);
+ tcpgo(tpriv, &tcb->katimer);
+ }
+ break;
+ case Finwait2:
+ update(s, &seg);
+ break;
+ case Closing:
+ update(s, &seg);
+ if(qlen(s->wq)+tcb->flgcnt == 0) {
+ tcphalt(tpriv, &tcb->rtt_timer);
+ tcphalt(tpriv, &tcb->acktimer);
+ tcphalt(tpriv, &tcb->katimer);
+ tcpsetstate(s, Time_wait);
+ tcb->timer.start = MSL2*(1000 / MSPTICK);
+ tcpgo(tpriv, &tcb->timer);
+ }
+ break;
+ case Last_ack:
+ update(s, &seg);
+ if(qlen(s->wq)+tcb->flgcnt == 0) {
+ localclose(s, nil);
+ goto raise;
+ }
+ case Time_wait:
+ tcb->flags |= FORCE;
+ if(tcb->timer.state != TcptimerON)
+ tcpgo(tpriv, &tcb->timer);
+ }
+
+ if((seg.flags&URG) && seg.urg) {
+ if(seq_gt(seg.urg + seg.seq, tcb->rcv.urg)) {
+ tcb->rcv.urg = seg.urg + seg.seq;
+ pullblock(&bp, seg.urg);
+ }
+ }
+ else
+ if(seq_gt(tcb->rcv.nxt, tcb->rcv.urg))
+ tcb->rcv.urg = tcb->rcv.nxt;
+
+ if(length == 0) {
+ if(bp != nil)
+ freeblist(bp);
+ }
+ else {
+ switch(tcb->state){
+ default:
+ /* Ignore segment text */
+ if(bp != nil)
+ freeblist(bp);
+ break;
+
+ case Syn_received:
+ case Established:
+ case Finwait1:
+ /* If we still have some data place on
+ * receive queue
+ */
+ if(bp) {
+ bp = packblock(bp);
+ if(bp == nil)
+ panic("tcp packblock");
+ qpassnolim(s->rq, bp);
+ bp = nil;
+
+ /*
+ * Force an ack every 2 data messages. This is
+ * a hack for rob to make his home system run
+ * faster.
+ *
+ * this also keeps the standard TCP congestion
+ * control working since it needs an ack every
+ * 2 max segs worth. This is not quite that,
+ * but under a real stream is equivalent since
+ * every packet has a max seg in it.
+ */
+ if(++(tcb->rcv.una) >= 2)
+ tcb->flags |= FORCE;
+ }
+ tcb->rcv.nxt += length;
+
+ /*
+ * update our rcv window
+ */
+ tcprcvwin(s);
+
+ /*
+ * turn on the acktimer if there's something
+ * to ack
+ */
+ if(tcb->acktimer.state != TcptimerON)
+ tcpgo(tpriv, &tcb->acktimer);
+
+ break;
+ case Finwait2:
+ /* no process to read the data, send a reset */
+ if(bp != nil)
+ freeblist(bp);
+ sndrst(tcp, source, dest, length, &seg, version,
+ "send to Finwait2");
+ QUNLOCK(s);
+ poperror();
+ return;
+ }
+ }
+
+ if(seg.flags & FIN) {
+ tcb->flags |= FORCE;
+
+ switch(tcb->state) {
+ case Syn_received:
+ case Established:
+ tcb->rcv.nxt++;
+ tcpsetstate(s, Close_wait);
+ break;
+ case Finwait1:
+ tcb->rcv.nxt++;
+ if(qlen(s->wq)+tcb->flgcnt == 0) {
+ tcphalt(tpriv, &tcb->rtt_timer);
+ tcphalt(tpriv, &tcb->acktimer);
+ tcphalt(tpriv, &tcb->katimer);
+ tcpsetstate(s, Time_wait);
+ tcb->timer.start = MSL2*(1000/MSPTICK);
+ tcpgo(tpriv, &tcb->timer);
+ }
+ else
+ tcpsetstate(s, Closing);
+ break;
+ case Finwait2:
+ tcb->rcv.nxt++;
+ tcphalt(tpriv, &tcb->rtt_timer);
+ tcphalt(tpriv, &tcb->acktimer);
+ tcphalt(tpriv, &tcb->katimer);
+ tcpsetstate(s, Time_wait);
+ tcb->timer.start = MSL2 * (1000/MSPTICK);
+ tcpgo(tpriv, &tcb->timer);
+ break;
+ case Close_wait:
+ case Closing:
+ case Last_ack:
+ break;
+ case Time_wait:
+ tcpgo(tpriv, &tcb->timer);
+ break;
+ }
+ }
+
+ /*
+ * get next adjacent segment from the resequence queue.
+ * dump/trim any overlapping segments
+ */
+ for(;;) {
+ if(tcb->reseq == nil)
+ goto output;
+
+ if(seq_ge(tcb->rcv.nxt, tcb->reseq->seg.seq) == 0)
+ goto output;
+
+ getreseq(tcb, &seg, &bp, &length);
+
+ if(tcptrim(tcb, &seg, &bp, &length) == 0)
+ break;
+ }
+ }
+output:
+ tcpoutput(s);
+ QUNLOCK(s);
+ poperror();
+ return;
+raise:
+ QUNLOCK(s);
+ poperror();
+ freeblist(bp);
+ tcpkick(s);
+}
+
+/*
+ * always enters and exits with the s locked. We drop
+ * the lock to ipoput the packet so some care has to be
+ * taken by callers.
+ */
+void
+tcpoutput(Conv *s)
+{
+ Tcp seg;
+ int msgs;
+ Tcpctl *tcb;
+ Block *hbp, *bp;
+ int sndcnt, n;
+ ulong ssize, dsize, usable, sent;
+ Fs *f;
+ Tcppriv *tpriv;
+ uchar version;
+
+ f = s->p->f;
+ tpriv = s->p->priv;
+ version = s->ipversion;
+
+ for(msgs = 0; msgs < 100; msgs++) {
+ tcb = (Tcpctl*)s->ptcl;
+
+ switch(tcb->state) {
+ case Listen:
+ case Closed:
+ case Finwait2:
+ return;
+ }
+
+ /* force an ack when a window has opened up */
+ if(tcb->rcv.blocked && tcb->rcv.wnd > 0){
+ tcb->rcv.blocked = 0;
+ tcb->flags |= FORCE;
+ }
+
+ sndcnt = qlen(s->wq)+tcb->flgcnt;
+ sent = tcb->snd.ptr - tcb->snd.una;
+
+ /* Don't send anything else until our SYN has been acked */
+ if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
+ break;
+
+ /* Compute usable segment based on offered window and limit
+ * window probes to one
+ */
+ if(tcb->snd.wnd == 0){
+ if(sent != 0) {
+ if((tcb->flags&FORCE) == 0)
+ break;
+// tcb->snd.ptr = tcb->snd.una;
+ }
+ usable = 1;
+ }
+ else {
+ usable = tcb->cwind;
+ if(tcb->snd.wnd < usable)
+ usable = tcb->snd.wnd;
+ usable -= sent;
+ }
+ ssize = sndcnt-sent;
+ if(ssize && usable < 2)
+ netlog(s->p->f, Logtcp, "throttled snd.wnd %lud cwind %lud\n",
+ tcb->snd.wnd, tcb->cwind);
+ if(usable < ssize)
+ ssize = usable;
+ if(tcb->mss < ssize)
+ ssize = tcb->mss;
+ dsize = ssize;
+ seg.urg = 0;
+
+ if(ssize == 0)
+ if((tcb->flags&FORCE) == 0)
+ break;
+
+ tcb->flags &= ~FORCE;
+ tcprcvwin(s);
+
+ /* By default we will generate an ack */
+ tcphalt(tpriv, &tcb->acktimer);
+ tcb->rcv.una = 0;
+ seg.source = s->lport;
+ seg.dest = s->rport;
+ seg.flags = ACK;
+ seg.mss = 0;
+ seg.ws = 0;
+ switch(tcb->state){
+ case Syn_sent:
+ seg.flags = 0;
+ if(tcb->snd.ptr == tcb->iss){
+ seg.flags |= SYN;
+ dsize--;
+ seg.mss = tcb->mss;
+ seg.ws = tcb->scale;
+ }
+ break;
+ case Syn_received:
+ /*
+ * don't send any data with a SYN/ACK packet
+ * because Linux rejects the packet in its
+ * attempt to solve the SYN attack problem
+ */
+ if(tcb->snd.ptr == tcb->iss){
+ seg.flags |= SYN;
+ dsize = 0;
+ ssize = 1;
+ seg.mss = tcb->mss;
+ seg.ws = tcb->scale;
+ }
+ break;
+ }
+ seg.seq = tcb->snd.ptr;
+ seg.ack = tcb->rcv.nxt;
+ seg.wnd = tcb->rcv.wnd;
+
+ /* Pull out data to send */
+ bp = nil;
+ if(dsize != 0) {
+ bp = qcopy(s->wq, dsize, sent);
+ if(BLEN(bp) != dsize) {
+ seg.flags |= FIN;
+ dsize--;
+ }
+ }
+
+ if(sent+dsize == sndcnt)
+ seg.flags |= PSH;
+
+ /* keep track of balance of resent data */
+ if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) {
+ n = tcb->snd.nxt - tcb->snd.ptr;
+ if(ssize < n)
+ n = ssize;
+ tcb->resent += n;
+ netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
+ s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr, tcb->snd.nxt);
+ tpriv->stats[RetransSegs]++;
+ }
+
+ tcb->snd.ptr += ssize;
+
+ /* Pull up the send pointer so we can accept acks
+ * for this window
+ */
+ if(seq_gt(tcb->snd.ptr,tcb->snd.nxt))
+ tcb->snd.nxt = tcb->snd.ptr;
+
+ /* Build header, link data and compute cksum */
+ switch(version){
+ case V4:
+ tcb->protohdr.tcp4hdr.vihl = IP_VER4;
+ hbp = htontcp4(&seg, bp, &tcb->protohdr.tcp4hdr, tcb);
+ if(hbp == nil) {
+ freeblist(bp);
+ return;
+ }
+ break;
+ case V6:
+ tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
+ hbp = htontcp6(&seg, bp, &tcb->protohdr.tcp6hdr, tcb);
+ if(hbp == nil) {
+ freeblist(bp);
+ return;
+ }
+ break;
+ default:
+ hbp = nil; /* to suppress a warning */
+ panic("tcpoutput: version %d", version);
+ }
+
+ /* Start the transmission timers if there is new data and we
+ * expect acknowledges
+ */
+ if(ssize != 0){
+ if(tcb->timer.state != TcptimerON)
+ tcpgo(tpriv, &tcb->timer);
+
+ /* If round trip timer isn't running, start it.
+ * measure the longest packet only in case the
+ * transmission time dominates RTT
+ */
+ if(tcb->rtt_timer.state != TcptimerON)
+ if(ssize == tcb->mss) {
+ tcpgo(tpriv, &tcb->rtt_timer);
+ tcb->rttseq = tcb->snd.ptr;
+ }
+ }
+
+ tpriv->stats[OutSegs]++;
+
+ /* put off the next keep alive */
+ tcpgo(tpriv, &tcb->katimer);
+
+ switch(version){
+ case V4:
+ if(ipoput4(f, hbp, 0, s->ttl, s->tos, s) < 0){
+ /* a negative return means no route */
+ localclose(s, "no route");
+ }
+ break;
+ case V6:
+ if(ipoput6(f, hbp, 0, s->ttl, s->tos, s) < 0){
+ /* a negative return means no route */
+ localclose(s, "no route");
+ }
+ break;
+ default:
+ panic("tcpoutput2: version %d", version);
+ }
+ if((uint)(msgs%4) == 1){
+ QUNLOCK(s);
+ sched();
+ QLOCK(s);
+ }
+ }
+}
+
+/*
+ * the BSD convention (hack?) for keep alives. resend last uchar acked.
+ */
+void
+tcpsendka(Conv *s)
+{
+ Tcp seg;
+ Tcpctl *tcb;
+ Block *hbp,*dbp;
+
+ tcb = (Tcpctl*)s->ptcl;
+
+ dbp = nil;
+ seg.urg = 0;
+ seg.source = s->lport;
+ seg.dest = s->rport;
+ seg.flags = ACK|PSH;
+ seg.mss = 0;
+ seg.ws = 0;
+ if(tcpporthogdefense)
+ seg.seq = tcb->snd.una-(1<<30)-nrand(1<<20);
+ else
+ seg.seq = tcb->snd.una-1;
+ seg.ack = tcb->rcv.nxt;
+ tcb->rcv.una = 0;
+ seg.wnd = tcb->rcv.wnd;
+ if(tcb->state == Finwait2){
+ seg.flags |= FIN;
+ } else {
+ dbp = allocb(1);
+ dbp->wp++;
+ }
+
+ if(isv4(s->raddr)) {
+ /* Build header, link data and compute cksum */
+ tcb->protohdr.tcp4hdr.vihl = IP_VER4;
+ hbp = htontcp4(&seg, dbp, &tcb->protohdr.tcp4hdr, tcb);
+ if(hbp == nil) {
+ freeblist(dbp);
+ return;
+ }
+ ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s);
+ }
+ else {
+ /* Build header, link data and compute cksum */
+ tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
+ hbp = htontcp6(&seg, dbp, &tcb->protohdr.tcp6hdr, tcb);
+ if(hbp == nil) {
+ freeblist(dbp);
+ return;
+ }
+ ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s);
+ }
+}
+
+/*
+ * set connection to time out after 12 minutes
+ */
+void
+tcpsetkacounter(Tcpctl *tcb)
+{
+ tcb->kacounter = (12 * 60 * 1000) / (tcb->katimer.start*MSPTICK);
+ if(tcb->kacounter < 3)
+ tcb->kacounter = 3;
+}
+
+/*
+ * if we've timed out, close the connection
+ * otherwise, send a keepalive and restart the timer
+ */
+void
+tcpkeepalive(void *v)
+{
+ Tcpctl *tcb;
+ Conv *s;
+
+ s = v;
+ tcb = (Tcpctl*)s->ptcl;
+ if(waserror()){
+ QUNLOCK(s);
+ nexterror();
+ }
+ QLOCK(s);
+ if(tcb->state != Closed){
+ if(--(tcb->kacounter) <= 0) {
+ localclose(s, Etimedout);
+ } else {
+ tcpsendka(s);
+ tcpgo(s->p->priv, &tcb->katimer);
+ }
+ }
+ QUNLOCK(s);
+ poperror();
+}
+
+/*
+ * start keepalive timer
+ */
+char*
+tcpstartka(Conv *s, char **f, int n)
+{
+ Tcpctl *tcb;
+ int x;
+
+ tcb = (Tcpctl*)s->ptcl;
+ if(tcb->state != Established)
+ return "connection must be in Establised state";
+ if(n > 1){
+ x = atoi(f[1]);
+ if(x >= MSPTICK)
+ tcb->katimer.start = x/MSPTICK;
+ }
+ tcpsetkacounter(tcb);
+ tcpgo(s->p->priv, &tcb->katimer);
+
+ return nil;
+}
+
+/*
+ * turn checksums on/off
+ */
+char*
+tcpsetchecksum(Conv *s, char **f, int _)
+{
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)s->ptcl;
+ tcb->nochecksum = !atoi(f[1]);
+
+ return nil;
+}
+
+void
+tcprxmit(Conv *s)
+{
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)s->ptcl;
+
+ tcb->flags |= RETRAN|FORCE;
+ tcb->snd.ptr = tcb->snd.una;
+
+ /*
+ * We should be halving the slow start threshhold (down to one
+ * mss) but leaving it at mss seems to work well enough
+ */
+ tcb->ssthresh = tcb->mss;
+
+ /*
+ * pull window down to a single packet
+ */
+ tcb->cwind = tcb->mss;
+ tcpoutput(s);
+}
+
+void
+tcptimeout(void *arg)
+{
+ Conv *s;
+ Tcpctl *tcb;
+ int maxback;
+ Tcppriv *tpriv;
+
+ s = (Conv*)arg;
+ tpriv = s->p->priv;
+ tcb = (Tcpctl*)s->ptcl;
+
+ if(waserror()){
+ QUNLOCK(s);
+ nexterror();
+ }
+ QLOCK(s);
+ switch(tcb->state){
+ default:
+ tcb->backoff++;
+ if(tcb->state == Syn_sent)
+ maxback = MAXBACKMS/2;
+ else
+ maxback = MAXBACKMS;
+ tcb->backedoff += tcb->timer.start * MSPTICK;
+ if(tcb->backedoff >= maxback) {
+ localclose(s, Etimedout);
+ break;
+ }
+ netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
+ tcpsettimer(tcb);
+ tcprxmit(s);
+ tpriv->stats[RetransTimeouts]++;
+ tcb->snd.dupacks = 0;
+ break;
+ case Time_wait:
+ localclose(s, nil);
+ break;
+ case Closed:
+ break;
+ }
+ QUNLOCK(s);
+ poperror();
+}
+
+int
+inwindow(Tcpctl *tcb, int seq)
+{
+ return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1);
+}
+
+/*
+ * set up state for a received SYN (or SYN ACK) packet
+ */
+void
+procsyn(Conv *s, Tcp *seg)
+{
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)s->ptcl;
+ tcb->flags |= FORCE;
+
+ tcb->rcv.nxt = seg->seq + 1;
+ tcb->rcv.urg = tcb->rcv.nxt;
+ tcb->irs = seg->seq;
+
+ /* our sending max segment size cannot be bigger than what he asked for */
+ if(seg->mss != 0 && seg->mss < tcb->mss)
+ tcb->mss = seg->mss;
+
+ /* the congestion window always starts out as a single segment */
+ tcb->snd.wnd = seg->wnd;
+ tcb->cwind = tcb->mss;
+}
+
+int
+addreseq(Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+{
+ Reseq *rp, *rp1;
+ int i, rqlen, qmax;
+
+ rp = malloc(sizeof(Reseq));
+ if(rp == nil){
+ freeblist(bp); /* bp always consumed by add_reseq */
+ return 0;
+ }
+
+ rp->seg = *seg;
+ rp->bp = bp;
+ rp->length = length;
+
+ /* Place on reassembly list sorting by starting seq number */
+ rp1 = tcb->reseq;
+ if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) {
+ rp->next = rp1;
+ tcb->reseq = rp;
+ if(rp->next != nil)
+ tpriv->stats[OutOfOrder]++;
+ return 0;
+ }
+
+ rqlen = 0;
+ for(i = 0;; i++) {
+ rqlen += rp1->length;
+ if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) {
+ rp->next = rp1->next;
+ rp1->next = rp;
+ if(rp->next != nil)
+ tpriv->stats[OutOfOrder]++;
+ break;
+ }
+ rp1 = rp1->next;
+ }
+ qmax = QMAX<<tcb->rcv.scale;
+ if(rqlen > qmax){
+ print("resequence queue > window: %d > %d\n", rqlen, qmax);
+ i = 0;
+ for(rp1 = tcb->reseq; rp1 != nil; rp1 = rp1->next){
+ print("%#lux %#lux %#ux\n", rp1->seg.seq,
+ rp1->seg.ack, rp1->seg.flags);
+ if(i++ > 10){
+ print("...\n");
+ break;
+ }
+ }
+
+ /*
+ * delete entire reassembly queue; wait for retransmit.
+ * - should we be smarter and only delete the tail?
+ */
+ for(rp = tcb->reseq; rp != nil; rp = rp1){
+ rp1 = rp->next;
+ freeblist(rp->bp);
+ free(rp);
+ }
+ tcb->reseq = nil;
+
+ return -1;
+ }
+ return 0;
+}
+
+void
+getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
+{
+ Reseq *rp;
+
+ rp = tcb->reseq;
+ if(rp == nil)
+ return;
+
+ tcb->reseq = rp->next;
+
+ *seg = rp->seg;
+ *bp = rp->bp;
+ *length = rp->length;
+
+ free(rp);
+}
+
+int
+tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
+{
+ ushort len;
+ uchar accept;
+ int dupcnt, excess;
+
+ accept = 0;
+ len = *length;
+ if(seg->flags & SYN)
+ len++;
+ if(seg->flags & FIN)
+ len++;
+
+ if(tcb->rcv.wnd == 0) {
+ if(len == 0 && seg->seq == tcb->rcv.nxt)
+ return 0;
+ }
+ else {
+ /* Some part of the segment should be in the window */
+ if(inwindow(tcb,seg->seq))
+ accept++;
+ else
+ if(len != 0) {
+ if(inwindow(tcb, seg->seq+len-1) ||
+ seq_within(tcb->rcv.nxt, seg->seq,seg->seq+len-1))
+ accept++;
+ }
+ }
+ if(!accept) {
+ freeblist(*bp);
+ return -1;
+ }
+ dupcnt = tcb->rcv.nxt - seg->seq;
+ if(dupcnt > 0){
+ tcb->rerecv += dupcnt;
+ if(seg->flags & SYN){
+ seg->flags &= ~SYN;
+ seg->seq++;
+
+ if(seg->urg > 1)
+ seg->urg--;
+ else
+ seg->flags &= ~URG;
+ dupcnt--;
+ }
+ if(dupcnt > 0){
+ pullblock(bp, (ushort)dupcnt);
+ seg->seq += dupcnt;
+ *length -= dupcnt;
+
+ if(seg->urg > dupcnt)
+ seg->urg -= dupcnt;
+ else {
+ seg->flags &= ~URG;
+ seg->urg = 0;
+ }
+ }
+ }
+ excess = seg->seq + *length - (tcb->rcv.nxt + tcb->rcv.wnd);
+ if(excess > 0) {
+ tcb->rerecv += excess;
+ *length -= excess;
+ *bp = trimblock(*bp, 0, *length);
+ if(*bp == nil)
+ panic("presotto is a boofhead");
+ seg->flags &= ~FIN;
+ }
+ return 0;
+}
+
+void
+tcpadvise(Proto *tcp, Block *bp, char *msg)
+{
+ Tcp4hdr *h4;
+ Tcp6hdr *h6;
+ Tcpctl *tcb;
+ uchar source[IPaddrlen];
+ uchar dest[IPaddrlen];
+ ushort psource, pdest;
+ Conv *s, **p;
+
+ h4 = (Tcp4hdr*)(bp->rp);
+ h6 = (Tcp6hdr*)(bp->rp);
+
+ if((h4->vihl&0xF0)==IP_VER4) {
+ v4tov6(dest, h4->tcpdst);
+ v4tov6(source, h4->tcpsrc);
+ psource = nhgets(h4->tcpsport);
+ pdest = nhgets(h4->tcpdport);
+ }
+ else {
+ ipmove(dest, h6->tcpdst);
+ ipmove(source, h6->tcpsrc);
+ psource = nhgets(h6->tcpsport);
+ pdest = nhgets(h6->tcpdport);
+ }
+
+ /* Look for a connection */
+ QLOCK(tcp);
+ for(p = tcp->conv; *p; p++) {
+ s = *p;
+ tcb = (Tcpctl*)s->ptcl;
+ if(s->rport == pdest)
+ if(s->lport == psource)
+ if(tcb->state != Closed)
+ if(ipcmp(s->raddr, dest) == 0)
+ if(ipcmp(s->laddr, source) == 0){
+ QLOCK(s);
+ QUNLOCK(tcp);
+ switch(tcb->state){
+ case Syn_sent:
+ localclose(s, msg);
+ break;
+ }
+ QUNLOCK(s);
+ freeblist(bp);
+ return;
+ }
+ }
+ QUNLOCK(tcp);
+ freeblist(bp);
+}
+
+static char*
+tcpporthogdefensectl(char *val)
+{
+ if(strcmp(val, "on") == 0)
+ tcpporthogdefense = 1;
+ else if(strcmp(val, "off") == 0)
+ tcpporthogdefense = 0;
+ else
+ return "unknown value for tcpporthogdefense";
+ return nil;
+}
+
+/* called with c QLOCKed */
+char*
+tcpctl(Conv* c, char** f, int n)
+{
+ if(n == 1 && strcmp(f[0], "hangup") == 0)
+ return tcphangup(c);
+ if(n >= 1 && strcmp(f[0], "keepalive") == 0)
+ return tcpstartka(c, f, n);
+ if(n >= 1 && strcmp(f[0], "checksum") == 0)
+ return tcpsetchecksum(c, f, n);
+ if(n >= 1 && strcmp(f[0], "tcpporthogdefense") == 0)
+ return tcpporthogdefensectl(f[1]);
+ return "unknown control request";
+}
+
+int
+tcpstats(Proto *tcp, char *buf, int len)
+{
+ Tcppriv *priv;
+ char *p, *e;
+ int i;
+
+ priv = tcp->priv;
+ p = buf;
+ e = p+len;
+ for(i = 0; i < Nstats; i++)
+ p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+ return p - buf;
+}
+
+/*
+ * garbage collect any stale conversations:
+ * - SYN received but no SYN-ACK after 5 seconds (could be the SYN attack)
+ * - Finwait2 after 5 minutes
+ *
+ * this is called whenever we run out of channels. Both checks are
+ * of questionable validity so we try to use them only when we're
+ * up against the wall.
+ */
+int
+tcpgc(Proto *tcp)
+{
+ Conv *c, **pp, **ep;
+ int n;
+ Tcpctl *tcb;
+
+
+ n = 0;
+ ep = &tcp->conv[tcp->nc];
+ for(pp = tcp->conv; pp < ep; pp++) {
+ c = *pp;
+ if(c == nil)
+ break;
+ if(!CANQLOCK(c))
+ continue;
+ tcb = (Tcpctl*)c->ptcl;
+ switch(tcb->state){
+ case Syn_received:
+ if(NOW - tcb->time > 5000){
+ localclose(c, "timed out");
+ n++;
+ }
+ break;
+ case Finwait2:
+ if(NOW - tcb->time > 5*60*1000){
+ localclose(c, "timed out");
+ n++;
+ }
+ break;
+ }
+ QUNLOCK(c);
+ }
+ return n;
+}
+
+void
+tcpsettimer(Tcpctl *tcb)
+{
+ int x;
+
+ /* round trip dependency */
+ x = backoff(tcb->backoff) *
+ (tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK;
+
+ /* bounded twixt 1/2 and 64 seconds */
+ if(x < 500/MSPTICK)
+ x = 500/MSPTICK;
+ else if(x > (64000/MSPTICK))
+ x = 64000/MSPTICK;
+ tcb->timer.start = x;
+}
+
+void
+tcpinit(Fs *fs)
+{
+ Proto *tcp;
+ Tcppriv *tpriv;
+
+ tcp = smalloc(sizeof(Proto));
+ tpriv = tcp->priv = smalloc(sizeof(Tcppriv));
+ tcp->name = "tcp";
+ tcp->connect = tcpconnect;
+ tcp->announce = tcpannounce;
+ tcp->ctl = tcpctl;
+ tcp->state = tcpstate;
+ tcp->create = tcpcreate;
+ tcp->close = tcpclose;
+ tcp->rcv = tcpiput;
+ tcp->advise = tcpadvise;
+ tcp->stats = tcpstats;
+ tcp->inuse = tcpinuse;
+ tcp->gc = tcpgc;
+ tcp->ipproto = IP_TCPPROTO;
+ tcp->nc = scalednconv();
+ tcp->ptclsize = sizeof(Tcpctl);
+ tpriv->stats[MaxConn] = tcp->nc;
+
+ Fsproto(fs, tcp);
+}
+
+void
+tcpsetscale(Conv *s, Tcpctl *tcb, ushort rcvscale, ushort sndscale)
+{
+ if(rcvscale){
+ tcb->rcv.scale = rcvscale & 0xff;
+ tcb->snd.scale = sndscale & 0xff;
+ tcb->window = QMAX<<tcb->snd.scale;
+ qsetlimit(s->rq, tcb->window);
+ } else {
+ tcb->rcv.scale = 0;
+ tcb->snd.scale = 0;
+ tcb->window = QMAX;
+ qsetlimit(s->rq, tcb->window);
+ }
+}
diff --git a/src/9vx/a/ip/tripmedium.c b/src/9vx/a/ip/tripmedium.c
@@ -0,0 +1,398 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+#include "trip.h"
+
+static void tripread(void *a);
+static void tripbind(Ipifc *ifc, int argc, char **argv);
+static void tripunbind(Ipifc *ifc);
+static void tripbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
+static void tripaddmulti(Ipifc *ifc, uchar*, uchar*);
+static void tripremmulti(Ipifc *ifc, uchar*, uchar*);
+static void tripaddroute(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
+static void tripremroute(Ipifc *ifc, int, uchar*, uchar*);
+static void tripares(Fs*, int, uchar*, uchar*, int, int);
+
+Medium tripmedium =
+{
+.name= "trip",
+.mintu= 20,
+.maxtu= 64*1024,
+.maclen= LCIMACSIZE,
+.bind= tripbind,
+.unbind= tripunbind,
+.bwrite= tripbwrite,
+.addmulti= tripaddmulti,
+.remmulti= tripremmulti,
+.addroute= tripaddroute,
+.remroute= tripremroute,
+.ares= tripares,
+};
+
+typedef struct Tripinfo Tripinfo;
+struct Tripinfo
+{
+ Fs* fs; /* my instance of the IP stack */
+ Ipifc* ifc; /* IP interface */
+ Card* dev;
+ Proc* readp; /* reading process */
+ Chan* mchan; /* Data channel */
+};
+
+/*
+ * called to bind an IP ifc to an ethernet device
+ * called with ifc qlock'd
+ */
+static void
+tripbind(Ipifc *ifc, int argc, char **argv)
+{
+ int fd;
+ Chan *mchan;
+ Tripinfo *er;
+
+ if(argc < 2)
+ error(Ebadarg);
+
+ fd = kopen(argv[2], ORDWR);
+ if(fd < 0)
+ error("trip open failed");
+
+ mchan = fdtochan(up->env->fgrp, fd, ORDWR, 0, 1);
+ kclose(fd);
+
+ if(devtab[mchan->type]->dc != 'T') {
+ cclose(mchan);
+ error(Enoport);
+ }
+
+ er = smalloc(sizeof(*er));
+ er->mchan = mchan;
+ er->ifc = ifc;
+ er->dev = tripsetifc(mchan, ifc);
+ er->fs = ifc->conv->p->f;
+
+ ifc->arg = er;
+
+ kproc("tripread", tripread, ifc);
+}
+
+/*
+ * called with ifc qlock'd
+ */
+static void
+tripunbind(Ipifc *ifc)
+{
+ Tripinfo *er = ifc->arg;
+/*
+ if(er->readp)
+ postnote(er->readp, 1, "unbind", 0);
+*/
+ tsleep(&up->sleep, return0, 0, 300);
+
+ if(er->mchan != nil)
+ cclose(er->mchan);
+
+ free(er);
+}
+
+/*
+ * called by ipoput with a single block to write
+ */
+static void
+tripbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip)
+{
+ Tripinfo *er = ifc->arg;
+
+ /*
+ * Packet is rerouted at linecard
+ * so the gateway is ignored
+ */
+ USED(ip);
+ USED(version);
+
+ if(waserror()) {
+ print("tripwrite failed\n");
+ return;
+ }
+
+ devtab[er->mchan->type]->bwrite(er->mchan, bp, 0);
+ poperror();
+ ifc->out++;
+}
+
+/*
+ * process to read from the trip interface
+ */
+static void
+tripread(void *a)
+{
+ Ipifc *ifc;
+ Block *bp;
+ Tripinfo *er;
+
+ ifc = a;
+ er = ifc->arg;
+ er->readp = up; /* hide identity under a rock for unbind */
+
+ for(;;) {
+ bp = devtab[er->mchan->type]->bread(er->mchan, ifc->maxtu, 0);
+ ifc->in++;
+ ipiput4(er->fs, ifc, bp);
+ }
+
+ pexit("hangup", 1);
+}
+
+static void
+tripaddroute(Ipifc *ifc, int v, uchar *addr, uchar *mask, uchar *gate, int t)
+{
+ int alen;
+ MTroute mtr;
+ Tripinfo *tinfo;
+
+ tinfo = ifc->arg;
+ if(!tinfo->dev->routing)
+ return;
+
+ /*
+ * Multicast addresses are handled on the linecard by
+ * the multicast port driver, so the route load is dumped.
+ * loaded by addmulti/remmulti for SBC routes
+ * joinmulti/leavemulti for inter LC
+ */
+ if(ipismulticast(addr))
+ return;
+
+ mtr.type = T_ROUTEADMIN;
+ if(v & Rv4) {
+ mtr.op = RTADD4;
+ alen = IPv4addrlen;
+ }
+ else {
+ mtr.op = RTADD6;
+ alen = IPaddrlen;
+ }
+ mtr.rtype = t;
+ memmove(mtr.addr, addr, alen);
+ memmove(mtr.mask, mask, alen);
+ memmove(mtr.gate, gate, alen);
+
+ i2osend(tinfo->dev, &mtr, sizeof(mtr));
+}
+
+static void
+tripremroute(Ipifc *ifc, int v, uchar *addr, uchar *mask)
+{
+ int alen;
+ MTroute mtr;
+ Tripinfo *tinfo;
+
+ tinfo = ifc->arg;
+ if(!tinfo->dev->routing)
+ return;
+
+ if(ipismulticast(addr))
+ return;
+
+ mtr.type = T_ROUTEADMIN;
+ if(v & Rv4) {
+ mtr.op = RTDEL4;
+ alen = IPv4addrlen;
+ }
+ else {
+ mtr.op = RTDEL6;
+ alen = IPaddrlen;
+ }
+ memmove(mtr.addr, addr, alen);
+ memmove(mtr.mask, mask, alen);
+
+ i2osend(tinfo->dev, &mtr, sizeof(mtr));
+}
+
+static void
+tripxmitroute(Route *r, Routewalk *rw)
+{
+ int nifc;
+ char t[5];
+ uchar a[IPaddrlen], m[IPaddrlen], g[IPaddrlen];
+
+ convroute(r, a, m, g, t, &nifc);
+ if(!(r->type & Rv4)) {
+ tripaddroute(rw->state, 0, a, m, g, r->type);
+ return;
+ }
+
+ tripaddroute(rw->state, Rv4, a+IPv4off, m+IPv4off, g+IPv4off, r->type);
+}
+
+static void
+sendifcinfo(Ipifc *dest)
+{
+ Conv **cp, **e;
+ Iplifc *l;
+ Ipifc *ifc;
+ MTifctl mtc;
+ Tripinfo *tinfo, *oinfo;
+ Proto *p;
+
+ tinfo = dest->arg;
+
+ /* Install interfaces */
+ p = tinfo->fs->ipifc;
+ e = &p->conv[p->nc];
+ for(cp = p->conv; cp < e; cp++) {
+
+ if(*cp == nil)
+ continue;
+
+ ifc = (Ipifc*)(*cp)->ptcl;
+ if(dest == ifc)
+ continue;
+
+ mtc.type = T_CTLIFADMIN;
+ mtc.maxtu = ifc->maxtu;
+ mtc.mintu = ifc->mintu;
+
+ mtc.port = 0;
+ if(ifc->m == &tripmedium) {
+ oinfo = ifc->arg;
+ mtc.port = oinfo->dev->bar[0].bar;
+ }
+
+ for(l = ifc->lifc; l != nil; l = l->next) {
+ if(isv4(l->local)) {
+ mtc.op = IFADD4;
+ memmove(mtc.addr, l->local+IPv4off, IPv4addrlen);
+ memmove(mtc.mask, l->mask+IPv4off, IPv4addrlen);
+ }
+ else {
+ mtc.op = IFADD6;
+ memmove(mtc.addr, l->local, sizeof(mtc.addr));
+ memmove(mtc.mask, l->mask, sizeof(mtc.mask));
+ }
+
+ i2osend(tinfo->dev, &mtc, sizeof(mtc));
+ }
+ }
+}
+
+void
+tripsync(Ipifc *ifc)
+{
+ Routewalk rw;
+
+ if(ifc == nil) {
+ print("tripsync: interface not bound\n");
+ return;
+ }
+
+ /* Mirror the route table into the lincard */
+ rw.o = 0;
+ rw.n = (1<<22);
+ rw.state = ifc;
+ rw.walk = tripxmitroute;
+
+ ipwalkroutes(ifc->conv->p->f, &rw);
+
+ /*
+ * Tell the linecard about interfaces that already
+ * exist elsewhere
+ */
+ sendifcinfo(ifc);
+}
+
+/* Tell a line card the SBC is interested in listening
+ * to a multicast address
+ */
+static void
+tripaddmulti(Ipifc *ifc, uchar *addr, uchar *ifca)
+{
+ MTmultiears mt;
+ Tripinfo *tinfo;
+
+ /* print("tripaddmulti %I %I\n", addr, ifca); /**/
+
+ tinfo = ifc->arg;
+ if(!tinfo->dev->routing)
+ return;
+
+ mt.type = T_MULTIEAR;
+ mt.op = ADDMULTI;
+ memmove(mt.addr, addr, sizeof(mt.addr));
+ memmove(mt.ifca, ifca, sizeof(mt.ifca));
+
+ i2osend(tinfo->dev, &mt, sizeof(mt));
+}
+
+/* Tell a line card the SBC is no longer interested in listening
+ * to a multicast address
+ */
+static void
+tripremmulti(Ipifc *ifc, uchar *addr, uchar *ifca)
+{
+ MTmultiears mt;
+ Tripinfo *tinfo;
+
+ tinfo = ifc->arg;
+ if(!tinfo->dev->routing)
+ return;
+
+ mt.type = T_MULTIEAR;
+ mt.op = REMMULTI;
+ memmove(mt.addr, addr, sizeof(mt.addr));
+ memmove(mt.ifca, ifca, sizeof(mt.ifca));
+
+ i2osend(tinfo->dev, &mt, sizeof(mt));
+}
+
+static void
+tripares(Fs *fs, int vers, uchar *ip, uchar *mac, int l, int)
+{
+ Route *r;
+ Ipifc *ifc;
+ MTaresenter ta;
+ Tripinfo *tinfo;
+ uchar v6ip[IPaddrlen];
+
+ if(vers == V4) {
+ r = v4lookup(fs, ip);
+ v4tov6(v6ip, ip);
+ ip = v6ip;
+ }
+ else
+ r = v6lookup(fs, ip);
+
+ if(r == nil) {
+ print("tripares: no route for entry\n");
+ return;
+ }
+
+ ifc = r->ifc;
+
+ tinfo = ifc->arg;
+ if(!tinfo->dev->routing)
+ return;
+
+ if(vers == V4) {
+ v4tov6(v6ip, ip);
+ ip = v6ip;
+ }
+
+ ta.type = T_ARESENTER;
+ ta.maclen = l;
+ memmove(ta.addr, ip, IPaddrlen);
+ memmove(ta.amac, mac, l);
+
+ i2osend(tinfo->dev, &ta, sizeof(ta));
+}
+
+void
+tripmediumlink(void)
+{
+ addipmedium(&tripmedium);
+}
diff --git a/src/9vx/a/ip/udp.c b/src/9vx/a/ip/udp.c
@@ -0,0 +1,619 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+
+
+#define DPRINT if(0)print
+
+enum
+{
+ UDP_UDPHDR_SZ = 8,
+
+ UDP4_PHDR_OFF = 8,
+ UDP4_PHDR_SZ = 12,
+ UDP4_IPHDR_SZ = 20,
+ UDP6_IPHDR_SZ = 40,
+ UDP6_PHDR_SZ = 40,
+ UDP6_PHDR_OFF = 0,
+
+ IP_UDPPROTO = 17,
+ UDP_USEAD7 = 52,
+
+ Udprxms = 200,
+ Udptickms = 100,
+ Udpmaxxmit = 10,
+};
+
+typedef struct Udp4hdr Udp4hdr;
+struct Udp4hdr
+{
+ /* ip header */
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+ uchar Unused;
+ uchar udpproto; /* Protocol */
+ uchar udpplen[2]; /* Header plus data length */
+ uchar udpsrc[IPv4addrlen]; /* Ip source */
+ uchar udpdst[IPv4addrlen]; /* Ip destination */
+
+ /* udp header */
+ uchar udpsport[2]; /* Source port */
+ uchar udpdport[2]; /* Destination port */
+ uchar udplen[2]; /* data length */
+ uchar udpcksum[2]; /* Checksum */
+};
+
+typedef struct Udp6hdr Udp6hdr;
+struct Udp6hdr {
+ uchar viclfl[4];
+ uchar len[2];
+ uchar nextheader;
+ uchar hoplimit;
+ uchar udpsrc[IPaddrlen];
+ uchar udpdst[IPaddrlen];
+
+ /* udp header */
+ uchar udpsport[2]; /* Source port */
+ uchar udpdport[2]; /* Destination port */
+ uchar udplen[2]; /* data length */
+ uchar udpcksum[2]; /* Checksum */
+};
+
+/* MIB II counters */
+typedef struct Udpstats Udpstats;
+struct Udpstats
+{
+ ulong udpInDatagrams;
+ ulong udpNoPorts;
+ ulong udpInErrors;
+ ulong udpOutDatagrams;
+};
+
+typedef struct Udppriv Udppriv;
+struct Udppriv
+{
+ Ipht ht;
+
+ /* MIB counters */
+ Udpstats ustats;
+
+ /* non-MIB stats */
+ ulong csumerr; /* checksum errors */
+ ulong lenerr; /* short packet */
+};
+
+void (*etherprofiler)(char *name, int qlen);
+void udpkick(void *x, Block *bp);
+
+/*
+ * protocol specific part of Conv
+ */
+typedef struct Udpcb Udpcb;
+struct Udpcb
+{
+ QLock qlock;
+ uchar headers;
+};
+
+static char*
+udpconnect(Conv *c, char **argv, int argc)
+{
+ char *e;
+ Udppriv *upriv;
+
+ upriv = c->p->priv;
+ e = Fsstdconnect(c, argv, argc);
+ Fsconnected(c, e);
+ if(e != nil)
+ return e;
+
+ iphtadd(&upriv->ht, c);
+ return nil;
+}
+
+
+static int
+udpstate(Conv *c, char *state, int n)
+{
+ return snprint(state, n, "%s qin %d qout %d\n",
+ c->inuse ? "Open" : "Closed",
+ c->rq ? qlen(c->rq) : 0,
+ c->wq ? qlen(c->wq) : 0
+ );
+}
+
+static char*
+udpannounce(Conv *c, char** argv, int argc)
+{
+ char *e;
+ Udppriv *upriv;
+
+ upriv = c->p->priv;
+ e = Fsstdannounce(c, argv, argc);
+ if(e != nil)
+ return e;
+ Fsconnected(c, nil);
+ iphtadd(&upriv->ht, c);
+
+ return nil;
+}
+
+static void
+udpcreate(Conv *c)
+{
+ c->rq = qopen(128*1024, Qmsg, 0, 0);
+ c->wq = qbypass(udpkick, c);
+}
+
+static void
+udpclose(Conv *c)
+{
+ Udpcb *ucb;
+ Udppriv *upriv;
+
+ upriv = c->p->priv;
+ iphtrem(&upriv->ht, c);
+
+ c->state = 0;
+ qclose(c->rq);
+ qclose(c->wq);
+ qclose(c->eq);
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ c->lport = 0;
+ c->rport = 0;
+
+ ucb = (Udpcb*)c->ptcl;
+ ucb->headers = 0;
+}
+
+void
+udpkick(void *x, Block *bp)
+{
+ Conv *c = x;
+ Udp4hdr *uh4;
+ Udp6hdr *uh6;
+ ushort rport;
+ uchar laddr[IPaddrlen], raddr[IPaddrlen];
+ Udpcb *ucb;
+ int dlen, ptcllen;
+ Udppriv *upriv;
+ Fs *f;
+ int version;
+ Conv *rc;
+
+ upriv = c->p->priv;
+ f = c->p->f;
+
+ netlog(c->p->f, Logudp, "udp: kick\n");
+ if(bp == nil)
+ return;
+
+ ucb = (Udpcb*)c->ptcl;
+ switch(ucb->headers) {
+ case 7:
+ /* get user specified addresses */
+ bp = pullupblock(bp, UDP_USEAD7);
+ if(bp == nil)
+ return;
+ ipmove(raddr, bp->rp);
+ bp->rp += IPaddrlen;
+ ipmove(laddr, bp->rp);
+ bp->rp += IPaddrlen;
+ /* pick interface closest to dest */
+ if(ipforme(f, laddr) != Runi)
+ findlocalip(f, laddr, raddr);
+ bp->rp += IPaddrlen; /* Ignore ifc address */
+ rport = nhgets(bp->rp);
+ bp->rp += 2+2; /* Ignore local port */
+ break;
+ default:
+ rport = 0;
+ break;
+ }
+
+ if(ucb->headers) {
+ if(memcmp(laddr, v4prefix, IPv4off) == 0
+ || ipcmp(laddr, IPnoaddr) == 0)
+ version = 4;
+ else
+ version = 6;
+ } else {
+ if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
+ memcmp(c->laddr, v4prefix, IPv4off) == 0)
+ || ipcmp(c->raddr, IPnoaddr) == 0)
+ version = 4;
+ else
+ version = 6;
+ }
+
+ dlen = blocklen(bp);
+
+ /* fill in pseudo header and compute checksum */
+ switch(version){
+ case V4:
+ bp = padblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ);
+ if(bp == nil)
+ return;
+
+ uh4 = (Udp4hdr *)(bp->rp);
+ ptcllen = dlen + UDP_UDPHDR_SZ;
+ uh4->Unused = 0;
+ uh4->udpproto = IP_UDPPROTO;
+ uh4->frag[0] = 0;
+ uh4->frag[1] = 0;
+ hnputs(uh4->udpplen, ptcllen);
+ if(ucb->headers) {
+ v6tov4(uh4->udpdst, raddr);
+ hnputs(uh4->udpdport, rport);
+ v6tov4(uh4->udpsrc, laddr);
+ rc = nil;
+ } else {
+ v6tov4(uh4->udpdst, c->raddr);
+ hnputs(uh4->udpdport, c->rport);
+ if(ipcmp(c->laddr, IPnoaddr) == 0)
+ findlocalip(f, c->laddr, c->raddr);
+ v6tov4(uh4->udpsrc, c->laddr);
+ rc = c;
+ }
+ hnputs(uh4->udpsport, c->lport);
+ hnputs(uh4->udplen, ptcllen);
+ uh4->udpcksum[0] = 0;
+ uh4->udpcksum[1] = 0;
+ hnputs(uh4->udpcksum,
+ ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ));
+ uh4->vihl = IP_VER4;
+ ipoput4(f, bp, 0, c->ttl, c->tos, rc);
+ break;
+
+ case V6:
+ bp = padblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ);
+ if(bp == nil)
+ return;
+
+ /*
+ * using the v6 ip header to create pseudo header
+ * first then reset it to the normal ip header
+ */
+ uh6 = (Udp6hdr *)(bp->rp);
+ memset(uh6, 0, 8);
+ ptcllen = dlen + UDP_UDPHDR_SZ;
+ hnputl(uh6->viclfl, ptcllen);
+ uh6->hoplimit = IP_UDPPROTO;
+ if(ucb->headers) {
+ ipmove(uh6->udpdst, raddr);
+ hnputs(uh6->udpdport, rport);
+ ipmove(uh6->udpsrc, laddr);
+ rc = nil;
+ } else {
+ ipmove(uh6->udpdst, c->raddr);
+ hnputs(uh6->udpdport, c->rport);
+ if(ipcmp(c->laddr, IPnoaddr) == 0)
+ findlocalip(f, c->laddr, c->raddr);
+ ipmove(uh6->udpsrc, c->laddr);
+ rc = c;
+ }
+ hnputs(uh6->udpsport, c->lport);
+ hnputs(uh6->udplen, ptcllen);
+ uh6->udpcksum[0] = 0;
+ uh6->udpcksum[1] = 0;
+ hnputs(uh6->udpcksum,
+ ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ));
+ memset(uh6, 0, 8);
+ uh6->viclfl[0] = IP_VER6;
+ hnputs(uh6->len, ptcllen);
+ uh6->nextheader = IP_UDPPROTO;
+ ipoput6(f, bp, 0, c->ttl, c->tos, rc);
+ break;
+
+ default:
+ panic("udpkick: version %d", version);
+ }
+ upriv->ustats.udpOutDatagrams++;
+}
+
+void
+udpiput(Proto *udp, Ipifc *ifc, Block *bp)
+{
+ int len;
+ Udp4hdr *uh4;
+ Udp6hdr *uh6;
+ Conv *c;
+ Udpcb *ucb;
+ uchar raddr[IPaddrlen], laddr[IPaddrlen];
+ ushort rport, lport;
+ Udppriv *upriv;
+ Fs *f;
+ int version;
+ int ottl, oviclfl, olen;
+ uchar *p;
+
+ upriv = udp->priv;
+ f = udp->f;
+ upriv->ustats.udpInDatagrams++;
+
+ uh4 = (Udp4hdr*)(bp->rp);
+ version = ((uh4->vihl&0xF0)==IP_VER6) ? 6 : 4;
+
+ /* Put back pseudo header for checksum
+ * (remember old values for icmpnoconv()) */
+ switch(version) {
+ case V4:
+ ottl = uh4->Unused;
+ uh4->Unused = 0;
+ len = nhgets(uh4->udplen);
+ olen = nhgets(uh4->udpplen);
+ hnputs(uh4->udpplen, len);
+
+ v4tov6(raddr, uh4->udpsrc);
+ v4tov6(laddr, uh4->udpdst);
+ lport = nhgets(uh4->udpdport);
+ rport = nhgets(uh4->udpsport);
+
+ if(nhgets(uh4->udpcksum)) {
+ if(ptclcsum(bp, UDP4_PHDR_OFF, len+UDP4_PHDR_SZ)) {
+ upriv->ustats.udpInErrors++;
+ netlog(f, Logudp, "udp: checksum error %I\n", raddr);
+ DPRINT("udp: checksum error %I\n", raddr);
+ freeblist(bp);
+ return;
+ }
+ }
+ uh4->Unused = ottl;
+ hnputs(uh4->udpplen, olen);
+ break;
+ case V6:
+ uh6 = (Udp6hdr*)(bp->rp);
+ len = nhgets(uh6->udplen);
+ oviclfl = nhgetl(uh6->viclfl);
+ olen = nhgets(uh6->len);
+ ottl = uh6->hoplimit;
+ ipmove(raddr, uh6->udpsrc);
+ ipmove(laddr, uh6->udpdst);
+ lport = nhgets(uh6->udpdport);
+ rport = nhgets(uh6->udpsport);
+ memset(uh6, 0, 8);
+ hnputl(uh6->viclfl, len);
+ uh6->hoplimit = IP_UDPPROTO;
+ if(ptclcsum(bp, UDP6_PHDR_OFF, len+UDP6_PHDR_SZ)) {
+ upriv->ustats.udpInErrors++;
+ netlog(f, Logudp, "udp: checksum error %I\n", raddr);
+ DPRINT("udp: checksum error %I\n", raddr);
+ freeblist(bp);
+ return;
+ }
+ hnputl(uh6->viclfl, oviclfl);
+ hnputs(uh6->len, olen);
+ uh6->nextheader = IP_UDPPROTO;
+ uh6->hoplimit = ottl;
+ break;
+ default:
+ panic("udpiput: version %d", version);
+ return; /* to avoid a warning */
+ }
+
+ QLOCK(udp);
+
+ c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
+ if(c == nil){
+ /* no conversation found */
+ upriv->ustats.udpNoPorts++;
+ QUNLOCK(udp);
+ netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
+ laddr, lport);
+
+ switch(version){
+ case V4:
+ icmpnoconv(f, bp);
+ break;
+ case V6:
+ icmphostunr(f, ifc, bp, Icmp6_port_unreach, 0);
+ break;
+ default:
+ panic("udpiput2: version %d", version);
+ }
+
+ freeblist(bp);
+ return;
+ }
+ ucb = (Udpcb*)c->ptcl;
+
+ if(c->state == Announced){
+ if(ucb->headers == 0){
+ /* create a new conversation */
+ if(ipforme(f, laddr) != Runi) {
+ switch(version){
+ case V4:
+ v4tov6(laddr, ifc->lifc->local);
+ break;
+ case V6:
+ ipmove(laddr, ifc->lifc->local);
+ break;
+ default:
+ panic("udpiput3: version %d", version);
+ }
+ }
+ c = Fsnewcall(c, raddr, rport, laddr, lport, version);
+ if(c == nil){
+ QUNLOCK(udp);
+ freeblist(bp);
+ return;
+ }
+ iphtadd(&upriv->ht, c);
+ ucb = (Udpcb*)c->ptcl;
+ }
+ }
+
+ QLOCK(c);
+ QUNLOCK(udp);
+
+ /*
+ * Trim the packet down to data size
+ */
+ len -= UDP_UDPHDR_SZ;
+ switch(version){
+ case V4:
+ bp = trimblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ, len);
+ break;
+ case V6:
+ bp = trimblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ, len);
+ break;
+ default:
+ bp = nil;
+ panic("udpiput4: version %d", version);
+ }
+ if(bp == nil){
+ QUNLOCK(c);
+ netlog(f, Logudp, "udp: len err %I.%d -> %I.%d\n", raddr, rport,
+ laddr, lport);
+ upriv->lenerr++;
+ return;
+ }
+
+ netlog(f, Logudpmsg, "udp: %I.%d -> %I.%d l %d\n", raddr, rport,
+ laddr, lport, len);
+
+ switch(ucb->headers){
+ case 7:
+ /* pass the src address */
+ bp = padblock(bp, UDP_USEAD7);
+ p = bp->rp;
+ ipmove(p, raddr); p += IPaddrlen;
+ ipmove(p, laddr); p += IPaddrlen;
+ ipmove(p, ifc->lifc->local); p += IPaddrlen;
+ hnputs(p, rport); p += 2;
+ hnputs(p, lport);
+ break;
+ }
+
+ if(bp->next)
+ bp = concatblock(bp);
+
+ if(qfull(c->rq)){
+ QUNLOCK(c);
+ netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport,
+ laddr, lport);
+ freeblist(bp);
+ return;
+ }
+
+ qpass(c->rq, bp);
+ QUNLOCK(c);
+
+}
+
+char*
+udpctl(Conv *c, char **f, int n)
+{
+ Udpcb *ucb;
+
+ ucb = (Udpcb*)c->ptcl;
+ if(n == 1){
+ if(strcmp(f[0], "headers") == 0){
+ ucb->headers = 7; /* new headers format */
+ return nil;
+ }
+ }
+ return "unknown control request";
+}
+
+void
+udpadvise(Proto *udp, Block *bp, char *msg)
+{
+ Udp4hdr *h4;
+ Udp6hdr *h6;
+ uchar source[IPaddrlen], dest[IPaddrlen];
+ ushort psource, pdest;
+ Conv *s, **p;
+ int version;
+
+ h4 = (Udp4hdr*)(bp->rp);
+ version = ((h4->vihl&0xF0)==IP_VER6) ? 6 : 4;
+
+ switch(version) {
+ case V4:
+ v4tov6(dest, h4->udpdst);
+ v4tov6(source, h4->udpsrc);
+ psource = nhgets(h4->udpsport);
+ pdest = nhgets(h4->udpdport);
+ break;
+ case V6:
+ h6 = (Udp6hdr*)(bp->rp);
+ ipmove(dest, h6->udpdst);
+ ipmove(source, h6->udpsrc);
+ psource = nhgets(h6->udpsport);
+ pdest = nhgets(h6->udpdport);
+ break;
+ default:
+ panic("udpadvise: version %d", version);
+ return; /* to avoid a warning */
+ }
+
+ /* Look for a connection */
+ QLOCK(udp);
+ for(p = udp->conv; *p; p++) {
+ s = *p;
+ if(s->rport == pdest)
+ if(s->lport == psource)
+ if(ipcmp(s->raddr, dest) == 0)
+ if(ipcmp(s->laddr, source) == 0){
+ if(s->ignoreadvice)
+ break;
+ QLOCK(s);
+ QUNLOCK(udp);
+ qhangup(s->rq, msg);
+ qhangup(s->wq, msg);
+ QUNLOCK(s);
+ freeblist(bp);
+ return;
+ }
+ }
+ QUNLOCK(udp);
+ freeblist(bp);
+}
+
+int
+udpstats(Proto *udp, char *buf, int len)
+{
+ Udppriv *upriv;
+
+ upriv = udp->priv;
+ return snprint(buf, len, "InDatagrams: %lud\nNoPorts: %lud\nInErrors: %lud\nOutDatagrams: %lud\n",
+ upriv->ustats.udpInDatagrams,
+ upriv->ustats.udpNoPorts,
+ upriv->ustats.udpInErrors,
+ upriv->ustats.udpOutDatagrams);
+}
+
+void
+udpinit(Fs *fs)
+{
+ Proto *udp;
+
+ udp = smalloc(sizeof(Proto));
+ udp->priv = smalloc(sizeof(Udppriv));
+ udp->name = "udp";
+ udp->connect = udpconnect;
+ udp->announce = udpannounce;
+ udp->ctl = udpctl;
+ udp->state = udpstate;
+ udp->create = udpcreate;
+ udp->close = udpclose;
+ udp->rcv = udpiput;
+ udp->advise = udpadvise;
+ udp->stats = udpstats;
+ udp->ipproto = IP_UDPPROTO;
+ udp->nc = Nchans;
+ udp->ptclsize = sizeof(Udpcb);
+
+ Fsproto(fs, udp);
+}
diff --git a/src/9vx/a/kfs.h b/src/9vx/a/kfs.h
@@ -0,0 +1,57 @@
+typedef struct Qid9p1 Qid9p1;
+typedef struct Dentry Dentry;
+typedef struct Kfsfile Kfsfile;
+typedef struct Kfs Kfs;
+
+/* DONT TOUCH, this is the disk structure */
+struct Qid9p1
+{
+ long path;
+ long version;
+};
+
+#define NAMELEN 28 /* size of names */
+#define NDBLOCK 6 /* number of direct blocks in Dentry */
+
+/* DONT TOUCH, this is the disk structure */
+struct Dentry
+{
+ char name[NAMELEN];
+ short uid;
+ short gid;
+ ushort mode;
+/*
+ #define DALLOC 0x8000
+ #define DDIR 0x4000
+ #define DAPND 0x2000
+ #define DLOCK 0x1000
+ #define DREAD 0x4
+ #define DWRITE 0x2
+ #define DEXEC 0x1
+*/
+ Qid9p1 qid;
+ long size;
+ long dblock[NDBLOCK];
+ long iblock;
+ long diblock;
+ long atime;
+ long mtime;
+};
+
+struct Kfsfile
+{
+ Dentry _;
+ long off;
+};
+
+struct Kfs
+{
+ int RBUFSIZE;
+ int BUFSIZE;
+ int DIRPERBUF;
+ int INDPERBUF;
+ int INDPERBUF2;
+};
+
+extern int kfsinit(Fs*);
+
diff --git a/src/9vx/a/netif.c b/src/9vx/a/netif.c
@@ -0,0 +1,761 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "netif.h"
+
+static int netown(Netfile*, char*, int);
+static int openfile(Netif*, int);
+static char* matchtoken(char*, char*);
+static char* netmulti(Netif*, Netfile*, uchar*, int);
+static int parseaddr(uchar*, char*, int);
+
+int netifdebug;
+#define dprint(...) if(netifdebug)print(__VA_ARGS__); else USED(netifdebug)
+
+/*
+ * set up a new network interface
+ */
+void
+netifinit(Netif *nif, char *name, int nfile, ulong limit)
+{
+ strncpy(nif->name, name, KNAMELEN-1);
+ nif->name[KNAMELEN-1] = 0;
+ nif->nfile = nfile;
+ nif->f = xalloc(nfile*sizeof(Netfile*));
+ if (nif->f == nil)
+ panic("netifinit: no memory");
+ nif->limit = limit;
+}
+
+#define DD(c,q,nam,n,owner,perm,dp) dprint("%lux.%llux %s\n", q.type, q.path, nam); devdir(c,q,nam,n,owner,perm,dp)
+
+/*
+ * generate a 3 level directory
+ */
+static int
+netifgen(Chan *c, char *dummy, Dirtab *vp, int dummy1, int i, Dir *dp)
+{
+ Qid q;
+ Netif *nif = (Netif*)vp;
+ Netfile *f;
+ int t, perm;
+ char *o;
+
+ memset(&q, 0, sizeof q);
+ q.type = QTFILE;
+ q.vers = 0;
+
+ dprint("gen %d %llud %.2d ", c->dri, c->qid.path, i);
+ /* top level directory contains the name of the network */
+ if(c->qid.path == 0){
+ switch(i){
+ case DEVDOTDOT:
+ q.path = 0;
+ q.type = QTDIR;
+ DD(c, q, ".", 0, eve, 0555, dp);
+ break;
+ case 0:
+ q.path = N2ndqid;
+ q.type = QTDIR;
+ strcpy(up->genbuf, nif->name);
+ DD(c, q, up->genbuf, 0, eve, 0555, dp);
+ break;
+ default:
+ dprint("-> -1 (top)\n");
+ return -1;
+ }
+ return 1;
+ }
+
+ /* second level contains clone plus all the conversations */
+ t = NETTYPE(c->qid.path);
+ if(t == N2ndqid || t == Ncloneqid || t == Naddrqid || t == Nstatqid || t == Nifstatqid){
+ switch(i){
+ case DEVDOTDOT:
+ q.type = QTDIR;
+ q.path = 0;
+ DD(c, q, ".", 0, eve, DMDIR|0555, dp);
+ break;
+ case 0:
+ q.path = Ncloneqid;
+ DD(c, q, "clone", 0, eve, 0666, dp);
+ break;
+ case 1:
+ q.path = Naddrqid;
+ DD(c, q, "addr", 0, eve, 0666, dp);
+ break;
+ case 2:
+ q.path = Nstatqid;
+ DD(c, q, "stats", 0, eve, 0444, dp);
+ break;
+ case 3:
+ q.path = Nifstatqid;
+ DD(c, q, "ifstats", 0, eve, 0444, dp);
+ break;
+ default:
+ i -= 4;
+ if(i >= nif->nfile){
+ dprint("-> -1 (2d): %d %d\n", i, nif->nfile);
+ return -1;
+ }
+ if(nif->f[i] == 0){
+ dprint("nif->f[%d] -> 0\n", i);
+ return 0;
+ }
+ q.type = QTDIR;
+ q.path = NETQID(i, N3rdqid);
+ sprint(up->genbuf, "%d", i);
+ DD(c, q, up->genbuf, 0, eve, DMDIR|0555, dp);
+ break;
+ }
+ return 1;
+ }
+
+ /* third level */
+ f = nif->f[NETID(c->qid.path)];
+ if(f == 0){
+ dprint("->f 0\n");
+ return -1;
+ }
+ if(*f->owner){
+ o = f->owner;
+ perm = f->mode;
+ } else {
+ o = eve;
+ perm = 0666;
+ }
+ switch(i){
+ case DEVDOTDOT:
+ q.type = QTDIR;
+ q.path = N2ndqid;
+ strcpy(up->genbuf, nif->name);
+ DD(c, q, up->genbuf, 0, eve, DMDIR|0555, dp);
+ break;
+ case 0:
+ q.path = NETQID(NETID(c->qid.path), Ndataqid);
+ DD(c, q, "data", 0, o, perm, dp);
+ break;
+ case 1:
+ q.path = NETQID(NETID(c->qid.path), Nctlqid);
+ DD(c, q, "ctl", 0, o, perm, dp);
+ break;
+ case 2:
+ q.path = NETQID(NETID(c->qid.path), Nstatqid);
+ DD(c, q, "stats", 0, eve, 0444, dp);
+ break;
+ case 3:
+ q.path = NETQID(NETID(c->qid.path), Ntypeqid);
+ DD(c, q, "type", 0, eve, 0444, dp);
+ break;
+ case 4:
+ q.path = NETQID(NETID(c->qid.path), Nifstatqid);
+ DD(c, q, "ifstats", 0, eve, 0444, dp);
+ break;
+ default:
+ dprint("-> -1 (third)\n");
+ return -1;
+ }
+ return 1;
+}
+
+static void
+prwalk(Netif *nif, Chan *c, Chan *nc, char **name, int nname)
+{
+ char buf[512], *e, *p;
+
+ if(netifdebug == 0)
+ return;
+ p = buf;
+ e = p + sizeof buf;
+ for(int i = 0; i < nname; i++)
+ p = seprint(p, e, "%s ", name[i]);
+ if(p > buf)
+ p--;
+ *p = 0;
+ print("netifwalk %lld [%s]\n", c->qid.path, buf);
+}
+
+Walkqid*
+netifwalk(Netif *nif, Chan *c, Chan *nc, char **name, int nname)
+{
+ prwalk(nif, c, nc, name, nname);
+ return devwalk(c, nc, name, nname, (Dirtab *)nif, 0, netifgen);
+}
+
+Chan*
+netifopen(Netif *nif, Chan *c, int omode)
+{
+ int id;
+ Netfile *f;
+
+ dprint("netifopen %p %d\n", nif, c? c->qid.path: -1);
+ id = 0;
+ if(c->qid.type & QTDIR){
+ if(omode != OREAD)
+ error(Eperm);
+ } else {
+ switch(NETTYPE(c->qid.path)){
+ case Ndataqid:
+ case Nctlqid:
+ id = NETID(c->qid.path);
+ openfile(nif, id);
+ break;
+ case Ncloneqid:
+ id = openfile(nif, -1);
+ c->qid.path = NETQID(id, Nctlqid);
+ break;
+ default:
+ if(omode != OREAD)
+ error(Ebadarg);
+ }
+ switch(NETTYPE(c->qid.path)){
+ case Ndataqid:
+ case Nctlqid:
+ f = nif->f[id];
+ if(netown(f, up->user, omode&7) < 0)
+ error(Eperm);
+ break;
+ }
+ }
+ c->mode = openmode(omode);
+ c->flag |= COPEN;
+ c->offset = 0;
+ c->iounit = qiomaxatomic;
+ return c;
+}
+
+long
+netifread(Netif *nif, Chan *c, void *a, long n, ulong offset)
+{
+ int i, j;
+ Netfile *f;
+ char *p;
+
+ dprint("netifread %lud %lud\n", c->qid.path, NETTYPE(c->qid.path));
+ if(c->qid.type&QTDIR)
+ return devdirread(c, a, n, (Dirtab*)nif, 0, netifgen);
+
+ switch(NETTYPE(c->qid.path)){
+ case Ndataqid:
+ f = nif->f[NETID(c->qid.path)];
+ return qread(f->in, a, n);
+ case Nctlqid:
+ return readnum(offset, a, n, NETID(c->qid.path), NUMSIZE);
+ case Nstatqid:
+ dprint("netstatqid\n");
+ p = smalloc(READSTR);
+ j = snprint(p, READSTR, "in: %llud\n", nif->inpackets);
+ j += snprint(p+j, READSTR-j, "link: %d\n", nif->link);
+ j += snprint(p+j, READSTR-j, "out: %llud\n", nif->outpackets);
+ j += snprint(p+j, READSTR-j, "crc errs: %d\n", nif->crcs);
+ j += snprint(p+j, READSTR-j, "overflows: %d\n", nif->overflows);
+ j += snprint(p+j, READSTR-j, "soft overflows: %d\n", nif->soverflows);
+ j += snprint(p+j, READSTR-j, "framing errs: %d\n", nif->frames);
+ j += snprint(p+j, READSTR-j, "buffer errs: %d\n", nif->buffs);
+ j += snprint(p+j, READSTR-j, "output errs: %d\n", nif->oerrs);
+ j += snprint(p+j, READSTR-j, "prom: %d\n", nif->prom);
+ j += snprint(p+j, READSTR-j, "mbps: %d\n", nif->mbps);
+ j += snprint(p+j, READSTR-j, "addr: ");
+ for(i = 0; i < nif->alen; i++)
+ j += snprint(p+j, READSTR-j, "%2.2ux", nif->addr[i]);
+ snprint(p+j, READSTR-j, "\n");
+ n = readstr(offset, a, n, p);
+ free(p);
+ return n;
+ case Naddrqid:
+ p = malloc(READSTR);
+ j = 0;
+ for(i = 0; i < nif->alen; i++)
+ j += snprint(p+j, READSTR-j, "%2.2ux", nif->addr[i]);
+ n = readstr(offset, a, n, p);
+ free(p);
+ return n;
+ case Ntypeqid:
+ f = nif->f[NETID(c->qid.path)];
+ return readnum(offset, a, n, f->type, NUMSIZE);
+ case Nifstatqid:
+ return 0;
+ }
+ error(Ebadarg);
+ return -1; /* not reached */
+}
+
+Block*
+netifbread(Netif *nif, Chan *c, long n, ulong offset)
+{
+ if((c->qid.type & QTDIR) || NETTYPE(c->qid.path) != Ndataqid)
+ return devbread(c, n, offset);
+
+ return qbread(nif->f[NETID(c->qid.path)]->in, n);
+}
+
+/*
+ * make sure this type isn't already in use on this device
+ */
+static int
+typeinuse(Netif *nif, int type)
+{
+ Netfile *f, **fp, **efp;
+
+ if(type <= 0)
+ return 0;
+
+ efp = &nif->f[nif->nfile];
+ for(fp = nif->f; fp < efp; fp++){
+ f = *fp;
+ if(f == 0)
+ continue;
+ if(f->type == type)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * the devxxx.c that calls us handles writing data, it knows best
+ */
+long
+netifwrite(Netif *nif, Chan *c, void *a, long n)
+{
+ Netfile *f;
+ int type;
+ char *p, buf[64];
+ uchar binaddr[Nmaxaddr];
+
+ if(NETTYPE(c->qid.path) != Nctlqid)
+ error(Eperm);
+
+ if(n >= sizeof(buf))
+ n = sizeof(buf)-1;
+ memmove(buf, a, n);
+ buf[n] = 0;
+
+ if(waserror()){
+ QUNLOCK(nif);
+ nexterror();
+ }
+
+ QLOCK(nif);
+ f = nif->f[NETID(c->qid.path)];
+ if((p = matchtoken(buf, "connect")) != 0){
+ type = atoi(p);
+ if(typeinuse(nif, type))
+ error(Einuse);
+ f->type = type;
+ if(f->type < 0)
+ nif->all++;
+ } else if(matchtoken(buf, "promiscuous")){
+ if(f->prom == 0){
+ if(nif->prom == 0 && nif->promiscuous != nil)
+ nif->promiscuous(nif->arg, 1);
+ f->prom = 1;
+ nif->prom++;
+ }
+ } else if((p = matchtoken(buf, "scanbs")) != 0){
+ /* scan for base stations */
+ if(f->scan == 0){
+ type = atoi(p);
+ if(type < 5)
+ type = 5;
+ if(nif->scanbs != nil)
+ nif->scanbs(nif->arg, type);
+ f->scan = type;
+ nif->scan++;
+ }
+ } else if(matchtoken(buf, "bridge")){
+ f->bridge = 1;
+ } else if(matchtoken(buf, "headersonly")){
+ f->headersonly = 1;
+ } else if((p = matchtoken(buf, "addmulti")) != 0){
+ if(parseaddr(binaddr, p, nif->alen) < 0)
+ error("bad address");
+ p = netmulti(nif, f, binaddr, 1);
+ if(p)
+ error(p);
+ } else if((p = matchtoken(buf, "remmulti")) != 0){
+ if(parseaddr(binaddr, p, nif->alen) < 0)
+ error("bad address");
+ p = netmulti(nif, f, binaddr, 0);
+ if(p)
+ error(p);
+ } else
+ n = -1;
+ QUNLOCK(nif);
+ poperror();
+ return n;
+}
+
+int
+netifwstat(Netif *nif, Chan *c, uchar *db, int n)
+{
+ Dir *dir;
+ Netfile *f;
+ int m;
+
+ f = nif->f[NETID(c->qid.path)];
+ if(f == 0)
+ error(Enonexist);
+
+ if(netown(f, up->user, OWRITE) < 0)
+ error(Eperm);
+
+ dir = smalloc(sizeof(Dir)+n);
+ m = convM2D(db, n, &dir[0], (char*)&dir[1]);
+ if(m == 0){
+ free(dir);
+ error(Eshortstat);
+ }
+ if(!emptystr(dir[0].uid))
+ strncpy(f->owner, dir[0].uid, KNAMELEN);
+ if(dir[0].mode != ~0UL)
+ f->mode = dir[0].mode;
+ free(dir);
+ return m;
+}
+
+int
+netifstat(Netif *nif, Chan *c, uchar *db, int n)
+{
+ dprint("netifstat %s nfile %d %lld type=%d\n", nif->name, nif->nfile, c->qid.path, c->type);
+ return devstat(c, db, n, (Dirtab *)nif, 0, netifgen);
+}
+
+void
+netifclose(Netif *nif, Chan *c)
+{
+ Netfile *f;
+ int t;
+ Netaddr *ap;
+
+ if((c->flag & COPEN) == 0)
+ return;
+
+ t = NETTYPE(c->qid.path);
+ if(t != Ndataqid && t != Nctlqid)
+ return;
+
+ f = nif->f[NETID(c->qid.path)];
+ QLOCK(f);
+ if(--(f->inuse) == 0){
+ if(f->prom){
+ QLOCK(nif);
+ if(--(nif->prom) == 0 && nif->promiscuous != nil)
+ nif->promiscuous(nif->arg, 0);
+ QUNLOCK(nif);
+ f->prom = 0;
+ }
+ if(f->scan){
+ QLOCK(nif);
+ if(--(nif->scan) == 0 && nif->scanbs != nil)
+ nif->scanbs(nif->arg, 0);
+ QUNLOCK(nif);
+ f->prom = 0;
+ f->scan = 0;
+ }
+ if(f->nmaddr){
+ QLOCK(nif);
+ t = 0;
+ for(ap = nif->maddr; ap; ap = ap->next){
+ if(f->maddr[t/8] & (1<<(t%8)))
+ netmulti(nif, f, ap->addr, 0);
+ }
+ QUNLOCK(nif);
+ f->nmaddr = 0;
+ }
+ if(f->type < 0){
+ QLOCK(nif);
+ --(nif->all);
+ QUNLOCK(nif);
+ }
+ f->owner[0] = 0;
+print("drop type %.4ux\n", f->type);
+ f->type = 0;
+ f->bridge = 0;
+ f->headersonly = 0;
+ qclose(f->in);
+ }
+ QUNLOCK(f);
+}
+
+Lock netlock;
+
+static int
+netown(Netfile *p, char *o, int omode)
+{
+ static int access[] = { 0400, 0200, 0600, 0100 };
+ int mode;
+ int t;
+
+ lock(&netlock);
+ if(*p->owner){
+ if(strncmp(o, p->owner, KNAMELEN) == 0) /* User */
+ mode = p->mode;
+ else if(strncmp(o, eve, KNAMELEN) == 0) /* Bootes is group */
+ mode = p->mode<<3;
+ else
+ mode = p->mode<<6; /* Other */
+
+ t = access[omode&3];
+ if((t & mode) == t){
+ unlock(&netlock);
+ return 0;
+ } else {
+ unlock(&netlock);
+ return -1;
+ }
+ }
+ strncpy(p->owner, o, KNAMELEN);
+ p->mode = 0660;
+ unlock(&netlock);
+ return 0;
+}
+
+/*
+ * Increment the reference count of a network device.
+ * If id < 0, return an unused ether device.
+ */
+static int
+openfile(Netif *nif, int id)
+{
+ Netfile *f, **fp, **efp;
+
+ if(id >= 0){
+ f = nif->f[id];
+ if(f == 0)
+ error(Enodev);
+ QLOCK(f);
+ qreopen(f->in);
+ f->inuse++;
+ QUNLOCK(f);
+ return id;
+ }
+
+ QLOCK(nif);
+ if(waserror()){
+ QUNLOCK(nif);
+ nexterror();
+ }
+ efp = &nif->f[nif->nfile];
+ for(fp = nif->f; fp < efp; fp++){
+ f = *fp;
+ if(f == 0){
+ f = malloc(sizeof(Netfile));
+ if(f == 0)
+ exhausted("memory");
+ f->in = qopen(nif->limit, Qmsg, 0, 0);
+ if(f->in == nil){
+ free(f);
+ exhausted("memory");
+ }
+ *fp = f;
+ QLOCK(f);
+ } else {
+ QLOCK(f);
+ if(f->inuse){
+ QUNLOCK(f);
+ continue;
+ }
+ }
+ f->inuse = 1;
+ qreopen(f->in);
+ netown(f, up->user, 0);
+ QUNLOCK(f);
+ QUNLOCK(nif);
+ poperror();
+ return fp - nif->f;
+ }
+ error(Enodev);
+ return -1; /* not reached */
+}
+
+/*
+ * look for a token starting a string,
+ * return a pointer to first non-space char after it
+ */
+static char*
+matchtoken(char *p, char *token)
+{
+ int n;
+
+ n = strlen(token);
+ if(strncmp(p, token, n))
+ return 0;
+ p += n;
+ if(*p == 0)
+ return p;
+ if(*p != ' ' && *p != '\t' && *p != '\n')
+ return 0;
+ while(*p == ' ' || *p == '\t' || *p == '\n')
+ p++;
+ return p;
+}
+
+void
+hnputv(void *p, uvlong v)
+{
+ uchar *a;
+
+ a = p;
+ hnputl(a, v>>32);
+ hnputl(a+4, v);
+}
+
+void
+hnputl(void *p, uint v)
+{
+ uchar *a;
+
+ a = p;
+ a[0] = v>>24;
+ a[1] = v>>16;
+ a[2] = v>>8;
+ a[3] = v;
+}
+
+void
+hnputs(void *p, ushort v)
+{
+ uchar *a;
+
+ a = p;
+ a[0] = v>>8;
+ a[1] = v;
+}
+
+uvlong
+nhgetv(void *p)
+{
+ uchar *a;
+
+ a = p;
+ return ((vlong)nhgetl(a) << 32) | nhgetl(a+4);
+}
+
+uint
+nhgetl(void *p)
+{
+ uchar *a;
+
+ a = p;
+ return (a[0]<<24)|(a[1]<<16)|(a[2]<<8)|(a[3]<<0);
+}
+
+ushort
+nhgets(void *p)
+{
+ uchar *a;
+
+ a = p;
+ return (a[0]<<8)|(a[1]<<0);
+}
+
+static ulong
+hash(uchar *a, int len)
+{
+ ulong sum = 0;
+
+ while(len-- > 0)
+ sum = (sum << 1) + *a++;
+ return sum%Nmhash;
+}
+
+int
+activemulti(Netif *nif, uchar *addr, int alen)
+{
+ Netaddr *hp;
+
+ for(hp = nif->mhash[hash(addr, alen)]; hp; hp = hp->hnext)
+ if(memcmp(addr, hp->addr, alen) == 0){
+ if(hp->ref)
+ return 1;
+ else
+ break;
+ }
+ return 0;
+}
+
+static int
+parseaddr(uchar *to, char *from, int alen)
+{
+ char nip[4];
+ char *p;
+ int i;
+
+ p = from;
+ for(i = 0; i < alen; i++){
+ if(*p == 0)
+ return -1;
+ nip[0] = *p++;
+ if(*p == 0)
+ return -1;
+ nip[1] = *p++;
+ nip[2] = 0;
+ to[i] = strtoul(nip, 0, 16);
+ if(*p == ':')
+ p++;
+ }
+ return 0;
+}
+
+/*
+ * keep track of multicast addresses
+ */
+static char*
+netmulti(Netif *nif, Netfile *f, uchar *addr, int add)
+{
+ Netaddr **l, *ap;
+ int i;
+ ulong h;
+
+ if(nif->multicast == nil)
+ return "interface does not support multicast";
+
+ l = &nif->maddr;
+ i = 0;
+ for(ap = *l; ap; ap = *l){
+ if(memcmp(addr, ap->addr, nif->alen) == 0)
+ break;
+ i++;
+ l = &ap->next;
+ }
+
+ if(add){
+ if(ap == 0){
+ *l = ap = smalloc(sizeof(*ap));
+ memmove(ap->addr, addr, nif->alen);
+ ap->next = 0;
+ ap->ref = 1;
+ h = hash(addr, nif->alen);
+ ap->hnext = nif->mhash[h];
+ nif->mhash[h] = ap;
+ } else {
+ ap->ref++;
+ }
+ if(ap->ref == 1){
+ nif->nmaddr++;
+ nif->multicast(nif->arg, addr, 1);
+ }
+ if(i < 8*sizeof(f->maddr)){
+ if((f->maddr[i/8] & (1<<(i%8))) == 0)
+ f->nmaddr++;
+ f->maddr[i/8] |= 1<<(i%8);
+ }
+ } else {
+ if(ap == 0 || ap->ref == 0)
+ return 0;
+ ap->ref--;
+ if(ap->ref == 0){
+ nif->nmaddr--;
+ nif->multicast(nif->arg, addr, 0);
+ }
+ if(i < 8*sizeof(f->maddr)){
+ if((f->maddr[i/8] & (1<<(i%8))) != 0)
+ f->nmaddr--;
+ f->maddr[i/8] &= ~(1<<(i%8));
+ }
+ }
+ return 0;
+}
diff --git a/src/9vx/a/netif.h b/src/9vx/a/netif.h
@@ -31,7 +31,7 @@ enum
*/
struct Netfile
{
- QLock lk;
+ QLock qlock;
int inuse;
ulong mode;
@@ -64,7 +64,7 @@ struct Netaddr
*/
struct Netif
{
- QLock lk;
+ QLock qlock;
/* multiplexing */
char name[KNAMELEN]; /* for top level directory */
@@ -87,8 +87,8 @@ struct Netif
/* statistics */
int misses;
- int inpackets;
- int outpackets;
+ uvlong inpackets;
+ uvlong outpackets;
int crcs; /* input crc errors */
int oerrs; /* output errors */
int frames; /* framing errors */
diff --git a/src/9vx/a/part.c b/src/9vx/a/part.c
@@ -0,0 +1,341 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#include "sd.h"
+#include "fs.h"
+
+enum {
+ Npart = 32
+};
+
+uchar *mbrbuf, *partbuf;
+int nbuf;
+#define trace 0
+
+int
+tsdbio(SDunit *unit, SDpart *part, void *a, vlong off, int mbr)
+{
+ uchar *b;
+
+ if(unit->dev->ifc->bio(unit, 0, 0, a, 1, (off/unit->secsize) + part->start) != unit->secsize){
+ if(trace)
+ print("%s: read %lud at %lld failed\n", unit->dev->name,
+ unit->secsize, (vlong)part->start*unit->secsize+off);
+ return -1;
+ }
+ b = a;
+ if(mbr && (b[0x1FE] != 0x55 || b[0x1FF] != 0xAA)){
+ if(trace)
+ print("%s: bad magic %.2ux %.2ux at %lld\n",
+ unit->dev->name, b[0x1FE], b[0x1FF],
+ (vlong)part->start*unit->secsize+off);
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * read partition table. The partition table is just ascii strings.
+ */
+#define MAGIC "plan9 partitions"
+static void
+oldp9part(SDunit *unit)
+{
+ SDpart *pp;
+ char *field[3], *line[Npart+1];
+ ulong n, start, end;
+ int i;
+
+ /*
+ * We have some partitions already.
+ */
+ pp = &unit->part[unit->npart];
+
+ /*
+ * We prefer partition tables on the second to last sector,
+ * but some old disks use the last sector instead.
+ */
+ pp->start = unit->sectors - 2;
+ pp->end = unit->sectors - 1;
+
+ if(tsdbio(unit, pp, partbuf, 0, 0) < 0)
+ return;
+
+ if(strncmp((char*)partbuf, MAGIC, sizeof(MAGIC)-1) != 0) {
+ /* not found on 2nd last sector; look on last sector */
+ pp->start++;
+ pp->end++;
+ if(tsdbio(unit, pp, partbuf, 0, 0) < 0)
+ return;
+ if(strncmp((char*)partbuf, MAGIC, sizeof(MAGIC)-1) != 0)
+ return;
+ print("%s: using old plan9 partition table on last sector\n", unit->dev->name);
+ }else
+ print("%s: using old plan9 partition table on 2nd-to-last sector\n", unit->dev->name);
+
+ /* we found a partition table, so add a partition partition */
+ unit->npart++;
+ partbuf[unit->secsize-1] = '\0';
+
+ /*
+ * parse partition table
+ */
+ n = getfields((char*)partbuf, line, Npart+1, 0, "\n");
+ if(n && strncmp(line[0], MAGIC, sizeof(MAGIC)-1) == 0){
+ for(i = 1; i < n && unit->npart < SDnpart; i++){
+ if(getfields(line[i], field, 3, 0, " ") != 3)
+ break;
+ start = strtoull(field[1], 0, 0);
+ end = strtoull(field[2], 0, 0);
+ if(start >= end || end > unit->sectors)
+ break;
+ sdaddpart(unit, field[0], start, end);
+ }
+ }
+}
+
+static void
+p9part(SDunit *unit, char *name)
+{
+ SDpart *p;
+ char *field[4], *line[Npart+1];
+ uvlong start, end;
+ int i, n;
+
+ p = sdfindpart(unit, name);
+ if(p == nil)
+ return;
+
+ if(tsdbio(unit, p, partbuf, unit->secsize, 0) < 0)
+ return;
+ partbuf[unit->secsize-1] = '\0';
+
+ if(strncmp((char*)partbuf, "part ", 5) != 0)
+ return;
+
+ n = getfields((char*)partbuf, line, Npart+1, 0, "\n");
+ if(n == 0)
+ return;
+ for(i = 0; i < n /* && unit->npart < SDnpart */; i++){
+ if(strncmp(line[i], "part ", 5) != 0)
+ break;
+ if(getfields(line[i], field, 4, 0, " ") != 4)
+ break;
+ start = strtoull(field[2], 0, 0);
+ end = strtoull(field[3], 0, 0);
+ if(start >= end || end > unit->sectors)
+ break;
+ sdaddpart(unit, field[1], p->start+start, p->start+end);
+ }
+}
+
+int
+isdos(int t)
+{
+ return t==FAT12 || t==FAT16 || t==FATHUGE || t==FAT32 || t==FAT32X;
+}
+
+int
+isextend(int t)
+{
+ return t==EXTEND || t==EXTHUGE || t==LEXTEND;
+}
+
+/*
+ * Fetch the first dos and all plan9 partitions out of the MBR partition table.
+ * We return -1 if we did not find a plan9 partition.
+ */
+static int
+mbrpart(SDunit *unit)
+{
+ Dospart *dp;
+ ulong taboffset, start, end;
+ ulong firstxpart, nxtxpart;
+ int havedos, i, nplan9;
+ char name[10];
+
+ taboffset = 0;
+ dp = (Dospart*)&mbrbuf[0x1BE];
+ if(1) {
+ /* get the MBR (allowing for DMDDO) */
+ if(tsdbio(unit, &unit->part[0], mbrbuf, (vlong)taboffset*unit->secsize, 1) < 0)
+ return -1;
+ for(i=0; i<4; i++)
+ if(dp[i].type == DMDDO) {
+ if(trace)
+ print("DMDDO partition found\n");
+ taboffset = 63;
+ if(tsdbio(unit, &unit->part[0], mbrbuf, (vlong)taboffset*unit->secsize, 1) < 0)
+ return -1;
+ i = -1; /* start over */
+ }
+ }
+
+ /*
+ * Read the partitions, first from the MBR and then
+ * from successive extended partition tables.
+ */
+ nplan9 = 0;
+ havedos = 0;
+ firstxpart = 0;
+ for(;;) {
+ if(tsdbio(unit, &unit->part[0], mbrbuf, (vlong)taboffset*unit->secsize, 1) < 0)
+ return -1;
+ if(trace) {
+ if(firstxpart)
+ print("%s ext %lud ", unit->dev->name, taboffset);
+ else
+ print("%s mbr ", unit->dev->name);
+ }
+ nxtxpart = 0;
+ for(i=0; i<4; i++) {
+ if(trace)
+ print("dp %d...", dp[i].type);
+ start = taboffset+GLONG(dp[i].start);
+ end = start+GLONG(dp[i].len);
+
+ if(dp[i].type == PLAN9) {
+ if(nplan9 == 0)
+ strcpy(name, "plan9");
+ else
+ sprint(name, "plan9.%d", nplan9);
+ sdaddpart(unit, name, start, end);
+ p9part(unit, name);
+ nplan9++;
+ }
+
+ /*
+ * We used to take the active partition (and then the first
+ * when none are active). We have to take the first here,
+ * so that the partition we call ``dos'' agrees with the
+ * partition disk/fdisk calls ``dos''.
+ */
+ if(havedos==0 && isdos(dp[i].type)){
+ havedos = 1;
+ sdaddpart(unit, "dos", start, end);
+ }
+
+ /* nxtxpart is relative to firstxpart (or 0), not taboffset */
+ if(isextend(dp[i].type)){
+ nxtxpart = start-taboffset+firstxpart;
+ if(trace)
+ print("link %lud...", nxtxpart);
+ }
+ }
+ if(trace)
+ print("\n");
+
+ if(!nxtxpart)
+ break;
+ if(!firstxpart)
+ firstxpart = nxtxpart;
+ taboffset = nxtxpart;
+ }
+ return nplan9 ? 0 : -1;
+}
+
+/*
+ * To facilitate booting from CDs, we create a partition for
+ * the boot floppy image embedded in a bootable CD.
+ */
+static int
+part9660(SDunit *unit)
+{
+ uchar buf[2048];
+ ulong a, n;
+ uchar *p;
+
+ if(unit->secsize != 2048)
+ return -1;
+
+ if(unit->dev->ifc->bio(unit, 0, 0, buf, 2048/unit->secsize, (17*2048)/unit->secsize) < 0)
+ return -1;
+
+ if(buf[0] || strcmp((char*)buf+1, "CD001\x01EL TORITO SPECIFICATION") != 0)
+ return -1;
+
+
+ p = buf+0x47;
+ a = p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24);
+
+ if(unit->dev->ifc->bio(unit, 0, 0, buf, 2048/unit->secsize, (a*2048)/unit->secsize) < 0)
+ return -1;
+
+ if(memcmp(buf, "\x01\x00\x00\x00", 4) != 0
+ || memcmp(buf+30, "\x55\xAA", 2) != 0
+ || buf[0x20] != 0x88)
+ return -1;
+
+ p = buf+0x28;
+ a = p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24);
+
+ switch(buf[0x21]){
+ case 0x01:
+ n = 1200*1024;
+ break;
+ case 0x02:
+ n = 1440*1024;
+ break;
+ case 0x03:
+ n = 2880*1024;
+ break;
+ default:
+ return -1;
+ }
+ n /= 2048;
+
+ print("found partition %s!cdboot; %lud+%lud\n", unit->dev->name, a, n);
+ sdaddpart(unit, "cdboot", a, a+n);
+ return 0;
+}
+
+enum {
+ NEW = 1<<0,
+ OLD = 1<<1
+};
+
+void
+partition(SDunit *unit)
+{
+ int type;
+ char *p;
+
+ if(unit->part == 0)
+ return;
+
+ if(part9660(unit) == 0)
+ return;
+
+ p = "new";
+
+ if(p != nil && strncmp(p, "new", 3) == 0)
+ type = NEW;
+ else if(p != nil && strncmp(p, "old", 3) == 0)
+ type = OLD;
+ else
+ type = NEW|OLD;
+
+ if(nbuf < unit->secsize) {
+ free(mbrbuf);
+ free(partbuf);
+ mbrbuf = malloc(unit->secsize);
+ partbuf = malloc(unit->secsize);
+ if(mbrbuf==nil || partbuf==nil) {
+ free(mbrbuf);
+ free(partbuf);
+ partbuf = mbrbuf = nil;
+ nbuf = 0;
+ return;
+ }
+ nbuf = unit->secsize;
+ }
+
+ if((type & NEW) && mbrpart(unit) >= 0){
+ /* nothing to do */;
+ }
+ else if(type & OLD)
+ oldp9part(unit);
+}
diff --git a/src/9vx/a/pgrp.c b/src/9vx/a/pgrp.c
@@ -180,7 +180,7 @@ dupfgrp(Fgrp *f)
lock(&f->ref.lk);
/* Make new fd list shorter if possible, preserving quantization */
new->nfd = f->maxfd+1;
- i = new->nfd%DELTAFD;
+ i = (uint)new->nfd%DELTAFD;
if(i != 0)
new->nfd += DELTAFD - i;
new->fd = malloc(new->nfd*sizeof(Chan*));
diff --git a/src/9vx/a/portfns.h b/src/9vx/a/portfns.h
@@ -32,8 +32,8 @@ void callwithureg(void(*)(Ureg*));
char* chanpath(Chan*);
int canlock(Lock*);
int canpage(Proc*);
-int canqlock(QLock*);
-int canrlock(RWlock*);
+int __canqlock(QLock*);
+int __canrlock(RWlock*);
void chandevinit(void);
void chandevreset(void);
void chandevshutdown(void);
@@ -166,7 +166,7 @@ void ksetenv(char*, char*, int);
void kstrcpy(char*, char*, int);
void kstrdup(char**, char*);
long latin1(Rune*, int);
-int lock(Lock*);
+int __lock(Lock*);
void logopen(Log*);
void logclose(Log*);
char* logctl(Log*, int, char**, Logflag*);
@@ -277,7 +277,7 @@ void qhangup(Queue*, char*);
int qisclosed(Queue*);
int qiwrite(Queue*, void*, int);
int qlen(Queue*);
-void qlock(QLock*);
+void __qlock(QLock*);
Queue* qopen(int, int, void (*)(void*), void*);
int qpass(Queue*, Block*);
int qpassnolim(Queue*, Block*);
@@ -287,7 +287,7 @@ long qread(Queue*, void*, int);
Block* qremove(Queue*);
void qreopen(Queue*);
void qsetlimit(Queue*, int);
-void qunlock(QLock*);
+void __qunlock(QLock*);
int qwindow(Queue*);
int qwrite(Queue*, void*, int);
void qnoblock(Queue*, int);
@@ -305,9 +305,9 @@ void renameuser(char*, char*);
void resched(char*);
void resrcwait(char*);
int return0(void*);
-void rlock(RWlock*);
+void __rlock(RWlock*);
long rtctime(void);
-void runlock(RWlock*);
+void __runlock(RWlock*);
Proc* runproc(void);
void savefpregs(FPsave*);
void sched(void);
@@ -361,7 +361,7 @@ int uartstageoutput(Uart*);
void unbreak(Proc*);
void uncachepage(Page*);
long unionread(Chan*, void*, long);
-void unlock(Lock*);
+void __unlock(Lock*);
uvlong us2fastticks(uvlong);
void userinit(void);
ulong userpc(void);
@@ -372,8 +372,8 @@ void validstat(uchar*, int);
void* vmemchr(void*, int, int);
Proc* wakeup(Rendez*);
int walk(Chan**, char**, int, int, int*);
-void wlock(RWlock*);
-void wunlock(RWlock*);
+void __wlock(RWlock*);
+void __wunlock(RWlock*);
void* xalloc(ulong);
void* xallocz(ulong, int);
void xfree(void*);
diff --git a/src/9vx/a/qlock.c b/src/9vx/a/qlock.c
@@ -5,6 +5,8 @@
#include "dat.h"
#include "fns.h"
+int tracelock = 0;
+
struct {
ulong rlock;
ulong rlockq;
@@ -15,7 +17,7 @@ struct {
} rwstats;
void
-qlock(QLock *q)
+__qlock(QLock *q)
{
Proc *p;
@@ -50,7 +52,7 @@ qlock(QLock *q)
}
int
-canqlock(QLock *q)
+__canqlock(QLock *q)
{
if(!canlock(&q->use))
return 0;
@@ -64,7 +66,7 @@ canqlock(QLock *q)
}
void
-qunlock(QLock *q)
+__qunlock(QLock *q)
{
Proc *p;
@@ -86,7 +88,7 @@ qunlock(QLock *q)
}
void
-rlock(RWlock *q)
+__rlock(RWlock *q)
{
Proc *p;
@@ -115,7 +117,7 @@ rlock(RWlock *q)
}
void
-runlock(RWlock *q)
+__runlock(RWlock *q)
{
Proc *p;
@@ -138,7 +140,7 @@ runlock(RWlock *q)
}
void
-wlock(RWlock *q)
+__wlock(RWlock *q)
{
Proc *p;
@@ -170,7 +172,7 @@ wlock(RWlock *q)
}
void
-wunlock(RWlock *q)
+__wunlock(RWlock *q)
{
Proc *p;
@@ -209,7 +211,7 @@ wunlock(RWlock *q)
/* same as rlock but punts if there are any writers waiting */
int
-canrlock(RWlock *q)
+__canrlock(RWlock *q)
{
lock(&q->use);
rwstats.rlock++;
diff --git a/src/9vx/a/sd.h b/src/9vx/a/sd.h
@@ -129,9 +129,14 @@ extern void sdadddevs(SDev*);
extern int sdsetsense(SDreq*, int, int, int, int);
extern int sdmodesense(SDreq*, uchar*, void*, int);
extern int sdfakescsi(SDreq*, void*, int);
+extern void sdaddpart(SDunit*, char*, uvlong, uvlong);
+extern SDpart* sdfindpart(SDunit*, char*);
/* sdscsi.c */
extern int scsiverify(SDunit*);
extern int scsionline(SDunit*);
extern long scsibio(SDunit*, int, int, void*, long, uvlong);
extern SDev* scsiid(SDev*, SDifc*);
+
+/* part.c */
+extern void partition(SDunit*);
diff --git a/src/9vx/a/sdaoe.c b/src/9vx/a/sdaoe.c
@@ -0,0 +1,652 @@
+/*
+ * aoe sd driver, copyright © 2007 coraid
+ */
+
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "error.h"
+#include "sd.h"
+#include "netif.h"
+#include "aoe.h"
+
+extern char Echange[];
+extern char Enotup[];
+
+#define uprint(...) snprint(up->genbuf, sizeof up->genbuf, __VA_ARGS__);
+
+enum {
+ Nctlr = 32,
+ Maxpath = 128,
+};
+
+enum {
+ /* sync with ahci.h */
+ Dllba = 1<<0,
+ Dsmart = 1<<1,
+ Dpower = 1<<2,
+ Dnop = 1<<3,
+ Datapi = 1<<4,
+ Datapi16= 1<<5,
+};
+
+static char *flagname[] = {
+ "llba",
+ "smart",
+ "power",
+ "nop",
+ "atapi",
+ "atapi16",
+};
+
+typedef struct Ctlr Ctlr;
+struct Ctlr{
+ QLock qlock;
+
+ Ctlr *next;
+ SDunit *unit;
+
+ char path[Maxpath];
+ Chan *c;
+
+ ulong vers;
+ uchar mediachange;
+ uchar flag;
+ uchar smart;
+ uchar smartrs;
+ uchar feat;
+
+ uvlong sectors;
+ char serial[20+1];
+ char firmware[8+1];
+ char model[40+1];
+ char ident[0x100];
+};
+
+static Lock ctlrlock;
+static Ctlr *head;
+static Ctlr *tail;
+
+SDifc sdaoeifc;
+
+static void
+idmove(char *p, ushort *a, int n)
+{
+ int i;
+ char *op, *e;
+
+ op = p;
+ for(i = 0; i < n/2; i++){
+ *p++ = a[i] >> 8;
+ *p++ = a[i];
+ }
+ *p = 0;
+ while(p > op && *--p == ' ')
+ *p = 0;
+ e = p;
+ p = op;
+ while(*p == ' ')
+ p++;
+ memmove(op, p, n - (e - p));
+}
+
+static ushort
+gbit16(void *a)
+{
+ uchar *i;
+
+ i = a;
+ return i[1] << 8 | i[0];
+}
+
+static ulong
+gbit32(void *a)
+{
+ ulong j;
+ uchar *i;
+
+ i = a;
+ j = i[3] << 24;
+ j |= i[2] << 16;
+ j |= i[1] << 8;
+ j |= i[0];
+ return j;
+}
+
+static uvlong
+gbit64(void *a)
+{
+ uchar *i;
+
+ i = a;
+ return (uvlong)gbit32(i+4)<<32 | gbit32(i);
+}
+
+static int
+identify(Ctlr *c, ushort *id)
+{
+ int i;
+ uchar oserial[21];
+ uvlong osectors, s;
+
+ osectors = c->sectors;
+ memmove(oserial, c->serial, sizeof c->serial);
+
+ c->feat &= ~(Dllba|Dpower|Dsmart|Dnop);
+ i = gbit16(id+83) | gbit16(id+86);
+ if(i & (1<<10)){
+ c->feat |= Dllba;
+ s = gbit64(id+100);
+ }else
+ s = gbit32(id+60);
+
+ i = gbit16(id+83);
+ if((i>>14) == 1) {
+ if(i & (1<<3))
+ c->feat |= Dpower;
+ i = gbit16(id+82);
+ if(i & 1)
+ c->feat |= Dsmart;
+ if(i & (1<<14))
+ c->feat |= Dnop;
+ }
+
+ idmove(c->serial, id+10, 20);
+ idmove(c->firmware, id+23, 8);
+ idmove(c->model, id+27, 40);
+
+ if((osectors == 0 || osectors != s) &&
+ memcmp(oserial, c->serial, sizeof oserial) != 0){
+ c->sectors = s;
+ c->mediachange = 1;
+ c->vers++;
+ }
+ return 0;
+}
+
+/* must call with d qlocked */
+static int
+aoeidentify(Ctlr *d, SDunit *u)
+{
+ Chan *c;
+
+ c = nil;
+ if(waserror()){
+ if(c)
+ cclose(c);
+ iprint("aoeidentify: %s\n", up->errstr);
+ nexterror();
+ }
+
+ uprint("%s/ident", d->path);
+ c = namec(up->genbuf, Aopen, OREAD, 0);
+ devtab[c->type]->read(c, d->ident, sizeof d->ident, 0);
+
+ poperror();
+ cclose(c);
+
+ d->feat = 0;
+ d->smart = 0;
+ identify(d, (ushort*)d->ident);
+
+ memset(u->inquiry, 0, sizeof u->inquiry);
+ u->inquiry[2] = 2;
+ u->inquiry[3] = 2;
+ u->inquiry[4] = sizeof u->inquiry - 4;
+ memmove(u->inquiry+8, d->model, 40);
+
+ return 0;
+}
+
+static Ctlr*
+ctlrlookup(char *path)
+{
+ Ctlr *c;
+
+ lock(&ctlrlock);
+ for(c = head; c; c = c->next)
+ if(strcmp(c->path, path) == 0)
+ break;
+ unlock(&ctlrlock);
+ return c;
+}
+
+static Ctlr*
+newctlr(char *path)
+{
+ Ctlr *c;
+
+ /* race? */
+ if(ctlrlookup(path))
+ error(Eexist);
+
+ if((c = malloc(sizeof *c)) == nil)
+ return 0;
+ kstrcpy(c->path, path, sizeof c->path);
+ lock(&ctlrlock);
+ if(head != nil)
+ tail->next = c;
+ else
+ head = c;
+ tail = c;
+ unlock(&ctlrlock);
+ return c;
+}
+
+static void
+delctlr(Ctlr *c)
+{
+ Ctlr *x, *prev;
+
+ lock(&ctlrlock);
+
+ for(prev = 0, x = head; x; prev = x, x = c->next)
+ if(strcmp(c->path, x->path) == 0)
+ break;
+ if(x == 0){
+ unlock(&ctlrlock);
+ error(Enonexist);
+ }
+
+ if(prev)
+ prev->next = x->next;
+ else
+ head = x->next;
+ if(x->next == nil)
+ tail = prev;
+ unlock(&ctlrlock);
+
+ if(x->c)
+ cclose(x->c);
+ free(x);
+}
+
+static SDev*
+aoeprobe(char *path, SDev *s)
+{
+ int n, i;
+ char *p;
+ Chan *c;
+ Ctlr *ctlr;
+
+ if((p = strrchr(path, '/')) == 0)
+ error(Ebadarg);
+ *p = 0;
+ uprint("%s/ctl", path);
+ *p = '/';
+
+ c = namec(up->genbuf, Aopen, OWRITE, 0);
+ if(waserror()) {
+ cclose(c);
+ nexterror();
+ }
+ n = uprint("discover %s", p+1);
+ devtab[c->type]->write(c, up->genbuf, n, 0);
+ poperror();
+ cclose(c);
+
+ for(i = 0;; i += 200){
+ if(i > 8000 || waserror())
+ error(Etimedout);
+ tsleep(&up->sleep, return0, 0, 200);
+ poperror();
+
+ uprint("%s/ident", path);
+ if(waserror())
+ continue;
+ c = namec(up->genbuf, Aopen, OREAD, 0);
+ poperror();
+ cclose(c);
+
+ ctlr = newctlr(path);
+ break;
+ }
+
+ if(s == nil && (s = malloc(sizeof *s)) == nil)
+ return nil;
+ s->ctlr = ctlr;
+ s->ifc = &sdaoeifc;
+ s->nunit = 1;
+ return s;
+}
+
+static char *probef[32];
+static int nprobe;
+
+static int
+pnpprobeid(char *s)
+{
+ int id;
+
+ if(strlen(s) < 2)
+ return 0;
+ id = 'e';
+ if(s[1] == '!')
+ id = s[0];
+ return id;
+}
+
+static SDev*
+aoepnp(void)
+{
+ int i, id;
+ char *p;
+ SDev *h, *t, *s;
+
+// if((p = getconf("aoedev")) == 0)
+ if(1)
+ return 0;
+ nprobe = tokenize(p, probef, nelem(probef));
+ h = t = 0;
+ for(i = 0; i < nprobe; i++){
+ id = pnpprobeid(probef[i]);
+ if(id == 0)
+ continue;
+ s = malloc(sizeof *s);
+ if(s == nil)
+ break;
+ s->ctlr = 0;
+ s->idno = id;
+ s->ifc = &sdaoeifc;
+ s->nunit = 1;
+
+ if(h)
+ t->next = s;
+ else
+ h = s;
+ t = s;
+ }
+ return h;
+}
+
+static Ctlr*
+pnpprobe(SDev *sd)
+{
+ int j;
+ char *p;
+ static int i;
+
+ if(i > nprobe)
+ return 0;
+ p = probef[i++];
+ if(strlen(p) < 2)
+ return 0;
+ if(p[1] == '!')
+ p += 2;
+
+ for(j = 0;; j += 200){
+ if(j > 8000){
+ print("#æ: pnpprobe: %s: %s\n", probef[i-1], up->errstr);
+ return 0;
+ }
+ if(waserror()){
+ tsleep(&up->sleep, return0, 0, 200);
+ continue;
+ }
+ sd = aoeprobe(p, sd);
+ poperror();
+ break;
+ }
+ print("#æ: pnpprobe establishes %sin %dms\n", probef[i-1], j);
+ return sd->ctlr;
+}
+
+
+static int
+aoeverify(SDunit *u)
+{
+ SDev *s;
+ Ctlr *c;
+
+ s = u->dev;
+ c = s->ctlr;
+ if(c == nil && (s->ctlr = c = pnpprobe(s)) == nil)
+ return 0;
+ c->mediachange = 1;
+ return 1;
+}
+
+static int
+aoeconnect(SDunit *u, Ctlr *c)
+{
+ QLOCK(c);
+ if(waserror()){
+ QUNLOCK(c);
+ return -1;
+ }
+
+ aoeidentify(u->dev->ctlr, u);
+ if(c->c)
+ cclose(c->c);
+ c->c = 0;
+ uprint("%s/data", c->path);
+ c->c = namec(up->genbuf, Aopen, ORDWR, 0);
+ QUNLOCK(c);
+ poperror();
+
+ return 0;
+}
+
+static int
+aoeonline(SDunit *u)
+{
+ Ctlr *c;
+ int r;
+
+ c = u->dev->ctlr;
+ r = 0;
+
+ if((c->feat&Datapi) && c->mediachange){
+ if(aoeconnect(u, c) == 0 && (r = scsionline(u)) > 0)
+ c->mediachange = 0;
+ return r;
+ }
+
+ if(c->mediachange){
+ if(aoeconnect(u, c) == -1)
+ return 0;
+ r = 2;
+ c->mediachange = 0;
+ u->sectors = c->sectors;
+ u->secsize = Aoesectsz;
+ } else
+ r = 1;
+
+ return r;
+}
+
+static int
+aoerio(SDreq *r)
+{
+ int i, count;
+ uvlong lba;
+ char *name;
+ uchar *cmd;
+ long (*rio)(Chan*, void*, long, vlong);
+ Ctlr *c;
+ SDunit *unit;
+
+ unit = r->unit;
+ c = unit->dev->ctlr;
+// if(c->feat & Datapi)
+// return aoeriopkt(r, d);
+
+ cmd = r->cmd;
+ name = unit->perm.name;
+
+ if(r->cmd[0] == 0x35 || r->cmd[0] == 0x91){
+// QLOCK(c);
+// i = flushcache();
+// QUNLOCK(c);
+// if(i == 0)
+// return sdsetsense(r, SDok, 0, 0, 0);
+ return sdsetsense(r, SDcheck, 3, 0xc, 2);
+ }
+
+ if((i = sdfakescsi(r, c->ident, sizeof c->ident)) != SDnostatus){
+ r->status = i;
+ return i;
+ }
+
+ switch(*cmd){
+ case 0x88:
+ case 0x28:
+ rio = devtab[c->c->type]->read;
+ break;
+ case 0x8a:
+ case 0x2a:
+ rio = devtab[c->c->type]->write;
+ break;
+ default:
+ print("%s: bad cmd %#.2ux\n", name, cmd[0]);
+ r->status = SDcheck;
+ return SDcheck;
+ }
+
+ if(r->data == nil)
+ return SDok;
+
+ if(r->clen == 16){
+ if(cmd[2] || cmd[3])
+ return sdsetsense(r, SDcheck, 3, 0xc, 2);
+ lba = (uvlong)cmd[4]<<40 | (uvlong)cmd[5]<<32;
+ lba |= cmd[6]<<24 | cmd[7]<<16 | cmd[8]<<8 | cmd[9];
+ count = cmd[10]<<24 | cmd[11]<<16 | cmd[12]<<8 | cmd[13];
+ }else{
+ lba = cmd[2]<<24 | cmd[3]<<16 | cmd[4]<<8 | cmd[5];
+ count = cmd[7]<<8 | cmd[8];
+ }
+
+ count *= Aoesectsz;
+
+ if(r->dlen < count)
+ count = r->dlen & ~0x1ff;
+
+ if(waserror()){
+ if(strcmp(up->errstr, Echange) == 0 ||
+ strcmp(up->errstr, Enotup) == 0)
+ unit->sectors = 0;
+ nexterror();
+ }
+ r->rlen = rio(c->c, r->data, count, Aoesectsz * lba);
+ poperror();
+ r->status = SDok;
+ return SDok;
+}
+
+static char *smarttab[] = {
+ "unset",
+ "error",
+ "threshold exceeded",
+ "normal"
+};
+
+static char *
+pflag(char *s, char *e, uchar f)
+{
+ uchar i, m;
+
+ for(i = 0; i < 8; i++){
+ m = 1 << i;
+ if(f & m)
+ s = seprint(s, e, "%s ", flagname[i]);
+ }
+ return seprint(s, e, "\n");
+}
+
+static int
+aoerctl(SDunit *u, char *p, int l)
+{
+ Ctlr *c;
+ char *e, *op;
+
+ if((c = u->dev->ctlr) == nil)
+ return 0;
+ e = p+l;
+ op = p;
+
+ p = seprint(p, e, "model\t%s\n", c->model);
+ p = seprint(p, e, "serial\t%s\n", c->serial);
+ p = seprint(p, e, "firm %s\n", c->firmware);
+ if(c->smartrs == 0xff)
+ p = seprint(p, e, "smart\tenable error\n");
+ else if(c->smartrs == 0)
+ p = seprint(p, e, "smart\tdisabled\n");
+ else
+ p = seprint(p, e, "smart\t%s\n", smarttab[c->smart]);
+ p = seprint(p, e, "flag ");
+ p = pflag(p, e, c->feat);
+ p = seprint(p, e, "geometry %llud %d\n", c->sectors, Aoesectsz);
+ return p-op;
+}
+
+static int
+aoewctl(SDunit *d1, Cmdbuf *cmd)
+{
+ cmderror(cmd, Ebadarg);
+ return 0;
+}
+
+static SDev*
+aoeprobew(DevConf *c)
+{
+ char *p;
+
+ p = strchr(c->type, '/');
+ if(p == nil || strlen(p) > Maxpath - 11)
+ error(Ebadarg);
+ if(p[1] == '#')
+ p++; /* hack */
+ if(ctlrlookup(p))
+ error(Einuse);
+ return aoeprobe(p, 0);
+}
+
+static void
+aoeclear(SDev *s)
+{
+ delctlr((Ctlr *)s->ctlr);
+}
+
+static char*
+aoertopctl(SDev *s, char *p, char *e)
+{
+ Ctlr *c;
+
+ c = s->ctlr;
+ return seprint(p, e, "%s aoe %s\n", s->name, c->path);
+}
+
+static int
+aoewtopctl(SDev *d1, Cmdbuf *cmd)
+{
+ switch(cmd->nf){
+ default:
+ cmderror(cmd, Ebadarg);
+ }
+ return 0;
+}
+
+SDifc sdaoeifc = {
+ "aoe",
+
+ aoepnp,
+ nil, /* legacy */
+ nil, /* enable */
+ nil, /* disable */
+
+ aoeverify,
+ aoeonline,
+ aoerio,
+ aoerctl,
+ aoewctl,
+
+ scsibio,
+ aoeprobew, /* probe */
+ aoeclear, /* clear */
+ aoertopctl,
+ aoewtopctl,
+};
diff --git a/src/9vx/bootcode.9 b/src/9vx/bootcode.9
Binary files differ.
diff --git a/src/9vx/devip.c b/src/9vx/devip.c
@@ -883,7 +883,7 @@ cswrite(Chan *c, void *a, long n, vlong offset)
return n;
}
-Dev ipdevtab =
+Dev pipdevtab =
{
'I',
"ip",
diff --git a/src/9vx/devtab.c b/src/9vx/devtab.c
@@ -5,6 +5,7 @@
#include "fns.h"
#include "error.h"
+extern Dev aoedevtab;
extern Dev consdevtab;
extern Dev rootdevtab;
extern Dev pipedevtab;
@@ -24,14 +25,18 @@ extern Dev mntloopdevtab;
extern Dev dupdevtab;
extern Dev sddevtab;
extern Dev capdevtab;
+extern Dev etherdevtab;
Dev *devtab[] = {
&rootdevtab, /* must be first */
+ &aoedevtab,
&audiodevtab,
+ &capdevtab,
&consdevtab,
&drawdevtab,
&dupdevtab,
&envdevtab,
+ ðerdevtab,
&fsdevtab,
&ipdevtab,
&mntdevtab,
@@ -40,11 +45,9 @@ Dev *devtab[] = {
&pipedevtab,
&procdevtab,
&ramdevtab,
+ &sddevtab,
&srvdevtab,
&ssldevtab,
&tlsdevtab,
- &sddevtab,
- &capdevtab,
0
};
-
diff --git a/src/9vx/etherpcap.c b/src/9vx/etherpcap.c
@@ -0,0 +1,189 @@
+/*
+ * etherpcap - portable Virtual Ethernet driver for 9vx.
+ *
+ * Copyright (c) 2008 Devon H. O'Dell
+ * copyright © 2008 erik quanstrom
+ * copyright © 2010 Jesus Galan Lopez
+ *
+ * Released under 2-clause BSD license.
+ */
+
+#include "u.h"
+
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "error.h"
+#include "netif.h"
+#include "etherif.h"
+#include "vether.h"
+
+#include <pcap.h>
+
+static uvlong txerrs;
+
+extern int eafrom(char *ma, uchar ea[6]);
+
+typedef struct Ctlr Ctlr;
+struct Ctlr {
+ pcap_t *pd;
+};
+
+static void *
+veerror(char* err)
+{
+ iprint("ve: %s\n", err);
+ return nil;
+}
+
+static pcap_t *
+setup(char *dev, uchar *ea)
+{
+ char filter[30];
+ char errbuf[PCAP_ERRBUF_SIZE];
+ pcap_t *pd;
+ struct bpf_program prog;
+ bpf_u_int32 net;
+ bpf_u_int32 mask;
+
+ if(sprint(filter, "ether dst %2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux:%2.2ux",
+ ea[0], ea[1], ea[2],ea[3], ea[4], ea[5]) == -1)
+ return veerror("cannot create pcap filter");
+
+ if (!dev && (dev = pcap_lookupdev(errbuf)) == nil)
+ return veerror("cannot find network device");
+
+// if ((pd = pcap_open_live(netdev, 1514, 1, 1, errbuf)) == nil)
+ if ((pd = pcap_open_live(dev, 65000, 1, 1, errbuf)) == nil)
+ return nil;
+
+ pcap_lookupnet(dev, &net, &mask, errbuf);
+ pcap_compile(pd, &prog, filter, 0, net);
+
+ if (pcap_setfilter(pd, &prog) == -1)
+ return nil;
+
+ pcap_freecode(&prog);
+
+ return pd;
+}
+
+static Block *
+pcappkt(Ctlr *c)
+{
+ struct pcap_pkthdr hdr;
+ uchar *p;
+ Block *b;
+
+ while ((p = pcap_next(c->pd, &hdr)) == nil);
+
+ b = allocb(hdr.caplen);
+ memcpy(b->rp, p, hdr.caplen);
+ b->wp += hdr.caplen;
+ b->flag |= Btcpck|Budpck|Bpktck;
+
+/*
+ iprint("+++++++++++ packet %d (len %d):\n", ++fn, hdr.caplen);
+ int i=0; uchar* u;
+ static int fn=0;
+
+ for(u=b->rp; u<b->wp; u++){
+ if (i%16 == 0) iprint("%.4ux", i);
+ if (i%8 == 0) iprint(" ");
+ iprint("%2.2ux ", *u);
+ if (++i%16 == 0) iprint("\n");
+ }
+ iprint("\n-------------\n");
+*/
+
+ return b;
+
+}
+
+static void
+pcaprecvkproc(void *v)
+{
+ Ether *e;
+ Block *b;
+
+ e = v;
+ while ((b = pcappkt(e->ctlr)))
+ if (b != nil)
+ etheriq(e, b, 1);
+}
+
+static void
+pcaptransmit(Ether* e)
+{
+ const u_char *u;
+ Block *b;
+ Ctlr *c;
+
+ c = e->ctlr;
+ while ((b = qget(e->oq)) != nil) {
+ int wlen;
+
+ u = (const u_char*)b->rp;
+
+ wlen = pcap_inject(c->pd, u, BLEN(b));
+ // iprint("injected packet len %d\n", wlen);
+ if (wlen == -1)
+ txerrs++;
+
+ freeb(b);
+ }
+}
+
+static long
+pcapifstat(Ether *e, void *a, long n, ulong offset)
+{
+ char buf[128];
+
+ snprint(buf, sizeof buf, "txerrors: %lud\n", txerrs);
+ return readstr(offset, a, n, buf);
+}
+
+static void
+pcapattach(Ether* e)
+{
+ kproc("pcaprecv", pcaprecvkproc, e);
+}
+
+static int
+pcappnp(Ether* e)
+{
+ Ctlr c;
+ static int cve = 0;
+
+ while(cve < nve && ve[cve].tap == 1)
+ cve++;
+ if(cve >= nve)
+ return -1;
+
+ memset(&c, 0, sizeof(c));
+ c.pd = setup(ve[cve].dev, ve[cve].ea);
+ if (c.pd == nil) {
+ iprint("ve: pcap failed to initialize\n");
+ cve++;
+ return -1;
+ }
+ e->ctlr = malloc(sizeof(c));
+ memcpy(e->ctlr, &c, sizeof(c));
+ e->tbdf = BUSUNKNOWN;
+ memcpy(e->ea, ve[cve].ea, Eaddrlen);
+ e->attach = pcapattach;
+ e->transmit = pcaptransmit;
+ e->ifstat = pcapifstat;
+ e->ni.arg = e;
+ e->ni.link = 1;
+ cve++;
+ return 0;
+}
+
+void
+etherpcaplink(void)
+{
+ addethercard("pcap", pcappnp);
+}
diff --git a/src/9vx/ethertap.c b/src/9vx/ethertap.c
@@ -0,0 +1,185 @@
+/*
+ * ethertap: tap device ethernet driver
+ * copyright © 2008 erik quanstrom
+ * copyright © 2010 Tully Gray
+ * copyright © 2010 Jesus Galan Lopez
+ */
+
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "error.h"
+#include "netif.h"
+#include "etherif.h"
+#include "vether.h"
+
+#include <net/if.h>
+#include <sys/ioctl.h>
+
+#ifdef linux
+#include <netpacket/packet.h>
+#include <linux/if_tun.h>
+#elif defined(__FreeBSD__)
+#include <net/if_tun.h>
+#endif
+
+typedef struct Ctlr Ctlr;
+struct Ctlr {
+ int fd;
+ int txerrs;
+ uchar ea[Eaddrlen];
+};
+
+static uchar anyea[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff,};
+
+#ifdef linux
+static int
+opentap(char *dev)
+{
+ int fd;
+ char *tap0 = "tap0";
+ struct ifreq ifr;
+
+ if(dev == nil)
+ dev = tap0;
+ if((fd = open("/dev/net/tun", O_RDWR)) < 0)
+ return -1;
+ memset(&ifr, 0, sizeof ifr);
+ strncpy(ifr.ifr_name, dev, sizeof ifr.ifr_name);
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+ if(ioctl(fd, TUNSETIFF, &ifr) < 0){
+ close(fd);
+ return -1;
+ }
+ return fd;
+}
+#elif defined(__FreeBSD__)
+static int
+opentap(char *dev)
+{
+ int fd;
+ struct stat s;
+
+ if((fd = open("/dev/tap", O_RDWR)) < 0)
+ return -1;
+ return fd;
+}
+#endif
+
+static int
+setup(char *dev)
+{
+ return opentap(dev);
+}
+
+Block*
+tappkt(Ctlr *c)
+{
+ int n;
+ Block *b;
+
+ b = allocb(1514);
+ for(;;){
+ n = read(c->fd, b->rp, BALLOC(b));
+ if(n <= 0)
+ panic("fd %d read %d", c->fd, n);
+ if(memcmp(b->rp + 0, anyea, 6) == 0
+ || memcmp(b->rp + 0, c->ea, 6) == 0)
+ break;
+ }
+ b->wp += n;
+ b->flag |= Btcpck|Budpck|Bpktck;
+ return b;
+}
+
+static void
+taprecvkproc(void *v)
+{
+ Block *b;
+ Ether *e;
+
+ e = v;
+ while((b = tappkt(e->ctlr)))
+ etheriq(e, b, 1);
+ pexit("read fail", 1);
+}
+
+static void
+taptransmit(Ether* e)
+{
+ Block *b, *h;
+ Ctlr *c;
+
+ c = e->ctlr;
+ while ((b = qget(e->oq)) != nil) {
+ if(memcmp(b->rp + 6, anyea, 6) == 0 ||
+ memcmp(b->rp + 0, c->ea, 6) == 0){
+ h = allocb(BLEN(b));
+ memcpy(h->rp, b->wp, BLEN(b));
+ h->wp += BLEN(b);
+ h->flag |= Btcpck|Budpck|Bpktck;
+ etheriq(e, h, 1);
+ }
+ if(write(c->fd, b->rp, BLEN(b)) == -1)
+ c->txerrs++;
+ freeb(b);
+ }
+}
+
+static long
+tapifstat(Ether *e, void *a, long n, ulong offset)
+{
+ char buf[128];
+ Ctlr *c;
+
+ c = a;
+ snprint(buf, sizeof buf, "txerrors: %lud\n", c->txerrs);
+ return readstr(offset, a, n, buf);
+}
+
+static void
+tapattach(Ether* e)
+{
+ kproc("taprecv", taprecvkproc, e);
+}
+
+static int
+tappnp(Ether* e)
+{
+ Ctlr c;
+ static int cve = 0;
+
+ while(cve < nve && ve[cve].tap == 0)
+ cve++;
+ if(cve == nve)
+ return -1;
+
+ memset(&c, 0, sizeof c);
+ c.fd = setup(ve[cve].dev);
+ memcpy(c.ea, ve[cve].ea, Eaddrlen);
+ if(c.fd== -1){
+ iprint("ve: tap failed to initialize\n");
+ cve++;
+ return -1;
+ }
+ e->ctlr = malloc(sizeof c);
+ memcpy(e->ctlr, &c, sizeof c);
+ e->tbdf = BUSUNKNOWN;
+ memcpy(e->ea, ve[cve].ea, Eaddrlen);
+ e->attach = tapattach;
+ e->transmit = taptransmit;
+ e->ifstat = tapifstat;
+ e->ni.arg = e;
+ e->ni.link = 1;
+ cve++;
+ return 0;
+}
+
+void
+ethertaplink(void)
+{
+ addethercard("tap", tappnp);
+}
diff --git a/src/9vx/fossil.9 b/src/9vx/fossil.9
Binary files differ.
diff --git a/src/9vx/main.c b/src/9vx/main.c
@@ -25,13 +25,24 @@
#include "arg.h"
#include "tos.h"
+#include "fs.h"
+
+#include "netif.h"
+#include "etherif.h"
+#include "vether.h"
+
#define Image IMAGE
#include "draw.h"
#include "memdraw.h"
#include "cursor.h"
#include "screen.h"
+#define BOOTLINELEN 64
+#define BOOTARGSLEN (3584-0x200-BOOTLINELEN)
+#define MAXCONF 100
+
extern Dev ipdevtab;
+extern Dev pipdevtab;
extern Dev drawdevtab;
extern Dev fsdevtab;
extern Dev audiodevtab;
@@ -42,8 +53,14 @@ char* argv0;
char* conffile = "9vx";
Conf conf;
+static char* inifile;
+static char inibuf[BOOTARGSLEN];
+static char *iniline[MAXCONF];
static int bootboot; /* run /boot/boot instead of bootscript */
+static int nofork; /* do not fork at init */
static int initrc; /* run rc instead of init */
+static int nogui; /* do not start the gui */
+static int usetty; /* use tty for input/output */
static char* username;
static Mach mach0;
@@ -56,13 +73,19 @@ static int singlethread;
static void bootinit(void);
static void siginit(void);
+static int readini(char *fn);
+static void inifields(void (*fp)(char*, char*));
+static void iniopt(char *name, char *value);
+static void inienv(char *name, char *value);
+
static char* getuser(void);
static char* findroot(void);
void
usage(void)
{
- fprint(2, "usage: 9vx [-gt] [-r root] [-u user]\n");
+ // TODO(yy): add debug and other options by ron
+ fprint(2, "usage: 9vx [-p file.ini] [-bfgit] [-n [tap] [netdev]] [-m macaddr] [-r root] [-u user]\n");
exit(1);
}
@@ -74,9 +97,8 @@ nop(void)
int
main(int argc, char **argv)
{
- int usetty;
- int nogui;
- int nofork;
+ int vetap;
+ char *vedev;
char buf[1024];
/* Minimal set up to make print work. */
@@ -87,6 +109,7 @@ main(int argc, char **argv)
nogui = 0;
nofork = 0;
usetty = 0;
+ nve = 0;
localroot = nil;
ARGBEGIN{
/* debugging options */
@@ -102,9 +125,6 @@ main(int argc, char **argv)
case 'K':
tracekdev++;
break;
- case 'F':
- nofork = 1;
- break;
case 'M':
tracemmu++;
break;
@@ -125,6 +145,9 @@ main(int argc, char **argv)
case 'b':
bootboot = 1;
break;
+ case 'f':
+ nofork = 1;
+ break;
case 'g':
nogui = 1;
usetty = 1;
@@ -132,6 +155,26 @@ main(int argc, char **argv)
case 'i':
initrc = 1;
break;
+ case 'p':
+ inifile = EARGF(usage());
+ break;
+ case 'm':
+ setmac(EARGF(usage()));
+ break;
+ case 'n':
+ vetap = 0;
+ vedev = ARGF();
+ if(vedev != nil && strcmp(vedev, "tap") == 0){
+ vetap = 1;
+ vedev = ARGF();
+ }
+ if(vedev != nil && vedev[0] == '-'){
+ vedev = nil;
+ argc++;
+ argv--;
+ }
+ addve(vedev, vetap);
+ break;
case 'r':
localroot = EARGF(usage());
break;
@@ -148,6 +191,13 @@ main(int argc, char **argv)
if(argc != 0)
usage();
+ if(inifile){
+ if(readini(inifile) != 0)
+ panic("error reading config file %s", inifile);
+ conffile=inifile;
+ inifields(&iniopt);
+ }
+
if(!bootboot){
if(localroot == nil && (localroot = findroot()) == nil)
panic("cannot find plan 9 root; use -r");
@@ -188,14 +238,34 @@ main(int argc, char **argv)
/*
* Debugging: tell user what options we guessed.
*/
- print("9vx %s-r %s -u %s\n", usetty ? "-t " : "", localroot, username);
+ print("9vx ");
+ if(inifile)
+ print("-p %s ", inifile);
+ if(bootboot | nofork | nogui | initrc | usetty)
+ print("-%s%s%s%s%s ", bootboot ? "b" : "", nofork ? "f " : "",
+ nogui ? "g" : "", initrc ? "i " : "", usetty ? "t " : "");
+ for(int i=0; i<nve; i++){
+ print("-n %s", ve[i].tap ? "tap ": "");
+ if(ve[i].dev != nil)
+ print("%s ", ve[i].dev);
+ if(ve[i].mac != nil)
+ print("-m %s ", ve[i].mac);
+ }
+ print("-r %s -u %s\n", localroot, username);
+
+ if(nve == 0)
+ ipdevtab = pipdevtab;
printinit();
procinit0();
initseg();
+ if(nve > 0)
+ links();
+
chandevreset();
if(!singlethread){
- makekprocdev(&ipdevtab);
+ if(nve == 0)
+ makekprocdev(&ipdevtab);
makekprocdev(&fsdevtab);
makekprocdev(&drawdevtab);
makekprocdev(&audiodevtab);
@@ -218,6 +288,144 @@ main(int argc, char **argv)
}
/*
+ * read configuration file
+ */
+int
+readini(char *fn)
+{
+ int blankline, incomment, inspace, n, fd;
+ char *cp, *p, *q;
+
+ if(strcmp(fn, "-") == 0)
+ fd = stdin;
+ else if((fd = open(fn, OREAD)) < 0)
+ return -1;
+
+ cp = inibuf;
+ *cp = 0;
+ n = read(fd, cp, BOOTARGSLEN-1);
+ close(fd);
+ if(n <= 0)
+ return -1;
+
+ cp[n] = 0;
+
+ /*
+ * Strip out '\r', change '\t' -> ' '.
+ * Change runs of spaces into single spaces.
+ * Strip out trailing spaces, blank lines.
+ *
+ * We do this before we make the copy so that if we
+ * need to change the copy, it is already fairly clean.
+ * The main need is in the case when plan9.ini has been
+ * padded with lots of trailing spaces, as is the case
+ * for those created during a distribution install.
+ */
+ p = cp;
+ blankline = 1;
+ incomment = inspace = 0;
+ for(q = cp; *q; q++){
+ if(*q == '\r')
+ continue;
+ if(*q == '\t')
+ *q = ' ';
+ if(*q == ' '){
+ inspace = 1;
+ continue;
+ }
+ if(*q == '\n'){
+ if(!blankline){
+ if(!incomment)
+ *p++ = '\n';
+ blankline = 1;
+ }
+ incomment = inspace = 0;
+ continue;
+ }
+ if(inspace){
+ if(!blankline && !incomment)
+ *p++ = ' ';
+ inspace = 0;
+ }
+ if(blankline && *q == '#')
+ incomment = 1;
+ blankline = 0;
+ if(!incomment)
+ *p++ = *q;
+ }
+ if(p > cp && p[-1] != '\n')
+ *p++ = '\n';
+ *p++ = 0;
+
+ getfields(cp, iniline, MAXCONF, 0, "\n");
+
+ return 0;
+}
+
+void
+inifields(void (*fp)(char*, char*))
+{
+ int i;
+ char *cp;
+
+ for(i = 0; i < MAXCONF; i++){
+ if(!iniline[i])
+ break;
+ cp = strchr(iniline[i], '=');
+ if(cp == 0)
+ continue;
+ *cp++ = 0;
+ if(cp - iniline[i] >= NAMELEN+1)
+ *(iniline[i]+NAMELEN-1) = 0;
+ (fp)(iniline[i], cp);
+ *(cp-1) = '=';
+ }
+}
+
+void
+iniopt(char *name, char *value)
+{
+ char *vedev;
+ int vetap;
+
+ if(*name == '*')
+ name++;
+ if(strcmp(name, "bootboot") == 0)
+ bootboot = 1;
+ else if(strcmp(name, "initrc") == 0)
+ initrc = 1;
+ else if(strcmp(name, "nofork") == 0)
+ nofork = 1;
+ else if(strcmp(name, "localroot") == 0 && !localroot)
+ localroot = value;
+ else if(strcmp(name, "user") == 0 && !username)
+ username = value;
+ else if(strcmp(name, "usetty") == 0)
+ usetty = 1;
+ else if(strcmp(name, "macaddr") == 0)
+ setmac(value);
+ else if(strcmp(name, "netdev") == 0){
+ if(strncmp(value, "tap", 3) == 0) {
+ vetap = 1;
+ value += 4;
+ }
+ vedev = value;
+ addve(vedev, vetap);
+ }
+ else if(strcmp(name, "nogui") == 0){
+ nogui = 1;
+ usetty = 1;
+ }
+}
+
+void
+inienv(char *name, char *value)
+{
+ if(*name != '*')
+ ksetenv(name, value, 0);
+}
+
+/*
* Search for Plan 9 /386/bin/rc to find root.
*/
static char*
@@ -228,8 +436,7 @@ findroot(void)
char buf[1024];
char *dir[] = {
cwd,
- "/Users/rsc/9vx",
- "/home/rsc/plan9/4e"
+ "/usr/local/9vx"
};
if(getcwd(cwd, sizeof cwd) == nil){
@@ -304,6 +511,10 @@ bootinit(void)
*/
extern uchar factotumcode[];
extern long factotumlen;
+ extern uchar fossilcode[];
+ extern long fossillen;
+ extern uchar venticode[];
+ extern long ventilen;
if(bootboot){
extern uchar bootcode[];
@@ -314,6 +525,8 @@ bootinit(void)
else
addbootfile("boot", (uchar*)bootscript, strlen(bootscript));
addbootfile("factotum", factotumcode, factotumlen);
+ addbootfile("fossil", fossilcode, fossillen);
+ addbootfile("venti", venticode, ventilen);
}
static uchar *sp; /* user stack of init proc */
@@ -484,7 +697,8 @@ init0(void)
ksetenv("service", "terminal", 0);
ksetenv("user", username, 0);
ksetenv("sysname", "vx32", 0);
-
+ inifields(&inienv);
+
/* if we're not running /boot/boot, mount / and create /srv/boot */
if(!bootboot){
kbind("#Zplan9/", "/", MAFTER);
@@ -556,8 +770,13 @@ sigsegv(int signo, siginfo_t *info, void *v)
#elif defined(__FreeBSD__)
mcontext_t *mc;
mc = &uc->uc_mcontext;
+#ifdef __i386__
eip = mc->mc_eip;
esp = mc->mc_esp;
+#elif defined(__amd64__)
+ eip = mc->mc_rip;
+ esp = mc->mc_rsp;
+#endif
addr = (ulong)info->si_addr;
if(__FreeBSD__ < 7){
/*
diff --git a/src/9vx/mmu.c b/src/9vx/mmu.c
@@ -26,7 +26,7 @@ int tracemmu;
* Plan 9 assumes this, and while it's not a ton of work to break that
* assumption, it was easier not to.
*/
-#define MEMSIZE (256<<20)
+#define MEMSIZE (256<<20) // same as ../a/devether.c:13 (TODO: var)
static int pagefile;
static char* pagebase;
@@ -35,6 +35,19 @@ static Uspace uspace[16];
static Uspace *ulist[nelem(uspace)];
int nuspace = 1;
+#ifdef __i386__
+#define BIT32 0
+#define HINT nil
+#elif defined(__amd64__)
+#ifdef linux
+#define BIT32 MAP_32BIT
+#define HINT nil
+#elif defined(__FreeBSD__)
+#define BIT32 MAP_FIXED
+#define HINT (caddr_t)0x40000000
+#endif
+#endif
+
int
isuaddr(void *v)
{
@@ -56,15 +69,14 @@ mapzero(void)
{
int fd, bit32;
void *v;
+ void *hint;
-#ifdef i386
- bit32 = 0;
-#else
- bit32 = MAP_32BIT;
-#endif
+ bit32 = BIT32;
+ hint = HINT;
+
/* First try mmaping /dev/zero. Some OS'es don't allow this. */
if((fd = open("/dev/zero", O_RDONLY)) >= 0){
- v = mmap(nil, USTKTOP, PROT_NONE, bit32|MAP_PRIVATE, fd, 0);
+ v = mmap(hint, USTKTOP, PROT_NONE, bit32|MAP_PRIVATE, fd, 0);
if(v != MAP_FAILED) {
if((uint32_t)(uintptr)v != (uintptr)v) {
iprint("mmap returned 64-bit pointer %p\n", v);
@@ -75,7 +87,7 @@ mapzero(void)
}
/* Next try an anonymous map. */
- v = mmap(nil, USTKTOP, PROT_NONE, bit32|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ v = mmap(hint, USTKTOP, PROT_NONE, bit32|MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if(v != MAP_FAILED) {
if((uint32_t)(uintptr)v != (uintptr)v) {
iprint("mmap returned 64-bit pointer %p\n", v);
diff --git a/src/9vx/sched.c b/src/9vx/sched.c
@@ -174,7 +174,7 @@ struct Pwaiter
};
void
-plock(Psleep *p)
+__plock(Psleep *p)
{
int r;
@@ -193,7 +193,7 @@ plock(Psleep *p)
}
void
-punlock(Psleep *p)
+__punlock(Psleep *p)
{
int r;
@@ -202,7 +202,7 @@ punlock(Psleep *p)
}
void
-psleep(Psleep *p)
+__psleep(Psleep *p)
{
int r;
Pwaiter w;
@@ -218,7 +218,7 @@ psleep(Psleep *p)
}
void
-pwakeup(Psleep *p)
+__pwakeup(Psleep *p)
{
int r;
Pwaiter *w;
diff --git a/src/9vx/sdloop.c b/src/9vx/sdloop.c
@@ -22,6 +22,7 @@ struct Ctlr{
Chan *c;
int mode;
uvlong qidpath;
+ char fn[20];
};
static Lock ctlrlock;
@@ -30,9 +31,47 @@ static Ctlr *ctlrtail;
SDifc sdloopifc;
+static void
+loopopen(Ctlr *c)
+{
+ if(c->c == nil)
+ c->c = namec(c->fn, Aopen, c->mode, 0);
+}
+
static SDev*
looppnp(void)
{
+ struct stat sbuf;
+ char c, c2;
+ char fn[20];
+
+ for(c = 'a'; c <= 'j'; ++c){
+ sprint(fn, "#Z/dev/sd%c", c);
+ if(stat(fn+2, &sbuf) == 0)
+ loopdev(fn, ORDWR);
+ }
+ for(c = '0'; c <= '9'; ++c){
+ sprintf(fn, "#Z/dev/sd%c",c);
+ if(stat(fn+2, &sbuf) == 0)
+ loopdev(fn, ORDWR);
+ }
+ for(c = 'a'; c <= 'j'; ++c){
+ sprint(fn, "#Z/dev/hd%c", c);
+ if(stat(fn+2, &sbuf) == 0)
+ loopdev(fn, ORDWR);
+ }
+ for(c = '0'; c <= '9'; ++c){
+ sprint(fn, "#Z/dev/wd%c", c);
+ if(stat(fn+2, &sbuf) == 0)
+ loopdev(fn, ORDWR);
+ }
+ for(c = '0'; c <= '8'; ++c){
+ for(c2 = '0'; c2 <= '8'; ++c2){
+ sprint(fn, "#Z/dev/cciss/c%cd%c", c, c2);
+ if(stat(fn+2, &sbuf) == 0)
+ loopdev(fn, ORDWR);
+ }
+ }
return nil;
}
@@ -69,6 +108,7 @@ looponline(SDunit *unit)
sdev = unit->dev;
ctlr = sdev->ctlr;
+ loopopen(ctlr);
c = ctlr->c;
n = devtab[c->type]->stat(c, buf, sizeof buf);
if(convM2D(buf, n, &dir, nil) == 0)
@@ -99,6 +139,7 @@ looprio(SDreq *r)
unit = r->unit;
sdev = unit->dev;
ctlr = sdev->ctlr;
+ loopopen(ctlr);
cmd = r->cmd;
if((status = sdfakescsi(r, nil, 0)) != SDnostatus){
@@ -141,6 +182,7 @@ looprctl(SDunit *unit, char *p, int l)
char *e, *op;
ctlr = unit->dev->ctlr;
+ loopopen(ctlr);
e = p+l;
op = p;
@@ -170,7 +212,8 @@ loopclear1(Ctlr *ctlr)
ctlrtail = ctlr->prev;
unlock(&ctlrlock);
- cclose(ctlr->c);
+ if(ctlr->c)
+ cclose(ctlr->c);
free(ctlr);
}
@@ -187,6 +230,7 @@ looprtopctl(SDev *s, char *p, char *e)
char *r;
c = s->ctlr;
+ loopopen(c);
r = "ro";
if(c->mode == ORDWR)
r = "rw";
@@ -219,9 +263,9 @@ loopdev(char *name, int mode)
Ctlr *volatile ctlr;
SDev *volatile sdev;
- c = namec(name, Aopen, mode, 0);
ctlr = nil;
sdev = nil;
+/*
if(waserror()){
cclose(c);
if(ctlr)
@@ -230,6 +274,7 @@ loopdev(char *name, int mode)
free(sdev);
nexterror();
}
+*/
ctlr = smalloc(sizeof *ctlr);
sdev = smalloc(sizeof *sdev);
@@ -238,9 +283,11 @@ loopdev(char *name, int mode)
sdev->nunit = 1;
sdev->idno = '0';
ctlr->sdev = sdev;
- ctlr->c = c;
+ strcpy(ctlr->fn, name);
ctlr->mode = mode;
+/*
poperror();
+*/
lock(&ctlrlock);
ctlr->next = nil;
@@ -277,11 +324,5 @@ SDifc sdloopifc = {
loopwtopctl,
};
-SDifc *sdifc[] =
-{
- &sdloopifc,
- nil
-};
-
diff --git a/src/9vx/u.h b/src/9vx/u.h
@@ -17,3 +17,4 @@ typedef int socklen_t;
#define nil ((void*)0)
#define sleep _ksleep
#define syscall _ksyscall
+#define atoi(x) strtol(x, 0, 0)
diff --git a/src/9vx/venti.9 b/src/9vx/venti.9
Binary files differ.
diff --git a/src/9vx/vether.c b/src/9vx/vether.c
@@ -0,0 +1,122 @@
+#include "u.h"
+#include "mem.h"
+#include "lib.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "ip/ip.h"
+#include "netif.h"
+#include "etherif.h"
+#include "vether.h"
+#include "sd.h"
+
+extern int nettap;
+extern void ethertaplink(void);
+extern void etherpcaplink(void);
+extern void ethermediumlink(void);
+extern void loopbackmediumlink(void);
+extern void netdevmediumlink(void);
+
+extern void ilinit(Fs*);
+extern void tcpinit(Fs*);
+extern void udpinit(Fs*);
+extern void ipifcinit(Fs*);
+extern void icmpinit(Fs*);
+extern void icmp6init(Fs*);
+extern void greinit(Fs*);
+extern void ipmuxinit(Fs*);
+extern void espinit(Fs*);
+
+extern SDifc sdloopifc;
+extern SDifc sdaoeifc;
+
+void
+setmac(char *macaddr)
+{
+ int i;
+ char **nc = &macaddr;
+
+ if(nve == 0)
+ return;
+ ve[nve-1].mac = macaddr;
+ for(i = 0; i < Eaddrlen; i++){
+ ve[nve-1].ea[i] = (uchar)strtoul(macaddr, nc, 16);
+ macaddr = *nc+1;
+ }
+}
+
+static int
+eainuse(int n, uchar ea[Eaddrlen])
+{
+ int i;
+
+ for(i = 0; i < nve; i++)
+ if((i<n || ve[i].mac != nil) && memcmp(ea, ve[i].ea, Eaddrlen) == 0)
+ return -1;
+ return 0;
+}
+
+void
+addve(char *dev, int tap)
+{
+ if(nve == MaxEther)
+ panic("too many virtual ether cards");
+ ve[nve].tap = tap;
+ ve[nve].dev = dev;
+ ve[nve].mac = nil;
+ nve++;
+}
+
+void links(void) {
+ static uchar ea[Eaddrlen] = {0x00, 0x00, 0x09, 0x00, 0x00, 0x00};
+
+ ethermediumlink();
+ loopbackmediumlink();
+ netdevmediumlink();
+ for(int i=0; i<nve; i++){
+ if(ve[i].mac == nil){
+ while(eainuse(i, ea))
+ ea[5]++;
+ memcpy(ve[i].ea, ea, Eaddrlen);
+ }
+ if(ve[i].tap == 1)
+ ethertaplink();
+ else
+ etherpcaplink();
+ }
+}
+
+void (*ipprotoinit[])(Fs*) = {
+ ilinit,
+ tcpinit,
+ udpinit,
+ ipifcinit,
+ icmpinit,
+ icmp6init,
+ greinit,
+ ipmuxinit,
+ espinit,
+ nil,
+};
+
+int
+eafrom(char *ma, uchar ea[6])
+{
+ int i;
+ char **nc = &ma;
+
+ for(i = 0; i < 6; i++){
+ if(!ma)
+ return -1;
+ ea[i] = (uchar)strtoul(ma, nc, 16);
+ ma = *nc+1;
+ }
+ return 0;
+}
+
+SDifc *sdifc[] =
+{
+ &sdloopifc,
+ &sdaoeifc,
+ 0,
+};
diff --git a/src/9vx/vether.h b/src/9vx/vether.h
@@ -0,0 +1,15 @@
+typedef struct Vether Vether;
+struct Vether
+{
+ int tap;
+ char *dev;
+ char *mac;
+ uchar ea[Eaddrlen];
+};
+
+Vether ve[MaxEther+1];
+int nve;
+
+void setmac(char*);
+void addve(char*, int);
+void links();
diff --git a/src/libvx32/Makefrag b/src/libvx32/Makefrag
@@ -1,8 +1,12 @@
ifeq ($(ARCH),x86_64)
VX32_RUN = run64.o
else
+ifeq ($(ARCH),amd64)
+VX32_RUN = run64.o
+else
VX32_RUN = run32.o
endif
+endif
ifeq ($(OS),darwin)
VX32_RUN := $(VX32_RUN) darwin-asm.o
diff --git a/src/libvx32/freebsd.c b/src/libvx32/freebsd.c
@@ -20,18 +20,34 @@
#warning "libvx32 and FreeBSD 5 and 6's libpthread are not compatible."
#endif
+#ifdef __i386__
static void setbase(struct segment_descriptor *desc, unsigned long base)
+#elif defined __amd64__
+static void setbase(struct user_segment_descriptor *desc, unsigned long base)
+#endif
{
desc->sd_lobase = base & 0xffffff;
desc->sd_hibase = base >> 24;
}
+#ifdef __i386__
static void setlimit(struct segment_descriptor *desc, unsigned long limit)
+#elif defined __amd64__
+static void setlimit(struct user_segment_descriptor *desc, unsigned long limit)
+#endif
{
desc->sd_lolimit = limit & 0xffff;
desc->sd_hilimit = limit >> 16;
}
+/*
+#ifdef __amd64__
+union descriptor {
+ struct user_segment_descriptor sd;
+ struct gate_descriptor gd;
+};
+#endif
+*/
int vxemu_map(vxemu *emu, vxmmap *mm)
{
@@ -52,27 +68,44 @@ int vxemu_map(vxemu *emu, vxmmap *mm)
desc.sd.sd_def32 = 1;
desc.sd.sd_gran = 1;
if(emu->datasel == 0){
+#ifdef __i386__
if ((s = i386_set_ldt(LDT_AUTO_ALLOC, &desc, 1)) < 0)
+#elif defined __amd64__
+ if ((s = sysarch(I386_SET_GSBASE, &desc)) < 0)
+#endif
return -1;
emu->datasel = (s<<3) + 4 + 3; // 4=LDT, 3=RPL
- }else if(i386_set_ldt(emu->datasel >> 3, &desc, 1) < 0)
+#ifdef __i386__
+ }else if (i386_set_ldt(emu->datasel >> 3, &desc, 1) < 0)
+#elif defined __amd64__
+ }else if (sysarch(I386_SET_GSBASE, &desc) < 0)
+#endif
return -1;
// Set up the process's vxemu segment selector (for FS).
setbase(&desc.sd, (unsigned long)emu);
setlimit(&desc.sd, (VXCODEBUFSIZE - 1) >> VXPAGESHIFT);
if(emu->emusel == 0){
+#ifdef __i386__
if ((s = i386_set_ldt(LDT_AUTO_ALLOC, &desc, 1)) < 0)
+#elif defined __amd64__
+ if ((s = sysarch(I386_SET_GSBASE, &desc)) < 0)
+#endif
return -1;
emu->emusel = (s<<3) + 4 + 3; // 4=LDT, 3=RPL
- }else if(i386_set_ldt(emu->emusel >> 3, &desc, 1) < 0)
+#ifdef __i386__
+ }else if (i386_set_ldt(emu->emusel >> 3, &desc, 1) < 0)
+#elif defined __amd64__
+ }else if (sysarch(I386_SET_GSBASE, &desc) < 0)
+#endif
return -1;
emu->ldt_base = (uintptr_t)mm->base;
emu->ldt_size = mm->size;
}
-#ifdef __x86_64
+#ifdef __amd64__
+/*
// Set up 32-bit mode code and data segments (not vxproc-specific),
// giving access to the full low 32-bit of linear address space.
// The code segment is necessary to get into 32-bit compatibility mode;
@@ -80,11 +113,9 @@ int vxemu_map(vxemu *emu, vxmmap *mm)
// doesn't give 64-bit processes a "real" data segment by default
// but instead just loads zero into the data segment selectors!
emu->runptr.sel = FLATCODE;
- desc.entry_number = emu->runptr.sel / 8;
- desc.base_addr = 0;
- desc.limit = 0xfffff;
- desc.contents = MODIFY_LDT_CONTENTS_CODE;
- if (modify_ldt(1, &desc, sizeof(desc)) < 0)
+ setbase(&desc.sd, 0);
+ setlimit(&desc.sd, 0xfffff);
+ if ((s = sysarch(I386_SET_GSBASE, &desc)) < 0)
return -1;
desc.entry_number = FLATDATA / 8;
@@ -97,6 +128,7 @@ int vxemu_map(vxemu *emu, vxmmap *mm)
extern void vxrun_return();
asm volatile("movw %%cs,%0" : "=r" (emu->retptr.sel));
emu->retptr.ofs = (uint32_t)(intptr_t)vxrun_return;
+*/
#endif
return 0;
@@ -122,28 +154,35 @@ static void dumpmcontext(mcontext_t *ctx, uint32_t cr2)
"r12 %016lx r13 %016lx\nr14 %016lx r15 %016lx\n"
"rip %016lx efl %016lx cs %04x ss %04x\n"
"err %016lx trapno %016lx cr2 %016lx\n",
- ctx->rax, ctx->rbx, ctx->rcx, ctx->rdx,
- ctx->rsi, ctx->rdi, ctx->rbp, ctx->rsp,
- ctx->r8, ctx->r9, ctx->r10, ctx->r11,
- ctx->r12, ctx->r13, ctx->r14, ctx->r15,
- ctx->rip, ctx->eflags, ctx->cs, ctx->__pad0,
- ctx->err, ctx->trapno, ctx->cr2);
+ ctx->mc_rax, ctx->mc_rbx, ctx->mc_rcx, ctx->mc_rdx,
+ ctx->mc_rsi, ctx->mc_rdi, ctx->mc_rbp, ctx->mc_rsp,
+ ctx->mc_r8, ctx->mc_r9, ctx->mc_r10, ctx->mc_r11,
+ ctx->mc_r12, ctx->mc_r13, ctx->mc_r14, ctx->mc_r15,
+ ctx->mc_rip, ctx->mc_rflags, ctx->mc_cs, ctx->mc_ss,
+ ctx->mc_err, ctx->mc_trapno, cr2);
#endif
}
static void
fprestore(int *state, int fmt)
{
+#ifdef __i386__
if(fmt == _MC_FPFMT_387)
asm volatile("frstor 0(%%eax); fwait\n" : : "a" (state) : "memory");
- else if(fmt == _MC_FPFMT_XMM){
+ else
+#endif
+ if(fmt == _MC_FPFMT_XMM){
/* Have to 16-align the 512-byte state */
char buf[512+16], *p;
p = buf;
if((long)p&15)
p += 16 - (long)p&15;
memmove(p, state, 512);
+#ifdef __i386__
asm volatile("fxrstor 0(%%eax); fwait\n" : : "a" (p) : "memory");
+#elif defined(__amd64__)
+ asm volatile("fxrstor 0(%%rax); fwait\n" : : "a" (p) : "memory");
+#endif
}else
abort();
}
@@ -167,12 +206,22 @@ int vx32_sighandler(int signo, siginfo_t *si, void *v)
// First sanity check vxproc segment number.
// FreeBSD reset the register before entering the handler!
+#ifdef __i386__
asm("movw %"VSEGSTR",%0"
: "=r" (oldvs));
vs = mc->mc_vs & 0xFFFF; /* mc_vs #defined in os.h */
+#elif defined(__amd64__)
+ if (sysarch(I386_GET_GSBASE, &vs) < 0)
+ return 0;
+#endif
+#ifdef __i386__
if(0) vxprint("vx32_sighandler signo=%d eip=%#x esp=%#x vs=%#x currentvs=%#x\n",
signo, mc->mc_eip, mc->mc_esp, vs, oldvs);
+#elif defined(__amd64__)
+ if(0) vxprint("vx32_sighandler signo=%d rip=%#x rsp=%#x vs=%#x currentvs=%#x\n",
+ signo, mc->mc_rip, mc->mc_rsp, vs, oldvs);
+#endif
if ((vs & 7) != 7) // LDT, RPL=3
return 0;
@@ -192,12 +241,21 @@ int vx32_sighandler(int signo, siginfo_t *si, void *v)
// Okay, we're convinced.
// Find current vxproc and vxemu.
+#ifdef __i386__
asm("movw %"VSEGSTR",%1\n"
"movw %2,%"VSEGSTR"\n"
"movl %"VSEGSTR":%3,%0\n"
"movw %1,%"VSEGSTR"\n"
: "=r" (vxp), "=r" (oldvs)
: "r" (vs), "m" (((vxemu*)0)->proc));
+#elif defined(__amd64__)
+ asm("movw %"VSEGSTR",%1\n"
+ "movw %2,%"VSEGSTR"\n"
+ "movw %"VSEGSTR":%3,%0\n"
+ "movw %1,%"VSEGSTR"\n"
+ : "=r" (vxp), "=r" (oldvs)
+ : "r" (vs), "m" (((vxemu*)0)->proc));
+#endif
emu = vxp->emu;
// Get back our regular host segment register state,
@@ -212,7 +270,11 @@ int vx32_sighandler(int signo, siginfo_t *si, void *v)
switch(signo){
case SIGSEGV:
newtrap = VXTRAP_PAGEFAULT;
+#ifdef __i386__
addr = (uint32_t)si->si_addr;
+#elif defined(__amd64__)
+ addr = (uint64_t)si->si_addr;
+#endif
break;
case SIGBUS:
/*
@@ -242,7 +304,11 @@ int vx32_sighandler(int signo, siginfo_t *si, void *v)
// before entering the signal handler.
addr = 0;
newtrap = VXTRAP_SINGLESTEP;
+#ifdef __i386__
mc->mc_eflags &= ~EFLAGS_TF; // Just in case.
+#elif defined(__amd64__)
+ mc->mc_rflags &= ~EFLAGS_TF; // Just in case.
+#endif
break;
default:
@@ -264,51 +330,111 @@ int vx32_sighandler(int signo, siginfo_t *si, void *v)
}
emu->cpu_trap = newtrap;
+#ifdef __i386__
r = vxemu_sighandler(emu, mc->mc_eip);
+#elif defined(__amd64__)
+ r = vxemu_sighandler(emu, mc->mc_rip);
+#endif
if (r == VXSIG_SINGLESTEP){
// Vxemu_sighandler wants us to single step.
// Execution state is in intermediate state - don't touch.
+#ifdef __i386__
mc->mc_eflags |= EFLAGS_TF; // x86 TF (single-step) bit
+#elif defined(__amd64__)
+ mc->mc_rflags |= EFLAGS_TF;
+#endif
vxrun_setup(emu);
return 1;
}
// Copy execution state into emu.
if ((r & VXSIG_SAVE_ALL) == VXSIG_SAVE_ALL) {
+#ifdef __i386__
emu->cpu.reg[EAX] = mc->mc_eax;
emu->cpu.reg[EBX] = mc->mc_ebx;
emu->cpu.reg[ECX] = mc->mc_ecx;
emu->cpu.reg[EDX] = mc->mc_edx;
- emu->cpu.reg[ESI] = mc->mc_esi;
+ emu->cpu.reg[ESI] = mc->mc_esi;
emu->cpu.reg[EDI] = mc->mc_edi;
emu->cpu.reg[ESP] = mc->mc_esp; // or esp_at_signal ???
emu->cpu.reg[EBP] = mc->mc_ebp;
emu->cpu.eflags = mc->mc_eflags;
+#elif defined(__amd64__)
+ emu->cpu.reg[EAX] = mc->mc_rax;
+ emu->cpu.reg[EBX] = mc->mc_rbx;
+ emu->cpu.reg[ECX] = mc->mc_rcx;
+ emu->cpu.reg[EDX] = mc->mc_rdx;
+ emu->cpu.reg[ESI] = mc->mc_rsi;
+ emu->cpu.reg[EDI] = mc->mc_rdi;
+ emu->cpu.reg[ESP] = mc->mc_rsp; // or esp_at_signal ???
+ emu->cpu.reg[EBP] = mc->mc_rbp;
+ emu->cpu.eflags = mc->mc_rflags;
+#endif
} else if (r & VXSIG_SAVE_ALL) {
if (r & VXSIG_SAVE_EAX)
+#ifdef __i386__
emu->cpu.reg[EAX] = mc->mc_eax;
+#elif defined(__amd64__)
+ emu->cpu.reg[EAX] = mc->mc_rax;
+#endif
if (r & VXSIG_SAVE_EBX)
+#ifdef __i386__
emu->cpu.reg[EBX] = mc->mc_ebx;
+#elif defined(__amd64__)
+ emu->cpu.reg[EBX] = mc->mc_rbx;
+#endif
if (r & VXSIG_SAVE_ECX)
+#ifdef __i386__
emu->cpu.reg[ECX] = mc->mc_ecx;
+#elif defined(__amd64__)
+ emu->cpu.reg[ECX] = mc->mc_rcx;
+#endif
if (r & VXSIG_SAVE_EDX)
+#ifdef __i386__
emu->cpu.reg[EDX] = mc->mc_edx;
+#elif defined(__amd64__)
+ emu->cpu.reg[EDX] = mc->mc_rdx;
+#endif
if (r & VXSIG_SAVE_ESI)
+#ifdef __i386__
emu->cpu.reg[ESI] = mc->mc_esi;
+#elif defined(__amd64__)
+ emu->cpu.reg[ESI] = mc->mc_rsi;
+#endif
if (r & VXSIG_SAVE_EDI)
+#ifdef __i386__
emu->cpu.reg[EDI] = mc->mc_edi;
+#elif defined(__amd64__)
+ emu->cpu.reg[EDI] = mc->mc_rdi;
+#endif
if (r & VXSIG_SAVE_ESP)
+#ifdef __i386__
emu->cpu.reg[ESP] = mc->mc_esp; // or esp_at_signal ???
+#elif defined(__amd64__)
+ emu->cpu.reg[ESP] = mc->mc_rsp; // or esp_at_signal ???
+#endif
if (r & VXSIG_SAVE_EBP)
+#ifdef __i386__
emu->cpu.reg[EBP] = mc->mc_ebp;
+#elif defined(__amd64__)
+ emu->cpu.reg[EBP] = mc->mc_rbp;
+#endif
if (r & VXSIG_SAVE_EFLAGS)
+#ifdef __i386__
emu->cpu.eflags = mc->mc_eflags;
+#elif defined(__amd64__)
+ emu->cpu.eflags = mc->mc_rflags;
+#endif
}
r &= ~VXSIG_SAVE_ALL;
if (r & VXSIG_SAVE_EBX_AS_EIP)
+#ifdef __i386__
emu->cpu.eip = mc->mc_ebx;
+#elif defined(__amd64__)
+ emu->cpu.eip = mc->mc_rbx;
+#endif
r &= ~VXSIG_SAVE_EBX_AS_EIP;
if (r & VXSIG_ADD_COUNT_TO_ESP) {
@@ -327,7 +453,11 @@ int vx32_sighandler(int signo, siginfo_t *si, void *v)
return 0;
emu->cpu.traperr = mc->mc_err;
emu->cpu.trapva = addr;
+#ifdef __i386__
memmove(&mc->mc_gs, &emu->trapenv->mc_gs, 19*4);
+#elif defined(__amd64__)
+ memmove(&mc->mc_onstack, &emu->trapenv->mc_onstack, sizeof(mcontext_t));
+#endif
return 1;
}
diff --git a/src/libvx32/run64.S b/src/libvx32/run64.S
@@ -79,7 +79,11 @@ vxrun:
movl VXEMU_EDI(%r8),%edi
// Run translated code
+#ifndef __FreeBSD__
ljmpl *VXEMU_RUNPTR(%r8) // 'ljmpq' doesn't work - gas bug??
+#else
+ ljmpq *VXEMU_RUNPTR(%r8)
+#endif
// Return from running translated code to the normal host environment.