commit ff994e189d20894dfe7276dd12ac5d5317ae3e8c
parent f5ea57b1369e2efa8381150f0b4c3a8acf8e5076
Author: Jesus Galan Lopez (yiyus) <yiyu.jgl@gmail.com>
Date: Tue, 1 Jun 2010 23:15:23 +0200
plan9 net stack, not compiling yet
Diffstat:
src/9vx/a/ip.ed | | | 2297 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/arp.c | | | 684 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/chandial.c | | | 124 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/devip.c | | | 1439 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/eipconvtest.c | | | 152 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/esp.c | | | 951 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/ethermedium.c | | | 766 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/gre.c | | | 283 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/icmp.c | | | 490 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/icmp6.c | | | 946 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/igmp.c | | | 294 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/il.c | | | 1408 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/inferno.c | | | 46 | ++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/ip.c | | | 776 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/ip.h | | | 677 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/ipaux.c | | | 368 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/ipifc.c | | | 1654 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/ipmux.c | | | 842 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/iproute.c | | | 854 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/ipv6.c | | | 718 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/ipv6.h | | | 185 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/loopbackmedium.c | | | 120 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/netdevmedium.c | | | 153 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/netlog.c | | | 261 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/nullmedium.c | | | 39 | +++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/pktmedium.c | | | 78 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/ptclbsum.c | | | 72 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/rudp.c | | | 1055 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/tcp.c | | | 3209 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/tripmedium.c | | | 398 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
src/9vx/a/ip/udp.c | | | 619 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
31 files changed, 21958 insertions(+), 0 deletions(-)
diff --git a/src/9vx/a/ip.ed b/src/9vx/a/ip.ed
@@ -0,0 +1,2297 @@
+diff -e ip.orig/arp.c ip/arp.c
+643c
+ QUNLOCK(arp);
+.
+613,614c
+ RUNLOCK(ifc);
+ QLOCK(arp);
+.
+609c
+ QUNLOCK(arp); /* for icmpns */
+.
+589c
+ if((a->rxtsrem <= 0) || !(CANRLOCK(ifc)) || (a->ifcid != ifc->ifcid)){
+.
+574c
+ QLOCK(arp);
+.
+557c
+ QUNLOCK(arp);
+.
+554c
+ QLOCK(arp);
+.
+511c
+ QUNLOCK(arp);
+.
+481c
+ QLOCK(arp);
+.
+444c
+ QUNLOCK(arp);
+.
+426c
+ QLOCK(arp);
+.
+398c
+ QUNLOCK(arp);
+.
+380c
+ RUNLOCK(ifc);
+.
+375c
+ RLOCK(ifc);
+.
+372c
+ RUNLOCK(ifc);
+.
+366c
+ QUNLOCK(arp);
+.
+337c
+ QLOCK(arp);
+.
+292c
+ QUNLOCK(arp);
+.
+260c
+ QUNLOCK(arp);
+.
+258c
+arprelease(Arp *arp, Arpent* ae)
+.
+250c
+ QUNLOCK(arp);
+.
+219c
+ QLOCK(arp);
+.
+50c
+int ReTransTimer = RETRANS_TIMER;
+.
+48c
+#define haship(s) ((ulong)((s)[IPaddrlen-1])%NHASH)
+.
+36c
+ QLock qlock;
+.
+14d
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/chandial.c ip/chandial.c
+6,7c
+#include "error.h"
+#include "ip/ip.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/devip.c ip/devip.c
+1430c
+ QUNLOCK(c);
+.
+1418c
+ QUNLOCK(c);
+.
+1404,1411c
+ QUNLOCK(c);
+.
+1399c
+ QLOCK(c);
+.
+1349c
+ QUNLOCK(c);
+.
+1326,1328d
+1322,1323d
+1318c
+ QUNLOCK(c);
+.
+1310c
+ if(CANQLOCK(c)){
+.
+1294c
+ QLOCK(c);
+.
+1185c
+ QUNLOCK(c);
+.
+1130c
+ QUNLOCK(c);
+.
+1128c
+ QLOCK(c);
+.
+1033c
+ QLOCK(c);
+.
+1029c
+ QLOCK(c);
+.
+1027c
+ QUNLOCK(c);
+.
+980c
+ QLOCK(c);
+.
+976c
+ QLOCK(c);
+.
+974c
+ QUNLOCK(c);
+.
+831c
+ QUNLOCK(p);
+.
+820,826c
+ QUNLOCK(p);
+.
+793c
+ QLOCK(p);
+.
+765c
+ QUNLOCK(p);
+.
+760c
+ QUNLOCK(p);
+.
+748c
+ QLOCK(p);
+.
+582c
+ QUNLOCK(cv);
+.
+561c
+ QUNLOCK(cv);
+.
+558c
+ QLOCK(cv);
+.
+516c
+ipremove(Chan* _)
+.
+510c
+ipcreate(Chan* _, char* __, int ___, ulong ____)
+.
+494c
+ QUNLOCK(cv);
+.
+487c
+ QLOCK(cv);
+.
+470c
+ QUNLOCK(cv);
+.
+468c
+ QLOCK(cv);
+.
+447,448c
+ QUNLOCK(cv);
+ QUNLOCK(p);
+.
+431,432c
+ QUNLOCK(cv);
+ QUNLOCK(p);
+.
+429c
+ QLOCK(cv);
+.
+427c
+ QLOCK(p);
+.
+415c
+ QUNLOCK(p);
+.
+411c
+ QUNLOCK(p);
+.
+409c
+ QLOCK(p);
+.
+174c
+ipgen(Chan *c, char* __ch, Dirtab* __dt, int __i, int s, Dir *dp)
+.
+50c
+#define QID(p, c, y) ( ((uint)(p)<<(Shiftproto)) | ((uint)(c)<<Shiftconv) | (y) )
+.
+6,7c
+#include "error.h"
+#include "ip/ip.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/esp.c ip/esp.c
+1106a
+
+
+#ifdef notdef
+enum {
+ RC4forward= 10*1024*1024, /* maximum skip forward */
+ RC4back = 100*1024, /* maximum look back */
+};
+
+typedef struct Esprc4 Esprc4;
+struct Esprc4
+{
+ ulong cseq; /* current byte sequence number */
+ RC4state current;
+
+ int ovalid; /* old is valid */
+ ulong lgseq; /* last good sequence */
+ ulong oseq; /* old byte sequence number */
+ RC4state old;
+};
+
+static void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
+
+static int
+rc4cipher(Espcb *ecb, uchar *p, int n)
+{
+ Esprc4 *esprc4;
+ RC4state tmpstate;
+ ulong seq;
+ long d, dd;
+
+ if(n < 4)
+ return 0;
+
+ esprc4 = ecb->espstate;
+ if(ecb->incoming) {
+ seq = nhgetl(p);
+ p += 4;
+ n -= 4;
+ d = seq-esprc4->cseq;
+ if(d == 0) {
+ rc4(&esprc4->current, p, n);
+ esprc4->cseq += n;
+ if(esprc4->ovalid) {
+ dd = esprc4->cseq - esprc4->lgseq;
+ if(dd > RC4back)
+ esprc4->ovalid = 0;
+ }
+ } else if(d > 0) {
+print("esp rc4cipher: missing packet: %uld %ld\n", seq, d); /* this link is hosed */
+ if(d > RC4forward) {
+ strcpy(up->errstr, "rc4cipher: skipped too much");
+ return 0;
+ }
+ esprc4->lgseq = seq;
+ if(!esprc4->ovalid) {
+ esprc4->ovalid = 1;
+ esprc4->oseq = esprc4->cseq;
+ memmove(&esprc4->old, &esprc4->current,
+ sizeof(RC4state));
+ }
+ rc4skip(&esprc4->current, d);
+ rc4(&esprc4->current, p, n);
+ esprc4->cseq = seq+n;
+ } else {
+print("esp rc4cipher: reordered packet: %uld %ld\n", seq, d);
+ dd = seq - esprc4->oseq;
+ if(!esprc4->ovalid || -d > RC4back || dd < 0) {
+ strcpy(up->errstr, "rc4cipher: too far back");
+ return 0;
+ }
+ memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
+ rc4skip(&tmpstate, dd);
+ rc4(&tmpstate, p, n);
+ return 1;
+ }
+
+ /* move old state up */
+ if(esprc4->ovalid) {
+ dd = esprc4->cseq - RC4back - esprc4->oseq;
+ if(dd > 0) {
+ rc4skip(&esprc4->old, dd);
+ esprc4->oseq += dd;
+ }
+ }
+ } else {
+ hnputl(p, esprc4->cseq);
+ p += 4;
+ n -= 4;
+ rc4(&esprc4->current, p, n);
+ esprc4->cseq += n;
+ }
+ return 1;
+}
+
+static void
+rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
+{
+ Esprc4 *esprc4;
+
+ /* bits to bytes */
+ n = (n+7)>>3;
+ esprc4 = smalloc(sizeof(Esprc4));
+ memset(esprc4, 0, sizeof(Esprc4));
+ setupRC4state(&esprc4->current, k, n);
+ ecb->espalg = name;
+ ecb->espblklen = 4;
+ ecb->espivlen = 4;
+ ecb->cipher = rc4cipher;
+ ecb->espstate = esprc4;
+}
+#endif
+.
+1056,1081d
+1048,1050c
+ ecb->espblklen = 8;
+ ecb->espivlen = 8;
+.
+1045c
+ for(i=0; i<8; i++)
+.
+1040,1042c
+ /* bits to bytes */
+ n = (n+7)>>3;
+ if(n > 8)
+ n = 8;
+.
+1037c
+ uchar key[8], ivec[8];
+.
+1035c
+desespinit(Espcb *ecb, char *name, uchar *k, int n)
+.
+1019,1033d
+1013,1014c
+ memmove(p, ds->ivec, 8);
+ for(p += 8; p < ep; p += 8){
+ pp = p;
+ ip = ds->ivec;
+ for(eip = ip+8; ip < eip; )
+ *pp++ ^= *ip++;
+ block_cipher(ds->expanded, p, 0);
+ memmove(ds->ivec, p, 8);
+ }
+.
+1010,1011c
+ memmove(ds->ivec, p, 8);
+ p += 8;
+ while(p < ep){
+ memmove(tmp, p, 8);
+ block_cipher(ds->expanded, p, 1);
+ tp = tmp;
+ ip = ds->ivec;
+ for(eip = ip+8; ip < eip; ){
+ *p++ ^= *ip;
+ *ip++ = *tp++;
+ }
+ }
+.
+1008a
+ ep = p + n;
+.
+1006a
+ uchar tmp[8];
+ uchar *pp, *tp, *ip, *eip, *ep;
+.
+999,1003d
+993c
+ ecb->ahlen = 12;
+.
+990c
+ klen >>= 3; /* convert to bytes */
+
+.
+986c
+md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
+.
+979c
+ seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+.
+968c
+ digest = md5(opad, 64, nil, nil);
+.
+966c
+ digest = md5(ipad, 64, nil, nil);
+.
+959,962c
+ for(i=0; i<64; i++){
+ ipad[i] = 0x36;
+ opad[i] = 0x5c;
+ }
+ ipad[64] = opad[64] = 0;
+ for(i=0; i<klen; i++){
+.
+957a
+ uchar innerhash[MD5dlen];
+.
+956d
+954a
+ uchar ipad[65], opad[65];
+.
+796,952c
+void
+.
+790c
+ ecb->ahlen = 12;
+.
+786c
+ klen >>= 8; /* convert to bytes */
+.
+782c
+shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
+.
+775c
+ seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+.
+772a
+ int r;
+.
+771d
+764c
+ digest = sha1(opad, 64, nil, nil);
+.
+762c
+ digest = sha1(ipad, 64, nil, nil);
+.
+755,758c
+ for(i=0; i<64; i++){
+ ipad[i] = 0x36;
+ opad[i] = 0x5c;
+ }
+ ipad[64] = opad[64] = 0;
+ for(i=0; i<klen; i++){
+.
+753a
+ uchar innerhash[SHA1dlen];
+.
+752d
+750a
+ uchar ipad[65], opad[65];
+.
+743,748c
+void
+.
+735c
+nullahinit(Espcb *ecb, char *name, uchar* _, int __)
+.
+729c
+nullauth(Espcb* _, uchar* __, int ___, uchar* ____)
+.
+720c
+nullespinit(Espcb *ecb, char *name, uchar* _, int __)
+.
+714c
+nullcipher(Espcb* _, uchar* __, int ___)
+.
+708,712d
+647c
+ QUNLOCK(c);
+.
+642c
+ QLOCK(c);
+.
+632c
+ QUNLOCK(c);
+.
+627c
+ QLOCK(c);
+.
+606c
+ QUNLOCK(esp);
+.
+600,601c
+ spi = nhgets(h->espspi);
+ QLOCK(esp);
+ c = convlookup(esp, spi);
+.
+597,598c
+ h = (Esp4hdr*)(bp->rp);
+.
+595c
+ ulong spi;
+.
+593a
+ Esp4hdr *h;
+.
+590d
+568c
+ QUNLOCK(c);
+.
+565c
+ qpass(c->rq, bp);
+.
+560,561c
+ netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
+ laddr, spi);
+.
+557,558d
+547c
+ bp->rp += hdrlen + ecb->espivlen;
+.
+539,541c
+ QUNLOCK(c);
+ netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n",
+ raddr, laddr, spi);
+.
+535c
+ et = (Esptail*)(bp->rp + hdrlen + payload);
+.
+523,529c
+ if(!ecb->cipher(ecb, bp->rp + hdrlen, payload)) {
+ QUNLOCK(c);
+print("esp: cipher failed %I -> %I!%ld: %s\n", raddr, laddr, spi, up->errstr);
+ netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %s\n", raddr,
+ laddr, spi, up->errstr);
+.
+517,519c
+ QUNLOCK(c);
+ netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n",
+ raddr, laddr, spi, payload, BLEN(bp));
+.
+515c
+ payload = BLEN(bp) - hdrlen - ecb->ahlen;
+.
+507,510c
+ QUNLOCK(c);
+print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
+ netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
+ laddr, spi);
+.
+502,505c
+ espspi = version == V4? ((Esp4hdr*)bp->rp)->espspi:
+ ((Esp6hdr*)bp->rp)->espspi;
+.
+493,496c
+ if(BLEN(bp) < hdrlen + ecb->espivlen + Esptaillen + ecb->ahlen) {
+ QUNLOCK(c);
+ netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
+ laddr, spi);
+.
+485,486c
+ QLOCK(c);
+ QUNLOCK(esp);
+.
+477,479c
+ QUNLOCK(esp);
+ netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
+ laddr, spi);
+.
+475c
+ c = convlookup(esp, spi);
+.
+473c
+ if (version == V4) {
+ eh4 = (Esp4hdr*)bp->rp;
+ spi = nhgetl(eh4->espspi);
+ v4tov6(raddr, eh4->espsrc);
+ v4tov6(laddr, eh4->espdst);
+ } else {
+ eh6 = (Esp6hdr*)bp->rp;
+ spi = nhgetl(eh6->espspi);
+ ipmove(raddr, eh6->src);
+ ipmove(laddr, eh6->dst);
+ }
+
+ QLOCK(esp);
+.
+471d
+464,466c
+ bp = pullupblock(bp, hdrlen + Esptaillen);
+.
+462a
+ if (bp == nil || BLEN(bp) == 0) {
+ /* get enough to identify the IP version */
+ bp = pullupblock(bp, IP4HDR);
+ if(bp == nil) {
+ netlog(f, Logesp, "esp: short packet\n");
+ return;
+ }
+ }
+ eh4 = (Esp4hdr*)bp->rp;
+ version = ((eh4->vihl & 0xf0) == IP_VER4? V4: V6);
+ hdrlen = version == V4? Esp4hdrlen: Esp6hdrlen;
+.
+459,460c
+ uchar *auth, *espspi;
+ ulong spi;
+ int payload, nexthdr, version, hdrlen;
+.
+457c
+ uchar raddr[IPaddrlen], laddr[IPaddrlen];
+.
+453,454c
+ Esp4hdr *eh4;
+ Esp6hdr *eh6;
+ Esptail *et;
+ Userhdr *uh;
+.
+451c
+espiput(Proto *esp, Ipifc* _, Block *bp)
+.
+446,449d
+440c
+ if (version == V4)
+.
+438c
+ QUNLOCK(c);
+.
+434,435c
+ ecb->auth(ecb, bp->rp + iphdrlen, (hdrlen - iphdrlen) +
+.
+429,431d
+425a
+ hnputl(eh6->espspi, ecb->spi);
+ hnputl(eh6->espseq, ++ecb->seq);
+.
+424d
+420,422d
+414a
+ hnputl(eh4->espspi, ecb->spi);
+ hnputl(eh4->espseq, ++ecb->seq);
+.
+411,413c
+ /* fill in head */
+ if (version == V4) {
+.
+407,409c
+ ecb->cipher(ecb, bp->rp + hdrlen, payload + pad + Esptaillen);
+ auth = bp->rp + hdrlen + payload + pad + Esptaillen;
+.
+401c
+ eh4 = (Esp4hdr *)bp->rp;
+ eh6 = (Esp6hdr *)bp->rp;
+ et = (Esptail*)(bp->rp + hdrlen + payload + pad);
+.
+383,384c
+ bp = padblock(bp, hdrlen + ecb->espivlen);
+.
+370c
+ QUNLOCK(c);
+.
+363c
+ QLOCK(c);
+.
+358c
+ version = ipvers(c);
+ iphdrlen = version == V4? IP4HDR: IP6HDR;
+ hdrlen = version == V4? Esp4hdrlen: Esp6hdrlen;
+
+.
+356c
+ Espcb *ecb;
+ Block *bp;
+ int nexthdr, payload, pad, align, version, hdrlen, iphdrlen;
+ uchar *auth;
+.
+353d
+347,349d
+299,344d
+284,297d
+274c
+ipvers(Conv *c)
+.
+221c
+ QUNLOCK(c->p);
+.
+215c
+ QLOCK(c->p);
+.
+207,210c
+ parseip(c->raddr, argv[1]);
+.
+192c
+ char *p, *pp;
+ char *e = nil;
+.
+182,186c
+ "null", 0, nullahinit,
+ "hmac_sha1_96", 128, shaahinit, /* rfc2404 */
+// "aes_xcbc_mac_96", 128, aesahinit, /* rfc3566 */
+ "hmac_md5_96", 128, md5ahinit, /* rfc2403 */
+ nil, 0, nil,
+.
+170,177c
+ "null", 0, nullespinit,
+// "des3_cbc", 192, des3espinit, /* rfc2451 */
+// "aes_128_cbc", 128, aescbcespinit, /* rfc3602 */
+// "aes_ctr", 128, aesctrespinit, /* rfc3686 */
+ "des_56_cbc", 64, desespinit, /* rfc2405, deprecated */
+// "rc4_128", 128, rc4espinit, /* gone in rfc4305 */
+ nil, 0, nil,
+.
+163,166c
+static void nullahinit(Espcb*, char*, uchar *key, int keylen);
+static void shaahinit(Espcb*, char*, uchar *key, int keylen);
+static void md5ahinit(Espcb*, char*, uchar *key, int keylen);
+.
+157,161c
+static void nullespinit(Espcb*, char*, uchar *key, int keylen);
+static void desespinit(Espcb *ecb, char *name, uchar *k, int n);
+.
+150c
+ void (*init)(Espcb*, char* name, uchar *key, int keylen);
+.
+143d
+137d
+131d
+127c
+ int header; /* user user level header */
+.
+96,107d
+86,87c
+ /* Ip6hdr; */
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+
+ /* Esphdr; */
+ uchar espspi[4]; /* Security parameter index */
+ uchar espseq[4]; /* Sequence number */
+.
+80c
+ /* Esphdr; */
+ uchar espspi[4]; /* Security parameter index */
+ uchar espseq[4]; /* Sequence number */
+.
+58,64c
+ * tunnel-mode layout: IP | ESP | TCP/UDP | user data.
+ * transport-mode layout is: ESP | IP | TCP/UDP | user data.
+.
+54d
+42,47d
+32,35c
+enum
+{
+.
+30a
+typedef struct Esppriv Esppriv;
+typedef struct Espcb Espcb;
+typedef struct Algorithm Algorithm;
+.
+26,28d
+20,23c
+typedef struct Esphdr Esphdr;
+.
+14c
+#include "error.h"
+.
+10c
+#include "lib.h"
+.
+6,7c
+ * TODO: update to match rfc4303.
+.
+3,4d
+diff -e ip.orig/ethermedium.c ip/ethermedium.c
+536c
+ if((sflag = ipv6anylocal(ifc, ipsrc)) != 0)
+.
+429c
+etherremmulti(Ipifc *ifc, uchar *a, uchar *_)
+.
+407c
+etheraddmulti(Ipifc *ifc, uchar *a, uchar *_)
+.
+401c
+ RUNLOCK(ifc);
+.
+392c
+ RUNLOCK(ifc);
+.
+387c
+ if(!CANRLOCK(ifc)){
+.
+362c
+ RUNLOCK(ifc);
+.
+353c
+ RUNLOCK(ifc);
+.
+348c
+ if(!CANRLOCK(ifc)){
+.
+269c
+ * called by ipoput with a single block to write with ifc RLOCK'd
+.
+123a
+
+.
+8c
+#include "netif.h"
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/gre.c ip/gre.c
+968c
+ gre->ptclsize = 0;
+.
+919,948d
+894,916c
+ return "unknown control request";
+.
+885,892d
+881,883c
+ else if(strcmp(f[0], "cooked") == 0){
+ gpriv->raw = 0;
+ return nil;
+.
+696,879c
+ gpriv = c->p->priv;
+ if(n == 1){
+ if(strcmp(f[0], "raw") == 0){
+ gpriv->raw = 1;
+ return nil;
+.
+694c
+ GREpriv *gpriv;
+.
+691,692c
+char*
+grectl(Conv *c, char **f, int n)
+.
+681,688c
+ return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr);
+.
+675,679d
+659,660c
+ if(qlen(c->rq) > 64*1024)
+ freeblist(bp);
+.
+651d
+648d
+645c
+ freeblist(bp);
+.
+643c
+ len = nhgets(ghp->len) - GRE_IPONLY;
+.
+639a
+ QUNLOCK(gre);
+
+.
+633,636c
+ if(*p == nil) {
+ QUNLOCK(gre);
+ freeblist(bp);
+.
+590,629c
+ if(c->rport == eproto &&
+ (gpriv->raw || ipcmp(c->raddr, raddr) == 0))
+.
+587d
+553,585c
+ /* Look for a conversation structure for this port and address */
+ c = nil;
+ for(p = gre->conv; *p; p++) {
+.
+547,551c
+ v4tov6(raddr, ghp->src);
+ eproto = nhgets(ghp->eproto);
+ QLOCK(gre);
+.
+536,545c
+ gpriv = gre->priv;
+ ghp = (GREhdr*)(bp->rp);
+.
+534d
+531,532c
+ ushort eproto;
+ uchar raddr[IPaddrlen];
+.
+336,529c
+ int len;
+ GREhdr *ghp;
+.
+334c
+greiput(Proto *gre, Ipifc* __, Block *bp)
+.
+328,329d
+325,326c
+ ghp->proto = IP_GREPROTO;
+ ghp->frag[0] = 0;
+ ghp->frag[1] = 0;
+.
+322c
+ hnputs(ghp->eproto, c->rport);
+.
+318,320c
+ findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */
+ memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen);
+.
+314,315c
+ memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen);
+ v4tov6(laddr, ghp->src);
+.
+311,312c
+ if(!((GREpriv*)c->p->priv)->raw){
+ v4tov6(raddr, ghp->dst);
+.
+308,309c
+ ghp = (GREhdr *)(bp->rp);
+ ghp->vihl = IP_VER4;
+.
+295,297d
+287,289c
+ Conv *c = x;
+ GREhdr *ghp;
+.
+283a
+int drop;
+
+.
+281c
+ c->lport = 0;
+ c->rport = 0;
+.
+247,278c
+ qclose(c->rq);
+ qclose(c->wq);
+ qclose(c->eq);
+.
+241c
+ return "pktifc does not support announce";
+.
+239c
+greannounce(Conv* _, char** __, int ___)
+.
+218,235c
+ USED(c);
+ return snprint(state, n, "%s\n", "Datagram");
+.
+211c
+ c->rq = qopen(64*1024, Qmsg, 0, c);
+.
+199c
+ QUNLOCK(p);
+.
+184c
+ QLOCK(p);
+.
+138,171c
+static char*
+.
+136d
+71,134d
+68c
+ ulong csumerr; /* checksum errors */
+ ulong lenerr; /* short packet */
+.
+66c
+struct GREpriv
+{
+ int raw; /* Raw GRE mode */
+
+.
+63c
+} GREhdr;
+.
+54c
+ uchar Unused;
+.
+46,47c
+typedef struct GREhdr
+{
+.
+21,43d
+13c
+enum
+{
+.
+9c
+#include "error.h"
+.
+5c
+#include "lib.h"
+.
+diff -e ip.orig/icmp.c ip/icmp.c
+350c
+ if(iplen > n || ((uint)iplen % 1)){
+.
+339,341c
+ netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code);
+.
+324c
+icmpiput(Proto *icmp, Ipifc* __, Block *bp)
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/icmp6.c ip/icmp6.c
+781c
+ bp->rp -= sizeof(IPICMP);
+.
+770c
+ bp->rp += sizeof(IPICMP);
+.
+762c
+ bp->rp -= sizeof(IPICMP);
+.
+750c
+ bp->rp += sizeof(IPICMP);
+.
+711c
+ RUNLOCK(ifc);
+.
+707c
+ RUNLOCK(ifc);
+.
+700c
+ RUNLOCK(ifc);
+.
+698c
+ RLOCK(ifc);
+.
+666c
+ sz = sizeof(IPICMP) + 8;
+.
+661c
+ if(pktsz - sizeof(Ip6hdr) < 8) {
+.
+649c
+ sz = sizeof(IPICMP) + 8;
+.
+641c
+ if(pktsz - sizeof(Ip6hdr) < 16) {
+.
+575c
+ if(iplen > n - IP6HDR || ((uint)iplen % 1) != 0) {
+.
+568c
+ if(n < sizeof(IPICMP)) {
+.
+546c
+ memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+.
+537c
+ netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n",
+.
+534c
+ netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n",
+.
+518c
+ int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+.
+506c
+ memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+.
+498c
+ netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n",
+.
+495c
+ netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n",
+.
+479c
+ int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+.
+471c
+ RUNLOCK(ifc);
+.
+457c
+ memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+.
+445c
+ netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n",
+.
+442c
+ netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n",
+.
+440c
+ RLOCK(ifc);
+.
+425c
+ int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+.
+397c
+ nbp = newIPICMP(sizeof(Ndpkt));
+.
+375c
+ nbp->wp -= sizeof(Ndpkt) - sizeof(NdiscC);
+.
+354c
+ nbp = newIPICMP(sizeof(Ndpkt));
+.
+260c
+ if(blocklen(bp) < sizeof(IPICMP)){
+.
+257c
+ bp = padblock(bp, sizeof(Ip6hdr));
+.
+122c
+ QLock qlock;
+.
+109,110d
+106d
+101a
+
+.
+99,100c
+ /* ICMPpkt; */
+ uchar type;
+ uchar code;
+ uchar cksum[2];
+ uchar icmpid[2];
+ uchar seq[2];
+
+.
+97c
+struct Ndpkt
+{
+ /* NdiscC; */
+ /* IPICMP; */
+ /* Ip6hdr; */
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+.
+94d
+91,92c
+ /* ICMPpkt; */
+ uchar type;
+ uchar code;
+ uchar cksum[2];
+ uchar icmpid[2];
+ uchar seq[2];
+
+.
+89c
+struct NdiscC
+{
+ /* IPICMP; */
+ /* Ip6hdr; */
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+.
+85,86c
+ /* Ip6hdr; */
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+
+ /* ICMPpkt; */
+ uchar type;
+ uchar code;
+ uchar cksum[2];
+ uchar icmpid[2];
+ uchar seq[2];
+.
+75,82c
+struct ICMPpkt {
+ uchar type;
+ uchar code;
+ uchar cksum[2];
+ uchar icmpid[2];
+ uchar seq[2];
+};
+.
+70c
+typedef struct ICMPpkt ICMPpkt;
+.
+9c
+#include "error.h"
+.
+5c
+#include "lib.h"
+.
+diff -e ip.orig/igmp.c ip/igmp.c
+217c
+ mp = Mediacopymulti(m);
+.
+177c
+igmpiput(Media *m, Ipifc *, Block *bp)
+.
+123c
+ byte ip[IPaddrlen];
+.
+97,99c
+ bp->wp += sizeof(IGMPpkt);
+ memset(bp->rp, 0, sizeof(IGMPpkt));
+ hnputl(p->src, Mediagetaddr(m));
+.
+87c
+igmpsendreport(Media *m, byte *addr)
+.
+68c
+ Lock lk;
+
+.
+60c
+ Media *m;
+.
+51,52d
+43,48c
+ byte vertype; /* version and type */
+ byte unused;
+ byte igmpcksum[2]; /* checksum of igmp portion */
+ byte group[IPaddrlen]; /* multicast group */
+.
+31,40c
+ byte vihl; /* Version and header length */
+ byte tos; /* Type of service */
+ byte len[2]; /* packet length (including headers) */
+ byte id[2]; /* Identification */
+ byte frag[2]; /* Fragment information */
+ byte Unused;
+ byte proto; /* Protocol */
+ byte cksum[2]; /* checksum of ip portion */
+ byte src[IPaddrlen]; /* Ip source */
+ byte dst[IPaddrlen]; /* Ip destination */
+.
+27a
+typedef char byte;
+
+.
+10c
+#include "error.h"
+.
+6c
+#include "lib.h"
+.
+1,4d
+diff -e ip.orig/inferno.c ip/inferno.c
+28a
+
+Medium tripmedium =
+{
+ "trip",
+};
+.
+25c
+bootpread(char* _, ulong __, int ___)
+.
+23a
+char*
+bootp(Ipifc* _)
+{
+ return "unimplmented";
+}
+
+.
+17a
+Chan*
+commonfdtochan(int fd, int mode, int a, int b)
+{
+ return fdtochan(fd, mode, a, b);
+}
+
+.
+6c
+#include "error.h"
+#include "ip.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/ip.c ip/ip.c
+522,524c
+ if(bp->base+sizeof(Ipfrag) >= bp->rp){
+ bp = padblock(bp, sizeof(Ipfrag));
+ bp->rp += sizeof(Ipfrag);
+.
+466,467c
+ for(i = 0; i < Nstats; i++)
+ p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]);
+.
+383c
+ freeb(bp);
+.
+381a
+ Conv conv;
+
+.
+322d
+320d
+301c
+ RUNLOCK(ifc);
+.
+213c
+ RUNLOCK(ifc);
+.
+211d
+196,199c
+ medialen = ifc->maxtu - ifc->m->hsize;
+.
+189c
+ RUNLOCK(ifc);
+.
+186c
+ if(!CANRLOCK(ifc))
+.
+11a
+/* MIB II counters */
+enum
+{
+ Forwarding,
+ DefaultTTL,
+ InReceives,
+ InHdrErrors,
+ InAddrErrors,
+ ForwDatagrams,
+ InUnknownProtos,
+ InDiscards,
+ InDelivers,
+ OutRequests,
+ OutDiscards,
+ OutNoRoutes,
+ ReasmTimeout,
+ ReasmReqds,
+ ReasmOKs,
+ ReasmFails,
+ FragOKs,
+ FragFails,
+ FragCreates,
+
+ Nstats,
+};
+
+struct Fragment4
+{
+ Block* blist;
+ Fragment4* next;
+ ulong src;
+ ulong dst;
+ ushort id;
+ ulong age;
+};
+
+struct Fragment6
+{
+ Block* blist;
+ Fragment6* next;
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+ uint id;
+ ulong age;
+};
+
+struct Ipfrag
+{
+ ushort foff;
+ ushort flen;
+};
+
+/* an instance of IP */
+struct IP
+{
+ ulong stats[Nstats];
+
+ QLock fraglock4;
+ Fragment4* flisthead4;
+ Fragment4* fragfree4;
+ Ref id4;
+
+ QLock fraglock6;
+ Fragment6* flisthead6;
+ Fragment6* fragfree6;
+ Ref id6;
+
+ int iprouting; /* true if we route like a gateway */
+};
+
+.
+9a
+typedef struct Fragment4 Fragment4;
+typedef struct Fragment6 Fragment6;
+typedef struct Ipfrag Ipfrag;
+
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/ip.h ip/ip.h
+732a
+Chan* commonfdtochan(int, int, int, int);
+.
+727a
+extern char* bootp(Ipifc*);
+.
+676a
+extern Medium tripmedium;
+.
+669c
+#define NOW msec()
+.
+578c
+/* RouteTree; */
+ Route* right;
+ Route* left;
+ Route* mid;
+ uchar depth;
+ uchar type;
+ uchar ifcid; /* must match ifc->id */
+ Ipifc *ifc;
+ char tag[4];
+ int ref;
+.
+516,517d
+491a
+ Logilmsg= 1<<8,
+.
+488a
+ Logil= 1<<4,
+.
+423c
+ RWlock rwlock;
+
+ Conv *conv; /* link to its conversation structure */
+.
+386c
+ QLock qlock;
+
+.
+374c
+ Lock lk;
+
+.
+312c
+ RWlock rwlock;
+.
+173c
+ QLock qlock;
+.
+153a
+typedef struct Ip4hdr Ip4hdr;
+.
+79,152d
+41c
+ Maxincall= 5,
+.
+30,35d
+8,9d
+2,3d
+diff -e ip.orig/ipaux.c ip/ipaux.c
+366c
+ UNLOCK(ht);
+.
+363c
+ UNLOCK(ht);
+.
+352c
+ UNLOCK(ht);
+.
+340c
+ UNLOCK(ht);
+.
+328c
+ UNLOCK(ht);
+.
+316c
+ UNLOCK(ht);
+.
+309c
+ LOCK(ht);
+.
+290c
+ UNLOCK(ht);
+.
+282c
+ LOCK(ht);
+.
+272c
+ UNLOCK(ht);
+.
+269c
+ LOCK(ht);
+.
+241c
+ return (ulong)(sa[IPaddrlen-1]<<24 ^ sp<< 16 ^ da[IPaddrlen-1]<<8 ^ dp) % Nhash;
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/ipifc.c ip/ipifc.c
+1575c
+ RUNLOCK(nifc);
+.
+1565c
+ RUNLOCK(nifc);
+.
+1562c
+ RLOCK(nifc);
+.
+1555c
+ RUNLOCK(nifc);
+.
+1541c
+ RUNLOCK(nifc);
+.
+1538c
+ RLOCK(nifc);
+.
+1518d
+1511d
+1498c
+ WUNLOCK(ifc);
+.
+1494c
+ WLOCK(ifc);
+.
+1491c
+ WUNLOCK(ifc);
+.
+1455c
+ WUNLOCK(ifc);
+.
+1451c
+ WLOCK(ifc);
+.
+1448c
+ WUNLOCK(ifc);
+.
+1301c
+ QUNLOCK(f->ipifc);
+.
+1265,1266c
+ if((atypel > atype && atype < atyper) ||
+ (atypel < atype && atype > atyper)){
+.
+1232,1234c
+ QLOCK(f->ipifc);
+.
+1154c
+ (isv6mcast(addr) && (addr[1] & 0xF) <= Link_local_scop))
+.
+1054c
+ QUNLOCK(f->self);
+.
+1040c
+ QLOCK(f->self);
+.
+1021c
+ QUNLOCK(f->self);
+.
+951c
+ QLOCK(f->self);
+.
+888c
+ QUNLOCK(f->self);
+.
+839c
+ QLOCK(f->self);
+.
+689c
+ WUNLOCK(ifc);
+.
+683c
+ WLOCK(ifc);
+.
+680c
+ WUNLOCK(ifc);
+.
+619c
+ WUNLOCK(ifc);
+.
+604c
+ WLOCK(ifc);
+.
+539c
+ * always called with ifc WLOCK'd
+.
+531c
+ WUNLOCK(ifc);
+.
+417c
+ WLOCK(ifc);
+.
+319c
+ c->sq = qopen(2*QMAX, 0, 0, 0);
+.
+306c
+ RUNLOCK(ifc);
+.
+299c
+ RUNLOCK(ifc);
+.
+294c
+ if(!CANRLOCK(ifc)){
+.
+266c
+ RUNLOCK(ifc);
+.
+259c
+ RLOCK(ifc);
+.
+244c
+ RUNLOCK(ifc);
+.
+238c
+ RLOCK(ifc);
+.
+212c
+ WUNLOCK(ifc);
+.
+181c
+ WLOCK(ifc);
+.
+178c
+ WUNLOCK(ifc);
+.
+162c
+ WUNLOCK(ifc);
+.
+124c
+ WUNLOCK(ifc);
+.
+120c
+ WUNLOCK(ifc);
+.
+118c
+ WLOCK(ifc);
+.
+58c
+#define hashipa(a) ( (ulong)(((a)[IPaddrlen-2]<<8) | (a)[IPaddrlen-1])%NHASH )
+.
+39c
+ QLock qlock;
+.
+18c
+ QMAX = 64*1024-1,
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/ipmux.c ip/ipmux.c
+811c
+ RUNLOCK(f);
+.
+809c
+ RLOCK(f);
+.
+742c
+ RUNLOCK(f);
+.
+680c
+ RLOCK(f);
+.
+631,633c
+ WLOCK(f);
+ i = (Ipmux *)c->p->priv;
+ ipmuxremove(&i, r->chain);
+ WUNLOCK(f);
+.
+617a
+ Ipmux *i;
+.
+610c
+ipmuxannounce(Conv* _, char** __, int ___)
+.
+583c
+ WUNLOCK(f);
+.
+581c
+ WLOCK(f);
+.
+9c
+#include "error.h"
+.
+5c
+#include "lib.h"
+.
+diff -e ip.orig/iproute.c ip/iproute.c
+469c
+ while((p = f->queue) != nil) {
+.
+425c
+ while((p = f->queue) != nil) {
+.
+359c
+ while((p = f->queue) != nil) {
+.
+313c
+ while((p = f->queue) != nil) {
+.
+213,214c
+ dl = 0; if((l = p->left) != nil) dl = l->depth;
+ dr = 0; if((r = p->right) != nil) dr = r->depth;
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/ipv6.c ip/ipv6.c
+506,508c
+ if(bp->base+sizeof(Ipfrag) >= bp->rp){
+ bp = padblock(bp, sizeof(Ipfrag));
+ bp->rp += sizeof(Ipfrag);
+.
+218c
+ RUNLOCK(ifc);
+.
+122c
+ RUNLOCK(ifc);
+.
+110c
+ RUNLOCK(ifc);
+.
+106c
+ if(!CANRLOCK(ifc))
+.
+29a
+/* MIB II counters */
+enum
+{
+ Forwarding,
+ DefaultTTL,
+ InReceives,
+ InHdrErrors,
+ InAddrErrors,
+ ForwDatagrams,
+ InUnknownProtos,
+ InDiscards,
+ InDelivers,
+ OutRequests,
+ OutDiscards,
+ OutNoRoutes,
+ ReasmTimeout,
+ ReasmReqds,
+ ReasmOKs,
+ ReasmFails,
+ FragOKs,
+ FragFails,
+ FragCreates,
+
+ Nstats,
+};
+
+static char *statnames[] =
+{
+[Forwarding] "Forwarding",
+[DefaultTTL] "DefaultTTL",
+[InReceives] "InReceives",
+[InHdrErrors] "InHdrErrors",
+[InAddrErrors] "InAddrErrors",
+[ForwDatagrams] "ForwDatagrams",
+[InUnknownProtos] "InUnknownProtos",
+[InDiscards] "InDiscards",
+[InDelivers] "InDelivers",
+[OutRequests] "OutRequests",
+[OutDiscards] "OutDiscards",
+[OutNoRoutes] "OutNoRoutes",
+[ReasmTimeout] "ReasmTimeout",
+[ReasmReqds] "ReasmReqds",
+[ReasmOKs] "ReasmOKs",
+[ReasmFails] "ReasmFails",
+[FragOKs] "FragOKs",
+[FragFails] "FragFails",
+[FragCreates] "FragCreates",
+};
+
+struct Fragment4
+{
+ Block* blist;
+ Fragment4* next;
+ ulong src;
+ ulong dst;
+ ushort id;
+ ulong age;
+};
+
+struct Fragment6
+{
+ Block* blist;
+ Fragment6* next;
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+ uint id;
+ ulong age;
+};
+
+struct Ipfrag
+{
+ ushort foff;
+ ushort flen;
+};
+
+/* an instance of IP */
+struct IP
+{
+ ulong stats[Nstats];
+
+ QLock fraglock4;
+ Fragment4* flisthead4;
+ Fragment4* fragfree4;
+ Ref id4;
+
+ QLock fraglock6;
+ Fragment6* flisthead6;
+ Fragment6* fragfree6;
+ Ref id6;
+
+ int iprouting; /* true if we route like a gateway */
+};
+
+.
+22a
+typedef struct Fragment4 Fragment4;
+typedef struct Fragment6 Fragment6;
+typedef struct Ipfrag Ipfrag;
+
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/ipv6.h ip/ipv6.h
+145c
+struct Routinghdr {
+.
+134c
+struct Opthdr {
+.
+130,131c
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+.
+120,128d
+81c
+ IP6HDR = 20, /* sizeof(Ip6hdr) */
+.
+26a
+#undef ESP
+
+.
+diff -e ip.orig/loopbackmedium.c ip/loopbackmedium.c
+99c
+ RUNLOCK(ifc);
+.
+92c
+ RUNLOCK(ifc);
+.
+87c
+ if(!CANRLOCK(ifc)){
+.
+58c
+loopbackbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
+.
+26c
+loopbackbind(Ipifc *ifc, int _, char** __)
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/netdevmedium.c ip/netdevmedium.c
+144c
+ RUNLOCK(ifc);
+.
+136c
+ RUNLOCK(ifc);
+.
+131c
+ if(!CANRLOCK(ifc)){
+.
+85c
+netdevbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/netlog.c ip/netlog.c
+260c
+ wakeup(&f->alog->rendez);
+.
+258c
+ UNLOCK(f->alog);
+.
+242c
+ LOCK(f->alog);
+.
+228c
+ char buf[128], *t, *fp;
+.
+185c
+ set = 1;
+.
+160c
+ QUNLOCK(f->alog);
+.
+157c
+ sleep(&f->alog->rendez, netlogready, f);
+.
+155c
+ UNLOCK(f->alog);
+.
+146c
+ UNLOCK(f->alog);
+.
+134c
+ LOCK(f->alog);
+.
+129c
+ QUNLOCK(f->alog);
+.
+127c
+ QLOCK(f->alog);
+.
+122c
+netlogread(Fs *f, void *a, ulong _, long n)
+.
+109c
+ UNLOCK(f->alog);
+.
+101c
+ UNLOCK(f->alog);
+.
+99c
+ LOCK(f->alog);
+.
+92c
+ UNLOCK(f->alog);
+.
+82c
+ UNLOCK(f->alog);
+.
+80c
+ LOCK(f->alog);
+.
+28,29c
+ QLock qlock;
+ Rendez rendez;
+.
+17c
+ Lock lk;
+.
+6,7c
+#include "error.h"
+#include "ip/ip.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/nullmedium.c ip/nullmedium.c
+22c
+nullbwrite(Ipifc* _, Block* __, int ___, uchar* ____)
+.
+17c
+nullunbind(Ipifc* _)
+.
+11c
+nullbind(Ipifc* _, int __, char** ___)
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/pktmedium.c ip/pktmedium.c
+51c
+pktbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
+.
+43c
+pktunbind(Ipifc* _)
+.
+36d
+34c
+pktbind(Ipifc* _, int argc, char **argv)
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/ptclbsum.c ip/ptclbsum.c
+68c
+ while((hisum = losum>>16))
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/rudp.c ip/rudp.c
+693c
+ rudp->nc = 16;
+.
+11c
+#include "error.h"
+.
+7c
+#include "lib.h"
+.
+diff -e ip.orig/tcp.c ip/tcp.c
+3171c
+ QUNLOCK(c);
+.
+3154c
+ if(!CANQLOCK(c))
+.
+3127c
+ p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+.
+3101c
+/* called with c QLOCKed */
+.
+3085c
+ QUNLOCK(tcp);
+.
+3080c
+ QUNLOCK(s);
+.
+3073,3074c
+ QLOCK(s);
+ QUNLOCK(tcp);
+.
+3064c
+ QLOCK(tcp);
+.
+2871,2873d
+2869c
+ if(seg->mss != 0 && seg->mss < tcb->mss)
+.
+2859d
+2842c
+ QUNLOCK(s);
+.
+2830c
+ netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
+.
+2817c
+ QLOCK(s);
+.
+2814c
+ QUNLOCK(s);
+.
+2768c
+tcpsetchecksum(Conv *s, char **f, int _)
+.
+2737c
+ QUNLOCK(s);
+.
+2728c
+ QLOCK(s);
+.
+2725c
+ QUNLOCK(s);
+.
+2641c
+ QLOCK(s);
+.
+2638,2639c
+ if((uint)(msgs%4) == 1){
+ QUNLOCK(s);
+.
+2563c
+ netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
+.
+2421c
+ QUNLOCK(s);
+.
+2417c
+ QUNLOCK(s);
+.
+2351c
+ QUNLOCK(s);
+.
+2189c
+ QUNLOCK(s);
+.
+2172,2174d
+2144c
+ QUNLOCK(s);
+.
+2095,2096c
+ QLOCK(s);
+ QUNLOCK(tcp);
+.
+2092c
+ QUNLOCK(s);
+.
+2072c
+ QUNLOCK(tcp);
+.
+2064c
+ QUNLOCK(tcp);
+.
+2053c
+ QUNLOCK(tcp);
+.
+2050,2051c
+ netlog(f, Logtcp, "iphtlook failed\n");
+.
+2045c
+ QLOCK(tcp);
+.
+1942c
+tcpiput(Proto *tcp, Ipifc* _, Block *bp)
+.
+1862c
+ netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
+.
+1817c
+ netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
+.
+1685,1686d
+1683c
+ if(lp->mss != 0 && lp->mss < tcb->mss)
+.
+1626c
+ netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d\n",
+.
+1562c
+ QUNLOCK(tcp);
+.
+1529c
+ if(!CANQLOCK(tcp))
+.
+1421,1422d
+1334c
+ * called with s QLOCKed
+.
+1245,1246d
+1231,1232d
+1210,1211d
+1208c
+ if(optlen == MSS_LENGTH)
+.
+995d
+873c
+ * called with s QLOCKed
+.
+861,862d
+805d
+609c
+ QUNLOCK(s);
+.
+603c
+ QLOCK(s);
+.
+600c
+ QUNLOCK(s);
+.
+583,584d
+569c
+ QUNLOCK(s);
+.
+551c
+ QLOCK(s);
+.
+548c
+ QUNLOCK(s);
+.
+352c
+ ulong stats[Nstats];
+.
+317d
+293d
+231c
+ ulong window; /* Recevive window */
+.
+229c
+ ushort mss; /* Mean segment size */
+.
+193c
+ * the QLOCK in the Conv locks this structure
+.
+49,50c
+ DEF_MSS = 1460, /* Default mean segment */
+ DEF_MSS6 = 1280, /* Default mean segment (min) for v6 */
+.
+44c
+ MSS_LENGTH = 4, /* Mean segment size */
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
+diff -e ip.orig/udp.c ip/udp.c
+590,591c
+ return snprint(buf, len, "InDatagrams: %lud\nNoPorts: %lud\nInErrors: %lud\nOutDatagrams: %lud\n",
+.
+580c
+ QUNLOCK(udp);
+.
+575c
+ QUNLOCK(s);
+.
+571,572c
+ QLOCK(s);
+ QUNLOCK(udp);
+.
+562c
+ QLOCK(udp);
+.
+510c
+ QUNLOCK(c);
+.
+502c
+ QUNLOCK(c);
+.
+475c
+ QUNLOCK(c);
+.
+456,457c
+ QLOCK(c);
+ QUNLOCK(udp);
+.
+447c
+ QUNLOCK(udp);
+.
+410c
+ QUNLOCK(udp);
+.
+404c
+ QLOCK(udp);
+.
+197c
+ netlog(c->p->f, Logudp, "udp: kick\n");
+.
+103c
+ QLock qlock;
+.
+78c
+ ulong udpOutDatagrams;
+.
+75c
+ ulong udpInDatagrams;
+.
+6c
+#include "error.h"
+.
+2c
+#include "lib.h"
+.
diff --git a/src/9vx/a/ip/arp.c b/src/9vx/a/ip/arp.c
@@ -0,0 +1,684 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+
+/*
+ * address resolution tables
+ */
+enum
+{
+ NHASH = (1<<6),
+ NCACHE = 256,
+
+ AOK = 1,
+ AWAIT = 2,
+};
+
+char *arpstate[] =
+{
+ "UNUSED",
+ "OK",
+ "WAIT",
+};
+
+/*
+ * one per Fs
+ */
+struct Arp
+{
+ QLock qlock;
+ Fs *f;
+ Arpent *hash[NHASH];
+ Arpent cache[NCACHE];
+ Arpent *rxmt;
+ Proc *rxmitp; /* neib sol re-transmit proc */
+ Rendez rxmtq;
+ Block *dropf, *dropl;
+};
+
+char *Ebadarp = "bad arp";
+
+#define haship(s) ((ulong)((s)[IPaddrlen-1])%NHASH)
+
+int ReTransTimer = RETRANS_TIMER;
+
+static void rxmitproc(void *v);
+
+void
+arpinit(Fs *f)
+{
+ f->arp = smalloc(sizeof(Arp));
+ f->arp->f = f;
+ f->arp->rxmt = nil;
+ f->arp->dropf = f->arp->dropl = nil;
+ kproc("rxmitproc", rxmitproc, f->arp);
+}
+
+/*
+ * create a new arp entry for an ip address.
+ */
+static Arpent*
+newarp6(Arp *arp, uchar *ip, Ipifc *ifc, int addrxt)
+{
+ uint t;
+ Block *next, *xp;
+ Arpent *a, *e, *f, **l;
+ Medium *m = ifc->m;
+ int empty;
+
+ /* find oldest entry */
+ e = &arp->cache[NCACHE];
+ a = arp->cache;
+ t = a->utime;
+ for(f = a; f < e; f++){
+ if(f->utime < t){
+ t = f->utime;
+ a = f;
+ }
+ }
+
+ /* dump waiting packets */
+ xp = a->hold;
+ a->hold = nil;
+
+ if(isv4(a->ip)){
+ while(xp){
+ next = xp->list;
+ freeblist(xp);
+ xp = next;
+ }
+ }
+ else { /* queue icmp unreachable for rxmitproc later on, w/o arp lock */
+ if(xp){
+ if(arp->dropl == nil)
+ arp->dropf = xp;
+ else
+ arp->dropl->list = xp;
+
+ for(next = xp->list; next; next = next->list)
+ xp = next;
+ arp->dropl = xp;
+ wakeup(&arp->rxmtq);
+ }
+ }
+
+ /* take out of current chain */
+ l = &arp->hash[haship(a->ip)];
+ for(f = *l; f; f = f->hash){
+ if(f == a){
+ *l = a->hash;
+ break;
+ }
+ l = &f->hash;
+ }
+
+ /* insert into new chain */
+ l = &arp->hash[haship(ip)];
+ a->hash = *l;
+ *l = a;
+
+ memmove(a->ip, ip, sizeof(a->ip));
+ a->utime = NOW;
+ a->ctime = 0;
+ a->type = m;
+
+ a->rtime = NOW + ReTransTimer;
+ a->rxtsrem = MAX_MULTICAST_SOLICIT;
+ a->ifc = ifc;
+ a->ifcid = ifc->ifcid;
+
+ /* put to the end of re-transmit chain; addrxt is 0 when isv4(a->ip) */
+ if(!ipismulticast(a->ip) && addrxt){
+ l = &arp->rxmt;
+ empty = (*l==nil);
+
+ for(f = *l; f; f = f->nextrxt){
+ if(f == a){
+ *l = a->nextrxt;
+ break;
+ }
+ l = &f->nextrxt;
+ }
+ for(f = *l; f; f = f->nextrxt){
+ l = &f->nextrxt;
+ }
+ *l = a;
+ if(empty)
+ wakeup(&arp->rxmtq);
+ }
+
+ a->nextrxt = nil;
+
+ return a;
+}
+
+/* called with arp qlocked */
+
+void
+cleanarpent(Arp *arp, Arpent *a)
+{
+ Arpent *f, **l;
+
+ a->utime = 0;
+ a->ctime = 0;
+ a->type = 0;
+ a->state = 0;
+
+ /* take out of current chain */
+ l = &arp->hash[haship(a->ip)];
+ for(f = *l; f; f = f->hash){
+ if(f == a){
+ *l = a->hash;
+ break;
+ }
+ l = &f->hash;
+ }
+
+ /* take out of re-transmit chain */
+ l = &arp->rxmt;
+ for(f = *l; f; f = f->nextrxt){
+ if(f == a){
+ *l = a->nextrxt;
+ break;
+ }
+ l = &f->nextrxt;
+ }
+ a->nextrxt = nil;
+ a->hash = nil;
+ a->hold = nil;
+ a->last = nil;
+ a->ifc = nil;
+}
+
+/*
+ * fill in the media address if we have it. Otherwise return an
+ * Arpent that represents the state of the address resolution FSM
+ * for ip. Add the packet to be sent onto the list of packets
+ * waiting for ip->mac to be resolved.
+ */
+Arpent*
+arpget(Arp *arp, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *mac)
+{
+ int hash;
+ Arpent *a;
+ Medium *type = ifc->m;
+ uchar v6ip[IPaddrlen];
+
+ if(version == V4){
+ v4tov6(v6ip, ip);
+ ip = v6ip;
+ }
+
+ QLOCK(arp);
+ hash = haship(ip);
+ for(a = arp->hash[hash]; a; a = a->hash){
+ if(memcmp(ip, a->ip, sizeof(a->ip)) == 0)
+ if(type == a->type)
+ break;
+ }
+
+ if(a == nil){
+ a = newarp6(arp, ip, ifc, (version != V4));
+ a->state = AWAIT;
+ }
+ a->utime = NOW;
+ if(a->state == AWAIT){
+ if(bp != nil){
+ if(a->hold)
+ a->last->list = bp;
+ else
+ a->hold = bp;
+ a->last = bp;
+ bp->list = nil;
+ }
+ return a; /* return with arp qlocked */
+ }
+
+ memmove(mac, a->mac, a->type->maclen);
+
+ /* remove old entries */
+ if(NOW - a->ctime > 15*60*1000)
+ cleanarpent(arp, a);
+
+ QUNLOCK(arp);
+ return nil;
+}
+
+/*
+ * called with arp locked
+ */
+void
+arprelease(Arp *arp, Arpent* ae)
+{
+ QUNLOCK(arp);
+}
+
+/*
+ * Copy out the mac address from the Arpent. Return the
+ * block waiting to get sent to this mac address.
+ *
+ * called with arp locked
+ */
+Block*
+arpresolve(Arp *arp, Arpent *a, Medium *type, uchar *mac)
+{
+ Block *bp;
+ Arpent *f, **l;
+
+ if(!isv4(a->ip)){
+ l = &arp->rxmt;
+ for(f = *l; f; f = f->nextrxt){
+ if(f == a){
+ *l = a->nextrxt;
+ break;
+ }
+ l = &f->nextrxt;
+ }
+ }
+
+ memmove(a->mac, mac, type->maclen);
+ a->type = type;
+ a->state = AOK;
+ a->utime = NOW;
+ bp = a->hold;
+ a->hold = nil;
+ QUNLOCK(arp);
+
+ return bp;
+}
+
+void
+arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, int refresh)
+{
+ Arp *arp;
+ Route *r;
+ Arpent *a, *f, **l;
+ Ipifc *ifc;
+ Medium *type;
+ Block *bp, *next;
+ uchar v6ip[IPaddrlen];
+
+ arp = fs->arp;
+
+ if(n != 6){
+// print("arp: len = %d\n", n);
+ return;
+ }
+
+ switch(version){
+ case V4:
+ r = v4lookup(fs, ip, nil);
+ v4tov6(v6ip, ip);
+ ip = v6ip;
+ break;
+ case V6:
+ r = v6lookup(fs, ip, nil);
+ break;
+ default:
+ panic("arpenter: version %d", version);
+ return; /* to supress warnings */
+ }
+
+ if(r == nil){
+// print("arp: no route for entry\n");
+ return;
+ }
+
+ ifc = r->ifc;
+ type = ifc->m;
+
+ QLOCK(arp);
+ for(a = arp->hash[haship(ip)]; a; a = a->hash){
+ if(a->type != type || (a->state != AWAIT && a->state != AOK))
+ continue;
+
+ if(ipcmp(a->ip, ip) == 0){
+ a->state = AOK;
+ memmove(a->mac, mac, type->maclen);
+
+ if(version == V6){
+ /* take out of re-transmit chain */
+ l = &arp->rxmt;
+ for(f = *l; f; f = f->nextrxt){
+ if(f == a){
+ *l = a->nextrxt;
+ break;
+ }
+ l = &f->nextrxt;
+ }
+ }
+
+ a->ifc = ifc;
+ a->ifcid = ifc->ifcid;
+ bp = a->hold;
+ a->hold = nil;
+ if(version == V4)
+ ip += IPv4off;
+ a->utime = NOW;
+ a->ctime = a->utime;
+ QUNLOCK(arp);
+
+ while(bp){
+ next = bp->list;
+ if(ifc != nil){
+ if(waserror()){
+ RUNLOCK(ifc);
+ nexterror();
+ }
+ RLOCK(ifc);
+ if(ifc->m != nil)
+ ifc->m->bwrite(ifc, bp, version, ip);
+ else
+ freeb(bp);
+ RUNLOCK(ifc);
+ poperror();
+ } else
+ freeb(bp);
+ bp = next;
+ }
+ return;
+ }
+ }
+
+ if(refresh == 0){
+ a = newarp6(arp, ip, ifc, 0);
+ a->state = AOK;
+ a->type = type;
+ a->ctime = NOW;
+ memmove(a->mac, mac, type->maclen);
+ }
+
+ QUNLOCK(arp);
+}
+
+int
+arpwrite(Fs *fs, char *s, int len)
+{
+ int n;
+ Route *r;
+ Arp *arp;
+ Block *bp;
+ Arpent *a, *fl, **l;
+ Medium *m;
+ char *f[4], buf[256];
+ uchar ip[IPaddrlen], mac[MAClen];
+
+ arp = fs->arp;
+
+ if(len == 0)
+ error(Ebadarp);
+ if(len >= sizeof(buf))
+ len = sizeof(buf)-1;
+ strncpy(buf, s, len);
+ buf[len] = 0;
+ if(len > 0 && buf[len-1] == '\n')
+ buf[len-1] = 0;
+
+ n = getfields(buf, f, 4, 1, " ");
+ if(strcmp(f[0], "flush") == 0){
+ QLOCK(arp);
+ for(a = arp->cache; a < &arp->cache[NCACHE]; a++){
+ memset(a->ip, 0, sizeof(a->ip));
+ memset(a->mac, 0, sizeof(a->mac));
+ a->hash = nil;
+ a->state = 0;
+ a->utime = 0;
+ while(a->hold != nil){
+ bp = a->hold->list;
+ freeblist(a->hold);
+ a->hold = bp;
+ }
+ }
+ memset(arp->hash, 0, sizeof(arp->hash));
+ /* clear all pkts on these lists (rxmt, dropf/l) */
+ arp->rxmt = nil;
+ arp->dropf = nil;
+ arp->dropl = nil;
+ QUNLOCK(arp);
+ } else if(strcmp(f[0], "add") == 0){
+ switch(n){
+ default:
+ error(Ebadarg);
+ case 3:
+ if (parseip(ip, f[1]) == -1)
+ error(Ebadip);
+ if(isv4(ip))
+ r = v4lookup(fs, ip+IPv4off, nil);
+ else
+ r = v6lookup(fs, ip, nil);
+ if(r == nil)
+ error("Destination unreachable");
+ m = r->ifc->m;
+ n = parsemac(mac, f[2], m->maclen);
+ break;
+ case 4:
+ m = ipfindmedium(f[1]);
+ if(m == nil)
+ error(Ebadarp);
+ if (parseip(ip, f[2]) == -1)
+ error(Ebadip);
+ n = parsemac(mac, f[3], m->maclen);
+ break;
+ }
+
+ if(m->ares == nil)
+ error(Ebadarp);
+
+ m->ares(fs, V6, ip, mac, n, 0);
+ } else if(strcmp(f[0], "del") == 0){
+ if(n != 2)
+ error(Ebadarg);
+
+ if (parseip(ip, f[1]) == -1)
+ error(Ebadip);
+ QLOCK(arp);
+
+ l = &arp->hash[haship(ip)];
+ for(a = *l; a; a = a->hash){
+ if(memcmp(ip, a->ip, sizeof(a->ip)) == 0){
+ *l = a->hash;
+ break;
+ }
+ l = &a->hash;
+ }
+
+ if(a){
+ /* take out of re-transmit chain */
+ l = &arp->rxmt;
+ for(fl = *l; fl; fl = fl->nextrxt){
+ if(fl == a){
+ *l = a->nextrxt;
+ break;
+ }
+ l = &fl->nextrxt;
+ }
+
+ a->nextrxt = nil;
+ a->hash = nil;
+ a->hold = nil;
+ a->last = nil;
+ a->ifc = nil;
+ memset(a->ip, 0, sizeof(a->ip));
+ memset(a->mac, 0, sizeof(a->mac));
+ }
+ QUNLOCK(arp);
+ } else
+ error(Ebadarp);
+
+ return len;
+}
+
+enum
+{
+ Alinelen= 90,
+};
+
+char *aformat = "%-6.6s %-8.8s %-40.40I %-32.32s\n";
+
+static void
+convmac(char *p, uchar *mac, int n)
+{
+ while(n-- > 0)
+ p += sprint(p, "%2.2ux", *mac++);
+}
+
+int
+arpread(Arp *arp, char *p, ulong offset, int len)
+{
+ Arpent *a;
+ int n;
+ char mac[2*MAClen+1];
+
+ if(offset % Alinelen)
+ return 0;
+
+ offset = offset/Alinelen;
+ len = len/Alinelen;
+
+ n = 0;
+ for(a = arp->cache; len > 0 && a < &arp->cache[NCACHE]; a++){
+ if(a->state == 0)
+ continue;
+ if(offset > 0){
+ offset--;
+ continue;
+ }
+ len--;
+ QLOCK(arp);
+ convmac(mac, a->mac, a->type->maclen);
+ n += sprint(p+n, aformat, a->type->name, arpstate[a->state], a->ip, mac);
+ QUNLOCK(arp);
+ }
+
+ return n;
+}
+
+extern int
+rxmitsols(Arp *arp)
+{
+ uint sflag;
+ Block *next, *xp;
+ Arpent *a, *b, **l;
+ Fs *f;
+ uchar ipsrc[IPaddrlen];
+ Ipifc *ifc = nil;
+ long nrxt;
+
+ QLOCK(arp);
+ f = arp->f;
+
+ a = arp->rxmt;
+ if(a==nil){
+ nrxt = 0;
+ goto dodrops; /* return nrxt; */
+ }
+ nrxt = a->rtime - NOW;
+ if(nrxt > 3*ReTransTimer/4)
+ goto dodrops; /* return nrxt; */
+
+ for(; a; a = a->nextrxt){
+ ifc = a->ifc;
+ assert(ifc != nil);
+ if((a->rxtsrem <= 0) || !(CANRLOCK(ifc)) || (a->ifcid != ifc->ifcid)){
+ xp = a->hold;
+ a->hold = nil;
+
+ if(xp){
+ if(arp->dropl == nil)
+ arp->dropf = xp;
+ else
+ arp->dropl->list = xp;
+ }
+
+ cleanarpent(arp, a);
+ }
+ else
+ break;
+ }
+ if(a == nil)
+ goto dodrops;
+
+
+ QUNLOCK(arp); /* for icmpns */
+ if((sflag = ipv6anylocal(ifc, ipsrc)) != SRC_UNSPEC)
+ icmpns(f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
+
+ RUNLOCK(ifc);
+ QLOCK(arp);
+
+ /* put to the end of re-transmit chain */
+ l = &arp->rxmt;
+ for(b = *l; b; b = b->nextrxt){
+ if(b == a){
+ *l = a->nextrxt;
+ break;
+ }
+ l = &b->nextrxt;
+ }
+ for(b = *l; b; b = b->nextrxt){
+ l = &b->nextrxt;
+ }
+ *l = a;
+ a->rxtsrem--;
+ a->nextrxt = nil;
+ a->rtime = NOW + ReTransTimer;
+
+ a = arp->rxmt;
+ if(a==nil)
+ nrxt = 0;
+ else
+ nrxt = a->rtime - NOW;
+
+dodrops:
+ xp = arp->dropf;
+ arp->dropf = nil;
+ arp->dropl = nil;
+ QUNLOCK(arp);
+
+ for(; xp; xp = next){
+ next = xp->list;
+ icmphostunr(f, ifc, xp, Icmp6_adr_unreach, 1);
+ }
+
+ return nrxt;
+
+}
+
+static int
+rxready(void *v)
+{
+ Arp *arp = (Arp *) v;
+ int x;
+
+ x = ((arp->rxmt != nil) || (arp->dropf != nil));
+
+ return x;
+}
+
+static void
+rxmitproc(void *v)
+{
+ Arp *arp = v;
+ long wakeupat;
+
+ arp->rxmitp = up;
+ //print("arp rxmitproc started\n");
+ if(waserror()){
+ arp->rxmitp = 0;
+ pexit("hangup", 1);
+ }
+ for(;;){
+ wakeupat = rxmitsols(arp);
+ if(wakeupat == 0)
+ sleep(&arp->rxmtq, rxready, v);
+ else if(wakeupat > ReTransTimer/4)
+ tsleep(&arp->rxmtq, return0, 0, wakeupat);
+ }
+}
+
diff --git a/src/9vx/a/ip/chandial.c b/src/9vx/a/ip/chandial.c
@@ -0,0 +1,124 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "ip/ip.h"
+
+typedef struct DS DS;
+static Chan* call(char*, char*, DS*);
+static void _dial_string_parse(char*, DS*);
+
+enum
+{
+ Maxstring= 128,
+};
+
+struct DS
+{
+ char buf[Maxstring]; /* dist string */
+ char *netdir;
+ char *proto;
+ char *rem;
+ char *local; /* other args */
+ char *dir;
+ Chan **ctlp;
+};
+
+/*
+ * the dialstring is of the form '[/net/]proto!dest'
+ */
+Chan*
+chandial(char *dest, char *local, char *dir, Chan **ctlp)
+{
+ DS ds;
+ char clone[Maxpath];
+
+ ds.local = local;
+ ds.dir = dir;
+ ds.ctlp = ctlp;
+
+ _dial_string_parse(dest, &ds);
+ if(ds.netdir == 0)
+ ds.netdir = "/net";
+
+ /* no connection server, don't translate */
+ snprint(clone, sizeof(clone), "%s/%s/clone", ds.netdir, ds.proto);
+ return call(clone, ds.rem, &ds);
+}
+
+static Chan*
+call(char *clone, char *dest, DS *ds)
+{
+ int n;
+ Chan *dchan, *cchan;
+ char name[Maxpath], data[Maxpath], *p;
+
+ cchan = namec(clone, Aopen, ORDWR, 0);
+
+ /* get directory name */
+ if(waserror()){
+ cclose(cchan);
+ nexterror();
+ }
+ n = devtab[cchan->type]->read(cchan, name, sizeof(name)-1, 0);
+ name[n] = 0;
+ for(p = name; *p == ' '; p++)
+ ;
+ sprint(name, "%lud", strtoul(p, 0, 0));
+ p = strrchr(clone, '/');
+ *p = 0;
+ if(ds->dir)
+ snprint(ds->dir, Maxpath, "%s/%s", clone, name);
+ snprint(data, sizeof(data), "%s/%s/data", clone, name);
+
+ /* connect */
+ if(ds->local)
+ snprint(name, sizeof(name), "connect %s %s", dest, ds->local);
+ else
+ snprint(name, sizeof(name), "connect %s", dest);
+ devtab[cchan->type]->write(cchan, name, strlen(name), 0);
+
+ /* open data connection */
+ dchan = namec(data, Aopen, ORDWR, 0);
+ if(ds->ctlp)
+ *ds->ctlp = cchan;
+ else
+ cclose(cchan);
+ poperror();
+ return dchan;
+
+}
+
+/*
+ * parse a dial string
+ */
+static void
+_dial_string_parse(char *str, DS *ds)
+{
+ char *p, *p2;
+
+ strncpy(ds->buf, str, Maxstring);
+ ds->buf[Maxstring-1] = 0;
+
+ p = strchr(ds->buf, '!');
+ if(p == 0) {
+ ds->netdir = 0;
+ ds->proto = "net";
+ ds->rem = ds->buf;
+ } else {
+ if(*ds->buf != '/' && *ds->buf != '#'){
+ ds->netdir = 0;
+ ds->proto = ds->buf;
+ } else {
+ for(p2 = p; *p2 != '/'; p2--)
+ ;
+ *p2++ = 0;
+ ds->netdir = ds->buf;
+ ds->proto = p2;
+ }
+ *p = 0;
+ ds->rem = p + 1;
+ }
+}
diff --git a/src/9vx/a/ip/devip.c b/src/9vx/a/ip/devip.c
@@ -0,0 +1,1439 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "ip/ip.h"
+
+enum
+{
+ Qtopdir= 1, /* top level directory */
+ Qtopbase,
+ Qarp= Qtopbase,
+ Qbootp,
+ Qndb,
+ Qiproute,
+ Qipselftab,
+ Qlog,
+
+ Qprotodir, /* directory for a protocol */
+ Qprotobase,
+ Qclone= Qprotobase,
+ Qstats,
+
+ Qconvdir, /* directory for a conversation */
+ Qconvbase,
+ Qctl= Qconvbase,
+ Qdata,
+ Qerr,
+ Qlisten,
+ Qlocal,
+ Qremote,
+ Qstatus,
+ Qsnoop,
+
+ Logtype= 5,
+ Masktype= (1<<Logtype)-1,
+ Logconv= 12,
+ Maskconv= (1<<Logconv)-1,
+ Shiftconv= Logtype,
+ Logproto= 8,
+ Maskproto= (1<<Logproto)-1,
+ Shiftproto= Logtype + Logconv,
+
+ Nfs= 128,
+};
+#define TYPE(x) ( ((ulong)(x).path) & Masktype )
+#define CONV(x) ( (((ulong)(x).path) >> Shiftconv) & Maskconv )
+#define PROTO(x) ( (((ulong)(x).path) >> Shiftproto) & Maskproto )
+#define QID(p, c, y) ( ((uint)(p)<<(Shiftproto)) | ((uint)(c)<<Shiftconv) | (y) )
+
+static char network[] = "network";
+
+QLock fslock;
+Fs *ipfs[Nfs]; /* attached fs's */
+Queue *qlog;
+
+extern void nullmediumlink(void);
+extern void pktmediumlink(void);
+ long ndbwrite(Fs *f, char *a, ulong off, int n);
+
+static int
+ip3gen(Chan *c, int i, Dir *dp)
+{
+ Qid q;
+ Conv *cv;
+ char *p;
+
+ cv = ipfs[c->dev]->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+ if(cv->owner == nil)
+ kstrdup(&cv->owner, eve);
+ mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE);
+
+ switch(i) {
+ default:
+ return -1;
+ case Qctl:
+ devdir(c, q, "ctl", 0, cv->owner, cv->perm, dp);
+ return 1;
+ case Qdata:
+ devdir(c, q, "data", qlen(cv->rq), cv->owner, cv->perm, dp);
+ return 1;
+ case Qerr:
+ devdir(c, q, "err", qlen(cv->eq), cv->owner, cv->perm, dp);
+ return 1;
+ case Qlisten:
+ devdir(c, q, "listen", 0, cv->owner, cv->perm, dp);
+ return 1;
+ case Qlocal:
+ p = "local";
+ break;
+ case Qremote:
+ p = "remote";
+ break;
+ case Qsnoop:
+ if(strcmp(cv->p->name, "ipifc") != 0)
+ return -1;
+ devdir(c, q, "snoop", qlen(cv->sq), cv->owner, 0400, dp);
+ return 1;
+ case Qstatus:
+ p = "status";
+ break;
+ }
+ devdir(c, q, p, 0, cv->owner, 0444, dp);
+ return 1;
+}
+
+static int
+ip2gen(Chan *c, int i, Dir *dp)
+{
+ Qid q;
+
+ switch(i) {
+ case Qclone:
+ mkqid(&q, QID(PROTO(c->qid), 0, Qclone), 0, QTFILE);
+ devdir(c, q, "clone", 0, network, 0666, dp);
+ return 1;
+ case Qstats:
+ mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE);
+ devdir(c, q, "stats", 0, network, 0444, dp);
+ return 1;
+ }
+ return -1;
+}
+
+static int
+ip1gen(Chan *c, int i, Dir *dp)
+{
+ Qid q;
+ char *p;
+ int prot;
+ int len = 0;
+ Fs *f;
+ extern ulong kerndate;
+
+ f = ipfs[c->dev];
+
+ prot = 0666;
+ mkqid(&q, QID(0, 0, i), 0, QTFILE);
+ switch(i) {
+ default:
+ return -1;
+ case Qarp:
+ p = "arp";
+ prot = 0664;
+ break;
+ case Qbootp:
+ p = "bootp";
+ break;
+ case Qndb:
+ p = "ndb";
+ len = strlen(f->ndb);
+ q.vers = f->ndbvers;
+ break;
+ case Qiproute:
+ p = "iproute";
+ prot = 0664;
+ break;
+ case Qipselftab:
+ p = "ipselftab";
+ prot = 0444;
+ break;
+ case Qlog:
+ p = "log";
+ break;
+ }
+ devdir(c, q, p, len, network, prot, dp);
+ if(i == Qndb && f->ndbmtime > kerndate)
+ dp->mtime = f->ndbmtime;
+ return 1;
+}
+
+static int
+ipgen(Chan *c, char* __ch, Dirtab* __dt, int __i, int s, Dir *dp)
+{
+ Qid q;
+ Conv *cv;
+ Fs *f;
+
+ f = ipfs[c->dev];
+
+ switch(TYPE(c->qid)) {
+ case Qtopdir:
+ if(s == DEVDOTDOT){
+ mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
+ sprint(up->genbuf, "#I%lud", c->dev);
+ devdir(c, q, up->genbuf, 0, network, 0555, dp);
+ return 1;
+ }
+ if(s < f->np) {
+ if(f->p[s]->connect == nil)
+ return 0; /* protocol with no user interface */
+ mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
+ devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
+ return 1;
+ }
+ s -= f->np;
+ return ip1gen(c, s+Qtopbase, dp);
+ case Qarp:
+ case Qbootp:
+ case Qndb:
+ case Qlog:
+ case Qiproute:
+ case Qipselftab:
+ return ip1gen(c, TYPE(c->qid), dp);
+ case Qprotodir:
+ if(s == DEVDOTDOT){
+ mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
+ sprint(up->genbuf, "#I%lud", c->dev);
+ devdir(c, q, up->genbuf, 0, network, 0555, dp);
+ return 1;
+ }
+ if(s < f->p[PROTO(c->qid)]->ac) {
+ cv = f->p[PROTO(c->qid)]->conv[s];
+ sprint(up->genbuf, "%d", s);
+ mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
+ devdir(c, q, up->genbuf, 0, cv->owner, 0555, dp);
+ return 1;
+ }
+ s -= f->p[PROTO(c->qid)]->ac;
+ return ip2gen(c, s+Qprotobase, dp);
+ case Qclone:
+ case Qstats:
+ return ip2gen(c, TYPE(c->qid), dp);
+ case Qconvdir:
+ if(s == DEVDOTDOT){
+ s = PROTO(c->qid);
+ mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
+ devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
+ return 1;
+ }
+ return ip3gen(c, s+Qconvbase, dp);
+ case Qctl:
+ case Qdata:
+ case Qerr:
+ case Qlisten:
+ case Qlocal:
+ case Qremote:
+ case Qstatus:
+ case Qsnoop:
+ return ip3gen(c, TYPE(c->qid), dp);
+ }
+ return -1;
+}
+
+static void
+ipreset(void)
+{
+ nullmediumlink();
+ pktmediumlink();
+
+ fmtinstall('i', eipfmt);
+ fmtinstall('I', eipfmt);
+ fmtinstall('E', eipfmt);
+ fmtinstall('V', eipfmt);
+ fmtinstall('M', eipfmt);
+}
+
+static Fs*
+ipgetfs(int dev)
+{
+ extern void (*ipprotoinit[])(Fs*);
+ Fs *f;
+ int i;
+
+ if(dev >= Nfs)
+ return nil;
+
+ qlock(&fslock);
+ if(ipfs[dev] == nil){
+ f = smalloc(sizeof(Fs));
+ ip_init(f);
+ arpinit(f);
+ netloginit(f);
+ for(i = 0; ipprotoinit[i]; i++)
+ ipprotoinit[i](f);
+ f->dev = dev;
+ ipfs[dev] = f;
+ }
+ qunlock(&fslock);
+
+ return ipfs[dev];
+}
+
+IPaux*
+newipaux(char *owner, char *tag)
+{
+ IPaux *a;
+ int n;
+
+ a = smalloc(sizeof(*a));
+ kstrdup(&a->owner, owner);
+ memset(a->tag, ' ', sizeof(a->tag));
+ n = strlen(tag);
+ if(n > sizeof(a->tag))
+ n = sizeof(a->tag);
+ memmove(a->tag, tag, n);
+ return a;
+}
+
+#define ATTACHER(c) (((IPaux*)((c)->aux))->owner)
+
+static Chan*
+ipattach(char* spec)
+{
+ Chan *c;
+ int dev;
+
+ dev = atoi(spec);
+ if(dev >= Nfs)
+ error("bad specification");
+
+ ipgetfs(dev);
+ c = devattach('I', spec);
+ mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
+ c->dev = dev;
+
+ c->aux = newipaux(commonuser(), "none");
+
+ return c;
+}
+
+static Walkqid*
+ipwalk(Chan* c, Chan *nc, char **name, int nname)
+{
+ IPaux *a = c->aux;
+ Walkqid* w;
+
+ w = devwalk(c, nc, name, nname, nil, 0, ipgen);
+ if(w != nil && w->clone != nil)
+ w->clone->aux = newipaux(a->owner, a->tag);
+ return w;
+}
+
+
+static int
+ipstat(Chan* c, uchar* db, int n)
+{
+ return devstat(c, db, n, nil, 0, ipgen);
+}
+
+static int
+incoming(void* arg)
+{
+ Conv *conv;
+
+ conv = arg;
+ return conv->incall != nil;
+}
+
+static int m2p[] = {
+ [OREAD] 4,
+ [OWRITE] 2,
+ [ORDWR] 6
+};
+
+static Chan*
+ipopen(Chan* c, int omode)
+{
+ Conv *cv, *nc;
+ Proto *p;
+ int perm;
+ Fs *f;
+
+ perm = m2p[omode&3];
+
+ f = ipfs[c->dev];
+
+ switch(TYPE(c->qid)) {
+ default:
+ break;
+ case Qndb:
+ if(omode & (OWRITE|OTRUNC) && !iseve())
+ error(Eperm);
+ if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC))
+ f->ndb[0] = 0;
+ break;
+ case Qlog:
+ netlogopen(f);
+ break;
+ case Qiproute:
+ case Qarp:
+ if(omode != OREAD && !iseve())
+ error(Eperm);
+ break;
+ case Qtopdir:
+ case Qprotodir:
+ case Qconvdir:
+ case Qstatus:
+ case Qremote:
+ case Qlocal:
+ case Qstats:
+ case Qbootp:
+ case Qipselftab:
+ if(omode != OREAD)
+ error(Eperm);
+ break;
+ case Qsnoop:
+ if(omode != OREAD)
+ error(Eperm);
+ p = f->p[PROTO(c->qid)];
+ cv = p->conv[CONV(c->qid)];
+ if(strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
+ error(Eperm);
+ incref(&cv->snoopers);
+ break;
+ case Qclone:
+ p = f->p[PROTO(c->qid)];
+ QLOCK(p);
+ if(waserror()){
+ QUNLOCK(p);
+ nexterror();
+ }
+ cv = Fsprotoclone(p, ATTACHER(c));
+ QUNLOCK(p);
+ poperror();
+ if(cv == nil) {
+ error(Enodev);
+ break;
+ }
+ mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE);
+ break;
+ case Qdata:
+ case Qctl:
+ case Qerr:
+ p = f->p[PROTO(c->qid)];
+ QLOCK(p);
+ cv = p->conv[CONV(c->qid)];
+ QLOCK(cv);
+ if(waserror()) {
+ QUNLOCK(cv);
+ QUNLOCK(p);
+ nexterror();
+ }
+ if((perm & (cv->perm>>6)) != perm) {
+ if(strcmp(ATTACHER(c), cv->owner) != 0)
+ error(Eperm);
+ if((perm & cv->perm) != perm)
+ error(Eperm);
+
+ }
+ cv->inuse++;
+ if(cv->inuse == 1){
+ kstrdup(&cv->owner, ATTACHER(c));
+ cv->perm = 0660;
+ }
+ QUNLOCK(cv);
+ QUNLOCK(p);
+ poperror();
+ break;
+ case Qlisten:
+ cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+ if((perm & (cv->perm>>6)) != perm) {
+ if(strcmp(ATTACHER(c), cv->owner) != 0)
+ error(Eperm);
+ if((perm & cv->perm) != perm)
+ error(Eperm);
+
+ }
+
+ if(cv->state != Announced)
+ error("not announced");
+
+ if(waserror()){
+ closeconv(cv);
+ nexterror();
+ }
+ QLOCK(cv);
+ cv->inuse++;
+ QUNLOCK(cv);
+
+ nc = nil;
+ while(nc == nil) {
+ /* give up if we got a hangup */
+ if(qisclosed(cv->rq))
+ error("listen hungup");
+
+ qlock(&cv->listenq);
+ if(waserror()) {
+ qunlock(&cv->listenq);
+ nexterror();
+ }
+
+ /* wait for a connect */
+ sleep(&cv->listenr, incoming, cv);
+
+ QLOCK(cv);
+ nc = cv->incall;
+ if(nc != nil){
+ cv->incall = nc->next;
+ mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
+ kstrdup(&cv->owner, ATTACHER(c));
+ }
+ QUNLOCK(cv);
+
+ qunlock(&cv->listenq);
+ poperror();
+ }
+ closeconv(cv);
+ poperror();
+ break;
+ }
+ c->mode = openmode(omode);
+ c->flag |= COPEN;
+ c->offset = 0;
+ return c;
+}
+
+static void
+ipcreate(Chan* _, char* __, int ___, ulong ____)
+{
+ error(Eperm);
+}
+
+static void
+ipremove(Chan* _)
+{
+ error(Eperm);
+}
+
+static int
+ipwstat(Chan *c, uchar *dp, int n)
+{
+ Dir d;
+ Conv *cv;
+ Fs *f;
+ Proto *p;
+
+ f = ipfs[c->dev];
+ switch(TYPE(c->qid)) {
+ default:
+ error(Eperm);
+ break;
+ case Qctl:
+ case Qdata:
+ break;
+ }
+
+ n = convM2D(dp, n, &d, nil);
+ if(n > 0){
+ p = f->p[PROTO(c->qid)];
+ cv = p->conv[CONV(c->qid)];
+ if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
+ error(Eperm);
+ if(d.uid[0])
+ kstrdup(&cv->owner, d.uid);
+ cv->perm = d.mode & 0777;
+ }
+ return n;
+}
+
+void
+closeconv(Conv *cv)
+{
+ Conv *nc;
+ Ipmulti *mp;
+
+ QLOCK(cv);
+
+ if(--cv->inuse > 0) {
+ QUNLOCK(cv);
+ return;
+ }
+
+ /* close all incoming calls since no listen will ever happen */
+ for(nc = cv->incall; nc; nc = cv->incall){
+ cv->incall = nc->next;
+ closeconv(nc);
+ }
+ cv->incall = nil;
+
+ kstrdup(&cv->owner, network);
+ cv->perm = 0660;
+
+ while((mp = cv->multi) != nil)
+ ipifcremmulti(cv, mp->ma, mp->ia);
+
+ cv->r = nil;
+ cv->rgen = 0;
+ cv->p->close(cv);
+ cv->state = Idle;
+ QUNLOCK(cv);
+}
+
+static void
+ipclose(Chan* c)
+{
+ Fs *f;
+
+ f = ipfs[c->dev];
+ switch(TYPE(c->qid)) {
+ default:
+ break;
+ case Qlog:
+ if(c->flag & COPEN)
+ netlogclose(f);
+ break;
+ case Qdata:
+ case Qctl:
+ case Qerr:
+ if(c->flag & COPEN)
+ closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]);
+ break;
+ case Qsnoop:
+ if(c->flag & COPEN)
+ decref(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers);
+ break;
+ }
+ free(((IPaux*)c->aux)->owner);
+ free(c->aux);
+}
+
+enum
+{
+ Statelen= 32*1024,
+};
+
+static long
+ipread(Chan *ch, void *a, long n, vlong off)
+{
+ Conv *c;
+ Proto *x;
+ char *buf, *p;
+ long rv;
+ Fs *f;
+ ulong offset = off;
+
+ f = ipfs[ch->dev];
+
+ p = a;
+ switch(TYPE(ch->qid)) {
+ default:
+ error(Eperm);
+ case Qtopdir:
+ case Qprotodir:
+ case Qconvdir:
+ return devdirread(ch, a, n, 0, 0, ipgen);
+ case Qarp:
+ return arpread(f->arp, a, offset, n);
+ case Qbootp:
+ return bootpread(a, offset, n);
+ case Qndb:
+ return readstr(offset, a, n, f->ndb);
+ case Qiproute:
+ return routeread(f, a, offset, n);
+ case Qipselftab:
+ return ipselftabread(f, a, offset, n);
+ case Qlog:
+ return netlogread(f, a, offset, n);
+ case Qctl:
+ buf = smalloc(16);
+ sprint(buf, "%lud", CONV(ch->qid));
+ rv = readstr(offset, p, n, buf);
+ free(buf);
+ return rv;
+ case Qremote:
+ buf = smalloc(Statelen);
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+ if(x->remote == nil) {
+ sprint(buf, "%I!%d\n", c->raddr, c->rport);
+ } else {
+ (*x->remote)(c, buf, Statelen-2);
+ }
+ rv = readstr(offset, p, n, buf);
+ free(buf);
+ return rv;
+ case Qlocal:
+ buf = smalloc(Statelen);
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+ if(x->local == nil) {
+ sprint(buf, "%I!%d\n", c->laddr, c->lport);
+ } else {
+ (*x->local)(c, buf, Statelen-2);
+ }
+ rv = readstr(offset, p, n, buf);
+ free(buf);
+ return rv;
+ case Qstatus:
+ buf = smalloc(Statelen);
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+ (*x->state)(c, buf, Statelen-2);
+ rv = readstr(offset, p, n, buf);
+ free(buf);
+ return rv;
+ case Qdata:
+ c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
+ return qread(c->rq, a, n);
+ case Qerr:
+ c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
+ return qread(c->eq, a, n);
+ case Qsnoop:
+ c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
+ return qread(c->sq, a, n);
+ case Qstats:
+ x = f->p[PROTO(ch->qid)];
+ if(x->stats == nil)
+ error("stats not implemented");
+ buf = smalloc(Statelen);
+ (*x->stats)(x, buf, Statelen);
+ rv = readstr(offset, p, n, buf);
+ free(buf);
+ return rv;
+ }
+}
+
+static Block*
+ipbread(Chan* ch, long n, ulong offset)
+{
+ Conv *c;
+ Proto *x;
+ Fs *f;
+
+ switch(TYPE(ch->qid)){
+ case Qdata:
+ f = ipfs[ch->dev];
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+ return qbread(c->rq, n);
+ default:
+ return devbread(ch, n, offset);
+ }
+}
+
+/*
+ * set local address to be that of the ifc closest to remote address
+ */
+static void
+setladdr(Conv* c)
+{
+ findlocalip(c->p->f, c->laddr, c->raddr);
+}
+
+/*
+ * set a local port making sure the quad of raddr,rport,laddr,lport is unique
+ */
+char*
+setluniqueport(Conv* c, int lport)
+{
+ Proto *p;
+ Conv *xp;
+ int x;
+
+ p = c->p;
+
+ QLOCK(p);
+ for(x = 0; x < p->nc; x++){
+ xp = p->conv[x];
+ if(xp == nil)
+ break;
+ if(xp == c)
+ continue;
+ if((xp->state == Connected || xp->state == Announced)
+ && xp->lport == lport
+ && xp->rport == c->rport
+ && ipcmp(xp->raddr, c->raddr) == 0
+ && ipcmp(xp->laddr, c->laddr) == 0){
+ QUNLOCK(p);
+ return "address in use";
+ }
+ }
+ c->lport = lport;
+ QUNLOCK(p);
+ return nil;
+}
+
+/*
+ * is lport in use by anyone?
+ */
+static int
+lportinuse(Proto *p, ushort lport)
+{
+ int x;
+
+ for(x = 0; x < p->nc && p->conv[x]; x++)
+ if(p->conv[x]->lport == lport)
+ return 1;
+ return 0;
+}
+
+/*
+ * pick a local port and set it
+ */
+char *
+setlport(Conv* c)
+{
+ Proto *p;
+ int i, port;
+
+ p = c->p;
+ QLOCK(p);
+ if(c->restricted){
+ /* Restricted ports cycle between 600 and 1024. */
+ for(i=0; i<1024-600; i++){
+ if(p->nextrport >= 1024 || p->nextrport < 600)
+ p->nextrport = 600;
+ port = p->nextrport++;
+ if(!lportinuse(p, port))
+ goto chosen;
+ }
+ }else{
+ /*
+ * Unrestricted ports are chosen randomly
+ * between 2^15 and 2^16. There are at most
+ * 4*Nchan = 4096 ports in use at any given time,
+ * so even in the worst case, a random probe has a
+ * 1 - 4096/2^15 = 87% chance of success.
+ * If 64 successive probes fail, there is a bug somewhere
+ * (or a once in 10^58 event has happened, but that's
+ * less likely than a venti collision).
+ */
+ for(i=0; i<64; i++){
+ port = (1<<15) + nrand(1<<15);
+ if(!lportinuse(p, port))
+ goto chosen;
+ }
+ }
+ QUNLOCK(p);
+ return "no ports available";
+
+chosen:
+ c->lport = port;
+ QUNLOCK(p);
+ return nil;
+}
+
+/*
+ * set a local address and port from a string of the form
+ * [address!]port[!r]
+ */
+char*
+setladdrport(Conv* c, char* str, int announcing)
+{
+ char *p;
+ char *rv;
+ ushort lport;
+ uchar addr[IPaddrlen];
+
+ /*
+ * ignore restricted part if it exists. it's
+ * meaningless on local ports.
+ */
+ p = strchr(str, '!');
+ if(p != nil){
+ *p++ = 0;
+ if(strcmp(p, "r") == 0)
+ p = nil;
+ }
+
+ c->lport = 0;
+ if(p == nil){
+ if(announcing)
+ ipmove(c->laddr, IPnoaddr);
+ else
+ setladdr(c);
+ p = str;
+ } else {
+ if(strcmp(str, "*") == 0)
+ ipmove(c->laddr, IPnoaddr);
+ else {
+ if(parseip(addr, str) == -1)
+ return Ebadip;
+ if(ipforme(c->p->f, addr))
+ ipmove(c->laddr, addr);
+ else
+ return "not a local IP address";
+ }
+ }
+
+ /* one process can get all connections */
+ if(announcing && strcmp(p, "*") == 0){
+ if(!iseve())
+ error(Eperm);
+ return setluniqueport(c, 0);
+ }
+
+ lport = atoi(p);
+ if(lport <= 0)
+ rv = setlport(c);
+ else
+ rv = setluniqueport(c, lport);
+ return rv;
+}
+
+static char*
+setraddrport(Conv* c, char* str)
+{
+ char *p;
+
+ p = strchr(str, '!');
+ if(p == nil)
+ return "malformed address";
+ *p++ = 0;
+ if (parseip(c->raddr, str) == -1)
+ return Ebadip;
+ c->rport = atoi(p);
+ p = strchr(p, '!');
+ if(p){
+ if(strstr(p, "!r") != nil)
+ c->restricted = 1;
+ }
+ return nil;
+}
+
+/*
+ * called by protocol connect routine to set addresses
+ */
+char*
+Fsstdconnect(Conv *c, char *argv[], int argc)
+{
+ char *p;
+
+ switch(argc) {
+ default:
+ return "bad args to connect";
+ case 2:
+ p = setraddrport(c, argv[1]);
+ if(p != nil)
+ return p;
+ setladdr(c);
+ p = setlport(c);
+ if (p != nil)
+ return p;
+ break;
+ case 3:
+ p = setraddrport(c, argv[1]);
+ if(p != nil)
+ return p;
+ p = setladdrport(c, argv[2], 0);
+ if(p != nil)
+ return p;
+ }
+
+ if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
+ memcmp(c->laddr, v4prefix, IPv4off) == 0)
+ || ipcmp(c->raddr, IPnoaddr) == 0)
+ c->ipversion = V4;
+ else
+ c->ipversion = V6;
+
+ return nil;
+}
+/*
+ * initiate connection and sleep till its set up
+ */
+static int
+connected(void* a)
+{
+ return ((Conv*)a)->state == Connected;
+}
+static void
+connectctlmsg(Proto *x, Conv *c, Cmdbuf *cb)
+{
+ char *p;
+
+ if(c->state != 0)
+ error(Econinuse);
+ c->state = Connecting;
+ c->cerr[0] = '\0';
+ if(x->connect == nil)
+ error("connect not supported");
+ p = x->connect(c, cb->f, cb->nf);
+ if(p != nil)
+ error(p);
+
+ QUNLOCK(c);
+ if(waserror()){
+ QLOCK(c);
+ nexterror();
+ }
+ sleep(&c->cr, connected, c);
+ QLOCK(c);
+ poperror();
+
+ if(c->cerr[0] != '\0')
+ error(c->cerr);
+}
+
+/*
+ * called by protocol announce routine to set addresses
+ */
+char*
+Fsstdannounce(Conv* c, char* argv[], int argc)
+{
+ memset(c->raddr, 0, sizeof(c->raddr));
+ c->rport = 0;
+ switch(argc){
+ default:
+ break;
+ case 2:
+ return setladdrport(c, argv[1], 1);
+ }
+ return "bad args to announce";
+}
+
+/*
+ * initiate announcement and sleep till its set up
+ */
+static int
+announced(void* a)
+{
+ return ((Conv*)a)->state == Announced;
+}
+static void
+announcectlmsg(Proto *x, Conv *c, Cmdbuf *cb)
+{
+ char *p;
+
+ if(c->state != 0)
+ error(Econinuse);
+ c->state = Announcing;
+ c->cerr[0] = '\0';
+ if(x->announce == nil)
+ error("announce not supported");
+ p = x->announce(c, cb->f, cb->nf);
+ if(p != nil)
+ error(p);
+
+ QUNLOCK(c);
+ if(waserror()){
+ QLOCK(c);
+ nexterror();
+ }
+ sleep(&c->cr, announced, c);
+ QLOCK(c);
+ poperror();
+
+ if(c->cerr[0] != '\0')
+ error(c->cerr);
+}
+
+/*
+ * called by protocol bind routine to set addresses
+ */
+char*
+Fsstdbind(Conv* c, char* argv[], int argc)
+{
+ switch(argc){
+ default:
+ break;
+ case 2:
+ return setladdrport(c, argv[1], 0);
+ }
+ return "bad args to bind";
+}
+
+static void
+bindctlmsg(Proto *x, Conv *c, Cmdbuf *cb)
+{
+ char *p;
+
+ if(x->bind == nil)
+ p = Fsstdbind(c, cb->f, cb->nf);
+ else
+ p = x->bind(c, cb->f, cb->nf);
+ if(p != nil)
+ error(p);
+}
+
+static void
+tosctlmsg(Conv *c, Cmdbuf *cb)
+{
+ if(cb->nf < 2)
+ c->tos = 0;
+ else
+ c->tos = atoi(cb->f[1]);
+}
+
+static void
+ttlctlmsg(Conv *c, Cmdbuf *cb)
+{
+ if(cb->nf < 2)
+ c->ttl = MAXTTL;
+ else
+ c->ttl = atoi(cb->f[1]);
+}
+
+static long
+ipwrite(Chan* ch, void *v, long n, vlong off)
+{
+ Conv *c;
+ Proto *x;
+ char *p;
+ Cmdbuf *cb;
+ uchar ia[IPaddrlen], ma[IPaddrlen];
+ Fs *f;
+ char *a;
+ ulong offset = off;
+
+ a = v;
+ f = ipfs[ch->dev];
+
+ switch(TYPE(ch->qid)){
+ default:
+ error(Eperm);
+ case Qdata:
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+
+ if(c->wq == nil)
+ error(Eperm);
+
+ qwrite(c->wq, a, n);
+ break;
+ case Qarp:
+ return arpwrite(f, a, n);
+ case Qiproute:
+ return routewrite(f, ch, a, n);
+ case Qlog:
+ netlogctl(f, a, n);
+ return n;
+ case Qndb:
+ return ndbwrite(f, a, offset, n);
+ break;
+ case Qctl:
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+ cb = parsecmd(a, n);
+
+ QLOCK(c);
+ if(waserror()) {
+ QUNLOCK(c);
+ free(cb);
+ nexterror();
+ }
+ if(cb->nf < 1)
+ error("short control request");
+ if(strcmp(cb->f[0], "connect") == 0)
+ connectctlmsg(x, c, cb);
+ else if(strcmp(cb->f[0], "announce") == 0)
+ announcectlmsg(x, c, cb);
+ else if(strcmp(cb->f[0], "bind") == 0)
+ bindctlmsg(x, c, cb);
+ else if(strcmp(cb->f[0], "ttl") == 0)
+ ttlctlmsg(c, cb);
+ else if(strcmp(cb->f[0], "tos") == 0)
+ tosctlmsg(c, cb);
+ else if(strcmp(cb->f[0], "ignoreadvice") == 0)
+ c->ignoreadvice = 1;
+ else if(strcmp(cb->f[0], "addmulti") == 0){
+ if(cb->nf < 2)
+ error("addmulti needs interface address");
+ if(cb->nf == 2){
+ if(!ipismulticast(c->raddr))
+ error("addmulti for a non multicast address");
+ if (parseip(ia, cb->f[1]) == -1)
+ error(Ebadip);
+ ipifcaddmulti(c, c->raddr, ia);
+ } else {
+ if (parseip(ia, cb->f[1]) == -1 ||
+ parseip(ma, cb->f[2]) == -1)
+ error(Ebadip);
+ if(!ipismulticast(ma))
+ error("addmulti for a non multicast address");
+ ipifcaddmulti(c, ma, ia);
+ }
+ } else if(strcmp(cb->f[0], "remmulti") == 0){
+ if(cb->nf < 2)
+ error("remmulti needs interface address");
+ if(!ipismulticast(c->raddr))
+ error("remmulti for a non multicast address");
+ if (parseip(ia, cb->f[1]) == -1)
+ error(Ebadip);
+ ipifcremmulti(c, c->raddr, ia);
+ } else if(strcmp(cb->f[0], "maxfragsize") == 0){
+ if(cb->nf < 2)
+ error("maxfragsize needs size");
+
+ c->maxfragsize = (int)strtol(cb->f[1], nil, 0);
+
+ } else if(x->ctl != nil) {
+ p = x->ctl(c, cb->f, cb->nf);
+ if(p != nil)
+ error(p);
+ } else
+ error("unknown control request");
+ QUNLOCK(c);
+ free(cb);
+ poperror();
+ }
+ return n;
+}
+
+static long
+ipbwrite(Chan* ch, Block* bp, ulong offset)
+{
+ Conv *c;
+ Proto *x;
+ Fs *f;
+ int n;
+
+ switch(TYPE(ch->qid)){
+ case Qdata:
+ f = ipfs[ch->dev];
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+
+ if(c->wq == nil)
+ error(Eperm);
+
+ if(bp->next)
+ bp = concatblock(bp);
+ n = BLEN(bp);
+ qbwrite(c->wq, bp);
+ return n;
+ default:
+ return devbwrite(ch, bp, offset);
+ }
+}
+
+Dev ipdevtab = {
+ 'I',
+ "ip",
+
+ ipreset,
+ devinit,
+ devshutdown,
+ ipattach,
+ ipwalk,
+ ipstat,
+ ipopen,
+ ipcreate,
+ ipclose,
+ ipread,
+ ipbread,
+ ipwrite,
+ ipbwrite,
+ ipremove,
+ ipwstat,
+};
+
+int
+Fsproto(Fs *f, Proto *p)
+{
+ if(f->np >= Maxproto)
+ return -1;
+
+ p->f = f;
+
+ if(p->ipproto > 0){
+ if(f->t2p[p->ipproto] != nil)
+ return -1;
+ f->t2p[p->ipproto] = p;
+ }
+
+ p->qid.type = QTDIR;
+ p->qid.path = QID(f->np, 0, Qprotodir);
+ p->conv = malloc(sizeof(Conv*)*(p->nc+1));
+ if(p->conv == nil)
+ panic("Fsproto");
+
+ p->x = f->np;
+ p->nextrport = 600;
+ f->p[f->np++] = p;
+
+ return 0;
+}
+
+/*
+ * return true if this protocol is
+ * built in
+ */
+int
+Fsbuiltinproto(Fs* f, uchar proto)
+{
+ return f->t2p[proto] != nil;
+}
+
+/*
+ * called with protocol locked
+ */
+Conv*
+Fsprotoclone(Proto *p, char *user)
+{
+ Conv *c, **pp, **ep;
+
+retry:
+ c = nil;
+ ep = &p->conv[p->nc];
+ for(pp = p->conv; pp < ep; pp++) {
+ c = *pp;
+ if(c == nil){
+ c = malloc(sizeof(Conv));
+ if(c == nil)
+ error(Enomem);
+ QLOCK(c);
+ c->p = p;
+ c->x = pp - p->conv;
+ if(p->ptclsize != 0){
+ c->ptcl = malloc(p->ptclsize);
+ if(c->ptcl == nil) {
+ free(c);
+ error(Enomem);
+ }
+ }
+ *pp = c;
+ p->ac++;
+ c->eq = qopen(1024, Qmsg, 0, 0);
+ (*p->create)(c);
+ break;
+ }
+ if(CANQLOCK(c)){
+ /*
+ * make sure both processes and protocol
+ * are done with this Conv
+ */
+ if(c->inuse == 0 && (p->inuse == nil || (*p->inuse)(c) == 0))
+ break;
+
+ QUNLOCK(c);
+ }
+ }
+ if(pp >= ep) {
+ if(p->gc != nil && (*p->gc)(p))
+ goto retry;
+ return nil;
+ }
+
+ c->inuse = 1;
+ kstrdup(&c->owner, user);
+ c->perm = 0660;
+ c->state = Idle;
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ c->r = nil;
+ c->rgen = 0;
+ c->lport = 0;
+ c->rport = 0;
+ c->restricted = 0;
+ c->maxfragsize = 0;
+ c->ttl = MAXTTL;
+ qreopen(c->rq);
+ qreopen(c->wq);
+ qreopen(c->eq);
+
+ QUNLOCK(c);
+ return c;
+}
+
+int
+Fsconnected(Conv* c, char* msg)
+{
+ if(msg != nil && *msg != '\0')
+ strncpy(c->cerr, msg, ERRMAX-1);
+
+ switch(c->state){
+
+ case Announcing:
+ c->state = Announced;
+ break;
+
+ case Connecting:
+ c->state = Connected;
+ break;
+ }
+
+ wakeup(&c->cr);
+ return 0;
+}
+
+Proto*
+Fsrcvpcol(Fs* f, uchar proto)
+{
+ if(f->ipmux)
+ return f->ipmux;
+ else
+ return f->t2p[proto];
+}
+
+Proto*
+Fsrcvpcolx(Fs *f, uchar proto)
+{
+ return f->t2p[proto];
+}
+
+/*
+ * called with protocol locked
+ */
+Conv*
+Fsnewcall(Conv *c, uchar *raddr, ushort rport, uchar *laddr, ushort lport, uchar version)
+{
+ Conv *nc;
+ Conv **l;
+ int i;
+
+ QLOCK(c);
+ i = 0;
+ for(l = &c->incall; *l; l = &(*l)->next)
+ i++;
+ if(i >= Maxincall) {
+ QUNLOCK(c);
+ return nil;
+ }
+
+ /* find a free conversation */
+ nc = Fsprotoclone(c->p, network);
+ if(nc == nil) {
+ QUNLOCK(c);
+ return nil;
+ }
+ ipmove(nc->raddr, raddr);
+ nc->rport = rport;
+ ipmove(nc->laddr, laddr);
+ nc->lport = lport;
+ nc->next = nil;
+ *l = nc;
+ nc->state = Connected;
+ nc->ipversion = version;
+
+ QUNLOCK(c);
+
+ wakeup(&c->listenr);
+
+ return nc;
+}
+
+long
+ndbwrite(Fs *f, char *a, ulong off, int n)
+{
+ if(off > strlen(f->ndb))
+ error(Eio);
+ if(off+n >= sizeof(f->ndb))
+ error(Eio);
+ memmove(f->ndb+off, a, n);
+ f->ndb[off+n] = 0;
+ f->ndbvers++;
+ f->ndbmtime = seconds();
+ return n;
+}
+
+ulong
+scalednconv(void)
+{
+ if(cpuserver && conf.npage*BY2PG >= 128*MB)
+ return Nchans*4;
+ return Nchans;
+}
diff --git a/src/9vx/a/ip/eipconvtest.c b/src/9vx/a/ip/eipconvtest.c
@@ -0,0 +1,152 @@
+#include <u.h>
+#include <libc.h>
+
+enum
+{
+ Isprefix= 16,
+};
+
+uchar prefixvals[256] =
+{
+[0x00] 0 | Isprefix,
+[0x80] 1 | Isprefix,
+[0xC0] 2 | Isprefix,
+[0xE0] 3 | Isprefix,
+[0xF0] 4 | Isprefix,
+[0xF8] 5 | Isprefix,
+[0xFC] 6 | Isprefix,
+[0xFE] 7 | Isprefix,
+[0xFF] 8 | Isprefix,
+};
+
+uchar v4prefix[16] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0xff, 0xff,
+ 0, 0, 0, 0
+};
+
+void
+hnputl(void *p, ulong v)
+{
+ uchar *a;
+
+ a = p;
+ a[0] = v>>24;
+ a[1] = v>>16;
+ a[2] = v>>8;
+ a[3] = v;
+}
+
+int
+eipconv(va_list *arg, Fconv *f)
+{
+ char buf[8*5];
+ static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
+ static char *ifmt = "%d.%d.%d.%d";
+ uchar *p, ip[16];
+ ulong *lp;
+ ushort s;
+ int i, j, n, eln, eli;
+
+ switch(f->chr) {
+ case 'E': /* Ethernet address */
+ p = va_arg(*arg, uchar*);
+ sprint(buf, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
+ break;
+ case 'I': /* Ip address */
+ p = va_arg(*arg, uchar*);
+common:
+ if(memcmp(p, v4prefix, 12) == 0)
+ sprint(buf, ifmt, p[12], p[13], p[14], p[15]);
+ else {
+ /* find longest elision */
+ eln = eli = -1;
+ for(i = 0; i < 16; i += 2){
+ for(j = i; j < 16; j += 2)
+ if(p[j] != 0 || p[j+1] != 0)
+ break;
+ if(j > i && j - i > eln){
+ eli = i;
+ eln = j - i;
+ }
+ }
+
+ /* print with possible elision */
+ n = 0;
+ for(i = 0; i < 16; i += 2){
+ if(i == eli){
+ n += sprint(buf+n, "::");
+ i += eln;
+ if(i >= 16)
+ break;
+ } else if(i != 0)
+ n += sprint(buf+n, ":");
+ s = (p[i]<<8) + p[i+1];
+ n += sprint(buf+n, "%ux", s);
+ }
+ }
+ break;
+ case 'i': /* v6 address as 4 longs */
+ lp = va_arg(*arg, ulong*);
+ for(i = 0; i < 4; i++)
+ hnputl(ip+4*i, *lp++);
+ p = ip;
+ goto common;
+ case 'V': /* v4 ip address */
+ p = va_arg(*arg, uchar*);
+ sprint(buf, ifmt, p[0], p[1], p[2], p[3]);
+ break;
+ case 'M': /* ip mask */
+ p = va_arg(*arg, uchar*);
+
+ /* look for a prefix mask */
+ for(i = 0; i < 16; i++)
+ if(p[i] != 0xff)
+ break;
+ if(i < 16){
+ if((prefixvals[p[i]] & Isprefix) == 0)
+ goto common;
+ for(j = i+1; j < 16; j++)
+ if(p[j] != 0)
+ goto common;
+ n = 8*i + (prefixvals[p[i]] & ~Isprefix);
+ } else
+ n = 8*16;
+
+ /* got one, use /xx format */
+ sprint(buf, "/%d", n);
+ break;
+ default:
+ strcpy(buf, "(eipconv)");
+ }
+ strconv(buf, f);
+ return sizeof(uchar*);
+}
+
+uchar testvec[11][16] =
+{
+ { 0,0,0,0, 0,0,0,0, 0,0,0xff,0xff, 1,3,4,5, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, },
+ { 0xff,0xff,0x80,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xc0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xe0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xf0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xff,0xf8,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, },
+ { 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0,0,0,0, 0,0x11,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0,0,0,0x11, 0,0,0,0, 0,0,0,0, 0,0,0,0x12, },
+};
+
+void
+main(void)
+{
+ int i;
+
+ fmtinstall('I', eipconv);
+ fmtinstall('M', eipconv);
+ for(i = 0; i < 11; i++)
+ print("%I\n%M\n", testvec[i], testvec[i]);
+ exits(0);
+}
diff --git a/src/9vx/a/ip/esp.c b/src/9vx/a/ip/esp.c
@@ -0,0 +1,951 @@
+/*
+ * Encapsulating Security Payload for IPsec for IPv4, rfc1827.
+ * currently only implements tunnel mode.
+ * TODO: update to match rfc4303.
+ */
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+#include "libsec.h"
+
+typedef struct Esphdr Esphdr;
+typedef struct Esp4hdr Esp4hdr;
+typedef struct Esp6hdr Esp6hdr;
+typedef struct Esptail Esptail;
+typedef struct Userhdr Userhdr;
+typedef struct Esppriv Esppriv;
+typedef struct Espcb Espcb;
+typedef struct Algorithm Algorithm;
+
+enum
+{
+ IP_ESPPROTO = 50, /* IP v4 and v6 protocol number */
+ Esp4hdrlen = IP4HDR + 8,
+ Esp6hdrlen = IP6HDR + 8,
+
+ Esptaillen = 2, /* does not include pad or auth data */
+ Userhdrlen = 4, /* user-visible header size - if enabled */
+};
+
+struct Esphdr
+{
+ uchar espspi[4]; /* Security parameter index */
+ uchar espseq[4]; /* Sequence number */
+};
+
+/*
+ * tunnel-mode layout: IP | ESP | TCP/UDP | user data.
+ * transport-mode layout is: ESP | IP | TCP/UDP | user data.
+ */
+struct Esp4hdr
+{
+ /* ipv4 header */
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+ uchar Unused;
+ uchar espproto; /* Protocol */
+ uchar espplen[2]; /* Header plus data length */
+ uchar espsrc[4]; /* Ip source */
+ uchar espdst[4]; /* Ip destination */
+
+ /* Esphdr; */
+ uchar espspi[4]; /* Security parameter index */
+ uchar espseq[4]; /* Sequence number */
+};
+
+/* tunnel-mode layout */
+struct Esp6hdr
+{
+ /* Ip6hdr; */
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+
+ /* Esphdr; */
+ uchar espspi[4]; /* Security parameter index */
+ uchar espseq[4]; /* Sequence number */
+};
+
+struct Esptail
+{
+ uchar pad;
+ uchar nexthdr;
+};
+
+/* header as seen by the user */
+struct Userhdr
+{
+ uchar nexthdr; /* next protocol */
+ uchar unused[3];
+};
+
+struct Esppriv
+{
+ ulong in;
+ ulong inerrors;
+};
+
+/*
+ * protocol specific part of Conv
+ */
+struct Espcb
+{
+ int incoming;
+ int header; /* user user level header */
+ ulong spi;
+ ulong seq; /* last seq sent */
+ ulong window; /* for replay attacks */
+ char *espalg;
+ void *espstate; /* other state for esp */
+ int espivlen; /* in bytes */
+ int espblklen;
+ int (*cipher)(Espcb*, uchar *buf, int len);
+ char *ahalg;
+ void *ahstate; /* other state for esp */
+ int ahlen; /* auth data length in bytes */
+ int ahblklen;
+ int (*auth)(Espcb*, uchar *buf, int len, uchar *hash);
+};
+
+struct Algorithm
+{
+ char *name;
+ int keylen; /* in bits */
+ void (*init)(Espcb*, char* name, uchar *key, int keylen);
+};
+
+static Conv* convlookup(Proto *esp, ulong spi);
+static char *setalg(Espcb *ecb, char **f, int n, Algorithm *alg);
+static void espkick(void *x);
+
+static void nullespinit(Espcb*, char*, uchar *key, int keylen);
+static void desespinit(Espcb *ecb, char *name, uchar *k, int n);
+
+static void nullahinit(Espcb*, char*, uchar *key, int keylen);
+static void shaahinit(Espcb*, char*, uchar *key, int keylen);
+static void md5ahinit(Espcb*, char*, uchar *key, int keylen);
+
+static Algorithm espalg[] =
+{
+ "null", 0, nullespinit,
+// "des3_cbc", 192, des3espinit, /* rfc2451 */
+// "aes_128_cbc", 128, aescbcespinit, /* rfc3602 */
+// "aes_ctr", 128, aesctrespinit, /* rfc3686 */
+ "des_56_cbc", 64, desespinit, /* rfc2405, deprecated */
+// "rc4_128", 128, rc4espinit, /* gone in rfc4305 */
+ nil, 0, nil,
+};
+
+static Algorithm ahalg[] =
+{
+ "null", 0, nullahinit,
+ "hmac_sha1_96", 128, shaahinit, /* rfc2404 */
+// "aes_xcbc_mac_96", 128, aesahinit, /* rfc3566 */
+ "hmac_md5_96", 128, md5ahinit, /* rfc2403 */
+ nil, 0, nil,
+};
+
+static char*
+espconnect(Conv *c, char **argv, int argc)
+{
+ char *p, *pp;
+ char *e = nil;
+ ulong spi;
+ Espcb *ecb = (Espcb*)c->ptcl;
+
+ switch(argc) {
+ default:
+ e = "bad args to connect";
+ break;
+ case 2:
+ p = strchr(argv[1], '!');
+ if(p == nil){
+ e = "malformed address";
+ break;
+ }
+ *p++ = 0;
+ parseip(c->raddr, argv[1]);
+ findlocalip(c->p->f, c->laddr, c->raddr);
+ ecb->incoming = 0;
+ ecb->seq = 0;
+ if(strcmp(p, "*") == 0) {
+ QLOCK(c->p);
+ for(;;) {
+ spi = nrand(1<<16) + 256;
+ if(convlookup(c->p, spi) == nil)
+ break;
+ }
+ QUNLOCK(c->p);
+ ecb->spi = spi;
+ ecb->incoming = 1;
+ qhangup(c->wq, nil);
+ } else {
+ spi = strtoul(p, &pp, 10);
+ if(pp == p) {
+ e = "malformed address";
+ break;
+ }
+ ecb->spi = spi;
+ qhangup(c->rq, nil);
+ }
+ nullespinit(ecb, "null", nil, 0);
+ nullahinit(ecb, "null", nil, 0);
+ }
+ Fsconnected(c, e);
+
+ return e;
+}
+
+
+static int
+espstate(Conv *c, char *state, int n)
+{
+ return snprint(state, n, "%s", c->inuse?"Open\n":"Closed\n");
+}
+
+static void
+espcreate(Conv *c)
+{
+ c->rq = qopen(64*1024, Qmsg, 0, 0);
+ c->wq = qopen(64*1024, Qkick, espkick, c);
+}
+
+static void
+espclose(Conv *c)
+{
+ Espcb *ecb;
+
+ qclose(c->rq);
+ qclose(c->wq);
+ qclose(c->eq);
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+
+ ecb = (Espcb*)c->ptcl;
+ free(ecb->espstate);
+ free(ecb->ahstate);
+ memset(ecb, 0, sizeof(Espcb));
+}
+
+static int
+ipvers(Conv *c)
+{
+ if((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
+ memcmp(c->laddr, v4prefix, IPv4off) == 0) ||
+ ipcmp(c->raddr, IPnoaddr) == 0)
+ return V4;
+ else
+ return V6;
+}
+
+static void
+espkick(void *x)
+{
+ Conv *c = x;
+ Esp4hdr *eh4;
+ Esp6hdr *eh6;
+ Esptail *et;
+ Userhdr *uh;
+ Espcb *ecb;
+ Block *bp;
+ int nexthdr, payload, pad, align, version, hdrlen, iphdrlen;
+ uchar *auth;
+
+ version = ipvers(c);
+ iphdrlen = version == V4? IP4HDR: IP6HDR;
+ hdrlen = version == V4? Esp4hdrlen: Esp6hdrlen;
+
+ bp = qget(c->wq);
+ if(bp == nil)
+ return;
+
+ QLOCK(c);
+ ecb = c->ptcl;
+
+ if(ecb->header) {
+ /* make sure the message has a User header */
+ bp = pullupblock(bp, Userhdrlen);
+ if(bp == nil) {
+ QUNLOCK(c);
+ return;
+ }
+ uh = (Userhdr*)bp->rp;
+ nexthdr = uh->nexthdr;
+ bp->rp += Userhdrlen;
+ } else {
+ nexthdr = 0; /* what should this be? */
+ }
+
+ payload = BLEN(bp) + ecb->espivlen;
+
+ /* Make space to fit ip header */
+ bp = padblock(bp, hdrlen + ecb->espivlen);
+
+ align = 4;
+ if(ecb->espblklen > align)
+ align = ecb->espblklen;
+ if(align % ecb->ahblklen != 0)
+ panic("espkick: ahblklen is important after all");
+ pad = (align-1) - (payload + Esptaillen-1)%align;
+
+ /*
+ * Make space for tail
+ * this is done by calling padblock with a negative size
+ * Padblock does not change bp->wp!
+ */
+ bp = padblock(bp, -(pad+Esptaillen+ecb->ahlen));
+ bp->wp += pad+Esptaillen+ecb->ahlen;
+
+ eh4 = (Esp4hdr *)bp->rp;
+ eh6 = (Esp6hdr *)bp->rp;
+ et = (Esptail*)(bp->rp + hdrlen + payload + pad);
+
+ /* fill in tail */
+ et->pad = pad;
+ et->nexthdr = nexthdr;
+
+ ecb->cipher(ecb, bp->rp + hdrlen, payload + pad + Esptaillen);
+ auth = bp->rp + hdrlen + payload + pad + Esptaillen;
+
+ /* fill in head */
+ if (version == V4) {
+ eh4->vihl = IP_VER4;
+ hnputl(eh4->espspi, ecb->spi);
+ hnputl(eh4->espseq, ++ecb->seq);
+ v6tov4(eh4->espsrc, c->laddr);
+ v6tov4(eh4->espdst, c->raddr);
+ eh4->espproto = IP_ESPPROTO;
+ eh4->frag[0] = 0;
+ eh4->frag[1] = 0;
+ } else {
+ eh6->vcf[0] = IP_VER6;
+ hnputl(eh6->espspi, ecb->spi);
+ hnputl(eh6->espseq, ++ecb->seq);
+ ipmove(eh6->src, c->laddr);
+ ipmove(eh6->dst, c->raddr);
+ eh6->proto = IP_ESPPROTO;
+ }
+
+ ecb->auth(ecb, bp->rp + iphdrlen, (hdrlen - iphdrlen) +
+ payload + pad + Esptaillen, auth);
+
+ QUNLOCK(c);
+ /* print("esp: pass down: %uld\n", BLEN(bp)); */
+ if (version == V4)
+ ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+ else
+ ipoput6(c->p->f, bp, 0, c->ttl, c->tos, c);
+}
+
+void
+espiput(Proto *esp, Ipifc* _, Block *bp)
+{
+ Esp4hdr *eh4;
+ Esp6hdr *eh6;
+ Esptail *et;
+ Userhdr *uh;
+ Conv *c;
+ Espcb *ecb;
+ uchar raddr[IPaddrlen], laddr[IPaddrlen];
+ Fs *f;
+ uchar *auth, *espspi;
+ ulong spi;
+ int payload, nexthdr, version, hdrlen;
+
+ f = esp->f;
+ if (bp == nil || BLEN(bp) == 0) {
+ /* get enough to identify the IP version */
+ bp = pullupblock(bp, IP4HDR);
+ if(bp == nil) {
+ netlog(f, Logesp, "esp: short packet\n");
+ return;
+ }
+ }
+ eh4 = (Esp4hdr*)bp->rp;
+ version = ((eh4->vihl & 0xf0) == IP_VER4? V4: V6);
+ hdrlen = version == V4? Esp4hdrlen: Esp6hdrlen;
+
+ bp = pullupblock(bp, hdrlen + Esptaillen);
+ if(bp == nil) {
+ netlog(f, Logesp, "esp: short packet\n");
+ return;
+ }
+
+ if (version == V4) {
+ eh4 = (Esp4hdr*)bp->rp;
+ spi = nhgetl(eh4->espspi);
+ v4tov6(raddr, eh4->espsrc);
+ v4tov6(laddr, eh4->espdst);
+ } else {
+ eh6 = (Esp6hdr*)bp->rp;
+ spi = nhgetl(eh6->espspi);
+ ipmove(raddr, eh6->src);
+ ipmove(laddr, eh6->dst);
+ }
+
+ QLOCK(esp);
+ /* Look for a conversation structure for this port */
+ c = convlookup(esp, spi);
+ if(c == nil) {
+ QUNLOCK(esp);
+ netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
+ laddr, spi);
+ icmpnoconv(f, bp);
+ freeblist(bp);
+ return;
+ }
+
+ QLOCK(c);
+ QUNLOCK(esp);
+
+ ecb = c->ptcl;
+ /* too hard to do decryption/authentication on block lists */
+ if(bp->next)
+ bp = concatblock(bp);
+
+ if(BLEN(bp) < hdrlen + ecb->espivlen + Esptaillen + ecb->ahlen) {
+ QUNLOCK(c);
+ netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
+ laddr, spi);
+ freeb(bp);
+ return;
+ }
+
+ auth = bp->wp - ecb->ahlen;
+ espspi = version == V4? ((Esp4hdr*)bp->rp)->espspi:
+ ((Esp6hdr*)bp->rp)->espspi;
+ if(!ecb->auth(ecb, espspi, auth - espspi, auth)) {
+ QUNLOCK(c);
+print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
+ netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
+ laddr, spi);
+ freeb(bp);
+ return;
+ }
+
+ payload = BLEN(bp) - hdrlen - ecb->ahlen;
+ if(payload <= 0 || payload % 4 != 0 || payload % ecb->espblklen != 0) {
+ QUNLOCK(c);
+ netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n",
+ raddr, laddr, spi, payload, BLEN(bp));
+ freeb(bp);
+ return;
+ }
+ if(!ecb->cipher(ecb, bp->rp + hdrlen, payload)) {
+ QUNLOCK(c);
+print("esp: cipher failed %I -> %I!%ld: %s\n", raddr, laddr, spi, up->errstr);
+ netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %s\n", raddr,
+ laddr, spi, up->errstr);
+ freeb(bp);
+ return;
+ }
+
+ payload -= Esptaillen;
+ et = (Esptail*)(bp->rp + hdrlen + payload);
+ payload -= et->pad + ecb->espivlen;
+ nexthdr = et->nexthdr;
+ if(payload <= 0) {
+ QUNLOCK(c);
+ netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n",
+ raddr, laddr, spi);
+ freeb(bp);
+ return;
+ }
+
+ /* trim packet */
+ bp->rp += hdrlen + ecb->espivlen;
+ bp->wp = bp->rp + payload;
+ if(ecb->header) {
+ /* assume Userhdrlen < Esp4hdrlen < Esp6hdrlen */
+ bp->rp -= Userhdrlen;
+ uh = (Userhdr*)bp->rp;
+ memset(uh, 0, Userhdrlen);
+ uh->nexthdr = nexthdr;
+ }
+
+ if(qfull(c->rq)){
+ netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
+ laddr, spi);
+ freeblist(bp);
+ }else {
+// print("esp: pass up: %uld\n", BLEN(bp));
+ qpass(c->rq, bp);
+ }
+
+ QUNLOCK(c);
+}
+
+char*
+espctl(Conv *c, char **f, int n)
+{
+ Espcb *ecb = c->ptcl;
+ char *e = nil;
+
+ if(strcmp(f[0], "esp") == 0)
+ e = setalg(ecb, f, n, espalg);
+ else if(strcmp(f[0], "ah") == 0)
+ e = setalg(ecb, f, n, ahalg);
+ else if(strcmp(f[0], "header") == 0)
+ ecb->header = 1;
+ else if(strcmp(f[0], "noheader") == 0)
+ ecb->header = 0;
+ else
+ e = "unknown control request";
+ return e;
+}
+
+void
+espadvise(Proto *esp, Block *bp, char *msg)
+{
+ Esp4hdr *h;
+ Conv *c;
+ ulong spi;
+
+ h = (Esp4hdr*)(bp->rp);
+
+ spi = nhgets(h->espspi);
+ QLOCK(esp);
+ c = convlookup(esp, spi);
+ if(c != nil) {
+ qhangup(c->rq, msg);
+ qhangup(c->wq, msg);
+ }
+ QUNLOCK(esp);
+ freeblist(bp);
+}
+
+int
+espstats(Proto *esp, char *buf, int len)
+{
+ Esppriv *upriv;
+
+ upriv = esp->priv;
+ return snprint(buf, len, "%lud %lud\n",
+ upriv->in,
+ upriv->inerrors);
+}
+
+static int
+esplocal(Conv *c, char *buf, int len)
+{
+ Espcb *ecb = c->ptcl;
+ int n;
+
+ QLOCK(c);
+ if(ecb->incoming)
+ n = snprint(buf, len, "%I!%uld\n", c->laddr, ecb->spi);
+ else
+ n = snprint(buf, len, "%I\n", c->laddr);
+ QUNLOCK(c);
+ return n;
+}
+
+static int
+espremote(Conv *c, char *buf, int len)
+{
+ Espcb *ecb = c->ptcl;
+ int n;
+
+ QLOCK(c);
+ if(ecb->incoming)
+ n = snprint(buf, len, "%I\n", c->raddr);
+ else
+ n = snprint(buf, len, "%I!%uld\n", c->raddr, ecb->spi);
+ QUNLOCK(c);
+ return n;
+}
+
+static Conv*
+convlookup(Proto *esp, ulong spi)
+{
+ Conv *c, **p;
+ Espcb *ecb;
+
+ for(p=esp->conv; *p; p++){
+ c = *p;
+ ecb = c->ptcl;
+ if(ecb->incoming && ecb->spi == spi)
+ return c;
+ }
+ return nil;
+}
+
+static char *
+setalg(Espcb *ecb, char **f, int n, Algorithm *alg)
+{
+ uchar *key;
+ int c, i, nbyte, nchar;
+
+ if(n < 2)
+ return "bad format";
+ for(; alg->name; alg++)
+ if(strcmp(f[1], alg->name) == 0)
+ break;
+ if(alg->name == nil)
+ return "unknown algorithm";
+
+ if(n != 3)
+ return "bad format";
+ nbyte = (alg->keylen + 7) >> 3;
+ nchar = strlen(f[2]);
+ for(i=0; i<nchar; i++) {
+ c = f[2][i];
+ if(c >= '0' && c <= '9')
+ f[2][i] -= '0';
+ else if(c >= 'a' && c <= 'f')
+ f[2][i] -= 'a'-10;
+ else if(c >= 'A' && c <= 'F')
+ f[2][i] -= 'A'-10;
+ else
+ return "bad character in key";
+ }
+ key = smalloc(nbyte);
+ for(i=0; i<nchar && i*2<nbyte; i++) {
+ c = f[2][nchar-i-1];
+ if(i&1)
+ c <<= 4;
+ key[i>>1] |= c;
+ }
+
+ alg->init(ecb, alg->name, key, alg->keylen);
+ free(key);
+ return nil;
+}
+
+static int
+nullcipher(Espcb* _, uchar* __, int ___)
+{
+ return 1;
+}
+
+static void
+nullespinit(Espcb *ecb, char *name, uchar* _, int __)
+{
+ ecb->espalg = name;
+ ecb->espblklen = 1;
+ ecb->espivlen = 0;
+ ecb->cipher = nullcipher;
+}
+
+static int
+nullauth(Espcb* _, uchar* __, int ___, uchar* ____)
+{
+ return 1;
+}
+
+static void
+nullahinit(Espcb *ecb, char *name, uchar* _, int __)
+{
+ ecb->ahalg = name;
+ ecb->ahblklen = 1;
+ ecb->ahlen = 0;
+ ecb->auth = nullauth;
+}
+
+void
+seanq_hmac_sha1(uchar hash[SHA1dlen], uchar *t, long tlen, uchar *key, long klen)
+{
+ uchar ipad[65], opad[65];
+ int i;
+ DigestState *digest;
+ uchar innerhash[SHA1dlen];
+
+ for(i=0; i<64; i++){
+ ipad[i] = 0x36;
+ opad[i] = 0x5c;
+ }
+ ipad[64] = opad[64] = 0;
+ for(i=0; i<klen; i++){
+ ipad[i] ^= key[i];
+ opad[i] ^= key[i];
+ }
+ digest = sha1(ipad, 64, nil, nil);
+ sha1(t, tlen, innerhash, digest);
+ digest = sha1(opad, 64, nil, nil);
+ sha1(innerhash, SHA1dlen, hash, digest);
+}
+
+static int
+shaauth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
+{
+ uchar hash[SHA1dlen];
+ int r;
+
+ memset(hash, 0, SHA1dlen);
+ seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+ r = memcmp(auth, hash, ecb->ahlen) == 0;
+ memmove(auth, hash, ecb->ahlen);
+ return r;
+}
+
+static void
+shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
+{
+ if(klen != 128)
+ panic("shaahinit: bad keylen");
+ klen >>= 8; /* convert to bytes */
+
+ ecb->ahalg = name;
+ ecb->ahblklen = 1;
+ ecb->ahlen = 12;
+ ecb->auth = shaauth;
+ ecb->ahstate = smalloc(klen);
+ memmove(ecb->ahstate, key, klen);
+}
+
+void
+seanq_hmac_md5(uchar hash[MD5dlen], uchar *t, long tlen, uchar *key, long klen)
+{
+ uchar ipad[65], opad[65];
+ int i;
+ DigestState *digest;
+ uchar innerhash[MD5dlen];
+
+ for(i=0; i<64; i++){
+ ipad[i] = 0x36;
+ opad[i] = 0x5c;
+ }
+ ipad[64] = opad[64] = 0;
+ for(i=0; i<klen; i++){
+ ipad[i] ^= key[i];
+ opad[i] ^= key[i];
+ }
+ digest = md5(ipad, 64, nil, nil);
+ md5(t, tlen, innerhash, digest);
+ digest = md5(opad, 64, nil, nil);
+ md5(innerhash, MD5dlen, hash, digest);
+}
+
+static int
+md5auth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
+{
+ uchar hash[MD5dlen];
+ int r;
+
+ memset(hash, 0, MD5dlen);
+ seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+ r = memcmp(auth, hash, ecb->ahlen) == 0;
+ memmove(auth, hash, ecb->ahlen);
+ return r;
+}
+
+static void
+md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
+{
+ if(klen != 128)
+ panic("md5ahinit: bad keylen");
+ klen >>= 3; /* convert to bytes */
+
+ ecb->ahalg = name;
+ ecb->ahblklen = 1;
+ ecb->ahlen = 12;
+ ecb->auth = md5auth;
+ ecb->ahstate = smalloc(klen);
+ memmove(ecb->ahstate, key, klen);
+}
+
+static int
+descipher(Espcb *ecb, uchar *p, int n)
+{
+ uchar tmp[8];
+ uchar *pp, *tp, *ip, *eip, *ep;
+ DESstate *ds = ecb->espstate;
+
+ ep = p + n;
+ if(ecb->incoming) {
+ memmove(ds->ivec, p, 8);
+ p += 8;
+ while(p < ep){
+ memmove(tmp, p, 8);
+ block_cipher(ds->expanded, p, 1);
+ tp = tmp;
+ ip = ds->ivec;
+ for(eip = ip+8; ip < eip; ){
+ *p++ ^= *ip;
+ *ip++ = *tp++;
+ }
+ }
+ } else {
+ memmove(p, ds->ivec, 8);
+ for(p += 8; p < ep; p += 8){
+ pp = p;
+ ip = ds->ivec;
+ for(eip = ip+8; ip < eip; )
+ *pp++ ^= *ip++;
+ block_cipher(ds->expanded, p, 0);
+ memmove(ds->ivec, p, 8);
+ }
+ }
+ return 1;
+}
+
+static void
+desespinit(Espcb *ecb, char *name, uchar *k, int n)
+{
+ uchar key[8], ivec[8];
+ int i;
+
+ /* bits to bytes */
+ n = (n+7)>>3;
+ if(n > 8)
+ n = 8;
+ memset(key, 0, sizeof(key));
+ memmove(key, k, n);
+ for(i=0; i<8; i++)
+ ivec[i] = nrand(256);
+ ecb->espalg = name;
+ ecb->espblklen = 8;
+ ecb->espivlen = 8;
+ ecb->cipher = descipher;
+ ecb->espstate = smalloc(sizeof(DESstate));
+ setupDESstate(ecb->espstate, key, ivec);
+}
+
+void
+espinit(Fs *fs)
+{
+ Proto *esp;
+
+ esp = smalloc(sizeof(Proto));
+ esp->priv = smalloc(sizeof(Esppriv));
+ esp->name = "esp";
+ esp->connect = espconnect;
+ esp->announce = nil;
+ esp->ctl = espctl;
+ esp->state = espstate;
+ esp->create = espcreate;
+ esp->close = espclose;
+ esp->rcv = espiput;
+ esp->advise = espadvise;
+ esp->stats = espstats;
+ esp->local = esplocal;
+ esp->remote = espremote;
+ esp->ipproto = IP_ESPPROTO;
+ esp->nc = Nchans;
+ esp->ptclsize = sizeof(Espcb);
+
+ Fsproto(fs, esp);
+}
+
+
+#ifdef notdef
+enum {
+ RC4forward= 10*1024*1024, /* maximum skip forward */
+ RC4back = 100*1024, /* maximum look back */
+};
+
+typedef struct Esprc4 Esprc4;
+struct Esprc4
+{
+ ulong cseq; /* current byte sequence number */
+ RC4state current;
+
+ int ovalid; /* old is valid */
+ ulong lgseq; /* last good sequence */
+ ulong oseq; /* old byte sequence number */
+ RC4state old;
+};
+
+static void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
+
+static int
+rc4cipher(Espcb *ecb, uchar *p, int n)
+{
+ Esprc4 *esprc4;
+ RC4state tmpstate;
+ ulong seq;
+ long d, dd;
+
+ if(n < 4)
+ return 0;
+
+ esprc4 = ecb->espstate;
+ if(ecb->incoming) {
+ seq = nhgetl(p);
+ p += 4;
+ n -= 4;
+ d = seq-esprc4->cseq;
+ if(d == 0) {
+ rc4(&esprc4->current, p, n);
+ esprc4->cseq += n;
+ if(esprc4->ovalid) {
+ dd = esprc4->cseq - esprc4->lgseq;
+ if(dd > RC4back)
+ esprc4->ovalid = 0;
+ }
+ } else if(d > 0) {
+print("esp rc4cipher: missing packet: %uld %ld\n", seq, d); /* this link is hosed */
+ if(d > RC4forward) {
+ strcpy(up->errstr, "rc4cipher: skipped too much");
+ return 0;
+ }
+ esprc4->lgseq = seq;
+ if(!esprc4->ovalid) {
+ esprc4->ovalid = 1;
+ esprc4->oseq = esprc4->cseq;
+ memmove(&esprc4->old, &esprc4->current,
+ sizeof(RC4state));
+ }
+ rc4skip(&esprc4->current, d);
+ rc4(&esprc4->current, p, n);
+ esprc4->cseq = seq+n;
+ } else {
+print("esp rc4cipher: reordered packet: %uld %ld\n", seq, d);
+ dd = seq - esprc4->oseq;
+ if(!esprc4->ovalid || -d > RC4back || dd < 0) {
+ strcpy(up->errstr, "rc4cipher: too far back");
+ return 0;
+ }
+ memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
+ rc4skip(&tmpstate, dd);
+ rc4(&tmpstate, p, n);
+ return 1;
+ }
+
+ /* move old state up */
+ if(esprc4->ovalid) {
+ dd = esprc4->cseq - RC4back - esprc4->oseq;
+ if(dd > 0) {
+ rc4skip(&esprc4->old, dd);
+ esprc4->oseq += dd;
+ }
+ }
+ } else {
+ hnputl(p, esprc4->cseq);
+ p += 4;
+ n -= 4;
+ rc4(&esprc4->current, p, n);
+ esprc4->cseq += n;
+ }
+ return 1;
+}
+
+static void
+rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
+{
+ Esprc4 *esprc4;
+
+ /* bits to bytes */
+ n = (n+7)>>3;
+ esprc4 = smalloc(sizeof(Esprc4));
+ memset(esprc4, 0, sizeof(Esprc4));
+ setupRC4state(&esprc4->current, k, n);
+ ecb->espalg = name;
+ ecb->espblklen = 4;
+ ecb->espivlen = 4;
+ ecb->cipher = rc4cipher;
+ ecb->espstate = esprc4;
+}
+#endif
diff --git a/src/9vx/a/ip/ethermedium.c b/src/9vx/a/ip/ethermedium.c
@@ -0,0 +1,766 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "netif.h"
+#include "ip.h"
+#include "ipv6.h"
+
+typedef struct Etherhdr Etherhdr;
+struct Etherhdr
+{
+ uchar d[6];
+ uchar s[6];
+ uchar t[2];
+};
+
+static uchar ipbroadcast[IPaddrlen] = {
+ 0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
+};
+
+static uchar etherbroadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+static void etherread4(void *a);
+static void etherread6(void *a);
+static void etherbind(Ipifc *ifc, int argc, char **argv);
+static void etherunbind(Ipifc *ifc);
+static void etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
+static void etheraddmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static void etherremmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static Block* multicastarp(Fs *f, Arpent *a, Medium*, uchar *mac);
+static void sendarp(Ipifc *ifc, Arpent *a);
+static void sendgarp(Ipifc *ifc, uchar*);
+static int multicastea(uchar *ea, uchar *ip);
+static void recvarpproc(void*);
+static void resolveaddr6(Ipifc *ifc, Arpent *a);
+static void etherpref2addr(uchar *pref, uchar *ea);
+
+Medium ethermedium =
+{
+.name= "ether",
+.hsize= 14,
+.mintu= 60,
+.maxtu= 1514,
+.maclen= 6,
+.bind= etherbind,
+.unbind= etherunbind,
+.bwrite= etherbwrite,
+.addmulti= etheraddmulti,
+.remmulti= etherremmulti,
+.ares= arpenter,
+.areg= sendgarp,
+.pref2addr= etherpref2addr,
+};
+
+Medium gbemedium =
+{
+.name= "gbe",
+.hsize= 14,
+.mintu= 60,
+.maxtu= 9014,
+.maclen= 6,
+.bind= etherbind,
+.unbind= etherunbind,
+.bwrite= etherbwrite,
+.addmulti= etheraddmulti,
+.remmulti= etherremmulti,
+.ares= arpenter,
+.areg= sendgarp,
+.pref2addr= etherpref2addr,
+};
+
+typedef struct Etherrock Etherrock;
+struct Etherrock
+{
+ Fs *f; /* file system we belong to */
+ Proc *arpp; /* arp process */
+ Proc *read4p; /* reading process (v4)*/
+ Proc *read6p; /* reading process (v6)*/
+ Chan *mchan4; /* Data channel for v4 */
+ Chan *achan; /* Arp channel */
+ Chan *cchan4; /* Control channel for v4 */
+ Chan *mchan6; /* Data channel for v6 */
+ Chan *cchan6; /* Control channel for v6 */
+};
+
+/*
+ * ethernet arp request
+ */
+enum
+{
+ ARPREQUEST = 1,
+ ARPREPLY = 2,
+};
+
+typedef struct Etherarp Etherarp;
+struct Etherarp
+{
+ uchar d[6];
+ uchar s[6];
+ uchar type[2];
+ uchar hrd[2];
+ uchar pro[2];
+ uchar hln;
+ uchar pln;
+ uchar op[2];
+ uchar sha[6];
+ uchar spa[4];
+ uchar tha[6];
+ uchar tpa[4];
+};
+
+static char *nbmsg = "nonblocking";
+
+/*
+ * called to bind an IP ifc to an ethernet device
+ * called with ifc wlock'd
+ */
+
+static void
+etherbind(Ipifc *ifc, int argc, char **argv)
+{
+ Chan *mchan4, *cchan4, *achan, *mchan6, *cchan6, *schan;
+ char addr[Maxpath]; //char addr[2*KNAMELEN];
+ char dir[Maxpath]; //char dir[2*KNAMELEN];
+ char *buf;
+ int n;
+ char *ptr;
+ Etherrock *er;
+
+ if(argc < 2)
+ error(Ebadarg);
+
+ mchan4 = cchan4 = achan = mchan6 = cchan6 = nil;
+ buf = nil;
+ if(waserror()){
+ if(mchan4 != nil)
+ cclose(mchan4);
+ if(cchan4 != nil)
+ cclose(cchan4);
+ if(achan != nil)
+ cclose(achan);
+ if(mchan6 != nil)
+ cclose(mchan6);
+ if(cchan6 != nil)
+ cclose(cchan6);
+ if(buf != nil)
+ free(buf);
+ nexterror();
+ }
+
+ /*
+ * open ipv4 conversation
+ *
+ * the dial will fail if the type is already open on
+ * this device.
+ */
+ snprint(addr, sizeof(addr), "%s!0x800", argv[2]); /* ETIP4 */
+ mchan4 = chandial(addr, nil, dir, &cchan4);
+
+ /*
+ * make it non-blocking
+ */
+ devtab[cchan4->type]->write(cchan4, nbmsg, strlen(nbmsg), 0);
+
+ /*
+ * get mac address and speed
+ */
+ snprint(addr, sizeof(addr), "%s/stats", argv[2]);
+ buf = smalloc(512);
+ schan = namec(addr, Aopen, OREAD, 0);
+ if(waserror()){
+ cclose(schan);
+ nexterror();
+ }
+ n = devtab[schan->type]->read(schan, buf, 511, 0);
+ cclose(schan);
+ poperror();
+ buf[n] = 0;
+
+ ptr = strstr(buf, "addr: ");
+ if(!ptr)
+ error(Eio);
+ ptr += 6;
+ parsemac(ifc->mac, ptr, 6);
+
+ ptr = strstr(buf, "mbps: ");
+ if(ptr){
+ ptr += 6;
+ ifc->mbps = atoi(ptr);
+ } else
+ ifc->mbps = 100;
+
+ /*
+ * open arp conversation
+ */
+ snprint(addr, sizeof(addr), "%s!0x806", argv[2]); /* ETARP */
+ achan = chandial(addr, nil, nil, nil);
+
+ /*
+ * open ipv6 conversation
+ *
+ * the dial will fail if the type is already open on
+ * this device.
+ */
+ snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]); /* ETIP6 */
+ mchan6 = chandial(addr, nil, dir, &cchan6);
+
+ /*
+ * make it non-blocking
+ */
+ devtab[cchan6->type]->write(cchan6, nbmsg, strlen(nbmsg), 0);
+
+ er = smalloc(sizeof(*er));
+ er->mchan4 = mchan4;
+ er->cchan4 = cchan4;
+ er->achan = achan;
+ er->mchan6 = mchan6;
+ er->cchan6 = cchan6;
+ er->f = ifc->conv->p->f;
+ ifc->arg = er;
+
+ free(buf);
+ poperror();
+
+ kproc("etherread4", etherread4, ifc);
+ kproc("recvarpproc", recvarpproc, ifc);
+ kproc("etherread6", etherread6, ifc);
+}
+
+/*
+ * called with ifc wlock'd
+ */
+static void
+etherunbind(Ipifc *ifc)
+{
+ Etherrock *er = ifc->arg;
+
+ if(er->read4p)
+ postnote(er->read4p, 1, "unbind", 0);
+ if(er->read6p)
+ postnote(er->read6p, 1, "unbind", 0);
+ if(er->arpp)
+ postnote(er->arpp, 1, "unbind", 0);
+
+ /* wait for readers to die */
+ while(er->arpp != 0 || er->read4p != 0 || er->read6p != 0)
+ tsleep(&up->sleep, return0, 0, 300);
+
+ if(er->mchan4 != nil)
+ cclose(er->mchan4);
+ if(er->achan != nil)
+ cclose(er->achan);
+ if(er->cchan4 != nil)
+ cclose(er->cchan4);
+ if(er->mchan6 != nil)
+ cclose(er->mchan6);
+ if(er->cchan6 != nil)
+ cclose(er->cchan6);
+
+ free(er);
+}
+
+/*
+ * called by ipoput with a single block to write with ifc RLOCK'd
+ */
+static void
+etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip)
+{
+ Etherhdr *eh;
+ Arpent *a;
+ uchar mac[6];
+ Etherrock *er = ifc->arg;
+
+ /* get mac address of destination */
+ a = arpget(er->f->arp, bp, version, ifc, ip, mac);
+ if(a){
+ /* check for broadcast or multicast */
+ bp = multicastarp(er->f, a, ifc->m, mac);
+ if(bp==nil){
+ switch(version){
+ case V4:
+ sendarp(ifc, a);
+ break;
+ case V6:
+ resolveaddr6(ifc, a);
+ break;
+ default:
+ panic("etherbwrite: version %d", version);
+ }
+ return;
+ }
+ }
+
+ /* make it a single block with space for the ether header */
+ bp = padblock(bp, ifc->m->hsize);
+ if(bp->next)
+ bp = concatblock(bp);
+ if(BLEN(bp) < ifc->mintu)
+ bp = adjustblock(bp, ifc->mintu);
+ eh = (Etherhdr*)bp->rp;
+
+ /* copy in mac addresses and ether type */
+ memmove(eh->s, ifc->mac, sizeof(eh->s));
+ memmove(eh->d, mac, sizeof(eh->d));
+
+ switch(version){
+ case V4:
+ eh->t[0] = 0x08;
+ eh->t[1] = 0x00;
+ devtab[er->mchan4->type]->bwrite(er->mchan4, bp, 0);
+ break;
+ case V6:
+ eh->t[0] = 0x86;
+ eh->t[1] = 0xDD;
+ devtab[er->mchan6->type]->bwrite(er->mchan6, bp, 0);
+ break;
+ default:
+ panic("etherbwrite2: version %d", version);
+ }
+ ifc->out++;
+}
+
+
+/*
+ * process to read from the ethernet
+ */
+static void
+etherread4(void *a)
+{
+ Ipifc *ifc;
+ Block *bp;
+ Etherrock *er;
+
+ ifc = a;
+ er = ifc->arg;
+ er->read4p = up; /* hide identity under a rock for unbind */
+ if(waserror()){
+ er->read4p = 0;
+ pexit("hangup", 1);
+ }
+ for(;;){
+ bp = devtab[er->mchan4->type]->bread(er->mchan4, ifc->maxtu, 0);
+ if(!CANRLOCK(ifc)){
+ freeb(bp);
+ continue;
+ }
+ if(waserror()){
+ RUNLOCK(ifc);
+ nexterror();
+ }
+ ifc->in++;
+ bp->rp += ifc->m->hsize;
+ if(ifc->lifc == nil)
+ freeb(bp);
+ else
+ ipiput4(er->f, ifc, bp);
+ RUNLOCK(ifc);
+ poperror();
+ }
+}
+
+
+/*
+ * process to read from the ethernet, IPv6
+ */
+static void
+etherread6(void *a)
+{
+ Ipifc *ifc;
+ Block *bp;
+ Etherrock *er;
+
+ ifc = a;
+ er = ifc->arg;
+ er->read6p = up; /* hide identity under a rock for unbind */
+ if(waserror()){
+ er->read6p = 0;
+ pexit("hangup", 1);
+ }
+ for(;;){
+ bp = devtab[er->mchan6->type]->bread(er->mchan6, ifc->maxtu, 0);
+ if(!CANRLOCK(ifc)){
+ freeb(bp);
+ continue;
+ }
+ if(waserror()){
+ RUNLOCK(ifc);
+ nexterror();
+ }
+ ifc->in++;
+ bp->rp += ifc->m->hsize;
+ if(ifc->lifc == nil)
+ freeb(bp);
+ else
+ ipiput6(er->f, ifc, bp);
+ RUNLOCK(ifc);
+ poperror();
+ }
+}
+
+static void
+etheraddmulti(Ipifc *ifc, uchar *a, uchar *_)
+{
+ uchar mac[6];
+ char buf[64];
+ Etherrock *er = ifc->arg;
+ int version;
+
+ version = multicastea(mac, a);
+ sprint(buf, "addmulti %E", mac);
+ switch(version){
+ case V4:
+ devtab[er->cchan4->type]->write(er->cchan4, buf, strlen(buf), 0);
+ break;
+ case V6:
+ devtab[er->cchan6->type]->write(er->cchan6, buf, strlen(buf), 0);
+ break;
+ default:
+ panic("etheraddmulti: version %d", version);
+ }
+}
+
+static void
+etherremmulti(Ipifc *ifc, uchar *a, uchar *_)
+{
+ uchar mac[6];
+ char buf[64];
+ Etherrock *er = ifc->arg;
+ int version;
+
+ version = multicastea(mac, a);
+ sprint(buf, "remmulti %E", mac);
+ switch(version){
+ case V4:
+ devtab[er->cchan4->type]->write(er->cchan4, buf, strlen(buf), 0);
+ break;
+ case V6:
+ devtab[er->cchan6->type]->write(er->cchan6, buf, strlen(buf), 0);
+ break;
+ default:
+ panic("etherremmulti: version %d", version);
+ }
+}
+
+/*
+ * send an ethernet arp
+ * (only v4, v6 uses the neighbor discovery, rfc1970)
+ */
+static void
+sendarp(Ipifc *ifc, Arpent *a)
+{
+ int n;
+ Block *bp;
+ Etherarp *e;
+ Etherrock *er = ifc->arg;
+
+ /* don't do anything if it's been less than a second since the last */
+ if(NOW - a->ctime < 1000){
+ arprelease(er->f->arp, a);
+ return;
+ }
+
+ /* remove all but the last message */
+ while((bp = a->hold) != nil){
+ if(bp == a->last)
+ break;
+ a->hold = bp->list;
+ freeblist(bp);
+ }
+
+ /* try to keep it around for a second more */
+ a->ctime = NOW;
+ arprelease(er->f->arp, a);
+
+ n = sizeof(Etherarp);
+ if(n < a->type->mintu)
+ n = a->type->mintu;
+ bp = allocb(n);
+ memset(bp->rp, 0, n);
+ e = (Etherarp*)bp->rp;
+ memmove(e->tpa, a->ip+IPv4off, sizeof(e->tpa));
+ ipv4local(ifc, e->spa);
+ memmove(e->sha, ifc->mac, sizeof(e->sha));
+ memset(e->d, 0xff, sizeof(e->d)); /* ethernet broadcast */
+ memmove(e->s, ifc->mac, sizeof(e->s));
+
+ hnputs(e->type, ETARP);
+ hnputs(e->hrd, 1);
+ hnputs(e->pro, ETIP4);
+ e->hln = sizeof(e->sha);
+ e->pln = sizeof(e->spa);
+ hnputs(e->op, ARPREQUEST);
+ bp->wp += n;
+
+ devtab[er->achan->type]->bwrite(er->achan, bp, 0);
+}
+
+static void
+resolveaddr6(Ipifc *ifc, Arpent *a)
+{
+ int sflag;
+ Block *bp;
+ Etherrock *er = ifc->arg;
+ uchar ipsrc[IPaddrlen];
+
+ /* don't do anything if it's been less than a second since the last */
+ if(NOW - a->ctime < ReTransTimer){
+ arprelease(er->f->arp, a);
+ return;
+ }
+
+ /* remove all but the last message */
+ while((bp = a->hold) != nil){
+ if(bp == a->last)
+ break;
+ a->hold = bp->list;
+ freeblist(bp);
+ }
+
+ /* try to keep it around for a second more */
+ a->ctime = NOW;
+ a->rtime = NOW + ReTransTimer;
+ if(a->rxtsrem <= 0) {
+ arprelease(er->f->arp, a);
+ return;
+ }
+
+ a->rxtsrem--;
+ arprelease(er->f->arp, a);
+
+ if((sflag = ipv6anylocal(ifc, ipsrc)) != 0)
+ icmpns(er->f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
+}
+
+/*
+ * send a gratuitous arp to refresh arp caches
+ */
+static void
+sendgarp(Ipifc *ifc, uchar *ip)
+{
+ int n;
+ Block *bp;
+ Etherarp *e;
+ Etherrock *er = ifc->arg;
+
+ /* don't arp for our initial non address */
+ if(ipcmp(ip, IPnoaddr) == 0)
+ return;
+
+ n = sizeof(Etherarp);
+ if(n < ifc->m->mintu)
+ n = ifc->m->mintu;
+ bp = allocb(n);
+ memset(bp->rp, 0, n);
+ e = (Etherarp*)bp->rp;
+ memmove(e->tpa, ip+IPv4off, sizeof(e->tpa));
+ memmove(e->spa, ip+IPv4off, sizeof(e->spa));
+ memmove(e->sha, ifc->mac, sizeof(e->sha));
+ memset(e->d, 0xff, sizeof(e->d)); /* ethernet broadcast */
+ memmove(e->s, ifc->mac, sizeof(e->s));
+
+ hnputs(e->type, ETARP);
+ hnputs(e->hrd, 1);
+ hnputs(e->pro, ETIP4);
+ e->hln = sizeof(e->sha);
+ e->pln = sizeof(e->spa);
+ hnputs(e->op, ARPREQUEST);
+ bp->wp += n;
+
+ devtab[er->achan->type]->bwrite(er->achan, bp, 0);
+}
+
+static void
+recvarp(Ipifc *ifc)
+{
+ int n;
+ Block *ebp, *rbp;
+ Etherarp *e, *r;
+ uchar ip[IPaddrlen];
+ static uchar eprinted[4];
+ Etherrock *er = ifc->arg;
+
+ ebp = devtab[er->achan->type]->bread(er->achan, ifc->maxtu, 0);
+ if(ebp == nil)
+ return;
+
+ e = (Etherarp*)ebp->rp;
+ switch(nhgets(e->op)) {
+ default:
+ break;
+
+ case ARPREPLY:
+ /* check for machine using my ip address */
+ v4tov6(ip, e->spa);
+ if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+ if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
+ print("arprep: 0x%E/0x%E also has ip addr %V\n",
+ e->s, e->sha, e->spa);
+ break;
+ }
+ }
+
+ /* make sure we're not entering broadcast addresses */
+ if(ipcmp(ip, ipbroadcast) == 0 ||
+ !memcmp(e->sha, etherbroadcast, sizeof(e->sha))){
+ print("arprep: 0x%E/0x%E cannot register broadcast address %I\n",
+ e->s, e->sha, e->spa);
+ break;
+ }
+
+ arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 0);
+ break;
+
+ case ARPREQUEST:
+ /* don't answer arps till we know who we are */
+ if(ifc->lifc == 0)
+ break;
+
+ /* check for machine using my ip or ether address */
+ v4tov6(ip, e->spa);
+ if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+ if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
+ if (memcmp(eprinted, e->spa, sizeof(e->spa))){
+ /* print only once */
+ print("arpreq: 0x%E also has ip addr %V\n", e->sha, e->spa);
+ memmove(eprinted, e->spa, sizeof(e->spa));
+ }
+ }
+ } else {
+ if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) == 0){
+ print("arpreq: %V also has ether addr %E\n", e->spa, e->sha);
+ break;
+ }
+ }
+
+ /* refresh what we know about sender */
+ arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 1);
+
+ /* answer only requests for our address or systems we're proxying for */
+ v4tov6(ip, e->tpa);
+ if(!iplocalonifc(ifc, ip))
+ if(!ipproxyifc(er->f, ifc, ip))
+ break;
+
+ n = sizeof(Etherarp);
+ if(n < ifc->mintu)
+ n = ifc->mintu;
+ rbp = allocb(n);
+ r = (Etherarp*)rbp->rp;
+ memset(r, 0, sizeof(Etherarp));
+ hnputs(r->type, ETARP);
+ hnputs(r->hrd, 1);
+ hnputs(r->pro, ETIP4);
+ r->hln = sizeof(r->sha);
+ r->pln = sizeof(r->spa);
+ hnputs(r->op, ARPREPLY);
+ memmove(r->tha, e->sha, sizeof(r->tha));
+ memmove(r->tpa, e->spa, sizeof(r->tpa));
+ memmove(r->sha, ifc->mac, sizeof(r->sha));
+ memmove(r->spa, e->tpa, sizeof(r->spa));
+ memmove(r->d, e->sha, sizeof(r->d));
+ memmove(r->s, ifc->mac, sizeof(r->s));
+ rbp->wp += n;
+
+ devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
+ }
+ freeb(ebp);
+}
+
+static void
+recvarpproc(void *v)
+{
+ Ipifc *ifc = v;
+ Etherrock *er = ifc->arg;
+
+ er->arpp = up;
+ if(waserror()){
+ er->arpp = 0;
+ pexit("hangup", 1);
+ }
+ for(;;)
+ recvarp(ifc);
+}
+
+static int
+multicastea(uchar *ea, uchar *ip)
+{
+ int x;
+
+ switch(x = ipismulticast(ip)){
+ case V4:
+ ea[0] = 0x01;
+ ea[1] = 0x00;
+ ea[2] = 0x5e;
+ ea[3] = ip[13] & 0x7f;
+ ea[4] = ip[14];
+ ea[5] = ip[15];
+ break;
+ case V6:
+ ea[0] = 0x33;
+ ea[1] = 0x33;
+ ea[2] = ip[12];
+ ea[3] = ip[13];
+ ea[4] = ip[14];
+ ea[5] = ip[15];
+ break;
+ }
+ return x;
+}
+
+/*
+ * fill in an arp entry for broadcast or multicast
+ * addresses. Return the first queued packet for the
+ * IP address.
+ */
+static Block*
+multicastarp(Fs *f, Arpent *a, Medium *medium, uchar *mac)
+{
+ /* is it broadcast? */
+ switch(ipforme(f, a->ip)){
+ case Runi:
+ return nil;
+ case Rbcast:
+ memset(mac, 0xff, 6);
+ return arpresolve(f->arp, a, medium, mac);
+ default:
+ break;
+ }
+
+ /* if multicast, fill in mac */
+ switch(multicastea(mac, a->ip)){
+ case V4:
+ case V6:
+ return arpresolve(f->arp, a, medium, mac);
+ }
+
+ /* let arp take care of it */
+ return nil;
+}
+
+void
+ethermediumlink(void)
+{
+ addipmedium(ðermedium);
+ addipmedium(&gbemedium);
+}
+
+
+static void
+etherpref2addr(uchar *pref, uchar *ea)
+{
+ pref[8] = ea[0] | 0x2;
+ pref[9] = ea[1];
+ pref[10] = ea[2];
+ pref[11] = 0xFF;
+ pref[12] = 0xFE;
+ pref[13] = ea[3];
+ pref[14] = ea[4];
+ pref[15] = ea[5];
+}
diff --git a/src/9vx/a/ip/gre.c b/src/9vx/a/ip/gre.c
@@ -0,0 +1,283 @@
+/*
+ * Generic Routing Encapsulation over IPv4, rfc1702
+ */
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+enum
+{
+ GRE_IPONLY = 12, /* size of ip header */
+ GRE_IPPLUSGRE = 12, /* minimum size of GRE header */
+ IP_GREPROTO = 47,
+
+ GRErxms = 200,
+ GREtickms = 100,
+ GREmaxxmit = 10,
+};
+
+typedef struct GREhdr
+{
+ /* ip header */
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar len[2]; /* packet length (including headers) */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+ uchar Unused;
+ uchar proto; /* Protocol */
+ uchar cksum[2]; /* checksum */
+ uchar src[4]; /* Ip source */
+ uchar dst[4]; /* Ip destination */
+
+ /* gre header */
+ uchar flags[2];
+ uchar eproto[2]; /* encapsulation protocol */
+} GREhdr;
+
+typedef struct GREpriv GREpriv;
+struct GREpriv
+{
+ int raw; /* Raw GRE mode */
+
+ /* non-MIB stats */
+ ulong csumerr; /* checksum errors */
+ ulong lenerr; /* short packet */
+};
+
+static void grekick(void *x, Block *bp);
+
+static char*
+greconnect(Conv *c, char **argv, int argc)
+{
+ Proto *p;
+ char *err;
+ Conv *tc, **cp, **ecp;
+
+ err = Fsstdconnect(c, argv, argc);
+ if(err != nil)
+ return err;
+
+ /* make sure noone's already connected to this other sys */
+ p = c->p;
+ QLOCK(p);
+ ecp = &p->conv[p->nc];
+ for(cp = p->conv; cp < ecp; cp++){
+ tc = *cp;
+ if(tc == nil)
+ break;
+ if(tc == c)
+ continue;
+ if(tc->rport == c->rport && ipcmp(tc->raddr, c->raddr) == 0){
+ err = "already connected to that addr/proto";
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ break;
+ }
+ }
+ QUNLOCK(p);
+
+ if(err != nil)
+ return err;
+ Fsconnected(c, nil);
+
+ return nil;
+}
+
+static void
+grecreate(Conv *c)
+{
+ c->rq = qopen(64*1024, Qmsg, 0, c);
+ c->wq = qbypass(grekick, c);
+}
+
+static int
+grestate(Conv *c, char *state, int n)
+{
+ USED(c);
+ return snprint(state, n, "%s\n", "Datagram");
+}
+
+static char*
+greannounce(Conv* _, char** __, int ___)
+{
+ return "pktifc does not support announce";
+}
+
+static void
+greclose(Conv *c)
+{
+ qclose(c->rq);
+ qclose(c->wq);
+ qclose(c->eq);
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ c->lport = 0;
+ c->rport = 0;
+}
+
+int drop;
+
+static void
+grekick(void *x, Block *bp)
+{
+ Conv *c = x;
+ GREhdr *ghp;
+ uchar laddr[IPaddrlen], raddr[IPaddrlen];
+
+ if(bp == nil)
+ return;
+
+ /* Make space to fit ip header (gre header already there) */
+ bp = padblock(bp, GRE_IPONLY);
+ if(bp == nil)
+ return;
+
+ /* make sure the message has a GRE header */
+ bp = pullupblock(bp, GRE_IPONLY+GRE_IPPLUSGRE);
+ if(bp == nil)
+ return;
+
+ ghp = (GREhdr *)(bp->rp);
+ ghp->vihl = IP_VER4;
+
+ if(!((GREpriv*)c->p->priv)->raw){
+ v4tov6(raddr, ghp->dst);
+ if(ipcmp(raddr, v4prefix) == 0)
+ memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen);
+ v4tov6(laddr, ghp->src);
+ if(ipcmp(laddr, v4prefix) == 0){
+ if(ipcmp(c->laddr, IPnoaddr) == 0)
+ findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */
+ memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen);
+ }
+ hnputs(ghp->eproto, c->rport);
+ }
+
+ ghp->proto = IP_GREPROTO;
+ ghp->frag[0] = 0;
+ ghp->frag[1] = 0;
+
+ ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
+}
+
+static void
+greiput(Proto *gre, Ipifc* __, Block *bp)
+{
+ int len;
+ GREhdr *ghp;
+ Conv *c, **p;
+ ushort eproto;
+ uchar raddr[IPaddrlen];
+ GREpriv *gpriv;
+
+ gpriv = gre->priv;
+ ghp = (GREhdr*)(bp->rp);
+
+ v4tov6(raddr, ghp->src);
+ eproto = nhgets(ghp->eproto);
+ QLOCK(gre);
+
+ /* Look for a conversation structure for this port and address */
+ c = nil;
+ for(p = gre->conv; *p; p++) {
+ c = *p;
+ if(c->inuse == 0)
+ continue;
+ if(c->rport == eproto &&
+ (gpriv->raw || ipcmp(c->raddr, raddr) == 0))
+ break;
+ }
+
+ if(*p == nil) {
+ QUNLOCK(gre);
+ freeblist(bp);
+ return;
+ }
+
+ QUNLOCK(gre);
+
+ /*
+ * Trim the packet down to data size
+ */
+ len = nhgets(ghp->len) - GRE_IPONLY;
+ if(len < GRE_IPPLUSGRE){
+ freeblist(bp);
+ return;
+ }
+ bp = trimblock(bp, GRE_IPONLY, len);
+ if(bp == nil){
+ gpriv->lenerr++;
+ return;
+ }
+
+ /*
+ * Can't delimit packet so pull it all into one block.
+ */
+ if(qlen(c->rq) > 64*1024)
+ freeblist(bp);
+ else{
+ bp = concatblock(bp);
+ if(bp == 0)
+ panic("greiput");
+ qpass(c->rq, bp);
+ }
+}
+
+int
+grestats(Proto *gre, char *buf, int len)
+{
+ GREpriv *gpriv;
+
+ gpriv = gre->priv;
+
+ return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr);
+}
+
+char*
+grectl(Conv *c, char **f, int n)
+{
+ GREpriv *gpriv;
+
+ gpriv = c->p->priv;
+ if(n == 1){
+ if(strcmp(f[0], "raw") == 0){
+ gpriv->raw = 1;
+ return nil;
+ }
+ else if(strcmp(f[0], "cooked") == 0){
+ gpriv->raw = 0;
+ return nil;
+ }
+ }
+ return "unknown control request";
+}
+
+void
+greinit(Fs *fs)
+{
+ Proto *gre;
+
+ gre = smalloc(sizeof(Proto));
+ gre->priv = smalloc(sizeof(GREpriv));
+ gre->name = "gre";
+ gre->connect = greconnect;
+ gre->announce = greannounce;
+ gre->state = grestate;
+ gre->create = grecreate;
+ gre->close = greclose;
+ gre->rcv = greiput;
+ gre->ctl = grectl;
+ gre->advise = nil;
+ gre->stats = grestats;
+ gre->ipproto = IP_GREPROTO;
+ gre->nc = 64;
+ gre->ptclsize = 0;
+
+ Fsproto(fs, gre);
+}
diff --git a/src/9vx/a/ip/icmp.c b/src/9vx/a/ip/icmp.c
@@ -0,0 +1,490 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+typedef struct Icmp {
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+ uchar ttl; /* Time to live */
+ uchar proto; /* Protocol */
+ uchar ipcksum[2]; /* Header checksum */
+ uchar src[4]; /* Ip source */
+ uchar dst[4]; /* Ip destination */
+ uchar type;
+ uchar code;
+ uchar cksum[2];
+ uchar icmpid[2];
+ uchar seq[2];
+ uchar data[1];
+} Icmp;
+
+enum { /* Packet Types */
+ EchoReply = 0,
+ Unreachable = 3,
+ SrcQuench = 4,
+ Redirect = 5,
+ EchoRequest = 8,
+ TimeExceed = 11,
+ InParmProblem = 12,
+ Timestamp = 13,
+ TimestampReply = 14,
+ InfoRequest = 15,
+ InfoReply = 16,
+ AddrMaskRequest = 17,
+ AddrMaskReply = 18,
+
+ Maxtype = 18,
+};
+
+enum
+{
+ MinAdvise = 24, /* minimum needed for us to advise another protocol */
+};
+
+char *icmpnames[Maxtype+1] =
+{
+[EchoReply] "EchoReply",
+[Unreachable] "Unreachable",
+[SrcQuench] "SrcQuench",
+[Redirect] "Redirect",
+[EchoRequest] "EchoRequest",
+[TimeExceed] "TimeExceed",
+[InParmProblem] "InParmProblem",
+[Timestamp] "Timestamp",
+[TimestampReply] "TimestampReply",
+[InfoRequest] "InfoRequest",
+[InfoReply] "InfoReply",
+[AddrMaskRequest] "AddrMaskRequest",
+[AddrMaskReply ] "AddrMaskReply ",
+};
+
+enum {
+ IP_ICMPPROTO = 1,
+ ICMP_IPSIZE = 20,
+ ICMP_HDRSIZE = 8,
+};
+
+enum
+{
+ InMsgs,
+ InErrors,
+ OutMsgs,
+ CsumErrs,
+ LenErrs,
+ HlenErrs,
+
+ Nstats,
+};
+
+static char *statnames[Nstats] =
+{
+[InMsgs] "InMsgs",
+[InErrors] "InErrors",
+[OutMsgs] "OutMsgs",
+[CsumErrs] "CsumErrs",
+[LenErrs] "LenErrs",
+[HlenErrs] "HlenErrs",
+};
+
+typedef struct Icmppriv Icmppriv;
+struct Icmppriv
+{
+ ulong stats[Nstats];
+
+ /* message counts */
+ ulong in[Maxtype+1];
+ ulong out[Maxtype+1];
+};
+
+static void icmpkick(void *x, Block*);
+
+static void
+icmpcreate(Conv *c)
+{
+ c->rq = qopen(64*1024, Qmsg, 0, c);
+ c->wq = qbypass(icmpkick, c);
+}
+
+extern char*
+icmpconnect(Conv *c, char **argv, int argc)
+{
+ char *e;
+
+ e = Fsstdconnect(c, argv, argc);
+ if(e != nil)
+ return e;
+ Fsconnected(c, e);
+
+ return nil;
+}
+
+extern int
+icmpstate(Conv *c, char *state, int n)
+{
+ USED(c);
+ return snprint(state, n, "%s qin %d qout %d\n",
+ "Datagram",
+ c->rq ? qlen(c->rq) : 0,
+ c->wq ? qlen(c->wq) : 0
+ );
+}
+
+extern char*
+icmpannounce(Conv *c, char **argv, int argc)
+{
+ char *e;
+
+ e = Fsstdannounce(c, argv, argc);
+ if(e != nil)
+ return e;
+ Fsconnected(c, nil);
+
+ return nil;
+}
+
+extern void
+icmpclose(Conv *c)
+{
+ qclose(c->rq);
+ qclose(c->wq);
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ c->lport = 0;
+}
+
+static void
+icmpkick(void *x, Block *bp)
+{
+ Conv *c = x;
+ Icmp *p;
+ Icmppriv *ipriv;
+
+ if(bp == nil)
+ return;
+
+ if(blocklen(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
+ freeblist(bp);
+ return;
+ }
+ p = (Icmp *)(bp->rp);
+ p->vihl = IP_VER4;
+ ipriv = c->p->priv;
+ if(p->type <= Maxtype)
+ ipriv->out[p->type]++;
+
+ v6tov4(p->dst, c->raddr);
+ v6tov4(p->src, c->laddr);
+ p->proto = IP_ICMPPROTO;
+ hnputs(p->icmpid, c->lport);
+ memset(p->cksum, 0, sizeof(p->cksum));
+ hnputs(p->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE));
+ ipriv->stats[OutMsgs]++;
+ ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
+}
+
+extern void
+icmpttlexceeded(Fs *f, uchar *ia, Block *bp)
+{
+ Block *nbp;
+ Icmp *p, *np;
+
+ p = (Icmp *)bp->rp;
+
+ netlog(f, Logicmp, "sending icmpttlexceeded -> %V\n", p->src);
+ nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
+ nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
+ np = (Icmp *)nbp->rp;
+ np->vihl = IP_VER4;
+ memmove(np->dst, p->src, sizeof(np->dst));
+ v6tov4(np->src, ia);
+ memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
+ np->type = TimeExceed;
+ np->code = 0;
+ np->proto = IP_ICMPPROTO;
+ hnputs(np->icmpid, 0);
+ hnputs(np->seq, 0);
+ memset(np->cksum, 0, sizeof(np->cksum));
+ hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
+ ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+
+}
+
+static void
+icmpunreachable(Fs *f, Block *bp, int code, int seq)
+{
+ Block *nbp;
+ Icmp *p, *np;
+ int i;
+ uchar addr[IPaddrlen];
+
+ p = (Icmp *)bp->rp;
+
+ /* only do this for unicast sources and destinations */
+ v4tov6(addr, p->dst);
+ i = ipforme(f, addr);
+ if((i&Runi) == 0)
+ return;
+ v4tov6(addr, p->src);
+ i = ipforme(f, addr);
+ if(i != 0 && (i&Runi) == 0)
+ return;
+
+ netlog(f, Logicmp, "sending icmpnoconv -> %V\n", p->src);
+ nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
+ nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
+ np = (Icmp *)nbp->rp;
+ np->vihl = IP_VER4;
+ memmove(np->dst, p->src, sizeof(np->dst));
+ memmove(np->src, p->dst, sizeof(np->src));
+ memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
+ np->type = Unreachable;
+ np->code = code;
+ np->proto = IP_ICMPPROTO;
+ hnputs(np->icmpid, 0);
+ hnputs(np->seq, seq);
+ memset(np->cksum, 0, sizeof(np->cksum));
+ hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
+ ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+extern void
+icmpnoconv(Fs *f, Block *bp)
+{
+ icmpunreachable(f, bp, 3, 0);
+}
+
+extern void
+icmpcantfrag(Fs *f, Block *bp, int mtu)
+{
+ icmpunreachable(f, bp, 4, mtu);
+}
+
+static void
+goticmpkt(Proto *icmp, Block *bp)
+{
+ Conv **c, *s;
+ Icmp *p;
+ uchar dst[IPaddrlen];
+ ushort recid;
+
+ p = (Icmp *) bp->rp;
+ v4tov6(dst, p->src);
+ recid = nhgets(p->icmpid);
+
+ for(c = icmp->conv; *c; c++) {
+ s = *c;
+ if(s->lport == recid)
+ if(ipcmp(s->raddr, dst) == 0){
+ bp = concatblock(bp);
+ if(bp != nil)
+ qpass(s->rq, bp);
+ return;
+ }
+ }
+ freeblist(bp);
+}
+
+static Block *
+mkechoreply(Block *bp)
+{
+ Icmp *q;
+ uchar ip[4];
+
+ q = (Icmp *)bp->rp;
+ q->vihl = IP_VER4;
+ memmove(ip, q->src, sizeof(q->dst));
+ memmove(q->src, q->dst, sizeof(q->src));
+ memmove(q->dst, ip, sizeof(q->dst));
+ q->type = EchoReply;
+ memset(q->cksum, 0, sizeof(q->cksum));
+ hnputs(q->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE));
+
+ return bp;
+}
+
+static char *unreachcode[] =
+{
+[0] "net unreachable",
+[1] "host unreachable",
+[2] "protocol unreachable",
+[3] "port unreachable",
+[4] "fragmentation needed and DF set",
+[5] "source route failed",
+};
+
+static void
+icmpiput(Proto *icmp, Ipifc* __, Block *bp)
+{
+ int n, iplen;
+ Icmp *p;
+ Block *r;
+ Proto *pr;
+ char *msg;
+ char m2[128];
+ Icmppriv *ipriv;
+
+ ipriv = icmp->priv;
+
+ ipriv->stats[InMsgs]++;
+
+ p = (Icmp *)bp->rp;
+ netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code);
+ n = blocklen(bp);
+ if(n < ICMP_IPSIZE+ICMP_HDRSIZE){
+ ipriv->stats[InErrors]++;
+ ipriv->stats[HlenErrs]++;
+ netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+ goto raise;
+ }
+ iplen = nhgets(p->length);
+ if(iplen > n || ((uint)iplen % 1)){
+ ipriv->stats[LenErrs]++;
+ ipriv->stats[InErrors]++;
+ netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
+ goto raise;
+ }
+ if(ptclcsum(bp, ICMP_IPSIZE, iplen - ICMP_IPSIZE)){
+ ipriv->stats[InErrors]++;
+ ipriv->stats[CsumErrs]++;
+ netlog(icmp->f, Logicmp, "icmp checksum error\n");
+ goto raise;
+ }
+ if(p->type <= Maxtype)
+ ipriv->in[p->type]++;
+
+ switch(p->type) {
+ case EchoRequest:
+ if (iplen < n)
+ bp = trimblock(bp, 0, iplen);
+ r = mkechoreply(bp);
+ ipriv->out[EchoReply]++;
+ ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
+ break;
+ case Unreachable:
+ if(p->code > 5)
+ msg = unreachcode[1];
+ else
+ msg = unreachcode[p->code];
+
+ bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
+ if(blocklen(bp) < MinAdvise){
+ ipriv->stats[LenErrs]++;
+ goto raise;
+ }
+ p = (Icmp *)bp->rp;
+ pr = Fsrcvpcolx(icmp->f, p->proto);
+ if(pr != nil && pr->advise != nil) {
+ (*pr->advise)(pr, bp, msg);
+ return;
+ }
+
+ bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+ goticmpkt(icmp, bp);
+ break;
+ case TimeExceed:
+ if(p->code == 0){
+ sprint(m2, "ttl exceeded at %V", p->src);
+
+ bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
+ if(blocklen(bp) < MinAdvise){
+ ipriv->stats[LenErrs]++;
+ goto raise;
+ }
+ p = (Icmp *)bp->rp;
+ pr = Fsrcvpcolx(icmp->f, p->proto);
+ if(pr != nil && pr->advise != nil) {
+ (*pr->advise)(pr, bp, m2);
+ return;
+ }
+ bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+ }
+
+ goticmpkt(icmp, bp);
+ break;
+ default:
+ goticmpkt(icmp, bp);
+ break;
+ }
+ return;
+
+raise:
+ freeblist(bp);
+}
+
+void
+icmpadvise(Proto *icmp, Block *bp, char *msg)
+{
+ Conv **c, *s;
+ Icmp *p;
+ uchar dst[IPaddrlen];
+ ushort recid;
+
+ p = (Icmp *) bp->rp;
+ v4tov6(dst, p->dst);
+ recid = nhgets(p->icmpid);
+
+ for(c = icmp->conv; *c; c++) {
+ s = *c;
+ if(s->lport == recid)
+ if(ipcmp(s->raddr, dst) == 0){
+ qhangup(s->rq, msg);
+ qhangup(s->wq, msg);
+ break;
+ }
+ }
+ freeblist(bp);
+}
+
+int
+icmpstats(Proto *icmp, char *buf, int len)
+{
+ Icmppriv *priv;
+ char *p, *e;
+ int i;
+
+ priv = icmp->priv;
+ p = buf;
+ e = p+len;
+ for(i = 0; i < Nstats; i++)
+ p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+ for(i = 0; i <= Maxtype; i++){
+ if(icmpnames[i])
+ p = seprint(p, e, "%s: %lud %lud\n", icmpnames[i], priv->in[i], priv->out[i]);
+ else
+ p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
+ }
+ return p - buf;
+}
+
+void
+icmpinit(Fs *fs)
+{
+ Proto *icmp;
+
+ icmp = smalloc(sizeof(Proto));
+ icmp->priv = smalloc(sizeof(Icmppriv));
+ icmp->name = "icmp";
+ icmp->connect = icmpconnect;
+ icmp->announce = icmpannounce;
+ icmp->state = icmpstate;
+ icmp->create = icmpcreate;
+ icmp->close = icmpclose;
+ icmp->rcv = icmpiput;
+ icmp->stats = icmpstats;
+ icmp->ctl = nil;
+ icmp->advise = icmpadvise;
+ icmp->gc = nil;
+ icmp->ipproto = IP_ICMPPROTO;
+ icmp->nc = 128;
+ icmp->ptclsize = 0;
+
+ Fsproto(fs, icmp);
+}
diff --git a/src/9vx/a/ip/icmp6.c b/src/9vx/a/ip/icmp6.c
@@ -0,0 +1,946 @@
+/*
+ * Internet Control Message Protocol for IPv6
+ */
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "ip.h"
+#include "ipv6.h"
+
+enum
+{
+ InMsgs6,
+ InErrors6,
+ OutMsgs6,
+ CsumErrs6,
+ LenErrs6,
+ HlenErrs6,
+ HoplimErrs6,
+ IcmpCodeErrs6,
+ TargetErrs6,
+ OptlenErrs6,
+ AddrmxpErrs6,
+ RouterAddrErrs6,
+
+ Nstats6,
+};
+
+enum {
+ ICMP_USEAD6 = 40,
+};
+
+enum {
+ Oflag = 1<<5,
+ Sflag = 1<<6,
+ Rflag = 1<<7,
+};
+
+enum {
+ /* ICMPv6 types */
+ EchoReply = 0,
+ UnreachableV6 = 1,
+ PacketTooBigV6 = 2,
+ TimeExceedV6 = 3,
+ SrcQuench = 4,
+ ParamProblemV6 = 4,
+ Redirect = 5,
+ EchoRequest = 8,
+ TimeExceed = 11,
+ InParmProblem = 12,
+ Timestamp = 13,
+ TimestampReply = 14,
+ InfoRequest = 15,
+ InfoReply = 16,
+ AddrMaskRequest = 17,
+ AddrMaskReply = 18,
+ EchoRequestV6 = 128,
+ EchoReplyV6 = 129,
+ RouterSolicit = 133,
+ RouterAdvert = 134,
+ NbrSolicit = 135,
+ NbrAdvert = 136,
+ RedirectV6 = 137,
+
+ Maxtype6 = 137,
+};
+
+typedef struct ICMPpkt ICMPpkt;
+typedef struct IPICMP IPICMP;
+typedef struct Ndpkt Ndpkt;
+typedef struct NdiscC NdiscC;
+
+struct ICMPpkt {
+ uchar type;
+ uchar code;
+ uchar cksum[2];
+ uchar icmpid[2];
+ uchar seq[2];
+};
+
+struct IPICMP {
+ /* Ip6hdr; */
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+
+ /* ICMPpkt; */
+ uchar type;
+ uchar code;
+ uchar cksum[2];
+ uchar icmpid[2];
+ uchar seq[2];
+};
+
+struct NdiscC
+{
+ /* IPICMP; */
+ /* Ip6hdr; */
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+
+ /* ICMPpkt; */
+ uchar type;
+ uchar code;
+ uchar cksum[2];
+ uchar icmpid[2];
+ uchar seq[2];
+
+ uchar target[IPaddrlen];
+};
+
+struct Ndpkt
+{
+ /* NdiscC; */
+ /* IPICMP; */
+ /* Ip6hdr; */
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+
+ /* ICMPpkt; */
+ uchar type;
+ uchar code;
+ uchar cksum[2];
+ uchar icmpid[2];
+ uchar seq[2];
+
+ uchar target[IPaddrlen];
+
+ uchar otype;
+ uchar olen; /* length in units of 8 octets(incl type, code),
+ * 1 for IEEE 802 addresses */
+ uchar lnaddr[6]; /* link-layer address */
+};
+
+typedef struct Icmppriv6
+{
+ ulong stats[Nstats6];
+
+ /* message counts */
+ ulong in[Maxtype6+1];
+ ulong out[Maxtype6+1];
+} Icmppriv6;
+
+typedef struct Icmpcb6
+{
+ QLock qlock;
+ uchar headers;
+} Icmpcb6;
+
+char *icmpnames6[Maxtype6+1] =
+{
+[EchoReply] "EchoReply",
+[UnreachableV6] "UnreachableV6",
+[PacketTooBigV6] "PacketTooBigV6",
+[TimeExceedV6] "TimeExceedV6",
+[SrcQuench] "SrcQuench",
+[Redirect] "Redirect",
+[EchoRequest] "EchoRequest",
+[TimeExceed] "TimeExceed",
+[InParmProblem] "InParmProblem",
+[Timestamp] "Timestamp",
+[TimestampReply] "TimestampReply",
+[InfoRequest] "InfoRequest",
+[InfoReply] "InfoReply",
+[AddrMaskRequest] "AddrMaskRequest",
+[AddrMaskReply] "AddrMaskReply",
+[EchoRequestV6] "EchoRequestV6",
+[EchoReplyV6] "EchoReplyV6",
+[RouterSolicit] "RouterSolicit",
+[RouterAdvert] "RouterAdvert",
+[NbrSolicit] "NbrSolicit",
+[NbrAdvert] "NbrAdvert",
+[RedirectV6] "RedirectV6",
+};
+
+static char *statnames6[Nstats6] =
+{
+[InMsgs6] "InMsgs",
+[InErrors6] "InErrors",
+[OutMsgs6] "OutMsgs",
+[CsumErrs6] "CsumErrs",
+[LenErrs6] "LenErrs",
+[HlenErrs6] "HlenErrs",
+[HoplimErrs6] "HoplimErrs",
+[IcmpCodeErrs6] "IcmpCodeErrs",
+[TargetErrs6] "TargetErrs",
+[OptlenErrs6] "OptlenErrs",
+[AddrmxpErrs6] "AddrmxpErrs",
+[RouterAddrErrs6] "RouterAddrErrs",
+};
+
+static char *unreachcode[] =
+{
+[Icmp6_no_route] "no route to destination",
+[Icmp6_ad_prohib] "comm with destination administratively prohibited",
+[Icmp6_out_src_scope] "beyond scope of source address",
+[Icmp6_adr_unreach] "address unreachable",
+[Icmp6_port_unreach] "port unreachable",
+[Icmp6_gress_src_fail] "source address failed ingress/egress policy",
+[Icmp6_rej_route] "reject route to destination",
+[Icmp6_unknown] "icmp unreachable: unknown code",
+};
+
+static void icmpkick6(void *x, Block *bp);
+
+static void
+icmpcreate6(Conv *c)
+{
+ c->rq = qopen(64*1024, Qmsg, 0, c);
+ c->wq = qbypass(icmpkick6, c);
+}
+
+static void
+set_cksum(Block *bp)
+{
+ IPICMP *p = (IPICMP *)(bp->rp);
+
+ hnputl(p->vcf, 0); /* borrow IP header as pseudoheader */
+ hnputs(p->ploadlen, blocklen(bp) - IP6HDR);
+ p->proto = 0;
+ p->ttl = ICMPv6; /* ttl gets set later */
+ hnputs(p->cksum, 0);
+ hnputs(p->cksum, ptclcsum(bp, 0, blocklen(bp)));
+ p->proto = ICMPv6;
+}
+
+static Block *
+newIPICMP(int packetlen)
+{
+ Block *nbp;
+
+ nbp = allocb(packetlen);
+ nbp->wp += packetlen;
+ memset(nbp->rp, 0, packetlen);
+ return nbp;
+}
+
+void
+icmpadvise6(Proto *icmp, Block *bp, char *msg)
+{
+ ushort recid;
+ Conv **c, *s;
+ IPICMP *p;
+
+ p = (IPICMP *)bp->rp;
+ recid = nhgets(p->icmpid);
+
+ for(c = icmp->conv; *c; c++) {
+ s = *c;
+ if(s->lport == recid && ipcmp(s->raddr, p->dst) == 0){
+ qhangup(s->rq, msg);
+ qhangup(s->wq, msg);
+ break;
+ }
+ }
+ freeblist(bp);
+}
+
+static void
+icmpkick6(void *x, Block *bp)
+{
+ uchar laddr[IPaddrlen], raddr[IPaddrlen];
+ Conv *c = x;
+ IPICMP *p;
+ Icmppriv6 *ipriv = c->p->priv;
+ Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
+
+ if(bp == nil)
+ return;
+
+ if(icb->headers==6) {
+ /* get user specified addresses */
+ bp = pullupblock(bp, ICMP_USEAD6);
+ if(bp == nil)
+ return;
+ bp->rp += 8;
+ ipmove(laddr, bp->rp);
+ bp->rp += IPaddrlen;
+ ipmove(raddr, bp->rp);
+ bp->rp += IPaddrlen;
+ bp = padblock(bp, sizeof(Ip6hdr));
+ }
+
+ if(blocklen(bp) < sizeof(IPICMP)){
+ freeblist(bp);
+ return;
+ }
+ p = (IPICMP *)(bp->rp);
+ if(icb->headers == 6) {
+ ipmove(p->dst, raddr);
+ ipmove(p->src, laddr);
+ } else {
+ ipmove(p->dst, c->raddr);
+ ipmove(p->src, c->laddr);
+ hnputs(p->icmpid, c->lport);
+ }
+
+ set_cksum(bp);
+ p->vcf[0] = 0x06 << 4;
+ if(p->type <= Maxtype6)
+ ipriv->out[p->type]++;
+ ipoput6(c->p->f, bp, 0, c->ttl, c->tos, nil);
+}
+
+char*
+icmpctl6(Conv *c, char **argv, int argc)
+{
+ Icmpcb6 *icb;
+
+ icb = (Icmpcb6*) c->ptcl;
+ if(argc==1 && strcmp(argv[0], "headers")==0) {
+ icb->headers = 6;
+ return nil;
+ }
+ return "unknown control request";
+}
+
+static void
+goticmpkt6(Proto *icmp, Block *bp, int muxkey)
+{
+ ushort recid;
+ uchar *addr;
+ Conv **c, *s;
+ IPICMP *p = (IPICMP *)bp->rp;
+
+ if(muxkey == 0) {
+ recid = nhgets(p->icmpid);
+ addr = p->src;
+ } else {
+ recid = muxkey;
+ addr = p->dst;
+ }
+
+ for(c = icmp->conv; *c; c++){
+ s = *c;
+ if(s->lport == recid && ipcmp(s->raddr, addr) == 0){
+ bp = concatblock(bp);
+ if(bp != nil)
+ qpass(s->rq, bp);
+ return;
+ }
+ }
+
+ freeblist(bp);
+}
+
+static Block *
+mkechoreply6(Block *bp, Ipifc *ifc)
+{
+ uchar addr[IPaddrlen];
+ IPICMP *p = (IPICMP *)(bp->rp);
+
+ ipmove(addr, p->src);
+ if(!isv6mcast(p->dst))
+ ipmove(p->src, p->dst);
+ else if (!ipv6anylocal(ifc, p->src))
+ return nil;
+ ipmove(p->dst, addr);
+ p->type = EchoReplyV6;
+ set_cksum(bp);
+ return bp;
+}
+
+/*
+ * sends out an ICMPv6 neighbor solicitation
+ * suni == SRC_UNSPEC or SRC_UNI,
+ * tuni == TARG_MULTI => multicast for address resolution,
+ * and tuni == TARG_UNI => neighbor reachability.
+ */
+extern void
+icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac)
+{
+ Block *nbp;
+ Ndpkt *np;
+ Proto *icmp = f->t2p[ICMPv6];
+ Icmppriv6 *ipriv = icmp->priv;
+
+ nbp = newIPICMP(sizeof(Ndpkt));
+ np = (Ndpkt*) nbp->rp;
+
+ if(suni == SRC_UNSPEC)
+ memmove(np->src, v6Unspecified, IPaddrlen);
+ else
+ memmove(np->src, src, IPaddrlen);
+
+ if(tuni == TARG_UNI)
+ memmove(np->dst, targ, IPaddrlen);
+ else
+ ipv62smcast(np->dst, targ);
+
+ np->type = NbrSolicit;
+ np->code = 0;
+ memmove(np->target, targ, IPaddrlen);
+ if(suni != SRC_UNSPEC) {
+ np->otype = SRC_LLADDR;
+ np->olen = 1; /* 1+1+6 = 8 = 1 8-octet */
+ memmove(np->lnaddr, mac, sizeof(np->lnaddr));
+ } else
+ nbp->wp -= sizeof(Ndpkt) - sizeof(NdiscC);
+
+ set_cksum(nbp);
+ np = (Ndpkt*)nbp->rp;
+ np->ttl = HOP_LIMIT;
+ np->vcf[0] = 0x06 << 4;
+ ipriv->out[NbrSolicit]++;
+ netlog(f, Logicmp, "sending neighbor solicitation %I\n", targ);
+ ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+/*
+ * sends out an ICMPv6 neighbor advertisement. pktflags == RSO flags.
+ */
+extern void
+icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags)
+{
+ Block *nbp;
+ Ndpkt *np;
+ Proto *icmp = f->t2p[ICMPv6];
+ Icmppriv6 *ipriv = icmp->priv;
+
+ nbp = newIPICMP(sizeof(Ndpkt));
+ np = (Ndpkt*)nbp->rp;
+
+ memmove(np->src, src, IPaddrlen);
+ memmove(np->dst, dst, IPaddrlen);
+
+ np->type = NbrAdvert;
+ np->code = 0;
+ np->icmpid[0] = flags;
+ memmove(np->target, targ, IPaddrlen);
+
+ np->otype = TARGET_LLADDR;
+ np->olen = 1;
+ memmove(np->lnaddr, mac, sizeof(np->lnaddr));
+
+ set_cksum(nbp);
+ np = (Ndpkt*) nbp->rp;
+ np->ttl = HOP_LIMIT;
+ np->vcf[0] = 0x06 << 4;
+ ipriv->out[NbrAdvert]++;
+ netlog(f, Logicmp, "sending neighbor advertisement %I\n", src);
+ ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+extern void
+icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free)
+{
+ int osz = BLEN(bp);
+ int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+ Block *nbp;
+ IPICMP *np;
+ Ip6hdr *p;
+ Proto *icmp = f->t2p[ICMPv6];
+ Icmppriv6 *ipriv = icmp->priv;
+
+ p = (Ip6hdr *)bp->rp;
+
+ if(isv6mcast(p->src))
+ goto clean;
+
+ nbp = newIPICMP(sz);
+ np = (IPICMP *)nbp->rp;
+
+ RLOCK(ifc);
+ if(ipv6anylocal(ifc, np->src))
+ netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n",
+ p->src, p->dst);
+ else {
+ netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n",
+ p->src, p->dst);
+ freeblist(nbp);
+ if(free)
+ goto clean;
+ else
+ return;
+ }
+
+ memmove(np->dst, p->src, IPaddrlen);
+ np->type = UnreachableV6;
+ np->code = code;
+ memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ set_cksum(nbp);
+ np->ttl = HOP_LIMIT;
+ np->vcf[0] = 0x06 << 4;
+ ipriv->out[UnreachableV6]++;
+
+ if(free)
+ ipiput6(f, ifc, nbp);
+ else {
+ ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+ return;
+ }
+
+clean:
+ RUNLOCK(ifc);
+ freeblist(bp);
+}
+
+extern void
+icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp)
+{
+ int osz = BLEN(bp);
+ int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+ Block *nbp;
+ IPICMP *np;
+ Ip6hdr *p;
+ Proto *icmp = f->t2p[ICMPv6];
+ Icmppriv6 *ipriv = icmp->priv;
+
+ p = (Ip6hdr *)bp->rp;
+
+ if(isv6mcast(p->src))
+ return;
+
+ nbp = newIPICMP(sz);
+ np = (IPICMP *) nbp->rp;
+
+ if(ipv6anylocal(ifc, np->src))
+ netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n",
+ p->src, p->dst);
+ else {
+ netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n",
+ p->src, p->dst);
+ return;
+ }
+
+ memmove(np->dst, p->src, IPaddrlen);
+ np->type = TimeExceedV6;
+ np->code = 0;
+ memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ set_cksum(nbp);
+ np->ttl = HOP_LIMIT;
+ np->vcf[0] = 0x06 << 4;
+ ipriv->out[TimeExceedV6]++;
+ ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+extern void
+icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp)
+{
+ int osz = BLEN(bp);
+ int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+ Block *nbp;
+ IPICMP *np;
+ Ip6hdr *p;
+ Proto *icmp = f->t2p[ICMPv6];
+ Icmppriv6 *ipriv = icmp->priv;
+
+ p = (Ip6hdr *)bp->rp;
+
+ if(isv6mcast(p->src))
+ return;
+
+ nbp = newIPICMP(sz);
+ np = (IPICMP *)nbp->rp;
+
+ if(ipv6anylocal(ifc, np->src))
+ netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n",
+ p->src, p->dst);
+ else {
+ netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n",
+ p->src, p->dst);
+ return;
+ }
+
+ memmove(np->dst, p->src, IPaddrlen);
+ np->type = PacketTooBigV6;
+ np->code = 0;
+ hnputl(np->icmpid, ifc->maxtu - ifc->m->hsize);
+ memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ set_cksum(nbp);
+ np->ttl = HOP_LIMIT;
+ np->vcf[0] = 0x06 << 4;
+ ipriv->out[PacketTooBigV6]++;
+ ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+/*
+ * RFC 2461, pages 39-40, pages 57-58.
+ */
+static int
+valid(Proto *icmp, Ipifc *ifc, Block *bp, Icmppriv6 *ipriv)
+{
+ int sz, osz, unsp, n, ttl, iplen;
+ int pktsz = BLEN(bp);
+ uchar *packet = bp->rp;
+ IPICMP *p = (IPICMP *) packet;
+ Ndpkt *np;
+
+ USED(ifc);
+ n = blocklen(bp);
+ if(n < sizeof(IPICMP)) {
+ ipriv->stats[HlenErrs6]++;
+ netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+ goto err;
+ }
+
+ iplen = nhgets(p->ploadlen);
+ if(iplen > n - IP6HDR || ((uint)iplen % 1) != 0) {
+ ipriv->stats[LenErrs6]++;
+ netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
+ goto err;
+ }
+
+ /* Rather than construct explicit pseudoheader, overwrite IPv6 header */
+ if(p->proto != ICMPv6) {
+ /* This code assumes no extension headers!!! */
+ netlog(icmp->f, Logicmp, "icmp error: extension header\n");
+ goto err;
+ }
+ memset(packet, 0, 4);
+ ttl = p->ttl;
+ p->ttl = p->proto;
+ p->proto = 0;
+ if(ptclcsum(bp, 0, iplen + IP6HDR)) {
+ ipriv->stats[CsumErrs6]++;
+ netlog(icmp->f, Logicmp, "icmp checksum error\n");
+ goto err;
+ }
+ p->proto = p->ttl;
+ p->ttl = ttl;
+
+ /* additional tests for some pkt types */
+ if (p->type == NbrSolicit || p->type == NbrAdvert ||
+ p->type == RouterAdvert || p->type == RouterSolicit ||
+ p->type == RedirectV6) {
+ if(p->ttl != HOP_LIMIT) {
+ ipriv->stats[HoplimErrs6]++;
+ goto err;
+ }
+ if(p->code != 0) {
+ ipriv->stats[IcmpCodeErrs6]++;
+ goto err;
+ }
+
+ switch (p->type) {
+ case NbrSolicit:
+ case NbrAdvert:
+ np = (Ndpkt*) p;
+ if(isv6mcast(np->target)) {
+ ipriv->stats[TargetErrs6]++;
+ goto err;
+ }
+ if(optexsts(np) && np->olen == 0) {
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
+ }
+
+ if (p->type == NbrSolicit &&
+ ipcmp(np->src, v6Unspecified) == 0)
+ if(!issmcast(np->dst) || optexsts(np)) {
+ ipriv->stats[AddrmxpErrs6]++;
+ goto err;
+ }
+
+ if(p->type == NbrAdvert)
+ if(isv6mcast(np->dst) &&
+ (nhgets(np->icmpid) & Sflag)){
+ ipriv->stats[AddrmxpErrs6]++;
+ goto err;
+ }
+ break;
+
+ case RouterAdvert:
+ if(pktsz - sizeof(Ip6hdr) < 16) {
+ ipriv->stats[HlenErrs6]++;
+ goto err;
+ }
+ if(!islinklocal(p->src)) {
+ ipriv->stats[RouterAddrErrs6]++;
+ goto err;
+ }
+ sz = sizeof(IPICMP) + 8;
+ while (sz+1 < pktsz) {
+ osz = packet[sz+1];
+ if(osz <= 0) {
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
+ }
+ sz += 8*osz;
+ }
+ break;
+
+ case RouterSolicit:
+ if(pktsz - sizeof(Ip6hdr) < 8) {
+ ipriv->stats[HlenErrs6]++;
+ goto err;
+ }
+ unsp = (ipcmp(p->src, v6Unspecified) == 0);
+ sz = sizeof(IPICMP) + 8;
+ while (sz+1 < pktsz) {
+ osz = packet[sz+1];
+ if(osz <= 0 ||
+ (unsp && packet[sz] == SRC_LLADDR)) {
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
+ }
+ sz += 8*osz;
+ }
+ break;
+
+ case RedirectV6:
+ /* to be filled in */
+ break;
+
+ default:
+ goto err;
+ }
+ }
+ return 1;
+err:
+ ipriv->stats[InErrors6]++;
+ return 0;
+}
+
+static int
+targettype(Fs *f, Ipifc *ifc, uchar *target)
+{
+ Iplifc *lifc;
+ int t;
+
+ RLOCK(ifc);
+ if(ipproxyifc(f, ifc, target)) {
+ RUNLOCK(ifc);
+ return Tuniproxy;
+ }
+
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+ if(ipcmp(lifc->local, target) == 0) {
+ t = (lifc->tentative)? Tunitent: Tunirany;
+ RUNLOCK(ifc);
+ return t;
+ }
+
+ RUNLOCK(ifc);
+ return 0;
+}
+
+static void
+icmpiput6(Proto *icmp, Ipifc *ipifc, Block *bp)
+{
+ int refresh = 1;
+ char *msg, m2[128];
+ uchar pktflags;
+ uchar *packet = bp->rp;
+ uchar lsrc[IPaddrlen];
+ Block *r;
+ IPICMP *p = (IPICMP *)packet;
+ Icmppriv6 *ipriv = icmp->priv;
+ Iplifc *lifc;
+ Ndpkt* np;
+ Proto *pr;
+
+ if(!valid(icmp, ipifc, bp, ipriv) || p->type > Maxtype6)
+ goto raise;
+
+ ipriv->in[p->type]++;
+
+ switch(p->type) {
+ case EchoRequestV6:
+ r = mkechoreply6(bp, ipifc);
+ if(r == nil)
+ goto raise;
+ ipriv->out[EchoReply]++;
+ ipoput6(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
+ break;
+
+ case UnreachableV6:
+ if(p->code >= nelem(unreachcode))
+ msg = unreachcode[Icmp6_unknown];
+ else
+ msg = unreachcode[p->code];
+
+ bp->rp += sizeof(IPICMP);
+ if(blocklen(bp) < 8){
+ ipriv->stats[LenErrs6]++;
+ goto raise;
+ }
+ p = (IPICMP *)bp->rp;
+ pr = Fsrcvpcolx(icmp->f, p->proto);
+ if(pr != nil && pr->advise != nil) {
+ (*pr->advise)(pr, bp, msg);
+ return;
+ }
+
+ bp->rp -= sizeof(IPICMP);
+ goticmpkt6(icmp, bp, 0);
+ break;
+
+ case TimeExceedV6:
+ if(p->code == 0){
+ sprint(m2, "ttl exceeded at %I", p->src);
+
+ bp->rp += sizeof(IPICMP);
+ if(blocklen(bp) < 8){
+ ipriv->stats[LenErrs6]++;
+ goto raise;
+ }
+ p = (IPICMP *)bp->rp;
+ pr = Fsrcvpcolx(icmp->f, p->proto);
+ if(pr && pr->advise) {
+ (*pr->advise)(pr, bp, m2);
+ return;
+ }
+ bp->rp -= sizeof(IPICMP);
+ }
+
+ goticmpkt6(icmp, bp, 0);
+ break;
+
+ case RouterAdvert:
+ case RouterSolicit:
+ /* using lsrc as a temp, munge hdr for goticmp6 */
+ if (0) {
+ memmove(lsrc, p->src, IPaddrlen);
+ memmove(p->src, p->dst, IPaddrlen);
+ memmove(p->dst, lsrc, IPaddrlen);
+ }
+ goticmpkt6(icmp, bp, p->type);
+ break;
+
+ case NbrSolicit:
+ np = (Ndpkt*) p;
+ pktflags = 0;
+ switch (targettype(icmp->f, ipifc, np->target)) {
+ case Tunirany:
+ pktflags |= Oflag;
+ /* fall through */
+
+ case Tuniproxy:
+ if(ipcmp(np->src, v6Unspecified) != 0) {
+ arpenter(icmp->f, V6, np->src, np->lnaddr,
+ 8*np->olen-2, 0);
+ pktflags |= Sflag;
+ }
+ if(ipv6local(ipifc, lsrc))
+ icmpna(icmp->f, lsrc,
+ (ipcmp(np->src, v6Unspecified) == 0?
+ v6allnodesL: np->src),
+ np->target, ipifc->mac, pktflags);
+ else
+ freeblist(bp);
+ break;
+
+ case Tunitent:
+ /* not clear what needs to be done. send up
+ * an icmp mesg saying don't use this address? */
+ default:
+ freeblist(bp);
+ }
+ break;
+
+ case NbrAdvert:
+ np = (Ndpkt*) p;
+
+ /*
+ * if the target address matches one of the local interface
+ * addresses and the local interface address has tentative bit
+ * set, insert into ARP table. this is so the duplicate address
+ * detection part of ipconfig can discover duplication through
+ * the arp table.
+ */
+ lifc = iplocalonifc(ipifc, np->target);
+ if(lifc && lifc->tentative)
+ refresh = 0;
+ arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2,
+ refresh);
+ freeblist(bp);
+ break;
+
+ case PacketTooBigV6:
+ default:
+ goticmpkt6(icmp, bp, 0);
+ break;
+ }
+ return;
+raise:
+ freeblist(bp);
+}
+
+int
+icmpstats6(Proto *icmp6, char *buf, int len)
+{
+ Icmppriv6 *priv;
+ char *p, *e;
+ int i;
+
+ priv = icmp6->priv;
+ p = buf;
+ e = p+len;
+ for(i = 0; i < Nstats6; i++)
+ p = seprint(p, e, "%s: %lud\n", statnames6[i], priv->stats[i]);
+ for(i = 0; i <= Maxtype6; i++)
+ if(icmpnames6[i])
+ p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i],
+ priv->in[i], priv->out[i]);
+/* else
+ p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i],
+ priv->out[i]);
+ */
+ return p - buf;
+}
+
+
+/* import from icmp.c */
+extern int icmpstate(Conv *c, char *state, int n);
+extern char* icmpannounce(Conv *c, char **argv, int argc);
+extern char* icmpconnect(Conv *c, char **argv, int argc);
+extern void icmpclose(Conv *c);
+
+void
+icmp6init(Fs *fs)
+{
+ Proto *icmp6 = smalloc(sizeof(Proto));
+
+ icmp6->priv = smalloc(sizeof(Icmppriv6));
+ icmp6->name = "icmpv6";
+ icmp6->connect = icmpconnect;
+ icmp6->announce = icmpannounce;
+ icmp6->state = icmpstate;
+ icmp6->create = icmpcreate6;
+ icmp6->close = icmpclose;
+ icmp6->rcv = icmpiput6;
+ icmp6->stats = icmpstats6;
+ icmp6->ctl = icmpctl6;
+ icmp6->advise = icmpadvise6;
+ icmp6->gc = nil;
+ icmp6->ipproto = ICMPv6;
+ icmp6->nc = 16;
+ icmp6->ptclsize = sizeof(Icmpcb6);
+
+ Fsproto(fs, icmp6);
+}
diff --git a/src/9vx/a/ip/igmp.c b/src/9vx/a/ip/igmp.c
@@ -0,0 +1,294 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+enum
+{
+ IGMP_IPHDRSIZE = 20, /* size of ip header */
+ IGMP_HDRSIZE = 8, /* size of IGMP header */
+ IP_IGMPPROTO = 2,
+
+ IGMPquery = 1,
+ IGMPreport = 2,
+
+ MSPTICK = 100,
+ MAXTIMEOUT = 10000/MSPTICK, /* at most 10 secs for a response */
+};
+
+typedef struct IGMPpkt IGMPpkt;
+typedef char byte;
+
+struct IGMPpkt
+{
+ /* ip header */
+ byte vihl; /* Version and header length */
+ byte tos; /* Type of service */
+ byte len[2]; /* packet length (including headers) */
+ byte id[2]; /* Identification */
+ byte frag[2]; /* Fragment information */
+ byte Unused;
+ byte proto; /* Protocol */
+ byte cksum[2]; /* checksum of ip portion */
+ byte src[IPaddrlen]; /* Ip source */
+ byte dst[IPaddrlen]; /* Ip destination */
+
+ /* igmp header */
+ byte vertype; /* version and type */
+ byte unused;
+ byte igmpcksum[2]; /* checksum of igmp portion */
+ byte group[IPaddrlen]; /* multicast group */
+};
+
+/*
+ * lists for group reports
+ */
+typedef struct IGMPrep IGMPrep;
+struct IGMPrep
+{
+ IGMPrep *next;
+ Media *m;
+ int ticks;
+ Multicast *multi;
+};
+
+typedef struct IGMP IGMP;
+struct IGMP
+{
+ Lock lk;
+
+ Rendez r;
+ IGMPrep *reports;
+};
+
+IGMP igmpalloc;
+
+ Proto igmp;
+extern Fs fs;
+
+static struct Stats
+{
+ ulong inqueries;
+ ulong outqueries;
+ ulong inreports;
+ ulong outreports;
+} stats;
+
+void
+igmpsendreport(Media *m, byte *addr)
+{
+ IGMPpkt *p;
+ Block *bp;
+
+ bp = allocb(sizeof(IGMPpkt));
+ if(bp == nil)
+ return;
+ p = (IGMPpkt*)bp->wp;
+ p->vihl = IP_VER4;
+ bp->wp += sizeof(IGMPpkt);
+ memset(bp->rp, 0, sizeof(IGMPpkt));
+ hnputl(p->src, Mediagetaddr(m));
+ hnputl(p->dst, Ipallsys);
+ p->vertype = (1<<4) | IGMPreport;
+ p->proto = IP_IGMPPROTO;
+ memmove(p->group, addr, IPaddrlen);
+ hnputs(p->igmpcksum, ptclcsum(bp, IGMP_IPHDRSIZE, IGMP_HDRSIZE));
+ netlog(Logigmp, "igmpreport %I\n", p->group);
+ stats.outreports++;
+ ipoput4(bp, 0, 1, DFLTTOS, nil); /* TTL of 1 */
+}
+
+static int
+isreport(void *a)
+{
+ USED(a);
+ return igmpalloc.reports != 0;
+}
+
+
+void
+igmpproc(void *a)
+{
+ IGMPrep *rp, **lrp;
+ Multicast *mp, **lmp;
+ byte ip[IPaddrlen];
+
+ USED(a);
+
+ for(;;){
+ sleep(&igmpalloc.r, isreport, 0);
+ for(;;){
+ lock(&igmpalloc);
+
+ if(igmpalloc.reports == nil)
+ break;
+
+ /* look for a single report */
+ lrp = &igmpalloc.reports;
+ mp = nil;
+ for(rp = *lrp; rp; rp = *lrp){
+ rp->ticks++;
+ lmp = &rp->multi;
+ for(mp = *lmp; mp; mp = *lmp){
+ if(rp->ticks >= mp->timeout){
+ *lmp = mp->next;
+ break;
+ }
+ lmp = &mp->next;
+ }
+ if(mp != nil)
+ break;
+
+ if(rp->multi != nil){
+ lrp = &rp->next;
+ continue;
+ } else {
+ *lrp = rp->next;
+ free(rp);
+ }
+ }
+ unlock(&igmpalloc);
+
+ if(mp){
+ /* do a single report and try again */
+ hnputl(ip, mp->addr);
+ igmpsendreport(rp->m, ip);
+ free(mp);
+ continue;
+ }
+
+ tsleep(&up->sleep, return0, 0, MSPTICK);
+ }
+ unlock(&igmpalloc);
+ }
+
+}
+
+void
+igmpiput(Media *m, Ipifc *, Block *bp)
+{
+ int n;
+ IGMPpkt *ghp;
+ Ipaddr group;
+ IGMPrep *rp, **lrp;
+ Multicast *mp, **lmp;
+
+ ghp = (IGMPpkt*)(bp->rp);
+ netlog(Logigmp, "igmpiput: %d %I\n", ghp->vertype, ghp->group);
+
+ n = blocklen(bp);
+ if(n < IGMP_IPHDRSIZE+IGMP_HDRSIZE){
+ netlog(Logigmp, "igmpiput: bad len\n");
+ goto error;
+ }
+ if((ghp->vertype>>4) != 1){
+ netlog(Logigmp, "igmpiput: bad igmp type\n");
+ goto error;
+ }
+ if(ptclcsum(bp, IGMP_IPHDRSIZE, IGMP_HDRSIZE)){
+ netlog(Logigmp, "igmpiput: checksum error %I\n", ghp->src);
+ goto error;
+ }
+
+ group = nhgetl(ghp->group);
+
+ lock(&igmpalloc);
+ switch(ghp->vertype & 0xf){
+ case IGMPquery:
+ /*
+ * start reporting groups that we're a member of.
+ */
+ stats.inqueries++;
+ for(rp = igmpalloc.reports; rp; rp = rp->next)
+ if(rp->m == m)
+ break;
+ if(rp != nil)
+ break; /* already reporting */
+
+ mp = Mediacopymulti(m);
+ if(mp == nil)
+ break;
+
+ rp = malloc(sizeof(*rp));
+ if(rp == nil)
+ break;
+
+ rp->m = m;
+ rp->multi = mp;
+ rp->ticks = 0;
+ for(; mp; mp = mp->next)
+ mp->timeout = nrand(MAXTIMEOUT);
+ rp->next = igmpalloc.reports;
+ igmpalloc.reports = rp;
+
+ wakeup(&igmpalloc.r);
+
+ break;
+ case IGMPreport:
+ /*
+ * find report list for this medium
+ */
+ stats.inreports++;
+ lrp = &igmpalloc.reports;
+ for(rp = *lrp; rp; rp = *lrp){
+ if(rp->m == m)
+ break;
+ lrp = &rp->next;
+ }
+ if(rp == nil)
+ break;
+
+ /*
+ * if someone else has reported a group,
+ * we don't have to.
+ */
+ lmp = &rp->multi;
+ for(mp = *lmp; mp; mp = *lmp){
+ if(mp->addr == group){
+ *lmp = mp->next;
+ free(mp);
+ break;
+ }
+ lmp = &mp->next;
+ }
+
+ break;
+ }
+ unlock(&igmpalloc);
+
+error:
+ freeb(bp);
+}
+
+int
+igmpstats(char *buf, int len)
+{
+ return snprint(buf, len, "\trcvd %d %d\n\tsent %d %d\n",
+ stats.inqueries, stats.inreports,
+ stats.outqueries, stats.outreports);
+}
+
+void
+igmpinit(Fs *fs)
+{
+ igmp.name = "igmp";
+ igmp.connect = nil;
+ igmp.announce = nil;
+ igmp.ctl = nil;
+ igmp.state = nil;
+ igmp.close = nil;
+ igmp.rcv = igmpiput;
+ igmp.stats = igmpstats;
+ igmp.ipproto = IP_IGMPPROTO;
+ igmp.nc = 0;
+ igmp.ptclsize = 0;
+
+ igmpreportfn = igmpsendreport;
+ kproc("igmpproc", igmpproc, 0);
+
+ Fsproto(fs, &igmp);
+}
diff --git a/src/9vx/a/ip/il.c b/src/9vx/a/ip/il.c
@@ -0,0 +1,1408 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+enum /* Connection state */
+{
+ Ilclosed,
+ Ilsyncer,
+ Ilsyncee,
+ Ilestablished,
+ Illistening,
+ Ilclosing,
+ Ilopening, /* only for file server */
+};
+
+char *ilstates[] =
+{
+ "Closed",
+ "Syncer",
+ "Syncee",
+ "Established",
+ "Listen",
+ "Closing",
+ "Opening", /* only for file server */
+};
+
+enum /* Packet types */
+{
+ Ilsync,
+ Ildata,
+ Ildataquery,
+ Ilack,
+ Ilquery,
+ Ilstate,
+ Ilclose,
+};
+
+char *iltype[] =
+{
+ "sync",
+ "data",
+ "dataquery",
+ "ack",
+ "query",
+ "state",
+ "close"
+};
+
+enum
+{
+ Seconds = 1000,
+ Iltickms = 50, /* time base */
+ AckDelay = 2*Iltickms, /* max time twixt message rcvd & ack sent */
+ MaxTimeout = 30*Seconds, /* max time between rexmit */
+ QueryTime = 10*Seconds, /* time between subsequent queries */
+ DeathTime = 30*QueryTime,
+
+ MaxRexmit = 16, /* max retransmissions before hangup */
+ Defaultwin = 20,
+
+ LogAGain = 3,
+ AGain = 1<<LogAGain,
+ LogDGain = 2,
+ DGain = 1<<LogDGain,
+
+ DefByteRate = 100, /* assume a megabit link */
+ DefRtt = 50, /* cross country on a great day */
+
+ Maxrq = 64*1024,
+};
+
+enum
+{
+ Nqt= 8,
+};
+
+typedef struct Ilcb Ilcb;
+struct Ilcb /* Control block */
+{
+ int state; /* Connection state */
+ Conv *conv;
+ QLock ackq; /* Unacknowledged queue */
+ Block *unacked;
+ Block *unackedtail;
+ ulong unackedbytes;
+ QLock outo; /* Out of order packet queue */
+ Block *outoforder;
+ ulong next; /* Id of next to send */
+ ulong recvd; /* Last packet received */
+ ulong acksent; /* Last packet acked */
+ ulong start; /* Local start id */
+ ulong rstart; /* Remote start id */
+ int window; /* Maximum receive window */
+ int rxquery; /* number of queries on this connection */
+ int rxtot; /* number of retransmits on this connection */
+ int rexmit; /* number of retransmits of *unacked */
+ ulong qt[Nqt+1]; /* state table for query messages */
+ int qtx; /* ... index into qt */
+
+ /* if set, fasttimeout causes a connection request to terminate after 4*Iltickms */
+ int fasttimeout;
+
+ /* timers */
+ ulong lastxmit; /* time of last xmit */
+ ulong lastrecv; /* time of last recv */
+ ulong timeout; /* retransmission time for *unacked */
+ ulong acktime; /* time to send next ack */
+ ulong querytime; /* time to send next query */
+
+ /* adaptive measurements */
+ int delay; /* Average of the fixed rtt delay */
+ int rate; /* Average uchar rate */
+ int mdev; /* Mean deviation of rtt */
+ int maxrtt; /* largest rtt seen */
+ ulong rttack; /* The ack we are waiting for */
+ int rttlen; /* Length of rttack packet */
+ uvlong rttstart; /* Time we issued rttack packet */
+};
+
+enum
+{
+ IL_IPSIZE = 20,
+ IL_HDRSIZE = 18,
+ IL_LISTEN = 0,
+ IL_CONNECT = 1,
+ IP_ILPROTO = 40,
+};
+
+typedef struct Ilhdr Ilhdr;
+struct Ilhdr
+{
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+ uchar ttl; /* Time to live */
+ uchar proto; /* Protocol */
+ uchar cksum[2]; /* Header checksum */
+ uchar src[4]; /* Ip source */
+ uchar dst[4]; /* Ip destination */
+ uchar ilsum[2]; /* Checksum including header */
+ uchar illen[2]; /* Packet length */
+ uchar iltype; /* Packet type */
+ uchar ilspec; /* Special */
+ uchar ilsrc[2]; /* Src port */
+ uchar ildst[2]; /* Dst port */
+ uchar ilid[4]; /* Sequence id */
+ uchar ilack[4]; /* Acked sequence */
+};
+
+enum
+{
+ InMsgs,
+ OutMsgs,
+ CsumErrs, /* checksum errors */
+ HlenErrs, /* header length error */
+ LenErrs, /* short packet */
+ OutOfOrder, /* out of order */
+ Retrans, /* retransmissions */
+ DupMsg,
+ DupBytes,
+ DroppedMsgs,
+
+ Nstats,
+};
+
+static char *statnames[] =
+{
+[InMsgs] "InMsgs",
+[OutMsgs] "OutMsgs",
+[CsumErrs] "CsumErrs",
+[HlenErrs] "HlenErr",
+[LenErrs] "LenErrs",
+[OutOfOrder] "OutOfOrder",
+[Retrans] "Retrans",
+[DupMsg] "DupMsg",
+[DupBytes] "DupBytes",
+[DroppedMsgs] "DroppedMsgs",
+};
+
+typedef struct Ilpriv Ilpriv;
+struct Ilpriv
+{
+ Ipht ht;
+
+ ulong stats[Nstats];
+
+ ulong csumerr; /* checksum errors */
+ ulong hlenerr; /* header length error */
+ ulong lenerr; /* short packet */
+ ulong order; /* out of order */
+ ulong rexmit; /* retransmissions */
+ ulong dup;
+ ulong dupb;
+
+ /* keeping track of the ack kproc */
+ int ackprocstarted;
+ QLock apl;
+};
+
+/* state for query/dataquery messages */
+
+
+void ilrcvmsg(Conv*, Block*);
+void ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
+void ilackq(Ilcb*, Block*);
+void ilprocess(Conv*, Ilhdr*, Block*);
+void ilpullup(Conv*);
+void ilhangup(Conv*, char*);
+void ilfreeq(Ilcb*);
+void ilrexmit(Ilcb*);
+void ilbackoff(Ilcb*);
+void ilsettimeout(Ilcb*);
+char* ilstart(Conv*, int, int);
+void ilackproc(void*);
+void iloutoforder(Conv*, Ilhdr*, Block*);
+void iliput(Proto*, Ipifc*, Block*);
+void iladvise(Proto*, Block*, char*);
+int ilnextqt(Ilcb*);
+void ilcbinit(Ilcb*);
+int later(ulong, ulong, char*);
+void ilreject(Fs*, Ilhdr*);
+void illocalclose(Conv *c);
+ int ilcksum = 1;
+static int initseq = 25001;
+static ulong scalediv, scalemul;
+static char *etime = "connection timed out";
+
+static char*
+ilconnect(Conv *c, char **argv, int argc)
+{
+ char *e, *p;
+ int fast;
+
+ /* huge hack to quickly try an il connection */
+ fast = 0;
+ if(argc > 1){
+ p = strstr(argv[1], "!fasttimeout");
+ if(p != nil){
+ *p = 0;
+ fast = 1;
+ }
+ }
+
+ e = Fsstdconnect(c, argv, argc);
+ if(e != nil)
+ return e;
+ return ilstart(c, IL_CONNECT, fast);
+}
+
+static int
+ilstate(Conv *c, char *state, int n)
+{
+ Ilcb *ic;
+
+ ic = (Ilcb*)(c->ptcl);
+ return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d\n",
+ ilstates[ic->state],
+ c->rq ? qlen(c->rq) : 0,
+ c->wq ? qlen(c->wq) : 0,
+ ic->delay>>LogAGain, ic->rate>>LogAGain, ic->mdev>>LogDGain,
+ ic->unackedbytes, ic->rxtot, ic->rxquery, ic->maxrtt);
+}
+
+static int
+ilinuse(Conv *c)
+{
+ Ilcb *ic;
+
+ ic = (Ilcb*)(c->ptcl);
+ return ic->state != Ilclosed;
+
+}
+
+/* called with c locked */
+static char*
+ilannounce(Conv *c, char **argv, int argc)
+{
+ char *e;
+
+ e = Fsstdannounce(c, argv, argc);
+ if(e != nil)
+ return e;
+ e = ilstart(c, IL_LISTEN, 0);
+ if(e != nil)
+ return e;
+ Fsconnected(c, nil);
+
+ return nil;
+}
+
+void
+illocalclose(Conv *c)
+{
+ Ilcb *ic;
+ Ilpriv *ipriv;
+
+ ipriv = c->p->priv;
+ ic = (Ilcb*)c->ptcl;
+ ic->state = Ilclosed;
+ iphtrem(&ipriv->ht, c);
+ ipmove(c->laddr, IPnoaddr);
+ c->lport = 0;
+}
+
+static void
+ilclose(Conv *c)
+{
+ Ilcb *ic;
+
+ ic = (Ilcb*)c->ptcl;
+
+ qclose(c->rq);
+ qclose(c->wq);
+ qclose(c->eq);
+
+ switch(ic->state) {
+ case Ilclosing:
+ case Ilclosed:
+ break;
+ case Ilsyncer:
+ case Ilsyncee:
+ case Ilestablished:
+ ic->state = Ilclosing;
+ ilsettimeout(ic);
+ ilsendctl(c, nil, Ilclose, ic->next, ic->recvd, 0);
+ break;
+ case Illistening:
+ illocalclose(c);
+ break;
+ }
+ ilfreeq(ic);
+}
+
+void
+ilkick(void *x, Block *bp)
+{
+ Conv *c = x;
+ Ilhdr *ih;
+ Ilcb *ic;
+ int dlen;
+ ulong id, ack;
+ Fs *f;
+ Ilpriv *priv;
+
+ f = c->p->f;
+ priv = c->p->priv;
+ ic = (Ilcb*)c->ptcl;
+
+ if(bp == nil)
+ return;
+
+ switch(ic->state) {
+ case Ilclosed:
+ case Illistening:
+ case Ilclosing:
+ freeblist(bp);
+ qhangup(c->rq, nil);
+ return;
+ }
+
+ dlen = blocklen(bp);
+
+ /* Make space to fit il & ip */
+ bp = padblock(bp, IL_IPSIZE+IL_HDRSIZE);
+ ih = (Ilhdr *)(bp->rp);
+ ih->vihl = IP_VER4;
+
+ /* Ip fields */
+ ih->frag[0] = 0;
+ ih->frag[1] = 0;
+ v6tov4(ih->dst, c->raddr);
+ v6tov4(ih->src, c->laddr);
+ ih->proto = IP_ILPROTO;
+
+ /* Il fields */
+ hnputs(ih->illen, dlen+IL_HDRSIZE);
+ hnputs(ih->ilsrc, c->lport);
+ hnputs(ih->ildst, c->rport);
+
+ qlock(&ic->ackq);
+ id = ic->next++;
+ hnputl(ih->ilid, id);
+ ack = ic->recvd;
+ hnputl(ih->ilack, ack);
+ ic->acksent = ack;
+ ic->acktime = NOW + AckDelay;
+ ih->iltype = Ildata;
+ ih->ilspec = 0;
+ ih->ilsum[0] = 0;
+ ih->ilsum[1] = 0;
+
+ /* Checksum of ilheader plus data (not ip & no pseudo header) */
+ if(ilcksum)
+ hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, dlen+IL_HDRSIZE));
+
+ ilackq(ic, bp);
+ qunlock(&ic->ackq);
+
+ /* Start the round trip timer for this packet if the timer is free */
+ if(ic->rttack == 0) {
+ ic->rttack = id;
+ ic->rttstart = fastticks(nil);
+ ic->rttlen = dlen + IL_IPSIZE + IL_HDRSIZE;
+ }
+
+ if(later(NOW, ic->timeout, nil))
+ ilsettimeout(ic);
+ ipoput4(f, bp, 0, c->ttl, c->tos, c);
+ priv->stats[OutMsgs]++;
+}
+
+static void
+ilcreate(Conv *c)
+{
+ c->rq = qopen(Maxrq, 0, 0, c);
+ c->wq = qbypass(ilkick, c);
+}
+
+int
+ilxstats(Proto *il, char *buf, int len)
+{
+ Ilpriv *priv;
+ char *p, *e;
+ int i;
+
+ priv = il->priv;
+ p = buf;
+ e = p+len;
+ for(i = 0; i < Nstats; i++)
+ p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+ return p - buf;
+}
+
+void
+ilackq(Ilcb *ic, Block *bp)
+{
+ Block *np;
+ int n;
+
+ n = blocklen(bp);
+
+ /* Enqueue a copy on the unacked queue in case this one gets lost */
+ np = copyblock(bp, n);
+ if(ic->unacked)
+ ic->unackedtail->list = np;
+ else
+ ic->unacked = np;
+ ic->unackedtail = np;
+ np->list = nil;
+ ic->unackedbytes += n;
+}
+
+static
+void
+ilrttcalc(Ilcb *ic, Block *bp)
+{
+ int rtt, tt, pt, delay, rate;
+
+ rtt = fastticks(nil) - ic->rttstart;
+ rtt = (rtt*scalemul)/scalediv;
+ delay = ic->delay;
+ rate = ic->rate;
+
+ /* Guard against zero wrap */
+ if(rtt > 120000 || rtt < 0)
+ return;
+
+ /* this block had to be transmitted after the one acked so count its size */
+ ic->rttlen += blocklen(bp) + IL_IPSIZE + IL_HDRSIZE;
+
+ if(ic->rttlen < 256){
+ /* guess fixed delay as rtt of small packets */
+ delay += rtt - (delay>>LogAGain);
+ if(delay < AGain)
+ delay = AGain;
+ ic->delay = delay;
+ } else {
+ /* if packet took longer than avg rtt delay, recalc rate */
+ tt = rtt - (delay>>LogAGain);
+ if(tt > 0){
+ rate += ic->rttlen/tt - (rate>>LogAGain);
+ if(rate < AGain)
+ rate = AGain;
+ ic->rate = rate;
+ }
+ }
+
+ /* mdev */
+ pt = ic->rttlen/(rate>>LogAGain) + (delay>>LogAGain);
+ ic->mdev += abs(rtt-pt) - (ic->mdev>>LogDGain);
+
+ if(rtt > ic->maxrtt)
+ ic->maxrtt = rtt;
+}
+
+void
+ilackto(Ilcb *ic, ulong ackto, Block *bp)
+{
+ Ilhdr *h;
+ ulong id;
+
+ if(ic->rttack == ackto)
+ ilrttcalc(ic, bp);
+
+ /* Cancel if we've passed the packet we were interested in */
+ if(ic->rttack <= ackto)
+ ic->rttack = 0;
+
+ qlock(&ic->ackq);
+ while(ic->unacked) {
+ h = (Ilhdr *)ic->unacked->rp;
+ id = nhgetl(h->ilid);
+ if(ackto < id)
+ break;
+
+ bp = ic->unacked;
+ ic->unacked = bp->list;
+ bp->list = nil;
+ ic->unackedbytes -= blocklen(bp);
+ freeblist(bp);
+ ic->rexmit = 0;
+ ilsettimeout(ic);
+ }
+ qunlock(&ic->ackq);
+}
+
+void
+iliput(Proto *il, Ipifc *dummy, Block *bp)
+{
+ char *st;
+ Ilcb *ic;
+ Ilhdr *ih;
+ uchar raddr[IPaddrlen];
+ uchar laddr[IPaddrlen];
+ ushort sp, dp, csum;
+ int plen, illen;
+ Conv *new, *s;
+ Ilpriv *ipriv;
+
+ ipriv = il->priv;
+
+ ih = (Ilhdr *)bp->rp;
+ plen = blocklen(bp);
+ if(plen < IL_IPSIZE+IL_HDRSIZE){
+ netlog(il->f, Logil, "il: hlenerr\n");
+ ipriv->stats[HlenErrs]++;
+ goto raise;
+ }
+
+ illen = nhgets(ih->illen);
+ if(illen+IL_IPSIZE > plen){
+ netlog(il->f, Logil, "il: lenerr\n");
+ ipriv->stats[LenErrs]++;
+ goto raise;
+ }
+
+ sp = nhgets(ih->ildst);
+ dp = nhgets(ih->ilsrc);
+ v4tov6(raddr, ih->src);
+ v4tov6(laddr, ih->dst);
+
+ if((csum = ptclcsum(bp, IL_IPSIZE, illen)) != 0) {
+ if(ih->iltype > Ilclose)
+ st = "?";
+ else
+ st = iltype[ih->iltype];
+ ipriv->stats[CsumErrs]++;
+ netlog(il->f, Logil, "il: cksum %ux %ux, pkt(%s id %lud ack %lud %I/%d->%d)\n",
+ csum, st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
+ goto raise;
+ }
+
+ QLOCK(il);
+ s = iphtlook(&ipriv->ht, raddr, dp, laddr, sp);
+ if(s == nil){
+ if(ih->iltype == Ilsync)
+ ilreject(il->f, ih); /* no listener */
+ QUNLOCK(il);
+ goto raise;
+ }
+
+ ic = (Ilcb*)s->ptcl;
+ if(ic->state == Illistening){
+ if(ih->iltype != Ilsync){
+ QUNLOCK(il);
+ if(ih->iltype > Ilclose)
+ st = "?";
+ else
+ st = iltype[ih->iltype];
+ ilreject(il->f, ih); /* no channel and not sync */
+ netlog(il->f, Logil, "il: no channel, pkt(%s id %lud ack %lud %I/%ud->%ud)\n",
+ st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
+ goto raise;
+ }
+
+ new = Fsnewcall(s, raddr, dp, laddr, sp, V4);
+ if(new == nil){
+ QUNLOCK(il);
+ netlog(il->f, Logil, "il: bad newcall %I/%ud->%ud\n", raddr, sp, dp);
+ ilsendctl(s, ih, Ilclose, 0, nhgetl(ih->ilid), 0);
+ goto raise;
+ }
+ s = new;
+
+ ic = (Ilcb*)s->ptcl;
+
+ ic->conv = s;
+ ic->state = Ilsyncee;
+ ilcbinit(ic);
+ ic->rstart = nhgetl(ih->ilid);
+ iphtadd(&ipriv->ht, s);
+ }
+
+ QLOCK(s);
+ QUNLOCK(il);
+ if(waserror()){
+ QUNLOCK(s);
+ nexterror();
+ }
+ ilprocess(s, ih, bp);
+ QUNLOCK(s);
+ poperror();
+ return;
+raise:
+ freeblist(bp);
+}
+
+void
+_ilprocess(Conv *s, Ilhdr *h, Block *bp)
+{
+ Ilcb *ic;
+ ulong id, ack;
+ Ilpriv *priv;
+
+ id = nhgetl(h->ilid);
+ ack = nhgetl(h->ilack);
+
+ ic = (Ilcb*)s->ptcl;
+
+ ic->lastrecv = NOW;
+ ic->querytime = NOW + QueryTime;
+ priv = s->p->priv;
+ priv->stats[InMsgs]++;
+
+ switch(ic->state) {
+ default:
+ netlog(s->p->f, Logil, "il: unknown state %d\n", ic->state);
+ case Ilclosed:
+ freeblist(bp);
+ break;
+ case Ilsyncer:
+ switch(h->iltype) {
+ default:
+ break;
+ case Ilsync:
+ if(ack != ic->start)
+ ilhangup(s, "connection rejected");
+ else {
+ ic->recvd = id;
+ ic->rstart = id;
+ ilsendctl(s, nil, Ilack, ic->next, ic->recvd, 0);
+ ic->state = Ilestablished;
+ ic->fasttimeout = 0;
+ ic->rexmit = 0;
+ Fsconnected(s, nil);
+ ilpullup(s);
+ }
+ break;
+ case Ilclose:
+ if(ack == ic->start)
+ ilhangup(s, "connection rejected");
+ break;
+ }
+ freeblist(bp);
+ break;
+ case Ilsyncee:
+ switch(h->iltype) {
+ default:
+ break;
+ case Ilsync:
+ if(id != ic->rstart || ack != 0){
+ illocalclose(s);
+ } else {
+ ic->recvd = id;
+ ilsendctl(s, nil, Ilsync, ic->start, ic->recvd, 0);
+ }
+ break;
+ case Ilack:
+ if(ack == ic->start) {
+ ic->state = Ilestablished;
+ ic->fasttimeout = 0;
+ ic->rexmit = 0;
+ ilpullup(s);
+ }
+ break;
+ case Ildata:
+ if(ack == ic->start) {
+ ic->state = Ilestablished;
+ ic->fasttimeout = 0;
+ ic->rexmit = 0;
+ goto established;
+ }
+ break;
+ case Ilclose:
+ if(ack == ic->start)
+ ilhangup(s, "remote close");
+ break;
+ }
+ freeblist(bp);
+ break;
+ case Ilestablished:
+ established:
+ switch(h->iltype) {
+ case Ilsync:
+ if(id != ic->rstart)
+ ilhangup(s, "remote close");
+ else
+ ilsendctl(s, nil, Ilack, ic->next, ic->rstart, 0);
+ freeblist(bp);
+ break;
+ case Ildata:
+ /*
+ * avoid consuming all the mount rpc buffers in the
+ * system. if the input queue is too long, drop this
+ * packet.
+ */
+ if (s->rq && qlen(s->rq) >= Maxrq) {
+ priv->stats[DroppedMsgs]++;
+ freeblist(bp);
+ break;
+ }
+
+ ilackto(ic, ack, bp);
+ iloutoforder(s, h, bp);
+ ilpullup(s);
+ break;
+ case Ildataquery:
+ ilackto(ic, ack, bp);
+ iloutoforder(s, h, bp);
+ ilpullup(s);
+ ilsendctl(s, nil, Ilstate, ic->next, ic->recvd, h->ilspec);
+ break;
+ case Ilack:
+ ilackto(ic, ack, bp);
+ freeblist(bp);
+ break;
+ case Ilquery:
+ ilackto(ic, ack, bp);
+ ilsendctl(s, nil, Ilstate, ic->next, ic->recvd, h->ilspec);
+ freeblist(bp);
+ break;
+ case Ilstate:
+ if(ack >= ic->rttack)
+ ic->rttack = 0;
+ ilackto(ic, ack, bp);
+ if(h->ilspec > Nqt)
+ h->ilspec = 0;
+ if(ic->qt[h->ilspec] > ack){
+ ilrexmit(ic);
+ ilsettimeout(ic);
+ }
+ freeblist(bp);
+ break;
+ case Ilclose:
+ freeblist(bp);
+ if(ack < ic->start || ack > ic->next)
+ break;
+ ic->recvd = id;
+ ilsendctl(s, nil, Ilclose, ic->next, ic->recvd, 0);
+ ic->state = Ilclosing;
+ ilsettimeout(ic);
+ ilfreeq(ic);
+ break;
+ }
+ break;
+ case Illistening:
+ freeblist(bp);
+ break;
+ case Ilclosing:
+ switch(h->iltype) {
+ case Ilclose:
+ ic->recvd = id;
+ ilsendctl(s, nil, Ilclose, ic->next, ic->recvd, 0);
+ if(ack == ic->next)
+ ilhangup(s, nil);
+ break;
+ default:
+ break;
+ }
+ freeblist(bp);
+ break;
+ }
+}
+
+void
+ilrexmit(Ilcb *ic)
+{
+ Ilhdr *h;
+ Block *nb;
+ Conv *c;
+ ulong id;
+ Ilpriv *priv;
+
+ nb = nil;
+ qlock(&ic->ackq);
+ if(ic->unacked)
+ nb = copyblock(ic->unacked, blocklen(ic->unacked));
+ qunlock(&ic->ackq);
+
+ if(nb == nil)
+ return;
+
+ h = (Ilhdr*)nb->rp;
+ h->vihl = IP_VER4;
+
+ h->iltype = Ildataquery;
+ hnputl(h->ilack, ic->recvd);
+ h->ilspec = ilnextqt(ic);
+ h->ilsum[0] = 0;
+ h->ilsum[1] = 0;
+ hnputs(h->ilsum, ptclcsum(nb, IL_IPSIZE, nhgets(h->illen)));
+
+ c = ic->conv;
+ id = nhgetl(h->ilid);
+ netlog(c->p->f, Logil, "il: rexmit %d %ud: %d %d: %i %d/%d\n", id, ic->recvd,
+ ic->rexmit, ic->timeout,
+ c->raddr, c->lport, c->rport);
+
+ ilbackoff(ic);
+
+ ipoput4(c->p->f, nb, 0, c->ttl, c->tos, c);
+
+ /* statistics */
+ ic->rxtot++;
+ priv = c->p->priv;
+ priv->rexmit++;
+}
+
+/* DEBUG */
+void
+ilprocess(Conv *s, Ilhdr *h, Block *bp)
+{
+ Ilcb *ic;
+
+ ic = (Ilcb*)s->ptcl;
+
+ USED(ic);
+ netlog(s->p->f, Logilmsg, "%11s rcv %d/%d snt %d/%d pkt(%s id %d ack %d %d->%d) ",
+ ilstates[ic->state], ic->rstart, ic->recvd, ic->start,
+ ic->next, iltype[h->iltype], nhgetl(h->ilid),
+ nhgetl(h->ilack), nhgets(h->ilsrc), nhgets(h->ildst));
+
+ _ilprocess(s, h, bp);
+
+ netlog(s->p->f, Logilmsg, "%11s rcv %d snt %d\n", ilstates[ic->state], ic->recvd, ic->next);
+}
+
+void
+ilhangup(Conv *s, char *msg)
+{
+ Ilcb *ic;
+ int callout;
+
+ netlog(s->p->f, Logil, "il: hangup! %I %d/%d: %s\n", s->raddr,
+ s->lport, s->rport, msg?msg:"no reason");
+
+ ic = (Ilcb*)s->ptcl;
+ callout = ic->state == Ilsyncer;
+ illocalclose(s);
+
+ qhangup(s->rq, msg);
+ qhangup(s->wq, msg);
+
+ if(callout)
+ Fsconnected(s, msg);
+}
+
+void
+ilpullup(Conv *s)
+{
+ Ilcb *ic;
+ Ilhdr *oh;
+ Block *bp;
+ ulong oid, dlen;
+ Ilpriv *ipriv;
+
+ ic = (Ilcb*)s->ptcl;
+ if(ic->state != Ilestablished)
+ return;
+
+ qlock(&ic->outo);
+ while(ic->outoforder) {
+ bp = ic->outoforder;
+ oh = (Ilhdr*)bp->rp;
+ oid = nhgetl(oh->ilid);
+ if(oid <= ic->recvd) {
+ ic->outoforder = bp->list;
+ freeblist(bp);
+ continue;
+ }
+ if(oid != ic->recvd+1){
+ ipriv = s->p->priv;
+ ipriv->stats[OutOfOrder]++;
+ break;
+ }
+
+ ic->recvd = oid;
+ ic->outoforder = bp->list;
+
+ bp->list = nil;
+ dlen = nhgets(oh->illen)-IL_HDRSIZE;
+ bp = trimblock(bp, IL_IPSIZE+IL_HDRSIZE, dlen);
+ /*
+ * Upper levels don't know about multiple-block
+ * messages so copy all into one (yick).
+ */
+ bp = concatblock(bp);
+ if(bp == 0)
+ panic("ilpullup");
+ bp = packblock(bp);
+ if(bp == 0)
+ panic("ilpullup2");
+ qpass(s->rq, bp);
+ }
+ qunlock(&ic->outo);
+}
+
+void
+iloutoforder(Conv *s, Ilhdr *h, Block *bp)
+{
+ Ilcb *ic;
+ uchar *lid;
+ Block *f, **l;
+ ulong id, newid;
+ Ilpriv *ipriv;
+
+ ipriv = s->p->priv;
+ ic = (Ilcb*)s->ptcl;
+ bp->list = nil;
+
+ id = nhgetl(h->ilid);
+ /* Window checks */
+ if(id <= ic->recvd || id > ic->recvd+ic->window) {
+ netlog(s->p->f, Logil, "il: message outside window %ud <%ud-%ud>: %i %d/%d\n",
+ id, ic->recvd, ic->recvd+ic->window, s->raddr, s->lport, s->rport);
+ freeblist(bp);
+ return;
+ }
+
+ /* Packet is acceptable so sort onto receive queue for pullup */
+ qlock(&ic->outo);
+ if(ic->outoforder == nil)
+ ic->outoforder = bp;
+ else {
+ l = &ic->outoforder;
+ for(f = *l; f; f = f->list) {
+ lid = ((Ilhdr*)(f->rp))->ilid;
+ newid = nhgetl(lid);
+ if(id <= newid) {
+ if(id == newid) {
+ ipriv->stats[DupMsg]++;
+ ipriv->stats[DupBytes] += blocklen(bp);
+ qunlock(&ic->outo);
+ freeblist(bp);
+ return;
+ }
+ bp->list = f;
+ *l = bp;
+ qunlock(&ic->outo);
+ return;
+ }
+ l = &f->list;
+ }
+ *l = bp;
+ }
+ qunlock(&ic->outo);
+}
+
+void
+ilsendctl(Conv *ipc, Ilhdr *inih, int type, ulong id, ulong ack, int ilspec)
+{
+ Ilhdr *ih;
+ Ilcb *ic;
+ Block *bp;
+ int ttl, tos;
+
+ bp = allocb(IL_IPSIZE+IL_HDRSIZE);
+ bp->wp += IL_IPSIZE+IL_HDRSIZE;
+
+ ih = (Ilhdr *)(bp->rp);
+ ih->vihl = IP_VER4;
+
+ /* Ip fields */
+ ih->proto = IP_ILPROTO;
+ hnputs(ih->illen, IL_HDRSIZE);
+ ih->frag[0] = 0;
+ ih->frag[1] = 0;
+ if(inih) {
+ hnputl(ih->dst, nhgetl(inih->src));
+ hnputl(ih->src, nhgetl(inih->dst));
+ hnputs(ih->ilsrc, nhgets(inih->ildst));
+ hnputs(ih->ildst, nhgets(inih->ilsrc));
+ hnputl(ih->ilid, nhgetl(inih->ilack));
+ hnputl(ih->ilack, nhgetl(inih->ilid));
+ ttl = MAXTTL;
+ tos = DFLTTOS;
+ }
+ else {
+ v6tov4(ih->dst, ipc->raddr);
+ v6tov4(ih->src, ipc->laddr);
+ hnputs(ih->ilsrc, ipc->lport);
+ hnputs(ih->ildst, ipc->rport);
+ hnputl(ih->ilid, id);
+ hnputl(ih->ilack, ack);
+ ic = (Ilcb*)ipc->ptcl;
+ ic->acksent = ack;
+ ic->acktime = NOW;
+ ttl = ipc->ttl;
+ tos = ipc->tos;
+ }
+ ih->iltype = type;
+ ih->ilspec = ilspec;
+ ih->ilsum[0] = 0;
+ ih->ilsum[1] = 0;
+
+ if(ilcksum)
+ hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
+
+if(ipc==nil)
+ panic("ipc is nil caller is %#p", getcallerpc(&ipc));
+if(ipc->p==nil)
+ panic("ipc->p is nil");
+
+ netlog(ipc->p->f, Logilmsg, "ctl(%s id %d ack %d %d->%d)\n",
+ iltype[ih->iltype], nhgetl(ih->ilid), nhgetl(ih->ilack),
+ nhgets(ih->ilsrc), nhgets(ih->ildst));
+
+ ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
+}
+
+void
+ilreject(Fs *f, Ilhdr *inih)
+{
+ Ilhdr *ih;
+ Block *bp;
+
+ bp = allocb(IL_IPSIZE+IL_HDRSIZE);
+ bp->wp += IL_IPSIZE+IL_HDRSIZE;
+
+ ih = (Ilhdr *)(bp->rp);
+ ih->vihl = IP_VER4;
+
+ /* Ip fields */
+ ih->proto = IP_ILPROTO;
+ hnputs(ih->illen, IL_HDRSIZE);
+ ih->frag[0] = 0;
+ ih->frag[1] = 0;
+ hnputl(ih->dst, nhgetl(inih->src));
+ hnputl(ih->src, nhgetl(inih->dst));
+ hnputs(ih->ilsrc, nhgets(inih->ildst));
+ hnputs(ih->ildst, nhgets(inih->ilsrc));
+ hnputl(ih->ilid, nhgetl(inih->ilack));
+ hnputl(ih->ilack, nhgetl(inih->ilid));
+ ih->iltype = Ilclose;
+ ih->ilspec = 0;
+ ih->ilsum[0] = 0;
+ ih->ilsum[1] = 0;
+
+ if(ilcksum)
+ hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
+
+ ipoput4(f, bp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+void
+ilsettimeout(Ilcb *ic)
+{
+ ulong pt;
+
+ pt = (ic->delay>>LogAGain)
+ + ic->unackedbytes/(ic->rate>>LogAGain)
+ + (ic->mdev>>(LogDGain-1))
+ + AckDelay;
+ if(pt > MaxTimeout)
+ pt = MaxTimeout;
+ ic->timeout = NOW + pt;
+}
+
+void
+ilbackoff(Ilcb *ic)
+{
+ ulong pt;
+ int i;
+
+ pt = (ic->delay>>LogAGain)
+ + ic->unackedbytes/(ic->rate>>LogAGain)
+ + (ic->mdev>>(LogDGain-1))
+ + AckDelay;
+ for(i = 0; i < ic->rexmit; i++)
+ pt = pt + (pt>>1);
+ if(pt > MaxTimeout)
+ pt = MaxTimeout;
+ ic->timeout = NOW + pt;
+
+ if(ic->fasttimeout)
+ ic->timeout = NOW+Iltickms;
+
+ ic->rexmit++;
+}
+
+// complain if two numbers not within an hour of each other
+#define Tfuture (1000*60*60)
+int
+later(ulong t1, ulong t2, char *x)
+{
+ int dt;
+
+ dt = t1 - t2;
+ if(dt > 0) {
+ if(x != nil && dt > Tfuture)
+ print("%s: way future %d\n", x, dt);
+ return 1;
+ }
+ if(dt < -Tfuture) {
+ if(x != nil)
+ print("%s: way past %d\n", x, -dt);
+ return 1;
+ }
+ return 0;
+}
+
+void
+ilackproc(void *x)
+{
+ Ilcb *ic;
+ Conv **s, *p;
+ Proto *il;
+
+ il = x;
+
+loop:
+ tsleep(&up->sleep, return0, 0, Iltickms);
+ for(s = il->conv; s && *s; s++) {
+ p = *s;
+ ic = (Ilcb*)p->ptcl;
+
+ switch(ic->state) {
+ case Ilclosed:
+ case Illistening:
+ break;
+ case Ilclosing:
+ if(later(NOW, ic->timeout, "timeout0")) {
+ if(ic->rexmit > MaxRexmit){
+ ilhangup(p, nil);
+ break;
+ }
+ ilsendctl(p, nil, Ilclose, ic->next, ic->recvd, 0);
+ ilbackoff(ic);
+ }
+ break;
+
+ case Ilsyncee:
+ case Ilsyncer:
+ if(later(NOW, ic->timeout, "timeout1")) {
+ if(ic->rexmit > MaxRexmit){
+ ilhangup(p, etime);
+ break;
+ }
+ ilsendctl(p, nil, Ilsync, ic->start, ic->recvd, 0);
+ ilbackoff(ic);
+ }
+ break;
+
+ case Ilestablished:
+ if(ic->recvd != ic->acksent)
+ if(later(NOW, ic->acktime, "acktime"))
+ ilsendctl(p, nil, Ilack, ic->next, ic->recvd, 0);
+
+ if(later(NOW, ic->querytime, "querytime")){
+ if(later(NOW, ic->lastrecv+DeathTime, "deathtime")){
+ netlog(il->f, Logil, "il: hangup: deathtime\n");
+ ilhangup(p, etime);
+ break;
+ }
+ ilsendctl(p, nil, Ilquery, ic->next, ic->recvd, ilnextqt(ic));
+ ic->querytime = NOW + QueryTime;
+ }
+
+ if(ic->unacked != nil)
+ if(later(NOW, ic->timeout, "timeout2")) {
+ if(ic->rexmit > MaxRexmit){
+ netlog(il->f, Logil, "il: hangup: too many rexmits\n");
+ ilhangup(p, etime);
+ break;
+ }
+ ilsendctl(p, nil, Ilquery, ic->next, ic->recvd, ilnextqt(ic));
+ ic->rxquery++;
+ ilbackoff(ic);
+ }
+ break;
+ }
+ }
+ goto loop;
+}
+
+void
+ilcbinit(Ilcb *ic)
+{
+ ic->start = nrand(0x1000000);
+ ic->next = ic->start+1;
+ ic->recvd = 0;
+ ic->window = Defaultwin;
+ ic->unackedbytes = 0;
+ ic->unacked = nil;
+ ic->outoforder = nil;
+ ic->rexmit = 0;
+ ic->rxtot = 0;
+ ic->rxquery = 0;
+ ic->qtx = 1;
+ ic->fasttimeout = 0;
+
+ /* timers */
+ ic->delay = DefRtt<<LogAGain;
+ ic->mdev = DefRtt<<LogDGain;
+ ic->rate = DefByteRate<<LogAGain;
+ ic->querytime = NOW + QueryTime;
+ ic->lastrecv = NOW; /* or we'll timeout right away */
+ ilsettimeout(ic);
+}
+
+char*
+ilstart(Conv *c, int type, int fasttimeout)
+{
+ Ilcb *ic;
+ Ilpriv *ipriv;
+ char kpname[KNAMELEN];
+
+ ipriv = c->p->priv;
+
+ if(ipriv->ackprocstarted == 0){
+ qlock(&ipriv->apl);
+ if(ipriv->ackprocstarted == 0){
+ sprint(kpname, "#I%dilack", c->p->f->dev);
+ kproc(kpname, ilackproc, c->p);
+ ipriv->ackprocstarted = 1;
+ }
+ qunlock(&ipriv->apl);
+ }
+
+ ic = (Ilcb*)c->ptcl;
+ ic->conv = c;
+
+ if(ic->state != Ilclosed)
+ return nil;
+
+ ilcbinit(ic);
+
+ if(fasttimeout){
+ /* timeout if we can't connect quickly */
+ ic->fasttimeout = 1;
+ ic->timeout = NOW+Iltickms;
+ ic->rexmit = MaxRexmit - 4;
+ };
+
+ switch(type) {
+ default:
+ netlog(c->p->f, Logil, "il: start: type %d\n", type);
+ break;
+ case IL_LISTEN:
+ ic->state = Illistening;
+ iphtadd(&ipriv->ht, c);
+ break;
+ case IL_CONNECT:
+ ic->state = Ilsyncer;
+ iphtadd(&ipriv->ht, c);
+ ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0);
+ break;
+ }
+
+ return nil;
+}
+
+void
+ilfreeq(Ilcb *ic)
+{
+ Block *bp, *next;
+
+ qlock(&ic->ackq);
+ for(bp = ic->unacked; bp; bp = next) {
+ next = bp->list;
+ freeblist(bp);
+ }
+ ic->unacked = nil;
+ qunlock(&ic->ackq);
+
+ qlock(&ic->outo);
+ for(bp = ic->outoforder; bp; bp = next) {
+ next = bp->list;
+ freeblist(bp);
+ }
+ ic->outoforder = nil;
+ qunlock(&ic->outo);
+}
+
+void
+iladvise(Proto *il, Block *bp, char *msg)
+{
+ Ilhdr *h;
+ Ilcb *ic;
+ uchar source[IPaddrlen], dest[IPaddrlen];
+ ushort psource;
+ Conv *s, **p;
+
+ h = (Ilhdr*)(bp->rp);
+
+ v4tov6(dest, h->dst);
+ v4tov6(source, h->src);
+ psource = nhgets(h->ilsrc);
+
+
+ /* Look for a connection, unfortunately the destination port is missing */
+ QLOCK(il);
+ for(p = il->conv; *p; p++) {
+ s = *p;
+ if(s->lport == psource)
+ if(ipcmp(s->laddr, source) == 0)
+ if(ipcmp(s->raddr, dest) == 0){
+ QUNLOCK(il);
+ ic = (Ilcb*)s->ptcl;
+ switch(ic->state){
+ case Ilsyncer:
+ ilhangup(s, msg);
+ break;
+ }
+ freeblist(bp);
+ return;
+ }
+ }
+ QUNLOCK(il);
+ freeblist(bp);
+}
+
+int
+ilnextqt(Ilcb *ic)
+{
+ int x;
+
+ qlock(&ic->ackq);
+ x = ic->qtx;
+ if(++x > Nqt)
+ x = 1;
+ ic->qtx = x;
+ ic->qt[x] = ic->next-1; /* highest xmitted packet */
+ ic->qt[0] = ic->qt[x]; /* compatibility with old implementations */
+ qunlock(&ic->ackq);
+
+ return x;
+}
+
+/* calculate scale constants that converts fast ticks to ms (more or less) */
+static void
+inittimescale(void)
+{
+ uvlong hz;
+
+ fastticks(&hz);
+ if(hz > 1000){
+ scalediv = hz/1000;
+ scalemul = 1;
+ } else {
+ scalediv = 1;
+ scalemul = 1000/hz;
+ }
+}
+
+void
+ilinit(Fs *f)
+{
+ Proto *il;
+
+ inittimescale();
+
+ il = smalloc(sizeof(Proto));
+ il->priv = smalloc(sizeof(Ilpriv));
+ il->name = "il";
+ il->connect = ilconnect;
+ il->announce = ilannounce;
+ il->state = ilstate;
+ il->create = ilcreate;
+ il->close = ilclose;
+ il->rcv = iliput;
+ il->ctl = nil;
+ il->advise = iladvise;
+ il->stats = ilxstats;
+ il->inuse = ilinuse;
+ il->gc = nil;
+ il->ipproto = IP_ILPROTO;
+ il->nc = scalednconv();
+ il->ptclsize = sizeof(Ilcb);
+ Fsproto(f, il);
+}
diff --git a/src/9vx/a/ip/inferno.c b/src/9vx/a/ip/inferno.c
@@ -0,0 +1,46 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "ip.h"
+
+/*
+ * some hacks for commonality twixt inferno and plan9
+ */
+
+char*
+commonuser(void)
+{
+ return up->user;
+}
+
+Chan*
+commonfdtochan(int fd, int mode, int a, int b)
+{
+ return fdtochan(fd, mode, a, b);
+}
+
+char*
+commonerror(void)
+{
+ return up->errstr;
+}
+
+char*
+bootp(Ipifc* _)
+{
+ return "unimplmented";
+}
+
+int
+bootpread(char* _, ulong __, int ___)
+{
+ return 0;
+}
+
+Medium tripmedium =
+{
+ "trip",
+};
diff --git a/src/9vx/a/ip/ip.c b/src/9vx/a/ip/ip.c
@@ -0,0 +1,776 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+typedef struct Fragment4 Fragment4;
+typedef struct Fragment6 Fragment6;
+typedef struct Ipfrag Ipfrag;
+
+#define BLKIPVER(xp) (((Ip4hdr*)((xp)->rp))->vihl&0xF0)
+
+/* MIB II counters */
+enum
+{
+ Forwarding,
+ DefaultTTL,
+ InReceives,
+ InHdrErrors,
+ InAddrErrors,
+ ForwDatagrams,
+ InUnknownProtos,
+ InDiscards,
+ InDelivers,
+ OutRequests,
+ OutDiscards,
+ OutNoRoutes,
+ ReasmTimeout,
+ ReasmReqds,
+ ReasmOKs,
+ ReasmFails,
+ FragOKs,
+ FragFails,
+ FragCreates,
+
+ Nstats,
+};
+
+struct Fragment4
+{
+ Block* blist;
+ Fragment4* next;
+ ulong src;
+ ulong dst;
+ ushort id;
+ ulong age;
+};
+
+struct Fragment6
+{
+ Block* blist;
+ Fragment6* next;
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+ uint id;
+ ulong age;
+};
+
+struct Ipfrag
+{
+ ushort foff;
+ ushort flen;
+};
+
+/* an instance of IP */
+struct IP
+{
+ ulong stats[Nstats];
+
+ QLock fraglock4;
+ Fragment4* flisthead4;
+ Fragment4* fragfree4;
+ Ref id4;
+
+ QLock fraglock6;
+ Fragment6* flisthead6;
+ Fragment6* fragfree6;
+ Ref id6;
+
+ int iprouting; /* true if we route like a gateway */
+};
+
+static char *statnames[] =
+{
+[Forwarding] "Forwarding",
+[DefaultTTL] "DefaultTTL",
+[InReceives] "InReceives",
+[InHdrErrors] "InHdrErrors",
+[InAddrErrors] "InAddrErrors",
+[ForwDatagrams] "ForwDatagrams",
+[InUnknownProtos] "InUnknownProtos",
+[InDiscards] "InDiscards",
+[InDelivers] "InDelivers",
+[OutRequests] "OutRequests",
+[OutDiscards] "OutDiscards",
+[OutNoRoutes] "OutNoRoutes",
+[ReasmTimeout] "ReasmTimeout",
+[ReasmReqds] "ReasmReqds",
+[ReasmOKs] "ReasmOKs",
+[ReasmFails] "ReasmFails",
+[FragOKs] "FragOKs",
+[FragFails] "FragFails",
+[FragCreates] "FragCreates",
+};
+
+#define BLKIP(xp) ((Ip4hdr*)((xp)->rp))
+/*
+ * This sleazy macro relies on the media header size being
+ * larger than sizeof(Ipfrag). ipreassemble checks this is true
+ */
+#define BKFG(xp) ((Ipfrag*)((xp)->base))
+
+ushort ipcsum(uchar*);
+Block* ip4reassemble(IP*, int, Block*, Ip4hdr*);
+void ipfragfree4(IP*, Fragment4*);
+Fragment4* ipfragallo4(IP*);
+
+void
+ip_init_6(Fs *f)
+{
+ v6params *v6p;
+
+ v6p = smalloc(sizeof(v6params));
+
+ v6p->rp.mflag = 0; /* default not managed */
+ v6p->rp.oflag = 0;
+ v6p->rp.maxraint = 600000; /* millisecs */
+ v6p->rp.minraint = 200000;
+ v6p->rp.linkmtu = 0; /* no mtu sent */
+ v6p->rp.reachtime = 0;
+ v6p->rp.rxmitra = 0;
+ v6p->rp.ttl = MAXTTL;
+ v6p->rp.routerlt = 3 * v6p->rp.maxraint;
+
+ v6p->hp.rxmithost = 1000; /* v6 RETRANS_TIMER */
+
+ v6p->cdrouter = -1;
+
+ f->v6p = v6p;
+}
+
+void
+initfrag(IP *ip, int size)
+{
+ Fragment4 *fq4, *eq4;
+ Fragment6 *fq6, *eq6;
+
+ ip->fragfree4 = (Fragment4*)malloc(sizeof(Fragment4) * size);
+ if(ip->fragfree4 == nil)
+ panic("initfrag");
+
+ eq4 = &ip->fragfree4[size];
+ for(fq4 = ip->fragfree4; fq4 < eq4; fq4++)
+ fq4->next = fq4+1;
+
+ ip->fragfree4[size-1].next = nil;
+
+ ip->fragfree6 = (Fragment6*)malloc(sizeof(Fragment6) * size);
+ if(ip->fragfree6 == nil)
+ panic("initfrag");
+
+ eq6 = &ip->fragfree6[size];
+ for(fq6 = ip->fragfree6; fq6 < eq6; fq6++)
+ fq6->next = fq6+1;
+
+ ip->fragfree6[size-1].next = nil;
+}
+
+void
+ip_init(Fs *f)
+{
+ IP *ip;
+
+ ip = smalloc(sizeof(IP));
+ initfrag(ip, 100);
+ f->ip = ip;
+
+ ip_init_6(f);
+}
+
+void
+iprouting(Fs *f, int on)
+{
+ f->ip->iprouting = on;
+ if(f->ip->iprouting==0)
+ f->ip->stats[Forwarding] = 2;
+ else
+ f->ip->stats[Forwarding] = 1;
+}
+
+int
+ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+{
+ Ipifc *ifc;
+ uchar *gate;
+ ulong fragoff;
+ Block *xp, *nb;
+ Ip4hdr *eh, *feh;
+ int lid, len, seglen, chunk, dlen, blklen, offset, medialen;
+ Route *r, *sr;
+ IP *ip;
+ int rv = 0;
+
+ ip = f->ip;
+
+ /* Fill out the ip header */
+ eh = (Ip4hdr*)(bp->rp);
+
+ ip->stats[OutRequests]++;
+
+ /* Number of uchars in data and ip header to write */
+ len = blocklen(bp);
+
+ if(gating){
+ chunk = nhgets(eh->length);
+ if(chunk > len){
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "short gated packet\n");
+ goto free;
+ }
+ if(chunk < len)
+ len = chunk;
+ }
+ if(len >= IP_MAX){
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "exceeded ip max size %V\n", eh->dst);
+ goto free;
+ }
+
+ r = v4lookup(f, eh->dst, c);
+ if(r == nil){
+ ip->stats[OutNoRoutes]++;
+ netlog(f, Logip, "no interface %V\n", eh->dst);
+ rv = -1;
+ goto free;
+ }
+
+ ifc = r->ifc;
+ if(r->type & (Rifc|Runi))
+ gate = eh->dst;
+ else
+ if(r->type & (Rbcast|Rmulti)) {
+ gate = eh->dst;
+ sr = v4lookup(f, eh->src, nil);
+ if(sr != nil && (sr->type & Runi))
+ ifc = sr->ifc;
+ }
+ else
+ gate = r->v4.gate;
+
+ if(!gating)
+ eh->vihl = IP_VER4|IP_HLEN4;
+ eh->ttl = ttl;
+ if(!gating)
+ eh->tos = tos;
+
+ if(!CANRLOCK(ifc))
+ goto free;
+ if(waserror()){
+ RUNLOCK(ifc);
+ nexterror();
+ }
+ if(ifc->m == nil)
+ goto raise;
+
+ /* If we dont need to fragment just send it */
+ medialen = ifc->maxtu - ifc->m->hsize;
+ if(len <= medialen) {
+ if(!gating)
+ hnputs(eh->id, incref(&ip->id4));
+ hnputs(eh->length, len);
+ if(!gating){
+ eh->frag[0] = 0;
+ eh->frag[1] = 0;
+ }
+ eh->cksum[0] = 0;
+ eh->cksum[1] = 0;
+ hnputs(eh->cksum, ipcsum(&eh->vihl));
+ ifc->m->bwrite(ifc, bp, V4, gate);
+ RUNLOCK(ifc);
+ poperror();
+ return 0;
+ }
+
+if((eh->frag[0] & (IP_DF>>8)) && !gating) print("%V: DF set\n", eh->dst);
+
+ if(eh->frag[0] & (IP_DF>>8)){
+ ip->stats[FragFails]++;
+ ip->stats[OutDiscards]++;
+ icmpcantfrag(f, bp, medialen);
+ netlog(f, Logip, "%V: eh->frag[0] & (IP_DF>>8)\n", eh->dst);
+ goto raise;
+ }
+
+ seglen = (medialen - IP4HDR) & ~7;
+ if(seglen < 8){
+ ip->stats[FragFails]++;
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "%V seglen < 8\n", eh->dst);
+ goto raise;
+ }
+
+ dlen = len - IP4HDR;
+ xp = bp;
+ if(gating)
+ lid = nhgets(eh->id);
+ else
+ lid = incref(&ip->id4);
+
+ offset = IP4HDR;
+ while(xp != nil && offset && offset >= BLEN(xp)) {
+ offset -= BLEN(xp);
+ xp = xp->next;
+ }
+ xp->rp += offset;
+
+ if(gating)
+ fragoff = nhgets(eh->frag)<<3;
+ else
+ fragoff = 0;
+ dlen += fragoff;
+ for(; fragoff < dlen; fragoff += seglen) {
+ nb = allocb(IP4HDR+seglen);
+ feh = (Ip4hdr*)(nb->rp);
+
+ memmove(nb->wp, eh, IP4HDR);
+ nb->wp += IP4HDR;
+
+ if((fragoff + seglen) >= dlen) {
+ seglen = dlen - fragoff;
+ hnputs(feh->frag, fragoff>>3);
+ }
+ else
+ hnputs(feh->frag, (fragoff>>3)|IP_MF);
+
+ hnputs(feh->length, seglen + IP4HDR);
+ hnputs(feh->id, lid);
+
+ /* Copy up the data area */
+ chunk = seglen;
+ while(chunk) {
+ if(!xp) {
+ ip->stats[OutDiscards]++;
+ ip->stats[FragFails]++;
+ freeblist(nb);
+ netlog(f, Logip, "!xp: chunk %d\n", chunk);
+ goto raise;
+ }
+ blklen = chunk;
+ if(BLEN(xp) < chunk)
+ blklen = BLEN(xp);
+ memmove(nb->wp, xp->rp, blklen);
+ nb->wp += blklen;
+ xp->rp += blklen;
+ chunk -= blklen;
+ if(xp->rp == xp->wp)
+ xp = xp->next;
+ }
+
+ feh->cksum[0] = 0;
+ feh->cksum[1] = 0;
+ hnputs(feh->cksum, ipcsum(&feh->vihl));
+ ifc->m->bwrite(ifc, nb, V4, gate);
+ ip->stats[FragCreates]++;
+ }
+ ip->stats[FragOKs]++;
+raise:
+ RUNLOCK(ifc);
+ poperror();
+free:
+ freeblist(bp);
+ return rv;
+}
+
+void
+ipiput4(Fs *f, Ipifc *ifc, Block *bp)
+{
+ int hl;
+ int hop, tos, proto, olen;
+ Ip4hdr *h;
+ Proto *p;
+ ushort frag;
+ int notforme;
+ uchar *dp, v6dst[IPaddrlen];
+ IP *ip;
+ Route *r;
+
+ if(BLKIPVER(bp) != IP_VER4) {
+ ipiput6(f, ifc, bp);
+ return;
+ }
+
+ ip = f->ip;
+ ip->stats[InReceives]++;
+
+ /*
+ * Ensure we have all the header info in the first
+ * block. Make life easier for other protocols by
+ * collecting up to the first 64 bytes in the first block.
+ */
+ if(BLEN(bp) < 64) {
+ hl = blocklen(bp);
+ if(hl < IP4HDR)
+ hl = IP4HDR;
+ if(hl > 64)
+ hl = 64;
+ bp = pullupblock(bp, hl);
+ if(bp == nil)
+ return;
+ }
+
+ h = (Ip4hdr*)(bp->rp);
+
+ /* dump anything that whose header doesn't checksum */
+ if((bp->flag & Bipck) == 0 && ipcsum(&h->vihl)) {
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "ip: checksum error %V\n", h->src);
+ freeblist(bp);
+ return;
+ }
+ v4tov6(v6dst, h->dst);
+ notforme = ipforme(f, v6dst) == 0;
+
+ /* Check header length and version */
+ if((h->vihl&0x0F) != IP_HLEN4) {
+ hl = (h->vihl&0xF)<<2;
+ if(hl < (IP_HLEN4<<2)) {
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "ip: %V bad hivl %ux\n", h->src, h->vihl);
+ freeblist(bp);
+ return;
+ }
+ /* If this is not routed strip off the options */
+ if(notforme == 0) {
+ olen = nhgets(h->length);
+ dp = bp->rp + (hl - (IP_HLEN4<<2));
+ memmove(dp, h, IP_HLEN4<<2);
+ bp->rp = dp;
+ h = (Ip4hdr*)(bp->rp);
+ h->vihl = (IP_VER4|IP_HLEN4);
+ hnputs(h->length, olen-hl+(IP_HLEN4<<2));
+ }
+ }
+
+ /* route */
+ if(notforme) {
+ Conv conv;
+
+ if(!ip->iprouting){
+ freeb(bp);
+ return;
+ }
+
+ /* don't forward to source's network */
+ conv.r = nil;
+ r = v4lookup(f, h->dst, &conv);
+ if(r == nil || r->ifc == ifc){
+ ip->stats[OutDiscards]++;
+ freeblist(bp);
+ return;
+ }
+
+ /* don't forward if packet has timed out */
+ hop = h->ttl;
+ if(hop < 1) {
+ ip->stats[InHdrErrors]++;
+ icmpttlexceeded(f, ifc->lifc->local, bp);
+ freeblist(bp);
+ return;
+ }
+
+ /* reassemble if the interface expects it */
+if(r->ifc == nil) panic("nil route rfc");
+ if(r->ifc->reassemble){
+ frag = nhgets(h->frag);
+ if(frag) {
+ h->tos = 0;
+ if(frag & IP_MF)
+ h->tos = 1;
+ bp = ip4reassemble(ip, frag, bp, h);
+ if(bp == nil)
+ return;
+ h = (Ip4hdr*)(bp->rp);
+ }
+ }
+
+ ip->stats[ForwDatagrams]++;
+ tos = h->tos;
+ hop = h->ttl;
+ ipoput4(f, bp, 1, hop - 1, tos, &conv);
+ return;
+ }
+
+ frag = nhgets(h->frag);
+ if(frag) {
+ h->tos = 0;
+ if(frag & IP_MF)
+ h->tos = 1;
+ bp = ip4reassemble(ip, frag, bp, h);
+ if(bp == nil)
+ return;
+ h = (Ip4hdr*)(bp->rp);
+ }
+
+ /* don't let any frag info go up the stack */
+ h->frag[0] = 0;
+ h->frag[1] = 0;
+
+ proto = h->proto;
+ p = Fsrcvpcol(f, proto);
+ if(p != nil && p->rcv != nil) {
+ ip->stats[InDelivers]++;
+ (*p->rcv)(p, ifc, bp);
+ return;
+ }
+ ip->stats[InDiscards]++;
+ ip->stats[InUnknownProtos]++;
+ freeblist(bp);
+}
+
+int
+ipstats(Fs *f, char *buf, int len)
+{
+ IP *ip;
+ char *p, *e;
+ int i;
+
+ ip = f->ip;
+ ip->stats[DefaultTTL] = MAXTTL;
+
+ p = buf;
+ e = p+len;
+ for(i = 0; i < Nstats; i++)
+ p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]);
+ return p - buf;
+}
+
+Block*
+ip4reassemble(IP *ip, int offset, Block *bp, Ip4hdr *ih)
+{
+ int fend;
+ ushort id;
+ Fragment4 *f, *fnext;
+ ulong src, dst;
+ Block *bl, **l, *last, *prev;
+ int ovlap, len, fragsize, pktposn;
+
+ src = nhgetl(ih->src);
+ dst = nhgetl(ih->dst);
+ id = nhgets(ih->id);
+
+ /*
+ * block lists are too hard, pullupblock into a single block
+ */
+ if(bp->next){
+ bp = pullupblock(bp, blocklen(bp));
+ ih = (Ip4hdr*)(bp->rp);
+ }
+
+ qlock(&ip->fraglock4);
+
+ /*
+ * find a reassembly queue for this fragment
+ */
+ for(f = ip->flisthead4; f; f = fnext){
+ fnext = f->next; /* because ipfragfree4 changes the list */
+ if(f->src == src && f->dst == dst && f->id == id)
+ break;
+ if(f->age < NOW){
+ ip->stats[ReasmTimeout]++;
+ ipfragfree4(ip, f);
+ }
+ }
+
+ /*
+ * if this isn't a fragmented packet, accept it
+ * and get rid of any fragments that might go
+ * with it.
+ */
+ if(!ih->tos && (offset & ~(IP_MF|IP_DF)) == 0) {
+ if(f != nil) {
+ ipfragfree4(ip, f);
+ ip->stats[ReasmFails]++;
+ }
+ qunlock(&ip->fraglock4);
+ return bp;
+ }
+
+ if(bp->base+sizeof(Ipfrag) >= bp->rp){
+ bp = padblock(bp, sizeof(Ipfrag));
+ bp->rp += sizeof(Ipfrag);
+ }
+
+ BKFG(bp)->foff = offset<<3;
+ BKFG(bp)->flen = nhgets(ih->length)-IP4HDR;
+
+ /* First fragment allocates a reassembly queue */
+ if(f == nil) {
+ f = ipfragallo4(ip);
+ f->id = id;
+ f->src = src;
+ f->dst = dst;
+
+ f->blist = bp;
+
+ qunlock(&ip->fraglock4);
+ ip->stats[ReasmReqds]++;
+ return nil;
+ }
+
+ /*
+ * find the new fragment's position in the queue
+ */
+ prev = nil;
+ l = &f->blist;
+ bl = f->blist;
+ while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+ prev = bl;
+ l = &bl->next;
+ bl = bl->next;
+ }
+
+ /* Check overlap of a previous fragment - trim away as necessary */
+ if(prev) {
+ ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+ if(ovlap > 0) {
+ if(ovlap >= BKFG(bp)->flen) {
+ freeblist(bp);
+ qunlock(&ip->fraglock4);
+ return nil;
+ }
+ BKFG(prev)->flen -= ovlap;
+ }
+ }
+
+ /* Link onto assembly queue */
+ bp->next = *l;
+ *l = bp;
+
+ /* Check to see if succeeding segments overlap */
+ if(bp->next) {
+ l = &bp->next;
+ fend = BKFG(bp)->foff + BKFG(bp)->flen;
+ /* Take completely covered segments out */
+ while(*l) {
+ ovlap = fend - BKFG(*l)->foff;
+ if(ovlap <= 0)
+ break;
+ if(ovlap < BKFG(*l)->flen) {
+ BKFG(*l)->flen -= ovlap;
+ BKFG(*l)->foff += ovlap;
+ /* move up ih hdrs */
+ memmove((*l)->rp + ovlap, (*l)->rp, IP4HDR);
+ (*l)->rp += ovlap;
+ break;
+ }
+ last = (*l)->next;
+ (*l)->next = nil;
+ freeblist(*l);
+ *l = last;
+ }
+ }
+
+ /*
+ * look for a complete packet. if we get to a fragment
+ * without IP_MF set, we're done.
+ */
+ pktposn = 0;
+ for(bl = f->blist; bl; bl = bl->next) {
+ if(BKFG(bl)->foff != pktposn)
+ break;
+ if((BLKIP(bl)->frag[0]&(IP_MF>>8)) == 0) {
+ bl = f->blist;
+ len = nhgets(BLKIP(bl)->length);
+ bl->wp = bl->rp + len;
+
+ /* Pullup all the fragment headers and
+ * return a complete packet
+ */
+ for(bl = bl->next; bl; bl = bl->next) {
+ fragsize = BKFG(bl)->flen;
+ len += fragsize;
+ bl->rp += IP4HDR;
+ bl->wp = bl->rp + fragsize;
+ }
+
+ bl = f->blist;
+ f->blist = nil;
+ ipfragfree4(ip, f);
+ ih = BLKIP(bl);
+ hnputs(ih->length, len);
+ qunlock(&ip->fraglock4);
+ ip->stats[ReasmOKs]++;
+ return bl;
+ }
+ pktposn += BKFG(bl)->flen;
+ }
+ qunlock(&ip->fraglock4);
+ return nil;
+}
+
+/*
+ * ipfragfree4 - Free a list of fragments - assume hold fraglock4
+ */
+void
+ipfragfree4(IP *ip, Fragment4 *frag)
+{
+ Fragment4 *fl, **l;
+
+ if(frag->blist)
+ freeblist(frag->blist);
+
+ frag->src = 0;
+ frag->id = 0;
+ frag->blist = nil;
+
+ l = &ip->flisthead4;
+ for(fl = *l; fl; fl = fl->next) {
+ if(fl == frag) {
+ *l = frag->next;
+ break;
+ }
+ l = &fl->next;
+ }
+
+ frag->next = ip->fragfree4;
+ ip->fragfree4 = frag;
+
+}
+
+/*
+ * ipfragallo4 - allocate a reassembly queue - assume hold fraglock4
+ */
+Fragment4 *
+ipfragallo4(IP *ip)
+{
+ Fragment4 *f;
+
+ while(ip->fragfree4 == nil) {
+ /* free last entry on fraglist */
+ for(f = ip->flisthead4; f->next; f = f->next)
+ ;
+ ipfragfree4(ip, f);
+ }
+ f = ip->fragfree4;
+ ip->fragfree4 = f->next;
+ f->next = ip->flisthead4;
+ ip->flisthead4 = f;
+ f->age = NOW + 30000;
+
+ return f;
+}
+
+ushort
+ipcsum(uchar *addr)
+{
+ int len;
+ ulong sum;
+
+ sum = 0;
+ len = (addr[0]&0xf)<<2;
+
+ while(len > 0) {
+ sum += addr[0]<<8 | addr[1] ;
+ len -= 2;
+ addr += 2;
+ }
+
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum = (sum & 0xffff) + (sum >> 16);
+
+ return (sum^0xffff);
+}
diff --git a/src/9vx/a/ip/ip.h b/src/9vx/a/ip/ip.h
@@ -0,0 +1,677 @@
+typedef struct Conv Conv;
+typedef struct Fs Fs;
+typedef union Hwaddr Hwaddr;
+typedef struct IP IP;
+typedef struct IPaux IPaux;
+typedef struct Ipself Ipself;
+typedef struct Ipselftab Ipselftab;
+typedef struct Iplink Iplink;
+typedef struct Iplifc Iplifc;
+typedef struct Ipmulti Ipmulti;
+typedef struct Ipifc Ipifc;
+typedef struct Iphash Iphash;
+typedef struct Ipht Ipht;
+typedef struct Netlog Netlog;
+typedef struct Medium Medium;
+typedef struct Proto Proto;
+typedef struct Arpent Arpent;
+typedef struct Arp Arp;
+typedef struct Route Route;
+
+typedef struct Routerparams Routerparams;
+typedef struct Hostparams Hostparams;
+typedef struct v6router v6router;
+typedef struct v6params v6params;
+
+enum
+{
+ Addrlen= 64,
+ Maxproto= 20,
+ Nhash= 64,
+ Maxincall= 5,
+ Nchans= 1024,
+ MAClen= 16, /* longest mac address */
+
+ MAXTTL= 255,
+ DFLTTOS= 0,
+
+ IPaddrlen= 16,
+ IPv4addrlen= 4,
+ IPv4off= 12,
+ IPllen= 4,
+
+ /* ip versions */
+ V4= 4,
+ V6= 6,
+ IP_VER4= 0x40,
+ IP_VER6= 0x60,
+ IP_HLEN4= 5, /* v4: Header length in words */
+ IP_DF= 0x4000, /* v4: Don't fragment */
+ IP_MF= 0x2000, /* v4: More fragments */
+ IP4HDR= 20, /* sizeof(Ip4hdr) */
+ IP_MAX= 64*1024, /* Max. Internet packet size, v4 & v6 */
+
+ /* 2^Lroot trees in the root table */
+ Lroot= 10,
+
+ Maxpath = 64,
+};
+
+enum
+{
+ Idle= 0,
+ Announcing= 1,
+ Announced= 2,
+ Connecting= 3,
+ Connected= 4,
+};
+
+/* on the wire packet header */
+typedef struct Ip4hdr Ip4hdr;
+struct Ip4hdr
+{
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* ip->identification */
+ uchar frag[2]; /* Fragment information */
+ uchar ttl; /* Time to live */
+ uchar proto; /* Protocol */
+ uchar cksum[2]; /* Header checksum */
+ uchar src[4]; /* IP source */
+ uchar dst[4]; /* IP destination */
+};
+
+/*
+ * one per conversation directory
+ */
+struct Conv
+{
+ QLock qlock;
+
+ int x; /* conversation index */
+ Proto* p;
+
+ int restricted; /* remote port is restricted */
+ uint ttl; /* max time to live */
+ uint tos; /* type of service */
+ int ignoreadvice; /* don't terminate connection on icmp errors */
+
+ uchar ipversion;
+ uchar laddr[IPaddrlen]; /* local IP address */
+ uchar raddr[IPaddrlen]; /* remote IP address */
+ ushort lport; /* local port number */
+ ushort rport; /* remote port number */
+
+ char *owner; /* protections */
+ int perm;
+ int inuse; /* opens of listen/data/ctl */
+ int length;
+ int state;
+
+ int maxfragsize; /* If set, used for fragmentation */
+
+ /* udp specific */
+ int headers; /* data src/dst headers in udp */
+ int reliable; /* true if reliable udp */
+
+ Conv* incall; /* calls waiting to be listened for */
+ Conv* next;
+
+ Queue* rq; /* queued data waiting to be read */
+ Queue* wq; /* queued data waiting to be written */
+ Queue* eq; /* returned error packets */
+ Queue* sq; /* snooping queue */
+ Ref snoopers; /* number of processes with snoop open */
+
+ QLock car;
+ Rendez cr;
+ char cerr[ERRMAX];
+
+ QLock listenq;
+ Rendez listenr;
+
+ Ipmulti *multi; /* multicast bindings for this interface */
+
+ void* ptcl; /* protocol specific stuff */
+
+ Route *r; /* last route used */
+ ulong rgen; /* routetable generation for *r */
+};
+
+struct Medium
+{
+ char *name;
+ int hsize; /* medium header size */
+ int mintu; /* default min mtu */
+ int maxtu; /* default max mtu */
+ int maclen; /* mac address length */
+ void (*bind)(Ipifc*, int, char**);
+ void (*unbind)(Ipifc*);
+ void (*bwrite)(Ipifc *ifc, Block *b, int version, uchar *ip);
+
+ /* for arming interfaces to receive multicast */
+ void (*addmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+ void (*remmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+
+ /* process packets written to 'data' */
+ void (*pktin)(Fs *f, Ipifc *ifc, Block *bp);
+
+ /* routes for router boards */
+ void (*addroute)(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
+ void (*remroute)(Ipifc *ifc, int, uchar*, uchar*);
+ void (*flushroutes)(Ipifc *ifc);
+
+ /* for routing multicast groups */
+ void (*joinmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+ void (*leavemulti)(Ipifc *ifc, uchar *a, uchar *ia);
+
+ /* address resolution */
+ void (*ares)(Fs*, int, uchar*, uchar*, int, int); /* resolve */
+ void (*areg)(Ipifc*, uchar*); /* register */
+
+ /* v6 address generation */
+ void (*pref2addr)(uchar *pref, uchar *ea);
+
+ int unbindonclose; /* if non-zero, unbind on last close */
+};
+
+/* logical interface associated with a physical one */
+struct Iplifc
+{
+ uchar local[IPaddrlen];
+ uchar mask[IPaddrlen];
+ uchar remote[IPaddrlen];
+ uchar net[IPaddrlen];
+ uchar tentative; /* =1 => v6 dup disc on, =0 => confirmed unique */
+ uchar onlink; /* =1 => onlink, =0 offlink. */
+ uchar autoflag; /* v6 autonomous flag */
+ long validlt; /* v6 valid lifetime */
+ long preflt; /* v6 preferred lifetime */
+ long origint; /* time when addr was added */
+ Iplink *link; /* addresses linked to this lifc */
+ Iplifc *next;
+};
+
+/* binding twixt Ipself and Iplifc */
+struct Iplink
+{
+ Ipself *self;
+ Iplifc *lifc;
+ Iplink *selflink; /* next link for this local address */
+ Iplink *lifclink; /* next link for this ifc */
+ ulong expire;
+ Iplink *next; /* free list */
+ int ref;
+};
+
+/* rfc 2461, pp.40—43. */
+
+/* default values, one per stack */
+struct Routerparams {
+ int mflag; /* flag: managed address configuration */
+ int oflag; /* flag: other stateful configuration */
+ int maxraint; /* max. router adv interval (ms) */
+ int minraint; /* min. router adv interval (ms) */
+ int linkmtu; /* mtu options */
+ int reachtime; /* reachable time */
+ int rxmitra; /* retransmit interval */
+ int ttl; /* cur hop count limit */
+ int routerlt; /* router lifetime */
+};
+
+struct Hostparams {
+ int rxmithost;
+};
+
+struct Ipifc
+{
+ RWlock rwlock;
+
+ Conv *conv; /* link to its conversation structure */
+ char dev[64]; /* device we're attached to */
+ Medium *m; /* Media pointer */
+ int maxtu; /* Maximum transfer unit */
+ int mintu; /* Minumum tranfer unit */
+ int mbps; /* megabits per second */
+ void *arg; /* medium specific */
+ int reassemble; /* reassemble IP packets before forwarding */
+
+ /* these are used so that we can unbind on the fly */
+ Lock idlock;
+ uchar ifcid; /* incremented each 'bind/unbind/add/remove' */
+ int ref; /* number of proc's using this ipifc */
+ Rendez wait; /* where unbinder waits for ref == 0 */
+ int unbinding;
+
+ uchar mac[MAClen]; /* MAC address */
+
+ Iplifc *lifc; /* logical interfaces on this physical one */
+
+ ulong in, out; /* message statistics */
+ ulong inerr, outerr; /* ... */
+
+ uchar sendra6; /* flag: send router advs on this ifc */
+ uchar recvra6; /* flag: recv router advs on this ifc */
+ Routerparams rp; /* router parameters as in RFC 2461, pp.40—43.
+ used only if node is router */
+};
+
+/*
+ * one per multicast-lifc pair used by a Conv
+ */
+struct Ipmulti
+{
+ uchar ma[IPaddrlen];
+ uchar ia[IPaddrlen];
+ Ipmulti *next;
+};
+
+/*
+ * hash table for 2 ip addresses + 2 ports
+ */
+enum
+{
+ Nipht= 521, /* convenient prime */
+
+ IPmatchexact= 0, /* match on 4 tuple */
+ IPmatchany, /* *!* */
+ IPmatchport, /* *!port */
+ IPmatchaddr, /* addr!* */
+ IPmatchpa, /* addr!port */
+};
+struct Iphash
+{
+ Iphash *next;
+ Conv *c;
+ int match;
+};
+struct Ipht
+{
+ Lock lk;
+
+ Iphash *tab[Nipht];
+};
+void iphtadd(Ipht*, Conv*);
+void iphtrem(Ipht*, Conv*);
+Conv* iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp);
+
+/*
+ * one per multiplexed protocol
+ */
+struct Proto
+{
+ QLock qlock;
+
+ char* name; /* protocol name */
+ int x; /* protocol index */
+ int ipproto; /* ip protocol type */
+
+ char* (*connect)(Conv*, char**, int);
+ char* (*announce)(Conv*, char**, int);
+ char* (*bind)(Conv*, char**, int);
+ int (*state)(Conv*, char*, int);
+ void (*create)(Conv*);
+ void (*close)(Conv*);
+ void (*rcv)(Proto*, Ipifc*, Block*);
+ char* (*ctl)(Conv*, char**, int);
+ void (*advise)(Proto*, Block*, char*);
+ int (*stats)(Proto*, char*, int);
+ int (*local)(Conv*, char*, int);
+ int (*remote)(Conv*, char*, int);
+ int (*inuse)(Conv*);
+ int (*gc)(Proto*); /* returns true if any conversations are freed */
+
+ Fs *f; /* file system this proto is part of */
+ Conv **conv; /* array of conversations */
+ int ptclsize; /* size of per protocol ctl block */
+ int nc; /* number of conversations */
+ int ac;
+ Qid qid; /* qid for protocol directory */
+ ushort nextrport;
+
+ void *priv;
+};
+
+
+/*
+ * one per IP protocol stack
+ */
+struct Fs
+{
+ RWlock rwlock;
+
+ Conv *conv; /* link to its conversation structure */
+ int dev;
+
+ int np;
+ Proto* p[Maxproto+1]; /* list of supported protocols */
+ Proto* t2p[256]; /* vector of all protocols */
+ Proto* ipifc; /* kludge for ipifcremroute & ipifcaddroute */
+ Proto* ipmux; /* kludge for finding an ip multiplexor */
+
+ IP *ip;
+ Ipselftab *self;
+ Arp *arp;
+ v6params *v6p;
+
+ Route *v4root[1<<Lroot]; /* v4 routing forest */
+ Route *v6root[1<<Lroot]; /* v6 routing forest */
+ Route *queue; /* used as temp when reinjecting routes */
+
+ Netlog *alog;
+
+ char ndb[1024]; /* an ndb entry for this interface */
+ int ndbvers;
+ long ndbmtime;
+};
+
+/* one per default router known to host */
+struct v6router {
+ uchar inuse;
+ Ipifc *ifc;
+ int ifcid;
+ uchar routeraddr[IPaddrlen];
+ long ltorigin;
+ Routerparams rp;
+};
+
+struct v6params
+{
+ Routerparams rp; /* v6 params, one copy per node now */
+ Hostparams hp;
+ v6router v6rlist[3]; /* max 3 default routers, currently */
+ int cdrouter; /* uses only v6rlist[cdrouter] if */
+ /* cdrouter >= 0. */
+};
+
+
+int Fsconnected(Conv*, char*);
+Conv* Fsnewcall(Conv*, uchar*, ushort, uchar*, ushort, uchar);
+int Fspcolstats(char*, int);
+int Fsproto(Fs*, Proto*);
+int Fsbuiltinproto(Fs*, uchar);
+Conv* Fsprotoclone(Proto*, char*);
+Proto* Fsrcvpcol(Fs*, uchar);
+Proto* Fsrcvpcolx(Fs*, uchar);
+char* Fsstdconnect(Conv*, char**, int);
+char* Fsstdannounce(Conv*, char**, int);
+char* Fsstdbind(Conv*, char**, int);
+ulong scalednconv(void);
+void closeconv(Conv*);
+/*
+ * logging
+ */
+enum
+{
+ Logip= 1<<1,
+ Logtcp= 1<<2,
+ Logfs= 1<<3,
+ Logil= 1<<4,
+ Logicmp= 1<<5,
+ Logudp= 1<<6,
+ Logcompress= 1<<7,
+ Logilmsg= 1<<8,
+ Loggre= 1<<9,
+ Logppp= 1<<10,
+ Logtcprxmt= 1<<11,
+ Logigmp= 1<<12,
+ Logudpmsg= 1<<13,
+ Logipmsg= 1<<14,
+ Logrudp= 1<<15,
+ Logrudpmsg= 1<<16,
+ Logesp= 1<<17,
+ Logtcpwin= 1<<18,
+};
+
+void netloginit(Fs*);
+void netlogopen(Fs*);
+void netlogclose(Fs*);
+void netlogctl(Fs*, char*, int);
+long netlogread(Fs*, void*, ulong, long);
+void netlog(Fs*, int, char*, ...);
+void ifcloginit(Fs*);
+long ifclogread(Fs*, Chan *,void*, ulong, long);
+void ifclog(Fs*, uchar *, int);
+void ifclogopen(Fs*, Chan*);
+void ifclogclose(Fs*, Chan*);
+
+/*
+ * iproute.c
+ */
+typedef struct RouteTree RouteTree;
+typedef struct Routewalk Routewalk;
+typedef struct V4route V4route;
+typedef struct V6route V6route;
+
+enum
+{
+
+ /* type bits */
+ Rv4= (1<<0), /* this is a version 4 route */
+ Rifc= (1<<1), /* this route is a directly connected interface */
+ Rptpt= (1<<2), /* this route is a pt to pt interface */
+ Runi= (1<<3), /* a unicast self address */
+ Rbcast= (1<<4), /* a broadcast self address */
+ Rmulti= (1<<5), /* a multicast self address */
+ Rproxy= (1<<6), /* this route should be proxied */
+};
+
+struct Routewalk
+{
+ int o;
+ int h;
+ char* p;
+ char* e;
+ void* state;
+ void (*walk)(Route*, Routewalk*);
+};
+
+struct RouteTree
+{
+ Route* right;
+ Route* left;
+ Route* mid;
+ uchar depth;
+ uchar type;
+ uchar ifcid; /* must match ifc->id */
+ Ipifc *ifc;
+ char tag[4];
+ int ref;
+};
+
+struct V4route
+{
+ ulong address;
+ ulong endaddress;
+ uchar gate[IPv4addrlen];
+};
+
+struct V6route
+{
+ ulong address[IPllen];
+ ulong endaddress[IPllen];
+ uchar gate[IPaddrlen];
+};
+
+struct Route
+{
+/* RouteTree; */
+ Route* right;
+ Route* left;
+ Route* mid;
+ uchar depth;
+ uchar type;
+ uchar ifcid; /* must match ifc->id */
+ Ipifc *ifc;
+ char tag[4];
+ int ref;
+
+ union {
+ V6route v6;
+ V4route v4;
+ };
+};
+extern void v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
+extern void v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
+extern void v4delroute(Fs *f, uchar *a, uchar *mask, int dolock);
+extern void v6delroute(Fs *f, uchar *a, uchar *mask, int dolock);
+extern Route* v4lookup(Fs *f, uchar *a, Conv *c);
+extern Route* v6lookup(Fs *f, uchar *a, Conv *c);
+extern long routeread(Fs *f, char*, ulong, int);
+extern long routewrite(Fs *f, Chan*, char*, int);
+extern void routetype(int, char*);
+extern void ipwalkroutes(Fs*, Routewalk*);
+extern void convroute(Route*, uchar*, uchar*, uchar*, char*, int*);
+
+/*
+ * devip.c
+ */
+
+/*
+ * Hanging off every ip channel's ->aux is the following structure.
+ * It maintains the state used by devip and iproute.
+ */
+struct IPaux
+{
+ char *owner; /* the user that did the attach */
+ char tag[4];
+};
+
+extern IPaux* newipaux(char*, char*);
+
+/*
+ * arp.c
+ */
+struct Arpent
+{
+ uchar ip[IPaddrlen];
+ uchar mac[MAClen];
+ Medium *type; /* media type */
+ Arpent* hash;
+ Block* hold;
+ Block* last;
+ uint ctime; /* time entry was created or refreshed */
+ uint utime; /* time entry was last used */
+ uchar state;
+ Arpent *nextrxt; /* re-transmit chain */
+ uint rtime; /* time for next retransmission */
+ uchar rxtsrem;
+ Ipifc *ifc;
+ uchar ifcid; /* must match ifc->id */
+};
+
+extern void arpinit(Fs*);
+extern int arpread(Arp*, char*, ulong, int);
+extern int arpwrite(Fs*, char*, int);
+extern Arpent* arpget(Arp*, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *h);
+extern void arprelease(Arp*, Arpent *a);
+extern Block* arpresolve(Arp*, Arpent *a, Medium *type, uchar *mac);
+extern void arpenter(Fs*, int version, uchar *ip, uchar *mac, int len, int norefresh);
+
+/*
+ * ipaux.c
+ */
+
+extern int myetheraddr(uchar*, char*);
+extern vlong parseip(uchar*, char*);
+extern vlong parseipmask(uchar*, char*);
+extern char* v4parseip(uchar*, char*);
+extern void maskip(uchar *from, uchar *mask, uchar *to);
+extern int parsemac(uchar *to, char *from, int len);
+extern uchar* defmask(uchar*);
+extern int isv4(uchar*);
+extern void v4tov6(uchar *v6, uchar *v4);
+extern int v6tov4(uchar *v4, uchar *v6);
+extern int eipfmt(Fmt*);
+
+#define ipmove(x, y) memmove(x, y, IPaddrlen)
+#define ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) )
+
+extern uchar IPv4bcast[IPaddrlen];
+extern uchar IPv4bcastobs[IPaddrlen];
+extern uchar IPv4allsys[IPaddrlen];
+extern uchar IPv4allrouter[IPaddrlen];
+extern uchar IPnoaddr[IPaddrlen];
+extern uchar v4prefix[IPaddrlen];
+extern uchar IPallbits[IPaddrlen];
+
+#define NOW msec()
+
+/*
+ * media
+ */
+extern Medium ethermedium;
+extern Medium nullmedium;
+extern Medium pktmedium;
+extern Medium tripmedium;
+
+/*
+ * ipifc.c
+ */
+extern Medium* ipfindmedium(char *name);
+extern void addipmedium(Medium *med);
+extern int ipforme(Fs*, uchar *addr);
+extern int iptentative(Fs*, uchar *addr);
+extern int ipisbm(uchar *);
+extern int ipismulticast(uchar *);
+extern Ipifc* findipifc(Fs*, uchar *remote, int type);
+extern void findlocalip(Fs*, uchar *local, uchar *remote);
+extern int ipv4local(Ipifc *ifc, uchar *addr);
+extern int ipv6local(Ipifc *ifc, uchar *addr);
+extern int ipv6anylocal(Ipifc *ifc, uchar *addr);
+extern Iplifc* iplocalonifc(Ipifc *ifc, uchar *ip);
+extern int ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip);
+extern int ipismulticast(uchar *ip);
+extern int ipisbooting(void);
+extern int ipifccheckin(Ipifc *ifc, Medium *med);
+extern void ipifccheckout(Ipifc *ifc);
+extern int ipifcgrab(Ipifc *ifc);
+extern void ipifcaddroute(Fs*, int, uchar*, uchar*, uchar*, int);
+extern void ipifcremroute(Fs*, int, uchar*, uchar*);
+extern void ipifcremmulti(Conv *c, uchar *ma, uchar *ia);
+extern void ipifcaddmulti(Conv *c, uchar *ma, uchar *ia);
+extern char* ipifcrem(Ipifc *ifc, char **argv, int argc);
+extern char* ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp);
+extern long ipselftabread(Fs*, char *a, ulong offset, int n);
+extern char* ipifcadd6(Ipifc *ifc, char**argv, int argc);
+/*
+ * ip.c
+ */
+extern void iprouting(Fs*, int);
+extern void icmpnoconv(Fs*, Block*);
+extern void icmpcantfrag(Fs*, Block*, int);
+extern void icmpttlexceeded(Fs*, uchar*, Block*);
+extern ushort ipcsum(uchar*);
+extern void ipiput4(Fs*, Ipifc*, Block*);
+extern void ipiput6(Fs*, Ipifc*, Block*);
+extern int ipoput4(Fs*, Block*, int, int, int, Conv*);
+extern int ipoput6(Fs*, Block*, int, int, int, Conv*);
+extern int ipstats(Fs*, char*, int);
+extern ushort ptclbsum(uchar*, int);
+extern ushort ptclcsum(Block*, int, int);
+extern void ip_init(Fs*);
+extern void update_mtucache(uchar*, ulong);
+extern ulong restrict_mtu(uchar*, ulong);
+/*
+ * bootp.c
+ */
+extern char* bootp(Ipifc*);
+extern int bootpread(char*, ulong, int);
+
+/*
+ * resolving inferno/plan9 differences
+ */
+Chan* commonfdtochan(int, int, int, int);
+char* commonuser(void);
+char* commonerror(void);
+
+/*
+ * chandial.c
+ */
+extern Chan* chandial(char*, char*, char*, Chan**);
+
+/*
+ * global to all of the stack
+ */
+extern void (*igmpreportfn)(Ipifc*, uchar*);
diff --git a/src/9vx/a/ip/ipaux.c b/src/9vx/a/ip/ipaux.c
@@ -0,0 +1,368 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "ip.h"
+#include "ipv6.h"
+
+char *v6hdrtypes[Maxhdrtype] =
+{
+ [HBH] "HopbyHop",
+ [ICMP] "ICMP",
+ [IGMP] "IGMP",
+ [GGP] "GGP",
+ [IPINIP] "IP",
+ [ST] "ST",
+ [TCP] "TCP",
+ [UDP] "UDP",
+ [ISO_TP4] "ISO_TP4",
+ [RH] "Routinghdr",
+ [FH] "Fraghdr",
+ [IDRP] "IDRP",
+ [RSVP] "RSVP",
+ [AH] "Authhdr",
+ [ESP] "ESP",
+ [ICMPv6] "ICMPv6",
+ [NNH] "Nonexthdr",
+ [ISO_IP] "ISO_IP",
+ [IGRP] "IGRP",
+ [OSPF] "OSPF",
+};
+
+/*
+ * well known IPv6 addresses
+ */
+uchar v6Unspecified[IPaddrlen] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+uchar v6loopback[IPaddrlen] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x01
+};
+
+uchar v6linklocal[IPaddrlen] = {
+ 0xfe, 0x80, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+uchar v6linklocalmask[IPaddrlen] = {
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+int v6llpreflen = 8; /* link-local prefix length in bytes */
+
+uchar v6multicast[IPaddrlen] = {
+ 0xff, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+uchar v6multicastmask[IPaddrlen] = {
+ 0xff, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+int v6mcpreflen = 1; /* multicast prefix length */
+
+uchar v6allnodesN[IPaddrlen] = {
+ 0xff, 0x01, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x01
+};
+uchar v6allroutersN[IPaddrlen] = {
+ 0xff, 0x01, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x02
+};
+uchar v6allnodesNmask[IPaddrlen] = {
+ 0xff, 0xff, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+int v6aNpreflen = 2; /* all nodes (N) prefix */
+
+uchar v6allnodesL[IPaddrlen] = {
+ 0xff, 0x02, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x01
+};
+uchar v6allroutersL[IPaddrlen] = {
+ 0xff, 0x02, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x02
+};
+uchar v6allnodesLmask[IPaddrlen] = {
+ 0xff, 0xff, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+int v6aLpreflen = 2; /* all nodes (L) prefix */
+
+uchar v6solicitednode[IPaddrlen] = {
+ 0xff, 0x02, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x01,
+ 0xff, 0, 0, 0
+};
+uchar v6solicitednodemask[IPaddrlen] = {
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0x0, 0x0, 0x0
+};
+int v6snpreflen = 13;
+
+ushort
+ptclcsum(Block *bp, int offset, int len)
+{
+ uchar *addr;
+ ulong losum, hisum;
+ ushort csum;
+ int odd, blocklen, x;
+
+ /* Correct to front of data area */
+ while(bp != nil && offset && offset >= BLEN(bp)) {
+ offset -= BLEN(bp);
+ bp = bp->next;
+ }
+ if(bp == nil)
+ return 0;
+
+ addr = bp->rp + offset;
+ blocklen = BLEN(bp) - offset;
+
+ if(bp->next == nil) {
+ if(blocklen < len)
+ len = blocklen;
+ return ~ptclbsum(addr, len) & 0xffff;
+ }
+
+ losum = 0;
+ hisum = 0;
+
+ odd = 0;
+ while(len) {
+ x = blocklen;
+ if(len < x)
+ x = len;
+
+ csum = ptclbsum(addr, x);
+ if(odd)
+ hisum += csum;
+ else
+ losum += csum;
+ odd = (odd+x) & 1;
+ len -= x;
+
+ bp = bp->next;
+ if(bp == nil)
+ break;
+ blocklen = BLEN(bp);
+ addr = bp->rp;
+ }
+
+ losum += hisum>>8;
+ losum += (hisum&0xff)<<8;
+ while((csum = losum>>16) != 0)
+ losum = csum + (losum & 0xffff);
+
+ return ~losum & 0xffff;
+}
+
+enum
+{
+ Isprefix= 16,
+};
+
+#define CLASS(p) ((*(uchar*)(p))>>6)
+
+void
+ipv62smcast(uchar *smcast, uchar *a)
+{
+ assert(IPaddrlen == 16);
+ memmove(smcast, v6solicitednode, IPaddrlen);
+ smcast[13] = a[13];
+ smcast[14] = a[14];
+ smcast[15] = a[15];
+}
+
+
+/*
+ * parse a hex mac address
+ */
+int
+parsemac(uchar *to, char *from, int len)
+{
+ char nip[4];
+ char *p;
+ int i;
+
+ p = from;
+ memset(to, 0, len);
+ for(i = 0; i < len; i++){
+ if(p[0] == '\0' || p[1] == '\0')
+ break;
+
+ nip[0] = p[0];
+ nip[1] = p[1];
+ nip[2] = '\0';
+ p += 2;
+
+ to[i] = strtoul(nip, 0, 16);
+ if(*p == ':')
+ p++;
+ }
+ return i;
+}
+
+/*
+ * hashing tcp, udp, ... connections
+ */
+ulong
+iphash(uchar *sa, ushort sp, uchar *da, ushort dp)
+{
+ return (ulong)(sa[IPaddrlen-1]<<24 ^ sp<< 16 ^ da[IPaddrlen-1]<<8 ^ dp) % Nhash;
+}
+
+void
+iphtadd(Ipht *ht, Conv *c)
+{
+ ulong hv;
+ Iphash *h;
+
+ hv = iphash(c->raddr, c->rport, c->laddr, c->lport);
+ h = smalloc(sizeof(*h));
+ if(ipcmp(c->raddr, IPnoaddr) != 0)
+ h->match = IPmatchexact;
+ else {
+ if(ipcmp(c->laddr, IPnoaddr) != 0){
+ if(c->lport == 0)
+ h->match = IPmatchaddr;
+ else
+ h->match = IPmatchpa;
+ } else {
+ if(c->lport == 0)
+ h->match = IPmatchany;
+ else
+ h->match = IPmatchport;
+ }
+ }
+ h->c = c;
+
+ LOCK(ht);
+ h->next = ht->tab[hv];
+ ht->tab[hv] = h;
+ UNLOCK(ht);
+}
+
+void
+iphtrem(Ipht *ht, Conv *c)
+{
+ ulong hv;
+ Iphash **l, *h;
+
+ hv = iphash(c->raddr, c->rport, c->laddr, c->lport);
+ LOCK(ht);
+ for(l = &ht->tab[hv]; (*l) != nil; l = &(*l)->next)
+ if((*l)->c == c){
+ h = *l;
+ (*l) = h->next;
+ free(h);
+ break;
+ }
+ UNLOCK(ht);
+}
+
+/* look for a matching conversation with the following precedence
+ * connected && raddr,rport,laddr,lport
+ * announced && laddr,lport
+ * announced && *,lport
+ * announced && laddr,*
+ * announced && *,*
+ */
+Conv*
+iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp)
+{
+ ulong hv;
+ Iphash *h;
+ Conv *c;
+
+ /* exact 4 pair match (connection) */
+ hv = iphash(sa, sp, da, dp);
+ LOCK(ht);
+ for(h = ht->tab[hv]; h != nil; h = h->next){
+ if(h->match != IPmatchexact)
+ continue;
+ c = h->c;
+ if(sp == c->rport && dp == c->lport
+ && ipcmp(sa, c->raddr) == 0 && ipcmp(da, c->laddr) == 0){
+ UNLOCK(ht);
+ return c;
+ }
+ }
+
+ /* match local address and port */
+ hv = iphash(IPnoaddr, 0, da, dp);
+ for(h = ht->tab[hv]; h != nil; h = h->next){
+ if(h->match != IPmatchpa)
+ continue;
+ c = h->c;
+ if(dp == c->lport && ipcmp(da, c->laddr) == 0){
+ UNLOCK(ht);
+ return c;
+ }
+ }
+
+ /* match just port */
+ hv = iphash(IPnoaddr, 0, IPnoaddr, dp);
+ for(h = ht->tab[hv]; h != nil; h = h->next){
+ if(h->match != IPmatchport)
+ continue;
+ c = h->c;
+ if(dp == c->lport){
+ UNLOCK(ht);
+ return c;
+ }
+ }
+
+ /* match local address */
+ hv = iphash(IPnoaddr, 0, da, 0);
+ for(h = ht->tab[hv]; h != nil; h = h->next){
+ if(h->match != IPmatchaddr)
+ continue;
+ c = h->c;
+ if(ipcmp(da, c->laddr) == 0){
+ UNLOCK(ht);
+ return c;
+ }
+ }
+
+ /* look for something that matches anything */
+ hv = iphash(IPnoaddr, 0, IPnoaddr, 0);
+ for(h = ht->tab[hv]; h != nil; h = h->next){
+ if(h->match != IPmatchany)
+ continue;
+ c = h->c;
+ UNLOCK(ht);
+ return c;
+ }
+ UNLOCK(ht);
+ return nil;
+}
diff --git a/src/9vx/a/ip/ipifc.c b/src/9vx/a/ip/ipifc.c
@@ -0,0 +1,1654 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+
+#define DPRINT if(0)print
+
+enum {
+ Maxmedia = 32,
+ Nself = Maxmedia*5,
+ NHASH = 1<<6,
+ NCACHE = 256,
+ QMAX = 64*1024-1,
+};
+
+Medium *media[Maxmedia] = { 0 };
+
+/*
+ * cache of local addresses (addresses we answer to)
+ */
+struct Ipself
+{
+ uchar a[IPaddrlen];
+ Ipself *hnext; /* next address in the hash table */
+ Iplink *link; /* binding twixt Ipself and Ipifc */
+ ulong expire;
+ uchar type; /* type of address */
+ int ref;
+ Ipself *next; /* free list */
+};
+
+struct Ipselftab
+{
+ QLock qlock;
+ int inited;
+ int acceptall; /* true if an interface has the null address */
+ Ipself *hash[NHASH]; /* hash chains */
+};
+
+/*
+ * Multicast addresses are chained onto a Chan so that
+ * we can remove them when the Chan is closed.
+ */
+typedef struct Ipmcast Ipmcast;
+struct Ipmcast
+{
+ Ipmcast *next;
+ uchar ma[IPaddrlen]; /* multicast address */
+ uchar ia[IPaddrlen]; /* interface address */
+};
+
+/* quick hash for ip addresses */
+#define hashipa(a) ( (ulong)(((a)[IPaddrlen-2]<<8) | (a)[IPaddrlen-1])%NHASH )
+
+static char tifc[] = "ifc ";
+
+static void addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type);
+static void remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a);
+static char* ipifcjoinmulti(Ipifc *ifc, char **argv, int argc);
+static char* ipifcleavemulti(Ipifc *ifc, char **argv, int argc);
+static void ipifcregisterproxy(Fs*, Ipifc*, uchar*);
+static char* ipifcremlifc(Ipifc*, Iplifc*);
+
+/*
+ * link in a new medium
+ */
+void
+addipmedium(Medium *med)
+{
+ int i;
+
+ for(i = 0; i < nelem(media)-1; i++)
+ if(media[i] == nil){
+ media[i] = med;
+ break;
+ }
+}
+
+/*
+ * find the medium with this name
+ */
+Medium*
+ipfindmedium(char *name)
+{
+ Medium **mp;
+
+ for(mp = media; *mp != nil; mp++)
+ if(strcmp((*mp)->name, name) == 0)
+ break;
+ return *mp;
+}
+
+/*
+ * attach a device (or pkt driver) to the interface.
+ * called with c locked
+ */
+static char*
+ipifcbind(Conv *c, char **argv, int argc)
+{
+ Ipifc *ifc;
+ Medium *m;
+
+ if(argc < 2)
+ return Ebadarg;
+
+ ifc = (Ipifc*)c->ptcl;
+
+ /* bind the device to the interface */
+ m = ipfindmedium(argv[1]);
+ if(m == nil)
+ return "unknown interface type";
+
+ WLOCK(ifc);
+ if(ifc->m != nil){
+ WUNLOCK(ifc);
+ return "interface already bound";
+ }
+ if(waserror()){
+ WUNLOCK(ifc);
+ nexterror();
+ }
+
+ /* do medium specific binding */
+ (*m->bind)(ifc, argc, argv);
+
+ /* set the bound device name */
+ if(argc > 2)
+ strncpy(ifc->dev, argv[2], sizeof(ifc->dev));
+ else
+ snprint(ifc->dev, sizeof ifc->dev, "%s%d", m->name, c->x);
+ ifc->dev[sizeof(ifc->dev)-1] = 0;
+
+ /* set up parameters */
+ ifc->m = m;
+ ifc->mintu = ifc->m->mintu;
+ ifc->maxtu = ifc->m->maxtu;
+ if(ifc->m->unbindonclose == 0)
+ ifc->conv->inuse++;
+ ifc->rp.mflag = 0; /* default not managed */
+ ifc->rp.oflag = 0;
+ ifc->rp.maxraint = 600000; /* millisecs */
+ ifc->rp.minraint = 200000;
+ ifc->rp.linkmtu = 0; /* no mtu sent */
+ ifc->rp.reachtime = 0;
+ ifc->rp.rxmitra = 0;
+ ifc->rp.ttl = MAXTTL;
+ ifc->rp.routerlt = 3 * ifc->rp.maxraint;
+
+ /* any ancillary structures (like routes) no longer pertain */
+ ifc->ifcid++;
+
+ /* reopen all the queues closed by a previous unbind */
+ qreopen(c->rq);
+ qreopen(c->eq);
+ qreopen(c->sq);
+
+ WUNLOCK(ifc);
+ poperror();
+
+ return nil;
+}
+
+/*
+ * detach a device from an interface, close the interface
+ * called with ifc->conv closed
+ */
+static char*
+ipifcunbind(Ipifc *ifc)
+{
+ char *err;
+
+ if(waserror()){
+ WUNLOCK(ifc);
+ nexterror();
+ }
+ WLOCK(ifc);
+
+ /* dissociate routes */
+ if(ifc->m != nil && ifc->m->unbindonclose == 0)
+ ifc->conv->inuse--;
+ ifc->ifcid++;
+
+ /* disassociate logical interfaces (before zeroing ifc->arg) */
+ while(ifc->lifc){
+ err = ipifcremlifc(ifc, ifc->lifc);
+ /*
+ * note: err non-zero means lifc not found,
+ * which can't happen in this case.
+ */
+ if(err)
+ error(err);
+ }
+
+ /* disassociate device */
+ if(ifc->m && ifc->m->unbind)
+ (*ifc->m->unbind)(ifc);
+ memset(ifc->dev, 0, sizeof(ifc->dev));
+ ifc->arg = nil;
+ ifc->reassemble = 0;
+
+ /* close queues to stop queuing of packets */
+ qclose(ifc->conv->rq);
+ qclose(ifc->conv->wq);
+ qclose(ifc->conv->sq);
+
+ ifc->m = nil;
+ WUNLOCK(ifc);
+ poperror();
+ return nil;
+}
+
+char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag"
+" %d maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt"
+" %d pktin %lud pktout %lud errin %lud errout %lud\n";
+
+char slineformat[] = " %-40I %-10M %-40I %-12lud %-12lud\n";
+
+static int
+ipifcstate(Conv *c, char *state, int n)
+{
+ Ipifc *ifc;
+ Iplifc *lifc;
+ int m;
+
+ ifc = (Ipifc*)c->ptcl;
+ m = snprint(state, n, sfixedformat,
+ ifc->dev, ifc->maxtu, ifc->sendra6, ifc->recvra6,
+ ifc->rp.mflag, ifc->rp.oflag, ifc->rp.maxraint,
+ ifc->rp.minraint, ifc->rp.linkmtu, ifc->rp.reachtime,
+ ifc->rp.rxmitra, ifc->rp.ttl, ifc->rp.routerlt,
+ ifc->in, ifc->out, ifc->inerr, ifc->outerr);
+
+ RLOCK(ifc);
+ for(lifc = ifc->lifc; lifc && n > m; lifc = lifc->next)
+ m += snprint(state+m, n - m, slineformat, lifc->local,
+ lifc->mask, lifc->remote, lifc->validlt, lifc->preflt);
+ if(ifc->lifc == nil)
+ m += snprint(state+m, n - m, "\n");
+ RUNLOCK(ifc);
+ return m;
+}
+
+static int
+ipifclocal(Conv *c, char *state, int n)
+{
+ Ipifc *ifc;
+ Iplifc *lifc;
+ Iplink *link;
+ int m;
+
+ ifc = (Ipifc*)c->ptcl;
+ m = 0;
+
+ RLOCK(ifc);
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ m += snprint(state+m, n - m, "%-40.40I ->", lifc->local);
+ for(link = lifc->link; link; link = link->lifclink)
+ m += snprint(state+m, n - m, " %-40.40I", link->self->a);
+ m += snprint(state+m, n - m, "\n");
+ }
+ RUNLOCK(ifc);
+ return m;
+}
+
+static int
+ipifcinuse(Conv *c)
+{
+ Ipifc *ifc;
+
+ ifc = (Ipifc*)c->ptcl;
+ return ifc->m != nil;
+}
+
+/*
+ * called when a process writes to an interface's 'data'
+ */
+static void
+ipifckick(void *x)
+{
+ Conv *c = x;
+ Block *bp;
+ Ipifc *ifc;
+
+ bp = qget(c->wq);
+ if(bp == nil)
+ return;
+
+ ifc = (Ipifc*)c->ptcl;
+ if(!CANRLOCK(ifc)){
+ freeb(bp);
+ return;
+ }
+ if(waserror()){
+ RUNLOCK(ifc);
+ nexterror();
+ }
+ if(ifc->m == nil || ifc->m->pktin == nil)
+ freeb(bp);
+ else
+ (*ifc->m->pktin)(c->p->f, ifc, bp);
+ RUNLOCK(ifc);
+ poperror();
+}
+
+/*
+ * called when a new ipifc structure is created
+ */
+static void
+ipifccreate(Conv *c)
+{
+ Ipifc *ifc;
+
+ c->rq = qopen(QMAX, 0, 0, 0);
+ c->sq = qopen(2*QMAX, 0, 0, 0);
+ c->wq = qopen(QMAX, Qkick, ipifckick, c);
+ ifc = (Ipifc*)c->ptcl;
+ ifc->conv = c;
+ ifc->unbinding = 0;
+ ifc->m = nil;
+ ifc->reassemble = 0;
+}
+
+/*
+ * called after last close of ipifc data or ctl
+ * called with c locked, we must unlock
+ */
+static void
+ipifcclose(Conv *c)
+{
+ Ipifc *ifc;
+ Medium *m;
+
+ ifc = (Ipifc*)c->ptcl;
+ m = ifc->m;
+ if(m && m->unbindonclose)
+ ipifcunbind(ifc);
+}
+
+/*
+ * change an interface's mtu
+ */
+char*
+ipifcsetmtu(Ipifc *ifc, char **argv, int argc)
+{
+ int mtu;
+
+ if(argc < 2 || ifc->m == nil)
+ return Ebadarg;
+ mtu = strtoul(argv[1], 0, 0);
+ if(mtu < ifc->m->mintu || mtu > ifc->m->maxtu)
+ return Ebadarg;
+ ifc->maxtu = mtu;
+ return nil;
+}
+
+/*
+ * add an address to an interface.
+ */
+char*
+ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp)
+{
+ int i, type, mtu, sendnbrdisc = 0;
+ uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen];
+ uchar bcast[IPaddrlen], net[IPaddrlen];
+ Iplifc *lifc, **l;
+ Fs *f;
+
+ if(ifc->m == nil)
+ return "ipifc not yet bound to device";
+
+ f = ifc->conv->p->f;
+
+ type = Rifc;
+ memset(ip, 0, IPaddrlen);
+ memset(mask, 0, IPaddrlen);
+ memset(rem, 0, IPaddrlen);
+ switch(argc){
+ case 6:
+ if(strcmp(argv[5], "proxy") == 0)
+ type |= Rproxy;
+ /* fall through */
+ case 5:
+ mtu = strtoul(argv[4], 0, 0);
+ if(mtu >= ifc->m->mintu && mtu <= ifc->m->maxtu)
+ ifc->maxtu = mtu;
+ /* fall through */
+ case 4:
+ if (parseip(ip, argv[1]) == -1 || parseip(rem, argv[3]) == -1)
+ return Ebadip;
+ parseipmask(mask, argv[2]);
+ maskip(rem, mask, net);
+ break;
+ case 3:
+ if (parseip(ip, argv[1]) == -1)
+ return Ebadip;
+ parseipmask(mask, argv[2]);
+ maskip(ip, mask, rem);
+ maskip(rem, mask, net);
+ break;
+ case 2:
+ if (parseip(ip, argv[1]) == -1)
+ return Ebadip;
+ memmove(mask, defmask(ip), IPaddrlen);
+ maskip(ip, mask, rem);
+ maskip(rem, mask, net);
+ break;
+ default:
+ return Ebadarg;
+ }
+ if(isv4(ip))
+ tentative = 0;
+ WLOCK(ifc);
+
+ /* ignore if this is already a local address for this ifc */
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
+ if(ipcmp(lifc->local, ip) == 0) {
+ if(lifc->tentative != tentative)
+ lifc->tentative = tentative;
+ if(lifcp) {
+ lifc->onlink = lifcp->onlink;
+ lifc->autoflag = lifcp->autoflag;
+ lifc->validlt = lifcp->validlt;
+ lifc->preflt = lifcp->preflt;
+ lifc->origint = lifcp->origint;
+ }
+ goto out;
+ }
+ }
+
+ /* add the address to the list of logical ifc's for this ifc */
+ lifc = smalloc(sizeof(Iplifc));
+ ipmove(lifc->local, ip);
+ ipmove(lifc->mask, mask);
+ ipmove(lifc->remote, rem);
+ ipmove(lifc->net, net);
+ lifc->tentative = tentative;
+ if(lifcp) {
+ lifc->onlink = lifcp->onlink;
+ lifc->autoflag = lifcp->autoflag;
+ lifc->validlt = lifcp->validlt;
+ lifc->preflt = lifcp->preflt;
+ lifc->origint = lifcp->origint;
+ } else { /* default values */
+ lifc->onlink = lifc->autoflag = 1;
+ lifc->validlt = lifc->preflt = ~0L;
+ lifc->origint = NOW / 1000;
+ }
+ lifc->next = nil;
+
+ for(l = &ifc->lifc; *l; l = &(*l)->next)
+ ;
+ *l = lifc;
+
+ /* check for point-to-point interface */
+ if(ipcmp(ip, v6loopback)) /* skip v6 loopback, it's a special address */
+ if(ipcmp(mask, IPallbits) == 0)
+ type |= Rptpt;
+
+ /* add local routes */
+ if(isv4(ip))
+ v4addroute(f, tifc, rem+IPv4off, mask+IPv4off, rem+IPv4off, type);
+ else
+ v6addroute(f, tifc, rem, mask, rem, type);
+
+ addselfcache(f, ifc, lifc, ip, Runi);
+
+ if((type & (Rproxy|Rptpt)) == (Rproxy|Rptpt)){
+ ipifcregisterproxy(f, ifc, rem);
+ goto out;
+ }
+
+ if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0) {
+ /* add subnet directed broadcast address to the self cache */
+ for(i = 0; i < IPaddrlen; i++)
+ bcast[i] = (ip[i] & mask[i]) | ~mask[i];
+ addselfcache(f, ifc, lifc, bcast, Rbcast);
+
+ /* add subnet directed network address to the self cache */
+ for(i = 0; i < IPaddrlen; i++)
+ bcast[i] = (ip[i] & mask[i]) & mask[i];
+ addselfcache(f, ifc, lifc, bcast, Rbcast);
+
+ /* add network directed broadcast address to the self cache */
+ memmove(mask, defmask(ip), IPaddrlen);
+ for(i = 0; i < IPaddrlen; i++)
+ bcast[i] = (ip[i] & mask[i]) | ~mask[i];
+ addselfcache(f, ifc, lifc, bcast, Rbcast);
+
+ /* add network directed network address to the self cache */
+ memmove(mask, defmask(ip), IPaddrlen);
+ for(i = 0; i < IPaddrlen; i++)
+ bcast[i] = (ip[i] & mask[i]) & mask[i];
+ addselfcache(f, ifc, lifc, bcast, Rbcast);
+
+ addselfcache(f, ifc, lifc, IPv4bcast, Rbcast);
+ }
+ else {
+ if(ipcmp(ip, v6loopback) == 0) {
+ /* add node-local mcast address */
+ addselfcache(f, ifc, lifc, v6allnodesN, Rmulti);
+
+ /* add route for all node multicast */
+ v6addroute(f, tifc, v6allnodesN, v6allnodesNmask,
+ v6allnodesN, Rmulti);
+ }
+
+ /* add all nodes multicast address */
+ addselfcache(f, ifc, lifc, v6allnodesL, Rmulti);
+
+ /* add route for all nodes multicast */
+ v6addroute(f, tifc, v6allnodesL, v6allnodesLmask, v6allnodesL,
+ Rmulti);
+
+ /* add solicited-node multicast address */
+ ipv62smcast(bcast, ip);
+ addselfcache(f, ifc, lifc, bcast, Rmulti);
+
+ sendnbrdisc = 1;
+ }
+
+ /* register the address on this network for address resolution */
+ if(isv4(ip) && ifc->m->areg != nil)
+ (*ifc->m->areg)(ifc, ip);
+
+out:
+ WUNLOCK(ifc);
+ if(tentative && sendnbrdisc)
+ icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+ return nil;
+}
+
+/*
+ * remove a logical interface from an ifc
+ * always called with ifc WLOCK'd
+ */
+static char*
+ipifcremlifc(Ipifc *ifc, Iplifc *lifc)
+{
+ Iplifc **l;
+ Fs *f;
+
+ f = ifc->conv->p->f;
+
+ /*
+ * find address on this interface and remove from chain.
+ * for pt to pt we actually specify the remote address as the
+ * addresss to remove.
+ */
+ for(l = &ifc->lifc; *l != nil && *l != lifc; l = &(*l)->next)
+ ;
+ if(*l == nil)
+ return "address not on this interface";
+ *l = lifc->next;
+
+ /* disassociate any addresses */
+ while(lifc->link)
+ remselfcache(f, ifc, lifc, lifc->link->self->a);
+
+ /* remove the route for this logical interface */
+ if(isv4(lifc->local))
+ v4delroute(f, lifc->remote+IPv4off, lifc->mask+IPv4off, 1);
+ else {
+ v6delroute(f, lifc->remote, lifc->mask, 1);
+ if(ipcmp(lifc->local, v6loopback) == 0)
+ /* remove route for all node multicast */
+ v6delroute(f, v6allnodesN, v6allnodesNmask, 1);
+ else if(memcmp(lifc->local, v6linklocal, v6llpreflen) == 0)
+ /* remove route for all link multicast */
+ v6delroute(f, v6allnodesL, v6allnodesLmask, 1);
+ }
+
+ free(lifc);
+ return nil;
+}
+
+/*
+ * remove an address from an interface.
+ * called with c->car locked
+ */
+char*
+ipifcrem(Ipifc *ifc, char **argv, int argc)
+{
+ char *rv;
+ uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen];
+ Iplifc *lifc;
+
+ if(argc < 3)
+ return Ebadarg;
+
+ if (parseip(ip, argv[1]) == -1)
+ return Ebadip;
+ parseipmask(mask, argv[2]);
+ if(argc < 4)
+ maskip(ip, mask, rem);
+ else
+ if (parseip(rem, argv[3]) == -1)
+ return Ebadip;
+
+ WLOCK(ifc);
+
+ /*
+ * find address on this interface and remove from chain.
+ * for pt to pt we actually specify the remote address as the
+ * addresss to remove.
+ */
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next) {
+ if (memcmp(ip, lifc->local, IPaddrlen) == 0
+ && memcmp(mask, lifc->mask, IPaddrlen) == 0
+ && memcmp(rem, lifc->remote, IPaddrlen) == 0)
+ break;
+ }
+
+ rv = ipifcremlifc(ifc, lifc);
+ WUNLOCK(ifc);
+ return rv;
+}
+
+/*
+ * distribute routes to active interfaces like the
+ * TRIP linecards
+ */
+void
+ipifcaddroute(Fs *f, int vers, uchar *addr, uchar *mask, uchar *gate, int type)
+{
+ Medium *m;
+ Conv **cp, **e;
+ Ipifc *ifc;
+
+ e = &f->ipifc->conv[f->ipifc->nc];
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp != nil) {
+ ifc = (Ipifc*)(*cp)->ptcl;
+ m = ifc->m;
+ if(m && m->addroute)
+ m->addroute(ifc, vers, addr, mask, gate, type);
+ }
+ }
+}
+
+void
+ipifcremroute(Fs *f, int vers, uchar *addr, uchar *mask)
+{
+ Medium *m;
+ Conv **cp, **e;
+ Ipifc *ifc;
+
+ e = &f->ipifc->conv[f->ipifc->nc];
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp != nil) {
+ ifc = (Ipifc*)(*cp)->ptcl;
+ m = ifc->m;
+ if(m && m->remroute)
+ m->remroute(ifc, vers, addr, mask);
+ }
+ }
+}
+
+/*
+ * associate an address with the interface. This wipes out any previous
+ * addresses. This is a macro that means, remove all the old interfaces
+ * and add a new one.
+ */
+static char*
+ipifcconnect(Conv* c, char **argv, int argc)
+{
+ char *err;
+ Ipifc *ifc;
+
+ ifc = (Ipifc*)c->ptcl;
+
+ if(ifc->m == nil)
+ return "ipifc not yet bound to device";
+
+ if(waserror()){
+ WUNLOCK(ifc);
+ nexterror();
+ }
+ WLOCK(ifc);
+ while(ifc->lifc){
+ err = ipifcremlifc(ifc, ifc->lifc);
+ if(err)
+ error(err);
+ }
+ WUNLOCK(ifc);
+ poperror();
+
+ err = ipifcadd(ifc, argv, argc, 0, nil);
+ if(err)
+ return err;
+
+ Fsconnected(c, nil);
+ return nil;
+}
+
+char*
+ipifcra6(Ipifc *ifc, char **argv, int argc)
+{
+ int i, argsleft, vmax = ifc->rp.maxraint, vmin = ifc->rp.minraint;
+
+ argsleft = argc - 1;
+ i = 1;
+
+ if(argsleft % 2 != 0)
+ return Ebadarg;
+
+ while (argsleft > 1) {
+ if(strcmp(argv[i], "recvra") == 0)
+ ifc->recvra6 = (atoi(argv[i+1]) != 0);
+ else if(strcmp(argv[i], "sendra") == 0)
+ ifc->sendra6 = (atoi(argv[i+1]) != 0);
+ else if(strcmp(argv[i], "mflag") == 0)
+ ifc->rp.mflag = (atoi(argv[i+1]) != 0);
+ else if(strcmp(argv[i], "oflag") == 0)
+ ifc->rp.oflag = (atoi(argv[i+1]) != 0);
+ else if(strcmp(argv[i], "maxraint") == 0)
+ ifc->rp.maxraint = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "minraint") == 0)
+ ifc->rp.minraint = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "linkmtu") == 0)
+ ifc->rp.linkmtu = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "reachtime") == 0)
+ ifc->rp.reachtime = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "rxmitra") == 0)
+ ifc->rp.rxmitra = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "ttl") == 0)
+ ifc->rp.ttl = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "routerlt") == 0)
+ ifc->rp.routerlt = atoi(argv[i+1]);
+ else
+ return Ebadarg;
+
+ argsleft -= 2;
+ i += 2;
+ }
+
+ /* consistency check */
+ if(ifc->rp.maxraint < ifc->rp.minraint) {
+ ifc->rp.maxraint = vmax;
+ ifc->rp.minraint = vmin;
+ return Ebadarg;
+ }
+ return nil;
+}
+
+/*
+ * non-standard control messages.
+ * called with c->car locked.
+ */
+static char*
+ipifcctl(Conv* c, char**argv, int argc)
+{
+ Ipifc *ifc;
+ int i;
+
+ ifc = (Ipifc*)c->ptcl;
+ if(strcmp(argv[0], "add") == 0)
+ return ipifcadd(ifc, argv, argc, 0, nil);
+ else if(strcmp(argv[0], "try") == 0)
+ return ipifcadd(ifc, argv, argc, 1, nil);
+ else if(strcmp(argv[0], "remove") == 0)
+ return ipifcrem(ifc, argv, argc);
+ else if(strcmp(argv[0], "unbind") == 0)
+ return ipifcunbind(ifc);
+ else if(strcmp(argv[0], "joinmulti") == 0)
+ return ipifcjoinmulti(ifc, argv, argc);
+ else if(strcmp(argv[0], "leavemulti") == 0)
+ return ipifcleavemulti(ifc, argv, argc);
+ else if(strcmp(argv[0], "mtu") == 0)
+ return ipifcsetmtu(ifc, argv, argc);
+ else if(strcmp(argv[0], "reassemble") == 0){
+ ifc->reassemble = 1;
+ return nil;
+ }
+ else if(strcmp(argv[0], "iprouting") == 0){
+ i = 1;
+ if(argc > 1)
+ i = atoi(argv[1]);
+ iprouting(c->p->f, i);
+ return nil;
+ }
+ else if(strcmp(argv[0], "add6") == 0)
+ return ipifcadd6(ifc, argv, argc);
+ else if(strcmp(argv[0], "ra6") == 0)
+ return ipifcra6(ifc, argv, argc);
+ return "unsupported ctl";
+}
+
+int
+ipifcstats(Proto *ipifc, char *buf, int len)
+{
+ return ipstats(ipifc->f, buf, len);
+}
+
+void
+ipifcinit(Fs *f)
+{
+ Proto *ipifc;
+
+ ipifc = smalloc(sizeof(Proto));
+ ipifc->name = "ipifc";
+ ipifc->connect = ipifcconnect;
+ ipifc->announce = nil;
+ ipifc->bind = ipifcbind;
+ ipifc->state = ipifcstate;
+ ipifc->create = ipifccreate;
+ ipifc->close = ipifcclose;
+ ipifc->rcv = nil;
+ ipifc->ctl = ipifcctl;
+ ipifc->advise = nil;
+ ipifc->stats = ipifcstats;
+ ipifc->inuse = ipifcinuse;
+ ipifc->local = ipifclocal;
+ ipifc->ipproto = -1;
+ ipifc->nc = Maxmedia;
+ ipifc->ptclsize = sizeof(Ipifc);
+
+ f->ipifc = ipifc; /* hack for ipifcremroute, findipifc, ... */
+ f->self = smalloc(sizeof(Ipselftab)); /* hack for ipforme */
+
+ Fsproto(f, ipifc);
+}
+
+/*
+ * add to self routing cache
+ * called with c->car locked
+ */
+static void
+addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type)
+{
+ Ipself *p;
+ Iplink *lp;
+ int h;
+
+ QLOCK(f->self);
+
+ /* see if the address already exists */
+ h = hashipa(a);
+ for(p = f->self->hash[h]; p; p = p->next)
+ if(memcmp(a, p->a, IPaddrlen) == 0)
+ break;
+
+ /* allocate a local address and add to hash chain */
+ if(p == nil){
+ p = smalloc(sizeof(*p));
+ ipmove(p->a, a);
+ p->type = type;
+ p->next = f->self->hash[h];
+ f->self->hash[h] = p;
+
+ /* if the null address, accept all packets */
+ if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
+ f->self->acceptall = 1;
+ }
+
+ /* look for a link for this lifc */
+ for(lp = p->link; lp; lp = lp->selflink)
+ if(lp->lifc == lifc)
+ break;
+
+ /* allocate a lifc-to-local link and link to both */
+ if(lp == nil){
+ lp = smalloc(sizeof(*lp));
+ lp->ref = 1;
+ lp->lifc = lifc;
+ lp->self = p;
+ lp->selflink = p->link;
+ p->link = lp;
+ lp->lifclink = lifc->link;
+ lifc->link = lp;
+
+ /* add to routing table */
+ if(isv4(a))
+ v4addroute(f, tifc, a+IPv4off, IPallbits+IPv4off,
+ a+IPv4off, type);
+ else
+ v6addroute(f, tifc, a, IPallbits, a, type);
+
+ if((type & Rmulti) && ifc->m->addmulti != nil)
+ (*ifc->m->addmulti)(ifc, a, lifc->local);
+ } else
+ lp->ref++;
+
+ QUNLOCK(f->self);
+}
+
+/*
+ * These structures are unlinked from their chains while
+ * other threads may be using them. To avoid excessive locking,
+ * just put them aside for a while before freeing them.
+ * called with f->self locked
+ */
+static Iplink *freeiplink;
+static Ipself *freeipself;
+
+static void
+iplinkfree(Iplink *p)
+{
+ Iplink **l, *np;
+ ulong now = NOW;
+
+ l = &freeiplink;
+ for(np = *l; np; np = *l){
+ if(np->expire > now){
+ *l = np->next;
+ free(np);
+ continue;
+ }
+ l = &np->next;
+ }
+ p->expire = now + 5000; /* give other threads 5 secs to get out */
+ p->next = nil;
+ *l = p;
+}
+
+static void
+ipselffree(Ipself *p)
+{
+ Ipself **l, *np;
+ ulong now = NOW;
+
+ l = &freeipself;
+ for(np = *l; np; np = *l){
+ if(np->expire > now){
+ *l = np->next;
+ free(np);
+ continue;
+ }
+ l = &np->next;
+ }
+ p->expire = now + 5000; /* give other threads 5 secs to get out */
+ p->next = nil;
+ *l = p;
+}
+
+/*
+ * Decrement reference for this address on this link.
+ * Unlink from selftab if this is the last ref.
+ * called with c->car locked
+ */
+static void
+remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a)
+{
+ Ipself *p, **l;
+ Iplink *link, **l_self, **l_lifc;
+
+ QLOCK(f->self);
+
+ /* find the unique selftab entry */
+ l = &f->self->hash[hashipa(a)];
+ for(p = *l; p; p = *l){
+ if(ipcmp(p->a, a) == 0)
+ break;
+ l = &p->next;
+ }
+
+ if(p == nil)
+ goto out;
+
+ /*
+ * walk down links from an ifc looking for one
+ * that matches the selftab entry
+ */
+ l_lifc = &lifc->link;
+ for(link = *l_lifc; link; link = *l_lifc){
+ if(link->self == p)
+ break;
+ l_lifc = &link->lifclink;
+ }
+
+ if(link == nil)
+ goto out;
+
+ /*
+ * walk down the links from the selftab looking for
+ * the one we just found
+ */
+ l_self = &p->link;
+ for(link = *l_self; link; link = *l_self){
+ if(link == *l_lifc)
+ break;
+ l_self = &link->selflink;
+ }
+
+ if(link == nil)
+ panic("remselfcache");
+
+ if(--(link->ref) != 0)
+ goto out;
+
+ if((p->type & Rmulti) && ifc->m->remmulti != nil)
+ (*ifc->m->remmulti)(ifc, a, lifc->local);
+
+ /* ref == 0, remove from both chains and free the link */
+ *l_lifc = link->lifclink;
+ *l_self = link->selflink;
+ iplinkfree(link);
+
+ if(p->link != nil)
+ goto out;
+
+ /* remove from routing table */
+ if(isv4(a))
+ v4delroute(f, a+IPv4off, IPallbits+IPv4off, 1);
+ else
+ v6delroute(f, a, IPallbits, 1);
+
+ /* no more links, remove from hash and free */
+ *l = p->next;
+ ipselffree(p);
+
+ /* if IPnoaddr, forget */
+ if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
+ f->self->acceptall = 0;
+
+out:
+ QUNLOCK(f->self);
+}
+
+static char *stformat = "%-44.44I %2.2d %4.4s\n";
+enum
+{
+ Nstformat= 41,
+};
+
+long
+ipselftabread(Fs *f, char *cp, ulong offset, int n)
+{
+ int i, m, nifc, off;
+ Ipself *p;
+ Iplink *link;
+ char state[8];
+
+ m = 0;
+ off = offset;
+ QLOCK(f->self);
+ for(i = 0; i < NHASH && m < n; i++){
+ for(p = f->self->hash[i]; p != nil && m < n; p = p->next){
+ nifc = 0;
+ for(link = p->link; link; link = link->selflink)
+ nifc++;
+ routetype(p->type, state);
+ m += snprint(cp + m, n - m, stformat, p->a, nifc, state);
+ if(off > 0){
+ off -= m;
+ m = 0;
+ }
+ }
+ }
+ QUNLOCK(f->self);
+ return m;
+}
+
+int
+iptentative(Fs *f, uchar *addr)
+{
+ Ipself *p;
+
+ p = f->self->hash[hashipa(addr)];
+ for(; p; p = p->next){
+ if(ipcmp(addr, p->a) == 0)
+ return p->link->lifc->tentative;
+ }
+ return 0;
+}
+
+/*
+ * returns
+ * 0 - no match
+ * Runi
+ * Rbcast
+ * Rmcast
+ */
+int
+ipforme(Fs *f, uchar *addr)
+{
+ Ipself *p;
+
+ p = f->self->hash[hashipa(addr)];
+ for(; p; p = p->next){
+ if(ipcmp(addr, p->a) == 0)
+ return p->type;
+ }
+
+ /* hack to say accept anything */
+ if(f->self->acceptall)
+ return Runi;
+ return 0;
+}
+
+/*
+ * find the ifc on same net as the remote system. If none,
+ * return nil.
+ */
+Ipifc*
+findipifc(Fs *f, uchar *remote, int type)
+{
+ Ipifc *ifc, *x;
+ Iplifc *lifc;
+ Conv **cp, **e;
+ uchar gnet[IPaddrlen], xmask[IPaddrlen];
+
+ x = nil;
+ memset(xmask, 0, IPaddrlen);
+
+ /* find most specific match */
+ e = &f->ipifc->conv[f->ipifc->nc];
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp == 0)
+ continue;
+ ifc = (Ipifc*)(*cp)->ptcl;
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ maskip(remote, lifc->mask, gnet);
+ if(ipcmp(gnet, lifc->net) == 0){
+ if(x == nil || ipcmp(lifc->mask, xmask) > 0){
+ x = ifc;
+ ipmove(xmask, lifc->mask);
+ }
+ }
+ }
+ }
+ if(x != nil)
+ return x;
+
+ /* for now for broadcast and multicast, just use first interface */
+ if(type & (Rbcast|Rmulti)){
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp == 0)
+ continue;
+ ifc = (Ipifc*)(*cp)->ptcl;
+ if(ifc->lifc != nil)
+ return ifc;
+ }
+ }
+ return nil;
+}
+
+enum {
+ unknownv6, /* UGH */
+// multicastv6,
+ unspecifiedv6,
+ linklocalv6,
+ globalv6,
+};
+
+int
+v6addrtype(uchar *addr)
+{
+ if(islinklocal(addr) ||
+ (isv6mcast(addr) && (addr[1] & 0xF) <= Link_local_scop))
+ return linklocalv6;
+ else
+ return globalv6;
+}
+
+#define v6addrcurr(lifc) ((lifc)->preflt == ~0L || \
+ (lifc)->origint + (lifc)->preflt >= NOW/1000)
+
+static void
+findprimaryipv6(Fs *f, uchar *local)
+{
+ int atype, atypel;
+ Conv **cp, **e;
+ Ipifc *ifc;
+ Iplifc *lifc;
+
+ ipmove(local, v6Unspecified);
+ atype = unspecifiedv6;
+
+ /*
+ * find "best" (global > link local > unspecified)
+ * local address; address must be current.
+ */
+ e = &f->ipifc->conv[f->ipifc->nc];
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp == 0)
+ continue;
+ ifc = (Ipifc*)(*cp)->ptcl;
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ atypel = v6addrtype(lifc->local);
+ if(atypel > atype && v6addrcurr(lifc)) {
+ ipmove(local, lifc->local);
+ atype = atypel;
+ if(atype == globalv6)
+ return;
+ }
+ }
+ }
+}
+
+/*
+ * returns first ip address configured
+ */
+static void
+findprimaryipv4(Fs *f, uchar *local)
+{
+ Conv **cp, **e;
+ Ipifc *ifc;
+ Iplifc *lifc;
+
+ /* find first ifc local address */
+ e = &f->ipifc->conv[f->ipifc->nc];
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp == 0)
+ continue;
+ ifc = (Ipifc*)(*cp)->ptcl;
+ if((lifc = ifc->lifc) != nil){
+ ipmove(local, lifc->local);
+ return;
+ }
+ }
+}
+
+/*
+ * find the local address 'closest' to the remote system, copy it to
+ * local and return the ifc for that address
+ */
+void
+findlocalip(Fs *f, uchar *local, uchar *remote)
+{
+ int version, atype = unspecifiedv6, atypel = unknownv6;
+ int atyper, deprecated;
+ uchar gate[IPaddrlen], gnet[IPaddrlen];
+ Ipifc *ifc;
+ Iplifc *lifc;
+ Route *r;
+
+ QLOCK(f->ipifc);
+ r = v6lookup(f, remote, nil);
+ version = (memcmp(remote, v4prefix, IPv4off) == 0)? V4: V6;
+
+ if(r != nil){
+ ifc = r->ifc;
+ if(r->type & Rv4)
+ v4tov6(gate, r->v4.gate);
+ else {
+ ipmove(gate, r->v6.gate);
+ ipmove(local, v6Unspecified);
+ }
+
+ switch(version) {
+ case V4:
+ /* find ifc address closest to the gateway to use */
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ maskip(gate, lifc->mask, gnet);
+ if(ipcmp(gnet, lifc->net) == 0){
+ ipmove(local, lifc->local);
+ goto out;
+ }
+ }
+ break;
+ case V6:
+ /* find ifc address with scope matching the destination */
+ atyper = v6addrtype(remote);
+ deprecated = 0;
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ atypel = v6addrtype(lifc->local);
+ /* prefer appropriate scope */
+ if((atypel > atype && atype < atyper) ||
+ (atypel < atype && atype > atyper)){
+ ipmove(local, lifc->local);
+ deprecated = !v6addrcurr(lifc);
+ atype = atypel;
+ } else if(atypel == atype){
+ /* avoid deprecated addresses */
+ if(deprecated && v6addrcurr(lifc)){
+ ipmove(local, lifc->local);
+ atype = atypel;
+ deprecated = 0;
+ }
+ }
+ if(atype == atyper && !deprecated)
+ goto out;
+ }
+ if(atype >= atyper)
+ goto out;
+ break;
+ default:
+ panic("findlocalip: version %d", version);
+ }
+ }
+
+ switch(version){
+ case V4:
+ findprimaryipv4(f, local);
+ break;
+ case V6:
+ findprimaryipv6(f, local);
+ break;
+ default:
+ panic("findlocalip2: version %d", version);
+ }
+
+out:
+ QUNLOCK(f->ipifc);
+}
+
+/*
+ * return first v4 address associated with an interface
+ */
+int
+ipv4local(Ipifc *ifc, uchar *addr)
+{
+ Iplifc *lifc;
+
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ if(isv4(lifc->local)){
+ memmove(addr, lifc->local+IPv4off, IPv4addrlen);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * return first v6 address associated with an interface
+ */
+int
+ipv6local(Ipifc *ifc, uchar *addr)
+{
+ Iplifc *lifc;
+
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ if(!isv4(lifc->local) && !(lifc->tentative)){
+ ipmove(addr, lifc->local);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int
+ipv6anylocal(Ipifc *ifc, uchar *addr)
+{
+ Iplifc *lifc;
+
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ if(!isv4(lifc->local)){
+ ipmove(addr, lifc->local);
+ return SRC_UNI;
+ }
+ }
+ return SRC_UNSPEC;
+}
+
+/*
+ * see if this address is bound to the interface
+ */
+Iplifc*
+iplocalonifc(Ipifc *ifc, uchar *ip)
+{
+ Iplifc *lifc;
+
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+ if(ipcmp(ip, lifc->local) == 0)
+ return lifc;
+ return nil;
+}
+
+
+/*
+ * See if we're proxying for this address on this interface
+ */
+int
+ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip)
+{
+ Route *r;
+ uchar net[IPaddrlen];
+ Iplifc *lifc;
+
+ /* see if this is a direct connected pt to pt address */
+ r = v6lookup(f, ip, nil);
+ if(r == nil || (r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
+ return 0;
+
+ /* see if this is on the right interface */
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ maskip(ip, lifc->mask, net);
+ if(ipcmp(net, lifc->remote) == 0)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * return multicast version if any
+ */
+int
+ipismulticast(uchar *ip)
+{
+ if(isv4(ip)){
+ if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
+ return V4;
+ }
+ else if(ip[0] == 0xff)
+ return V6;
+ return 0;
+}
+int
+ipisbm(uchar *ip)
+{
+ if(isv4(ip)){
+ if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
+ return V4;
+ else if(ipcmp(ip, IPv4bcast) == 0)
+ return V4;
+ }
+ else if(ip[0] == 0xff)
+ return V6;
+ return 0;
+}
+
+
+/*
+ * add a multicast address to an interface, called with c->car locked
+ */
+void
+ipifcaddmulti(Conv *c, uchar *ma, uchar *ia)
+{
+ Ipifc *ifc;
+ Iplifc *lifc;
+ Conv **p;
+ Ipmulti *multi, **l;
+ Fs *f;
+
+ f = c->p->f;
+
+ for(l = &c->multi; *l; l = &(*l)->next)
+ if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
+ return; /* it's already there */
+
+ multi = *l = smalloc(sizeof(*multi));
+ ipmove(multi->ma, ma);
+ ipmove(multi->ia, ia);
+ multi->next = nil;
+
+ for(p = f->ipifc->conv; *p; p++){
+ if((*p)->inuse == 0)
+ continue;
+ ifc = (Ipifc*)(*p)->ptcl;
+ if(waserror()){
+ WUNLOCK(ifc);
+ nexterror();
+ }
+ WLOCK(ifc);
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+ if(ipcmp(ia, lifc->local) == 0)
+ addselfcache(f, ifc, lifc, ma, Rmulti);
+ WUNLOCK(ifc);
+ poperror();
+ }
+}
+
+
+/*
+ * remove a multicast address from an interface, called with c->car locked
+ */
+void
+ipifcremmulti(Conv *c, uchar *ma, uchar *ia)
+{
+ Ipmulti *multi, **l;
+ Iplifc *lifc;
+ Conv **p;
+ Ipifc *ifc;
+ Fs *f;
+
+ f = c->p->f;
+
+ for(l = &c->multi; *l; l = &(*l)->next)
+ if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
+ break;
+
+ multi = *l;
+ if(multi == nil)
+ return; /* we don't have it open */
+
+ *l = multi->next;
+
+ for(p = f->ipifc->conv; *p; p++){
+ if((*p)->inuse == 0)
+ continue;
+
+ ifc = (Ipifc*)(*p)->ptcl;
+ if(waserror()){
+ WUNLOCK(ifc);
+ nexterror();
+ }
+ WLOCK(ifc);
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+ if(ipcmp(ia, lifc->local) == 0)
+ remselfcache(f, ifc, lifc, ma);
+ WUNLOCK(ifc);
+ poperror();
+ }
+
+ free(multi);
+}
+
+/*
+ * make lifc's join and leave multicast groups
+ */
+static char*
+ipifcjoinmulti(Ipifc *ifc, char **argv, int argc)
+{
+ return nil;
+}
+
+static char*
+ipifcleavemulti(Ipifc *ifc, char **argv, int argc)
+{
+ return nil;
+}
+
+static void
+ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip)
+{
+ Conv **cp, **e;
+ Ipifc *nifc;
+ Iplifc *lifc;
+ Medium *m;
+ uchar net[IPaddrlen];
+
+ /* register the address on any network that will proxy for us */
+ e = &f->ipifc->conv[f->ipifc->nc];
+
+ if(!isv4(ip)) { /* V6 */
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp == nil || (nifc = (Ipifc*)(*cp)->ptcl) == ifc)
+ continue;
+ RLOCK(nifc);
+ m = nifc->m;
+ if(m == nil || m->addmulti == nil) {
+ RUNLOCK(nifc);
+ continue;
+ }
+ for(lifc = nifc->lifc; lifc; lifc = lifc->next){
+ maskip(ip, lifc->mask, net);
+ if(ipcmp(net, lifc->remote) == 0) {
+ /* add solicited-node multicast addr */
+ ipv62smcast(net, ip);
+ addselfcache(f, nifc, lifc, net, Rmulti);
+ arpenter(f, V6, ip, nifc->mac, 6, 0);
+ // (*m->addmulti)(nifc, net, ip);
+ break;
+ }
+ }
+ RUNLOCK(nifc);
+ }
+ }
+ else { /* V4 */
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp == nil || (nifc = (Ipifc*)(*cp)->ptcl) == ifc)
+ continue;
+ RLOCK(nifc);
+ m = nifc->m;
+ if(m == nil || m->areg == nil){
+ RUNLOCK(nifc);
+ continue;
+ }
+ for(lifc = nifc->lifc; lifc; lifc = lifc->next){
+ maskip(ip, lifc->mask, net);
+ if(ipcmp(net, lifc->remote) == 0){
+ (*m->areg)(nifc, ip);
+ break;
+ }
+ }
+ RUNLOCK(nifc);
+ }
+ }
+}
+
+
+/* added for new v6 mesg types */
+static void
+adddefroute6(Fs *f, uchar *gate, int force)
+{
+ Route *r;
+
+ r = v6lookup(f, v6Unspecified, nil);
+ /*
+ * route entries generated by all other means take precedence
+ * over router announcements.
+ */
+ if (r && !force && strcmp(r->tag, "ra") != 0)
+ return;
+
+ v6delroute(f, v6Unspecified, v6Unspecified, 1);
+ v6addroute(f, "ra", v6Unspecified, v6Unspecified, gate, 0);
+}
+
+enum {
+ Ngates = 3,
+};
+
+char*
+ipifcadd6(Ipifc *ifc, char**argv, int argc)
+{
+ int plen = 64;
+ long origint = NOW / 1000, preflt = ~0L, validlt = ~0L;
+ char addr[40], preflen[6];
+ char *params[3];
+ uchar autoflag = 1, onlink = 1;
+ uchar prefix[IPaddrlen];
+ Iplifc *lifc;
+
+ switch(argc) {
+ case 7:
+ preflt = atoi(argv[6]);
+ /* fall through */
+ case 6:
+ validlt = atoi(argv[5]);
+ /* fall through */
+ case 5:
+ autoflag = atoi(argv[4]);
+ /* fall through */
+ case 4:
+ onlink = atoi(argv[3]);
+ /* fall through */
+ case 3:
+ plen = atoi(argv[2]);
+ /* fall through */
+ case 2:
+ break;
+ default:
+ return Ebadarg;
+ }
+
+ if (parseip(prefix, argv[1]) != 6 || validlt < preflt || plen < 0 ||
+ plen > 64 || islinklocal(prefix))
+ return Ebadarg;
+
+ lifc = smalloc(sizeof(Iplifc));
+ lifc->onlink = (onlink != 0);
+ lifc->autoflag = (autoflag != 0);
+ lifc->validlt = validlt;
+ lifc->preflt = preflt;
+ lifc->origint = origint;
+
+ /* issue "add" ctl msg for v6 link-local addr and prefix len */
+ if(!ifc->m->pref2addr)
+ return Ebadarg;
+ ifc->m->pref2addr(prefix, ifc->mac); /* mac → v6 link-local addr */
+ sprint(addr, "%I", prefix);
+ sprint(preflen, "/%d", plen);
+ params[0] = "add";
+ params[1] = addr;
+ params[2] = preflen;
+
+ return ipifcadd(ifc, params, 3, 0, lifc);
+}
diff --git a/src/9vx/a/ip/ipmux.c b/src/9vx/a/ip/ipmux.c
@@ -0,0 +1,842 @@
+/*
+ * IP packet filter
+ */
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+
+typedef struct Ipmuxrock Ipmuxrock;
+typedef struct Ipmux Ipmux;
+
+typedef struct Myip4hdr Myip4hdr;
+struct Myip4hdr
+{
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* ip->identification */
+ uchar frag[2]; /* Fragment information */
+ uchar ttl; /* Time to live */
+ uchar proto; /* Protocol */
+ uchar cksum[2]; /* Header checksum */
+ uchar src[4]; /* IP source */
+ uchar dst[4]; /* IP destination */
+
+ uchar data[1]; /* start of data */
+};
+Myip4hdr *ipoff = 0;
+
+enum
+{
+ Tproto,
+ Tdata,
+ Tiph,
+ Tdst,
+ Tsrc,
+ Tifc,
+
+ Cother = 0,
+ Cbyte, /* single byte */
+ Cmbyte, /* single byte with mask */
+ Cshort, /* single short */
+ Cmshort, /* single short with mask */
+ Clong, /* single long */
+ Cmlong, /* single long with mask */
+ Cifc,
+ Cmifc,
+};
+
+char *ftname[] =
+{
+[Tproto] "proto",
+[Tdata] "data",
+[Tiph] "iph",
+[Tdst] "dst",
+[Tsrc] "src",
+[Tifc] "ifc",
+};
+
+/*
+ * a node in the decision tree
+ */
+struct Ipmux
+{
+ Ipmux *yes;
+ Ipmux *no;
+ uchar type; /* type of field(Txxxx) */
+ uchar ctype; /* tupe of comparison(Cxxxx) */
+ uchar len; /* length in bytes of item to compare */
+ uchar n; /* number of items val points to */
+ short off; /* offset of comparison */
+ short eoff; /* end offset of comparison */
+ uchar skiphdr; /* should offset start after ipheader */
+ uchar *val;
+ uchar *mask;
+ uchar *e; /* val+n*len*/
+
+ int ref; /* so we can garbage collect */
+ Conv *conv;
+};
+
+/*
+ * someplace to hold per conversation data
+ */
+struct Ipmuxrock
+{
+ Ipmux *chain;
+};
+
+static int ipmuxsprint(Ipmux*, int, char*, int);
+static void ipmuxkick(void *x);
+
+static char*
+skipwhite(char *p)
+{
+ while(*p == ' ' || *p == '\t')
+ p++;
+ return p;
+}
+
+static char*
+follows(char *p, char c)
+{
+ char *f;
+
+ f = strchr(p, c);
+ if(f == nil)
+ return nil;
+ *f++ = 0;
+ f = skipwhite(f);
+ if(*f == 0)
+ return nil;
+ return f;
+}
+
+static Ipmux*
+parseop(char **pp)
+{
+ char *p = *pp;
+ int type, off, end, len;
+ Ipmux *f;
+
+ p = skipwhite(p);
+ if(strncmp(p, "dst", 3) == 0){
+ type = Tdst;
+ off = (ulong)(ipoff->dst);
+ len = IPv4addrlen;
+ p += 3;
+ }
+ else if(strncmp(p, "src", 3) == 0){
+ type = Tsrc;
+ off = (ulong)(ipoff->src);
+ len = IPv4addrlen;
+ p += 3;
+ }
+ else if(strncmp(p, "ifc", 3) == 0){
+ type = Tifc;
+ off = -IPv4addrlen;
+ len = IPv4addrlen;
+ p += 3;
+ }
+ else if(strncmp(p, "proto", 5) == 0){
+ type = Tproto;
+ off = (ulong)&(ipoff->proto);
+ len = 1;
+ p += 5;
+ }
+ else if(strncmp(p, "data", 4) == 0 || strncmp(p, "iph", 3) == 0){
+ if(strncmp(p, "data", 4) == 0) {
+ type = Tdata;
+ p += 4;
+ }
+ else {
+ type = Tiph;
+ p += 3;
+ }
+ p = skipwhite(p);
+ if(*p != '[')
+ return nil;
+ p++;
+ off = strtoul(p, &p, 0);
+ if(off < 0 || off > (64-IP4HDR))
+ return nil;
+ p = skipwhite(p);
+ if(*p != ':')
+ end = off;
+ else {
+ p++;
+ p = skipwhite(p);
+ end = strtoul(p, &p, 0);
+ if(end < off)
+ return nil;
+ p = skipwhite(p);
+ }
+ if(*p != ']')
+ return nil;
+ p++;
+ len = end - off + 1;
+ }
+ else
+ return nil;
+
+ f = smalloc(sizeof(*f));
+ f->type = type;
+ f->len = len;
+ f->off = off;
+ f->val = nil;
+ f->mask = nil;
+ f->n = 1;
+ f->ref = 1;
+ if(type == Tdata)
+ f->skiphdr = 1;
+ else
+ f->skiphdr = 0;
+
+ return f;
+}
+
+static int
+htoi(char x)
+{
+ if(x >= '0' && x <= '9')
+ x -= '0';
+ else if(x >= 'a' && x <= 'f')
+ x -= 'a' - 10;
+ else if(x >= 'A' && x <= 'F')
+ x -= 'A' - 10;
+ else
+ x = 0;
+ return x;
+}
+
+static int
+hextoi(char *p)
+{
+ return (htoi(p[0])<<4) | htoi(p[1]);
+}
+
+static void
+parseval(uchar *v, char *p, int len)
+{
+ while(*p && len-- > 0){
+ *v++ = hextoi(p);
+ p += 2;
+ }
+}
+
+static Ipmux*
+parsemux(char *p)
+{
+ int n, nomask;
+ Ipmux *f;
+ char *val;
+ char *mask;
+ char *vals[20];
+ uchar *v;
+
+ /* parse operand */
+ f = parseop(&p);
+ if(f == nil)
+ return nil;
+
+ /* find value */
+ val = follows(p, '=');
+ if(val == nil)
+ goto parseerror;
+
+ /* parse mask */
+ mask = follows(p, '&');
+ if(mask != nil){
+ switch(f->type){
+ case Tsrc:
+ case Tdst:
+ case Tifc:
+ f->mask = smalloc(f->len);
+ v4parseip(f->mask, mask);
+ break;
+ case Tdata:
+ case Tiph:
+ f->mask = smalloc(f->len);
+ parseval(f->mask, mask, f->len);
+ break;
+ default:
+ goto parseerror;
+ }
+ nomask = 0;
+ } else {
+ nomask = 1;
+ f->mask = smalloc(f->len);
+ memset(f->mask, 0xff, f->len);
+ }
+
+ /* parse vals */
+ f->n = getfields(val, vals, sizeof(vals)/sizeof(char*), 1, "|");
+ if(f->n == 0)
+ goto parseerror;
+ f->val = smalloc(f->n*f->len);
+ v = f->val;
+ for(n = 0; n < f->n; n++){
+ switch(f->type){
+ case Tsrc:
+ case Tdst:
+ case Tifc:
+ v4parseip(v, vals[n]);
+ break;
+ case Tproto:
+ case Tdata:
+ case Tiph:
+ parseval(v, vals[n], f->len);
+ break;
+ }
+ v += f->len;
+ }
+
+ f->eoff = f->off + f->len;
+ f->e = f->val + f->n*f->len;
+ f->ctype = Cother;
+ if(f->n == 1){
+ switch(f->len){
+ case 1:
+ f->ctype = nomask ? Cbyte : Cmbyte;
+ break;
+ case 2:
+ f->ctype = nomask ? Cshort : Cmshort;
+ break;
+ case 4:
+ if(f->type == Tifc)
+ f->ctype = nomask ? Cifc : Cmifc;
+ else
+ f->ctype = nomask ? Clong : Cmlong;
+ break;
+ }
+ }
+ return f;
+
+parseerror:
+ if(f->mask)
+ free(f->mask);
+ if(f->val)
+ free(f->val);
+ free(f);
+ return nil;
+}
+
+/*
+ * Compare relative ordering of two ipmuxs. This doesn't compare the
+ * values, just the fields being looked at.
+ *
+ * returns: <0 if a is a more specific match
+ * 0 if a and b are matching on the same fields
+ * >0 if b is a more specific match
+ */
+static int
+ipmuxcmp(Ipmux *a, Ipmux *b)
+{
+ int n;
+
+ /* compare types, lesser ones are more important */
+ n = a->type - b->type;
+ if(n != 0)
+ return n;
+
+ /* compare offsets, call earlier ones more specific */
+ n = (a->off+((int)a->skiphdr)*(ulong)ipoff->data) -
+ (b->off+((int)b->skiphdr)*(ulong)ipoff->data);
+ if(n != 0)
+ return n;
+
+ /* compare match lengths, longer ones are more specific */
+ n = b->len - a->len;
+ if(n != 0)
+ return n;
+
+ /*
+ * if we get here we have two entries matching
+ * the same bytes of the record. Now check
+ * the mask for equality. Longer masks are
+ * more specific.
+ */
+ if(a->mask != nil && b->mask == nil)
+ return -1;
+ if(a->mask == nil && b->mask != nil)
+ return 1;
+ if(a->mask != nil && b->mask != nil){
+ n = memcmp(b->mask, a->mask, a->len);
+ if(n != 0)
+ return n;
+ }
+ return 0;
+}
+
+/*
+ * Compare the values of two ipmuxs. We're assuming that ipmuxcmp
+ * returned 0 comparing them.
+ */
+static int
+ipmuxvalcmp(Ipmux *a, Ipmux *b)
+{
+ int n;
+
+ n = b->len*b->n - a->len*a->n;
+ if(n != 0)
+ return n;
+ return memcmp(a->val, b->val, a->len*a->n);
+}
+
+/*
+ * add onto an existing ipmux chain in the canonical comparison
+ * order
+ */
+static void
+ipmuxchain(Ipmux **l, Ipmux *f)
+{
+ for(; *l; l = &(*l)->yes)
+ if(ipmuxcmp(f, *l) < 0)
+ break;
+ f->yes = *l;
+ *l = f;
+}
+
+/*
+ * copy a tree
+ */
+static Ipmux*
+ipmuxcopy(Ipmux *f)
+{
+ Ipmux *nf;
+
+ if(f == nil)
+ return nil;
+ nf = smalloc(sizeof *nf);
+ *nf = *f;
+ nf->no = ipmuxcopy(f->no);
+ nf->yes = ipmuxcopy(f->yes);
+ nf->val = smalloc(f->n*f->len);
+ nf->e = nf->val + f->len*f->n;
+ memmove(nf->val, f->val, f->n*f->len);
+ return nf;
+}
+
+static void
+ipmuxfree(Ipmux *f)
+{
+ if(f->val != nil)
+ free(f->val);
+ free(f);
+}
+
+static void
+ipmuxtreefree(Ipmux *f)
+{
+ if(f == nil)
+ return;
+ if(f->no != nil)
+ ipmuxfree(f->no);
+ if(f->yes != nil)
+ ipmuxfree(f->yes);
+ ipmuxfree(f);
+}
+
+/*
+ * merge two trees
+ */
+static Ipmux*
+ipmuxmerge(Ipmux *a, Ipmux *b)
+{
+ int n;
+ Ipmux *f;
+
+ if(a == nil)
+ return b;
+ if(b == nil)
+ return a;
+ n = ipmuxcmp(a, b);
+ if(n < 0){
+ f = ipmuxcopy(b);
+ a->yes = ipmuxmerge(a->yes, b);
+ a->no = ipmuxmerge(a->no, f);
+ return a;
+ }
+ if(n > 0){
+ f = ipmuxcopy(a);
+ b->yes = ipmuxmerge(b->yes, a);
+ b->no = ipmuxmerge(b->no, f);
+ return b;
+ }
+ if(ipmuxvalcmp(a, b) == 0){
+ a->yes = ipmuxmerge(a->yes, b->yes);
+ a->no = ipmuxmerge(a->no, b->no);
+ a->ref++;
+ ipmuxfree(b);
+ return a;
+ }
+ a->no = ipmuxmerge(a->no, b);
+ return a;
+}
+
+/*
+ * remove a chain from a demux tree. This is like merging accept that
+ * we remove instead of insert.
+ */
+static int
+ipmuxremove(Ipmux **l, Ipmux *f)
+{
+ int n, rv;
+ Ipmux *ft;
+
+ if(f == nil)
+ return 0; /* we've removed it all */
+ if(*l == nil)
+ return -1;
+
+ ft = *l;
+ n = ipmuxcmp(ft, f);
+ if(n < 0){
+ /* *l is maching an earlier field, descend both paths */
+ rv = ipmuxremove(&ft->yes, f);
+ rv += ipmuxremove(&ft->no, f);
+ return rv;
+ }
+ if(n > 0){
+ /* f represents an earlier field than *l, this should be impossible */
+ return -1;
+ }
+
+ /* if we get here f and *l are comparing the same fields */
+ if(ipmuxvalcmp(ft, f) != 0){
+ /* different values mean mutually exclusive */
+ return ipmuxremove(&ft->no, f);
+ }
+
+ /* we found a match */
+ if(--(ft->ref) == 0){
+ /*
+ * a dead node implies the whole yes side is also dead.
+ * since our chain is constrained to be on that side,
+ * we're done.
+ */
+ ipmuxtreefree(ft->yes);
+ *l = ft->no;
+ ipmuxfree(ft);
+ return 0;
+ }
+
+ /*
+ * free the rest of the chain. it is constrained to match the
+ * yes side.
+ */
+ return ipmuxremove(&ft->yes, f->yes);
+}
+
+/*
+ * connection request is a semi separated list of filters
+ * e.g. proto=17;data[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
+ *
+ * there's no protection against overlapping specs.
+ */
+static char*
+ipmuxconnect(Conv *c, char **argv, int argc)
+{
+ int i, n;
+ char *field[10];
+ Ipmux *mux, *chain;
+ Ipmuxrock *r;
+ Fs *f;
+
+ f = c->p->f;
+
+ if(argc != 2)
+ return Ebadarg;
+
+ n = getfields(argv[1], field, nelem(field), 1, ";");
+ if(n <= 0)
+ return Ebadarg;
+
+ chain = nil;
+ mux = nil;
+ for(i = 0; i < n; i++){
+ mux = parsemux(field[i]);
+ if(mux == nil){
+ ipmuxtreefree(chain);
+ return Ebadarg;
+ }
+ ipmuxchain(&chain, mux);
+ }
+ if(chain == nil)
+ return Ebadarg;
+ mux->conv = c;
+
+ /* save a copy of the chain so we can later remove it */
+ mux = ipmuxcopy(chain);
+ r = (Ipmuxrock*)(c->ptcl);
+ r->chain = chain;
+
+ /* add the chain to the protocol demultiplexor tree */
+ WLOCK(f);
+ f->ipmux->priv = ipmuxmerge(f->ipmux->priv, mux);
+ WUNLOCK(f);
+
+ Fsconnected(c, nil);
+ return nil;
+}
+
+static int
+ipmuxstate(Conv *c, char *state, int n)
+{
+ Ipmuxrock *r;
+
+ r = (Ipmuxrock*)(c->ptcl);
+ return ipmuxsprint(r->chain, 0, state, n);
+}
+
+static void
+ipmuxcreate(Conv *c)
+{
+ Ipmuxrock *r;
+
+ c->rq = qopen(64*1024, Qmsg, 0, c);
+ c->wq = qopen(64*1024, Qkick, ipmuxkick, c);
+ r = (Ipmuxrock*)(c->ptcl);
+ r->chain = nil;
+}
+
+static char*
+ipmuxannounce(Conv* _, char** __, int ___)
+{
+ return "ipmux does not support announce";
+}
+
+static void
+ipmuxclose(Conv *c)
+{
+ Ipmux *i;
+ Ipmuxrock *r;
+ Fs *f = c->p->f;
+
+ r = (Ipmuxrock*)(c->ptcl);
+
+ qclose(c->rq);
+ qclose(c->wq);
+ qclose(c->eq);
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ c->lport = 0;
+ c->rport = 0;
+
+ WLOCK(f);
+ i = (Ipmux *)c->p->priv;
+ ipmuxremove(&i, r->chain);
+ WUNLOCK(f);
+ ipmuxtreefree(r->chain);
+ r->chain = nil;
+}
+
+/*
+ * takes a fully formed ip packet and just passes it down
+ * the stack
+ */
+static void
+ipmuxkick(void *x)
+{
+ Conv *c = x;
+ Block *bp;
+
+ bp = qget(c->wq);
+ if(bp != nil) {
+ Myip4hdr *ih4 = (Myip4hdr*)(bp->rp);
+
+ if((ih4->vihl & 0xF0) != IP_VER6)
+ ipoput4(c->p->f, bp, 0, ih4->ttl, ih4->tos, nil);
+ else
+ ipoput6(c->p->f, bp, 0, ((Ip6hdr*)ih4)->ttl, 0, nil);
+ }
+}
+
+static void
+ipmuxiput(Proto *p, Ipifc *ifc, Block *bp)
+{
+ int len, hl;
+ Fs *f = p->f;
+ uchar *m, *h, *v, *e, *ve, *hp;
+ Conv *c;
+ Ipmux *mux;
+ Myip4hdr *ip;
+ Ip6hdr *ip6;
+
+ ip = (Myip4hdr*)bp->rp;
+ hl = (ip->vihl&0x0F)<<2;
+
+ if(p->priv == nil)
+ goto nomatch;
+
+ h = bp->rp;
+ len = BLEN(bp);
+
+ /* run the v4 filter */
+ RLOCK(f);
+ c = nil;
+ mux = f->ipmux->priv;
+ while(mux != nil){
+ if(mux->eoff > len){
+ mux = mux->no;
+ continue;
+ }
+ hp = h + mux->off + ((int)mux->skiphdr)*hl;
+ switch(mux->ctype){
+ case Cbyte:
+ if(*mux->val == *hp)
+ goto yes;
+ break;
+ case Cmbyte:
+ if((*hp & *mux->mask) == *mux->val)
+ goto yes;
+ break;
+ case Cshort:
+ if(*((ushort*)mux->val) == *(ushort*)hp)
+ goto yes;
+ break;
+ case Cmshort:
+ if((*(ushort*)hp & (*((ushort*)mux->mask))) == *((ushort*)mux->val))
+ goto yes;
+ break;
+ case Clong:
+ if(*((ulong*)mux->val) == *(ulong*)hp)
+ goto yes;
+ break;
+ case Cmlong:
+ if((*(ulong*)hp & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
+ goto yes;
+ break;
+ case Cifc:
+ if(*((ulong*)mux->val) == *(ulong*)(ifc->lifc->local + IPv4off))
+ goto yes;
+ break;
+ case Cmifc:
+ if((*(ulong*)(ifc->lifc->local + IPv4off) & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
+ goto yes;
+ break;
+ default:
+ v = mux->val;
+ for(e = mux->e; v < e; v = ve){
+ m = mux->mask;
+ hp = h + mux->off;
+ for(ve = v + mux->len; v < ve; v++){
+ if((*hp++ & *m++) != *v)
+ break;
+ }
+ if(v == ve)
+ goto yes;
+ }
+ }
+ mux = mux->no;
+ continue;
+yes:
+ if(mux->conv != nil)
+ c = mux->conv;
+ mux = mux->yes;
+ }
+ RUNLOCK(f);
+
+ if(c != nil){
+ /* tack on interface address */
+ bp = padblock(bp, IPaddrlen);
+ ipmove(bp->rp, ifc->lifc->local);
+ bp = concatblock(bp);
+ if(bp != nil)
+ if(qpass(c->rq, bp) < 0)
+ print("Q");
+ return;
+ }
+
+nomatch:
+ /* doesn't match any filter, hand it to the specific protocol handler */
+ ip = (Myip4hdr*)bp->rp;
+ if((ip->vihl & 0xF0) == IP_VER4) {
+ p = f->t2p[ip->proto];
+ } else {
+ ip6 = (Ip6hdr*)bp->rp;
+ p = f->t2p[ip6->proto];
+ }
+ if(p && p->rcv)
+ (*p->rcv)(p, ifc, bp);
+ else
+ freeblist(bp);
+ return;
+}
+
+static int
+ipmuxsprint(Ipmux *mux, int level, char *buf, int len)
+{
+ int i, j, n;
+ uchar *v;
+
+ n = 0;
+ for(i = 0; i < level; i++)
+ n += snprint(buf+n, len-n, " ");
+ if(mux == nil){
+ n += snprint(buf+n, len-n, "\n");
+ return n;
+ }
+ n += snprint(buf+n, len-n, "h[%d:%d]&",
+ mux->off+((int)mux->skiphdr)*((int)ipoff->data),
+ mux->off+(((int)mux->skiphdr)*((int)ipoff->data))+mux->len-1);
+ for(i = 0; i < mux->len; i++)
+ n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+ n += snprint(buf+n, len-n, "=");
+ v = mux->val;
+ for(j = 0; j < mux->n; j++){
+ for(i = 0; i < mux->len; i++)
+ n += snprint(buf+n, len - n, "%2.2ux", *v++);
+ n += snprint(buf+n, len-n, "|");
+ }
+ n += snprint(buf+n, len-n, "\n");
+ level++;
+ n += ipmuxsprint(mux->no, level, buf+n, len-n);
+ n += ipmuxsprint(mux->yes, level, buf+n, len-n);
+ return n;
+}
+
+static int
+ipmuxstats(Proto *p, char *buf, int len)
+{
+ int n;
+ Fs *f = p->f;
+
+ RLOCK(f);
+ n = ipmuxsprint(p->priv, 0, buf, len);
+ RUNLOCK(f);
+
+ return n;
+}
+
+void
+ipmuxinit(Fs *f)
+{
+ Proto *ipmux;
+
+ ipmux = smalloc(sizeof(Proto));
+ ipmux->priv = nil;
+ ipmux->name = "ipmux";
+ ipmux->connect = ipmuxconnect;
+ ipmux->announce = ipmuxannounce;
+ ipmux->state = ipmuxstate;
+ ipmux->create = ipmuxcreate;
+ ipmux->close = ipmuxclose;
+ ipmux->rcv = ipmuxiput;
+ ipmux->ctl = nil;
+ ipmux->advise = nil;
+ ipmux->stats = ipmuxstats;
+ ipmux->ipproto = -1;
+ ipmux->nc = 64;
+ ipmux->ptclsize = sizeof(Ipmuxrock);
+
+ f->ipmux = ipmux; /* hack for Fsrcvpcol */
+
+ Fsproto(f, ipmux);
+}
diff --git a/src/9vx/a/ip/iproute.c b/src/9vx/a/ip/iproute.c
@@ -0,0 +1,854 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+static void walkadd(Fs*, Route**, Route*);
+static void addnode(Fs*, Route**, Route*);
+static void calcd(Route*);
+
+/* these are used for all instances of IP */
+static Route* v4freelist;
+static Route* v6freelist;
+static RWlock routelock;
+static ulong v4routegeneration, v6routegeneration;
+
+static void
+freeroute(Route *r)
+{
+ Route **l;
+
+ r->left = nil;
+ r->right = nil;
+ if(r->type & Rv4)
+ l = &v4freelist;
+ else
+ l = &v6freelist;
+ r->mid = *l;
+ *l = r;
+}
+
+static Route*
+allocroute(int type)
+{
+ Route *r;
+ int n;
+ Route **l;
+
+ if(type & Rv4){
+ n = sizeof(RouteTree) + sizeof(V4route);
+ l = &v4freelist;
+ } else {
+ n = sizeof(RouteTree) + sizeof(V6route);
+ l = &v6freelist;
+ }
+
+ r = *l;
+ if(r != nil){
+ *l = r->mid;
+ } else {
+ r = malloc(n);
+ if(r == nil)
+ panic("out of routing nodes");
+ }
+ memset(r, 0, n);
+ r->type = type;
+ r->ifc = nil;
+ r->ref = 1;
+
+ return r;
+}
+
+static void
+addqueue(Route **q, Route *r)
+{
+ Route *l;
+
+ if(r == nil)
+ return;
+
+ l = allocroute(r->type);
+ l->mid = *q;
+ *q = l;
+ l->left = r;
+}
+
+/*
+ * compare 2 v6 addresses
+ */
+static int
+lcmp(ulong *a, ulong *b)
+{
+ int i;
+
+ for(i = 0; i < IPllen; i++){
+ if(a[i] > b[i])
+ return 1;
+ if(a[i] < b[i])
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * compare 2 v4 or v6 ranges
+ */
+enum
+{
+ Rpreceeds,
+ Rfollows,
+ Requals,
+ Rcontains,
+ Rcontained,
+};
+
+static int
+rangecompare(Route *a, Route *b)
+{
+ if(a->type & Rv4){
+ if(a->v4.endaddress < b->v4.address)
+ return Rpreceeds;
+
+ if(a->v4.address > b->v4.endaddress)
+ return Rfollows;
+
+ if(a->v4.address <= b->v4.address
+ && a->v4.endaddress >= b->v4.endaddress){
+ if(a->v4.address == b->v4.address
+ && a->v4.endaddress == b->v4.endaddress)
+ return Requals;
+ return Rcontains;
+ }
+ return Rcontained;
+ }
+
+ if(lcmp(a->v6.endaddress, b->v6.address) < 0)
+ return Rpreceeds;
+
+ if(lcmp(a->v6.address, b->v6.endaddress) > 0)
+ return Rfollows;
+
+ if(lcmp(a->v6.address, b->v6.address) <= 0
+ && lcmp(a->v6.endaddress, b->v6.endaddress) >= 0){
+ if(lcmp(a->v6.address, b->v6.address) == 0
+ && lcmp(a->v6.endaddress, b->v6.endaddress) == 0)
+ return Requals;
+ return Rcontains;
+ }
+
+ return Rcontained;
+}
+
+static void
+copygate(Route *old, Route *new)
+{
+ if(new->type & Rv4)
+ memmove(old->v4.gate, new->v4.gate, IPv4addrlen);
+ else
+ memmove(old->v6.gate, new->v6.gate, IPaddrlen);
+}
+
+/*
+ * walk down a tree adding nodes back in
+ */
+static void
+walkadd(Fs *f, Route **root, Route *p)
+{
+ Route *l, *r;
+
+ l = p->left;
+ r = p->right;
+ p->left = 0;
+ p->right = 0;
+ addnode(f, root, p);
+ if(l)
+ walkadd(f, root, l);
+ if(r)
+ walkadd(f, root, r);
+}
+
+/*
+ * calculate depth
+ */
+static void
+calcd(Route *p)
+{
+ Route *q;
+ int d;
+
+ if(p) {
+ d = 0;
+ q = p->left;
+ if(q)
+ d = q->depth;
+ q = p->right;
+ if(q && q->depth > d)
+ d = q->depth;
+ q = p->mid;
+ if(q && q->depth > d)
+ d = q->depth;
+ p->depth = d+1;
+ }
+}
+
+/*
+ * balance the tree at the current node
+ */
+static void
+balancetree(Route **cur)
+{
+ Route *p, *l, *r;
+ int dl, dr;
+
+ /*
+ * if left and right are
+ * too out of balance,
+ * rotate tree node
+ */
+ p = *cur;
+ dl = 0; if((l = p->left) != nil) dl = l->depth;
+ dr = 0; if((r = p->right) != nil) dr = r->depth;
+
+ if(dl > dr+1) {
+ p->left = l->right;
+ l->right = p;
+ *cur = l;
+ calcd(p);
+ calcd(l);
+ } else
+ if(dr > dl+1) {
+ p->right = r->left;
+ r->left = p;
+ *cur = r;
+ calcd(p);
+ calcd(r);
+ } else
+ calcd(p);
+}
+
+/*
+ * add a new node to the tree
+ */
+static void
+addnode(Fs *f, Route **cur, Route *new)
+{
+ Route *p;
+
+ p = *cur;
+ if(p == 0) {
+ *cur = new;
+ new->depth = 1;
+ return;
+ }
+
+ switch(rangecompare(new, p)){
+ case Rpreceeds:
+ addnode(f, &p->left, new);
+ break;
+ case Rfollows:
+ addnode(f, &p->right, new);
+ break;
+ case Rcontains:
+ /*
+ * if new node is superset
+ * of tree node,
+ * replace tree node and
+ * queue tree node to be
+ * merged into root.
+ */
+ *cur = new;
+ new->depth = 1;
+ addqueue(&f->queue, p);
+ break;
+ case Requals:
+ /*
+ * supercede the old entry if the old one isn't
+ * a local interface.
+ */
+ if((p->type & Rifc) == 0){
+ p->type = new->type;
+ p->ifcid = -1;
+ copygate(p, new);
+ } else if(new->type & Rifc)
+ p->ref++;
+ freeroute(new);
+ break;
+ case Rcontained:
+ addnode(f, &p->mid, new);
+ break;
+ }
+
+ balancetree(cur);
+}
+
+#define V4H(a) ((a&0x07ffffff)>>(32-Lroot-5))
+
+void
+v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+{
+ Route *p;
+ ulong sa;
+ ulong m;
+ ulong ea;
+ int h, eh;
+
+ m = nhgetl(mask);
+ sa = nhgetl(a) & m;
+ ea = sa | ~m;
+
+ eh = V4H(ea);
+ for(h=V4H(sa); h<=eh; h++) {
+ p = allocroute(Rv4 | type);
+ p->v4.address = sa;
+ p->v4.endaddress = ea;
+ memmove(p->v4.gate, gate, sizeof(p->v4.gate));
+ memmove(p->tag, tag, sizeof(p->tag));
+
+ wlock(&routelock);
+ addnode(f, &f->v4root[h], p);
+ while((p = f->queue) != nil) {
+ f->queue = p->mid;
+ walkadd(f, &f->v4root[h], p->left);
+ freeroute(p);
+ }
+ wunlock(&routelock);
+ }
+ v4routegeneration++;
+
+ ipifcaddroute(f, Rv4, a, mask, gate, type);
+}
+
+#define V6H(a) (((a)[IPllen-1] & 0x07ffffff)>>(32-Lroot-5))
+#define ISDFLT(a, mask, tag) ((ipcmp((a),v6Unspecified)==0) && (ipcmp((mask),v6Unspecified)==0) && (strcmp((tag), "ra")!=0))
+
+void
+v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+{
+ Route *p;
+ ulong sa[IPllen], ea[IPllen];
+ ulong x, y;
+ int h, eh;
+
+ /*
+ if(ISDFLT(a, mask, tag))
+ f->v6p->cdrouter = -1;
+ */
+
+
+ for(h = 0; h < IPllen; h++){
+ x = nhgetl(a+4*h);
+ y = nhgetl(mask+4*h);
+ sa[h] = x & y;
+ ea[h] = x | ~y;
+ }
+
+ eh = V6H(ea);
+ for(h = V6H(sa); h <= eh; h++) {
+ p = allocroute(type);
+ memmove(p->v6.address, sa, IPaddrlen);
+ memmove(p->v6.endaddress, ea, IPaddrlen);
+ memmove(p->v6.gate, gate, IPaddrlen);
+ memmove(p->tag, tag, sizeof(p->tag));
+
+ wlock(&routelock);
+ addnode(f, &f->v6root[h], p);
+ while((p = f->queue) != nil) {
+ f->queue = p->mid;
+ walkadd(f, &f->v6root[h], p->left);
+ freeroute(p);
+ }
+ wunlock(&routelock);
+ }
+ v6routegeneration++;
+
+ ipifcaddroute(f, 0, a, mask, gate, type);
+}
+
+Route**
+looknode(Route **cur, Route *r)
+{
+ Route *p;
+
+ for(;;){
+ p = *cur;
+ if(p == 0)
+ return 0;
+
+ switch(rangecompare(r, p)){
+ case Rcontains:
+ return 0;
+ case Rpreceeds:
+ cur = &p->left;
+ break;
+ case Rfollows:
+ cur = &p->right;
+ break;
+ case Rcontained:
+ cur = &p->mid;
+ break;
+ case Requals:
+ return cur;
+ }
+ }
+}
+
+void
+v4delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+{
+ Route **r, *p;
+ Route rt;
+ int h, eh;
+ ulong m;
+
+ m = nhgetl(mask);
+ rt.v4.address = nhgetl(a) & m;
+ rt.v4.endaddress = rt.v4.address | ~m;
+ rt.type = Rv4;
+
+ eh = V4H(rt.v4.endaddress);
+ for(h=V4H(rt.v4.address); h<=eh; h++) {
+ if(dolock)
+ wlock(&routelock);
+ r = looknode(&f->v4root[h], &rt);
+ if(r) {
+ p = *r;
+ if(--(p->ref) == 0){
+ *r = 0;
+ addqueue(&f->queue, p->left);
+ addqueue(&f->queue, p->mid);
+ addqueue(&f->queue, p->right);
+ freeroute(p);
+ while((p = f->queue) != nil) {
+ f->queue = p->mid;
+ walkadd(f, &f->v4root[h], p->left);
+ freeroute(p);
+ }
+ }
+ }
+ if(dolock)
+ wunlock(&routelock);
+ }
+ v4routegeneration++;
+
+ ipifcremroute(f, Rv4, a, mask);
+}
+
+void
+v6delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+{
+ Route **r, *p;
+ Route rt;
+ int h, eh;
+ ulong x, y;
+
+ for(h = 0; h < IPllen; h++){
+ x = nhgetl(a+4*h);
+ y = nhgetl(mask+4*h);
+ rt.v6.address[h] = x & y;
+ rt.v6.endaddress[h] = x | ~y;
+ }
+ rt.type = 0;
+
+ eh = V6H(rt.v6.endaddress);
+ for(h=V6H(rt.v6.address); h<=eh; h++) {
+ if(dolock)
+ wlock(&routelock);
+ r = looknode(&f->v6root[h], &rt);
+ if(r) {
+ p = *r;
+ if(--(p->ref) == 0){
+ *r = 0;
+ addqueue(&f->queue, p->left);
+ addqueue(&f->queue, p->mid);
+ addqueue(&f->queue, p->right);
+ freeroute(p);
+ while((p = f->queue) != nil) {
+ f->queue = p->mid;
+ walkadd(f, &f->v6root[h], p->left);
+ freeroute(p);
+ }
+ }
+ }
+ if(dolock)
+ wunlock(&routelock);
+ }
+ v6routegeneration++;
+
+ ipifcremroute(f, 0, a, mask);
+}
+
+Route*
+v4lookup(Fs *f, uchar *a, Conv *c)
+{
+ Route *p, *q;
+ ulong la;
+ uchar gate[IPaddrlen];
+ Ipifc *ifc;
+
+ if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v4routegeneration)
+ return c->r;
+
+ la = nhgetl(a);
+ q = nil;
+ for(p=f->v4root[V4H(la)]; p;)
+ if(la >= p->v4.address) {
+ if(la <= p->v4.endaddress) {
+ q = p;
+ p = p->mid;
+ } else
+ p = p->right;
+ } else
+ p = p->left;
+
+ if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
+ if(q->type & Rifc) {
+ hnputl(gate+IPv4off, q->v4.address);
+ memmove(gate, v4prefix, IPv4off);
+ } else
+ v4tov6(gate, q->v4.gate);
+ ifc = findipifc(f, gate, q->type);
+ if(ifc == nil)
+ return nil;
+ q->ifc = ifc;
+ q->ifcid = ifc->ifcid;
+ }
+
+ if(c != nil){
+ c->r = q;
+ c->rgen = v4routegeneration;
+ }
+
+ return q;
+}
+
+Route*
+v6lookup(Fs *f, uchar *a, Conv *c)
+{
+ Route *p, *q;
+ ulong la[IPllen];
+ int h;
+ ulong x, y;
+ uchar gate[IPaddrlen];
+ Ipifc *ifc;
+
+ if(memcmp(a, v4prefix, IPv4off) == 0){
+ q = v4lookup(f, a+IPv4off, c);
+ if(q != nil)
+ return q;
+ }
+
+ if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v6routegeneration)
+ return c->r;
+
+ for(h = 0; h < IPllen; h++)
+ la[h] = nhgetl(a+4*h);
+
+ q = 0;
+ for(p=f->v6root[V6H(la)]; p;){
+ for(h = 0; h < IPllen; h++){
+ x = la[h];
+ y = p->v6.address[h];
+ if(x == y)
+ continue;
+ if(x < y){
+ p = p->left;
+ goto next;
+ }
+ break;
+ }
+ for(h = 0; h < IPllen; h++){
+ x = la[h];
+ y = p->v6.endaddress[h];
+ if(x == y)
+ continue;
+ if(x > y){
+ p = p->right;
+ goto next;
+ }
+ break;
+ }
+ q = p;
+ p = p->mid;
+next: ;
+ }
+
+ if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
+ if(q->type & Rifc) {
+ for(h = 0; h < IPllen; h++)
+ hnputl(gate+4*h, q->v6.address[h]);
+ ifc = findipifc(f, gate, q->type);
+ } else
+ ifc = findipifc(f, q->v6.gate, q->type);
+ if(ifc == nil)
+ return nil;
+ q->ifc = ifc;
+ q->ifcid = ifc->ifcid;
+ }
+ if(c != nil){
+ c->r = q;
+ c->rgen = v6routegeneration;
+ }
+
+ return q;
+}
+
+void
+routetype(int type, char *p)
+{
+ memset(p, ' ', 4);
+ p[4] = 0;
+ if(type & Rv4)
+ *p++ = '4';
+ else
+ *p++ = '6';
+ if(type & Rifc)
+ *p++ = 'i';
+ if(type & Runi)
+ *p++ = 'u';
+ else if(type & Rbcast)
+ *p++ = 'b';
+ else if(type & Rmulti)
+ *p++ = 'm';
+ if(type & Rptpt)
+ *p = 'p';
+}
+
+static char *rformat = "%-15I %-4M %-15I %4.4s %4.4s %3s\n";
+
+void
+convroute(Route *r, uchar *addr, uchar *mask, uchar *gate, char *t, int *nifc)
+{
+ int i;
+
+ if(r->type & Rv4){
+ memmove(addr, v4prefix, IPv4off);
+ hnputl(addr+IPv4off, r->v4.address);
+ memset(mask, 0xff, IPv4off);
+ hnputl(mask+IPv4off, ~(r->v4.endaddress ^ r->v4.address));
+ memmove(gate, v4prefix, IPv4off);
+ memmove(gate+IPv4off, r->v4.gate, IPv4addrlen);
+ } else {
+ for(i = 0; i < IPllen; i++){
+ hnputl(addr + 4*i, r->v6.address[i]);
+ hnputl(mask + 4*i, ~(r->v6.endaddress[i] ^ r->v6.address[i]));
+ }
+ memmove(gate, r->v6.gate, IPaddrlen);
+ }
+
+ routetype(r->type, t);
+
+ if(r->ifc)
+ *nifc = r->ifc->conv->x;
+ else
+ *nifc = -1;
+}
+
+/*
+ * this code is not in rr to reduce stack size
+ */
+static void
+sprintroute(Route *r, Routewalk *rw)
+{
+ int nifc, n;
+ char t[5], *iname, ifbuf[5];
+ uchar addr[IPaddrlen], mask[IPaddrlen], gate[IPaddrlen];
+ char *p;
+
+ convroute(r, addr, mask, gate, t, &nifc);
+ iname = "-";
+ if(nifc != -1) {
+ iname = ifbuf;
+ snprint(ifbuf, sizeof ifbuf, "%d", nifc);
+ }
+ p = seprint(rw->p, rw->e, rformat, addr, mask, gate, t, r->tag, iname);
+ if(rw->o < 0){
+ n = p - rw->p;
+ if(n > -rw->o){
+ memmove(rw->p, rw->p-rw->o, n+rw->o);
+ rw->p = p + rw->o;
+ }
+ rw->o += n;
+ } else
+ rw->p = p;
+}
+
+/*
+ * recurse descending tree, applying the function in Routewalk
+ */
+static int
+rr(Route *r, Routewalk *rw)
+{
+ int h;
+
+ if(rw->e <= rw->p)
+ return 0;
+ if(r == nil)
+ return 1;
+
+ if(rr(r->left, rw) == 0)
+ return 0;
+
+ if(r->type & Rv4)
+ h = V4H(r->v4.address);
+ else
+ h = V6H(r->v6.address);
+
+ if(h == rw->h)
+ rw->walk(r, rw);
+
+ if(rr(r->mid, rw) == 0)
+ return 0;
+
+ return rr(r->right, rw);
+}
+
+void
+ipwalkroutes(Fs *f, Routewalk *rw)
+{
+ rlock(&routelock);
+ if(rw->e > rw->p) {
+ for(rw->h = 0; rw->h < nelem(f->v4root); rw->h++)
+ if(rr(f->v4root[rw->h], rw) == 0)
+ break;
+ }
+ if(rw->e > rw->p) {
+ for(rw->h = 0; rw->h < nelem(f->v6root); rw->h++)
+ if(rr(f->v6root[rw->h], rw) == 0)
+ break;
+ }
+ runlock(&routelock);
+}
+
+long
+routeread(Fs *f, char *p, ulong offset, int n)
+{
+ Routewalk rw;
+
+ rw.p = p;
+ rw.e = p+n;
+ rw.o = -offset;
+ rw.walk = sprintroute;
+
+ ipwalkroutes(f, &rw);
+
+ return rw.p - p;
+}
+
+/*
+ * this code is not in routeflush to reduce stack size
+ */
+void
+delroute(Fs *f, Route *r, int dolock)
+{
+ uchar addr[IPaddrlen];
+ uchar mask[IPaddrlen];
+ uchar gate[IPaddrlen];
+ char t[5];
+ int nifc;
+
+ convroute(r, addr, mask, gate, t, &nifc);
+ if(r->type & Rv4)
+ v4delroute(f, addr+IPv4off, mask+IPv4off, dolock);
+ else
+ v6delroute(f, addr, mask, dolock);
+}
+
+/*
+ * recurse until one route is deleted
+ * returns 0 if nothing is deleted, 1 otherwise
+ */
+int
+routeflush(Fs *f, Route *r, char *tag)
+{
+ if(r == nil)
+ return 0;
+ if(routeflush(f, r->mid, tag))
+ return 1;
+ if(routeflush(f, r->left, tag))
+ return 1;
+ if(routeflush(f, r->right, tag))
+ return 1;
+ if((r->type & Rifc) == 0){
+ if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0){
+ delroute(f, r, 0);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+long
+routewrite(Fs *f, Chan *c, char *p, int n)
+{
+ int h, changed;
+ char *tag;
+ Cmdbuf *cb;
+ uchar addr[IPaddrlen];
+ uchar mask[IPaddrlen];
+ uchar gate[IPaddrlen];
+ IPaux *a, *na;
+
+ cb = parsecmd(p, n);
+ if(waserror()){
+ free(cb);
+ nexterror();
+ }
+
+ if(strcmp(cb->f[0], "flush") == 0){
+ tag = cb->f[1];
+ for(h = 0; h < nelem(f->v4root); h++)
+ for(changed = 1; changed;){
+ wlock(&routelock);
+ changed = routeflush(f, f->v4root[h], tag);
+ wunlock(&routelock);
+ }
+ for(h = 0; h < nelem(f->v6root); h++)
+ for(changed = 1; changed;){
+ wlock(&routelock);
+ changed = routeflush(f, f->v6root[h], tag);
+ wunlock(&routelock);
+ }
+ } else if(strcmp(cb->f[0], "remove") == 0){
+ if(cb->nf < 3)
+ error(Ebadarg);
+ if (parseip(addr, cb->f[1]) == -1)
+ error(Ebadip);
+ parseipmask(mask, cb->f[2]);
+ if(memcmp(addr, v4prefix, IPv4off) == 0)
+ v4delroute(f, addr+IPv4off, mask+IPv4off, 1);
+ else
+ v6delroute(f, addr, mask, 1);
+ } else if(strcmp(cb->f[0], "add") == 0){
+ if(cb->nf < 4)
+ error(Ebadarg);
+ if(parseip(addr, cb->f[1]) == -1 ||
+ parseip(gate, cb->f[3]) == -1)
+ error(Ebadip);
+ parseipmask(mask, cb->f[2]);
+ tag = "none";
+ if(c != nil){
+ a = c->aux;
+ tag = a->tag;
+ }
+ if(memcmp(addr, v4prefix, IPv4off) == 0)
+ v4addroute(f, tag, addr+IPv4off, mask+IPv4off, gate+IPv4off, 0);
+ else
+ v6addroute(f, tag, addr, mask, gate, 0);
+ } else if(strcmp(cb->f[0], "tag") == 0) {
+ if(cb->nf < 2)
+ error(Ebadarg);
+
+ a = c->aux;
+ na = newipaux(a->owner, cb->f[1]);
+ c->aux = na;
+ free(a);
+ }
+
+ poperror();
+ free(cb);
+ return n;
+}
diff --git a/src/9vx/a/ip/ipv6.c b/src/9vx/a/ip/ipv6.c
@@ -0,0 +1,718 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+
+enum
+{
+ IP6FHDR = 8, /* sizeof(Fraghdr6) */
+};
+
+#define IPV6CLASS(hdr) (((hdr)->vcf[0]&0x0F)<<2 | ((hdr)->vcf[1]&0xF0)>>2)
+#define BLKIPVER(xp) (((Ip6hdr*)((xp)->rp))->vcf[0] & 0xF0)
+/*
+ * This sleazy macro is stolen shamelessly from ip.c, see comment there.
+ */
+#define BKFG(xp) ((Ipfrag*)((xp)->base))
+
+typedef struct Fragment4 Fragment4;
+typedef struct Fragment6 Fragment6;
+typedef struct Ipfrag Ipfrag;
+
+Block* ip6reassemble(IP*, int, Block*, Ip6hdr*);
+Fragment6* ipfragallo6(IP*);
+void ipfragfree6(IP*, Fragment6*);
+Block* procopts(Block *bp);
+static Block* procxtns(IP *ip, Block *bp, int doreasm);
+int unfraglen(Block *bp, uchar *nexthdr, int setfh);
+
+/* MIB II counters */
+enum
+{
+ Forwarding,
+ DefaultTTL,
+ InReceives,
+ InHdrErrors,
+ InAddrErrors,
+ ForwDatagrams,
+ InUnknownProtos,
+ InDiscards,
+ InDelivers,
+ OutRequests,
+ OutDiscards,
+ OutNoRoutes,
+ ReasmTimeout,
+ ReasmReqds,
+ ReasmOKs,
+ ReasmFails,
+ FragOKs,
+ FragFails,
+ FragCreates,
+
+ Nstats,
+};
+
+static char *statnames[] =
+{
+[Forwarding] "Forwarding",
+[DefaultTTL] "DefaultTTL",
+[InReceives] "InReceives",
+[InHdrErrors] "InHdrErrors",
+[InAddrErrors] "InAddrErrors",
+[ForwDatagrams] "ForwDatagrams",
+[InUnknownProtos] "InUnknownProtos",
+[InDiscards] "InDiscards",
+[InDelivers] "InDelivers",
+[OutRequests] "OutRequests",
+[OutDiscards] "OutDiscards",
+[OutNoRoutes] "OutNoRoutes",
+[ReasmTimeout] "ReasmTimeout",
+[ReasmReqds] "ReasmReqds",
+[ReasmOKs] "ReasmOKs",
+[ReasmFails] "ReasmFails",
+[FragOKs] "FragOKs",
+[FragFails] "FragFails",
+[FragCreates] "FragCreates",
+};
+
+struct Fragment4
+{
+ Block* blist;
+ Fragment4* next;
+ ulong src;
+ ulong dst;
+ ushort id;
+ ulong age;
+};
+
+struct Fragment6
+{
+ Block* blist;
+ Fragment6* next;
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+ uint id;
+ ulong age;
+};
+
+struct Ipfrag
+{
+ ushort foff;
+ ushort flen;
+};
+
+/* an instance of IP */
+struct IP
+{
+ ulong stats[Nstats];
+
+ QLock fraglock4;
+ Fragment4* flisthead4;
+ Fragment4* fragfree4;
+ Ref id4;
+
+ QLock fraglock6;
+ Fragment6* flisthead6;
+ Fragment6* fragfree6;
+ Ref id6;
+
+ int iprouting; /* true if we route like a gateway */
+};
+
+int
+ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+{
+ int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff;
+ int morefrags, blklen, rv = 0, tentative;
+ uchar *gate, nexthdr;
+ Block *xp, *nb;
+ Fraghdr6 fraghdr;
+ IP *ip;
+ Ip6hdr *eh;
+ Ipifc *ifc;
+ Route *r, *sr;
+
+ ip = f->ip;
+
+ /* Fill out the ip header */
+ eh = (Ip6hdr*)(bp->rp);
+
+ ip->stats[OutRequests]++;
+
+ /* Number of uchars in data and ip header to write */
+ len = blocklen(bp);
+
+ tentative = iptentative(f, eh->src);
+ if(tentative){
+ netlog(f, Logip, "reject tx of packet with tentative src address %I\n",
+ eh->src);
+ goto free;
+ }
+
+ if(gating){
+ chunk = nhgets(eh->ploadlen);
+ if(chunk > len){
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "short gated packet\n");
+ goto free;
+ }
+ if(chunk + IP6HDR < len)
+ len = chunk + IP6HDR;
+ }
+
+ if(len >= IP_MAX){
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "exceeded ip max size %I\n", eh->dst);
+ goto free;
+ }
+
+ r = v6lookup(f, eh->dst, c);
+ if(r == nil){
+// print("no route for %I, src %I free\n", eh->dst, eh->src);
+ ip->stats[OutNoRoutes]++;
+ netlog(f, Logip, "no interface %I\n", eh->dst);
+ rv = -1;
+ goto free;
+ }
+
+ ifc = r->ifc;
+ if(r->type & (Rifc|Runi))
+ gate = eh->dst;
+ else if(r->type & (Rbcast|Rmulti)) {
+ gate = eh->dst;
+ sr = v6lookup(f, eh->src, nil);
+ if(sr && (sr->type & Runi))
+ ifc = sr->ifc;
+ }
+ else
+ gate = r->v6.gate;
+
+ if(!gating)
+ eh->vcf[0] = IP_VER6;
+ eh->ttl = ttl;
+ if(!gating) {
+ eh->vcf[0] |= tos >> 4;
+ eh->vcf[1] = tos << 4;
+ }
+
+ if(!CANRLOCK(ifc))
+ goto free;
+
+ if(waserror()){
+ RUNLOCK(ifc);
+ nexterror();
+ }
+
+ if(ifc->m == nil)
+ goto raise;
+
+ /* If we dont need to fragment just send it */
+ medialen = ifc->maxtu - ifc->m->hsize;
+ if(len <= medialen) {
+ hnputs(eh->ploadlen, len - IP6HDR);
+ ifc->m->bwrite(ifc, bp, V6, gate);
+ RUNLOCK(ifc);
+ poperror();
+ return 0;
+ }
+
+ if(gating && ifc->reassemble <= 0) {
+ /*
+ * v6 intermediate nodes are not supposed to fragment pkts;
+ * we fragment if ifc->reassemble is turned on; an exception
+ * needed for nat.
+ */
+ ip->stats[OutDiscards]++;
+ icmppkttoobig6(f, ifc, bp);
+ netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst);
+ goto raise;
+ }
+
+ /* start v6 fragmentation */
+ uflen = unfraglen(bp, &nexthdr, 1);
+ if(uflen > medialen) {
+ ip->stats[FragFails]++;
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst);
+ goto raise;
+ }
+
+ flen = len - uflen;
+ seglen = (medialen - (uflen + IP6FHDR)) & ~7;
+ if(seglen < 8) {
+ ip->stats[FragFails]++;
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "%I: seglen < 8\n", eh->dst);
+ goto raise;
+ }
+
+ lid = incref(&ip->id6);
+ fraghdr.nexthdr = nexthdr;
+ fraghdr.res = 0;
+ hnputl(fraghdr.id, lid);
+
+ xp = bp;
+ offset = uflen;
+ while (xp && offset && offset >= BLEN(xp)) {
+ offset -= BLEN(xp);
+ xp = xp->next;
+ }
+ xp->rp += offset;
+
+ fragoff = 0;
+ morefrags = 1;
+
+ for(; fragoff < flen; fragoff += seglen) {
+ nb = allocb(uflen + IP6FHDR + seglen);
+
+ if(fragoff + seglen >= flen) {
+ seglen = flen - fragoff;
+ morefrags = 0;
+ }
+
+ hnputs(eh->ploadlen, seglen+IP6FHDR);
+ memmove(nb->wp, eh, uflen);
+ nb->wp += uflen;
+
+ hnputs(fraghdr.offsetRM, fragoff); /* last 3 bits must be 0 */
+ fraghdr.offsetRM[1] |= morefrags;
+ memmove(nb->wp, &fraghdr, IP6FHDR);
+ nb->wp += IP6FHDR;
+
+ /* Copy data */
+ chunk = seglen;
+ while (chunk) {
+ if(!xp) {
+ ip->stats[OutDiscards]++;
+ ip->stats[FragFails]++;
+ freeblist(nb);
+ netlog(f, Logip, "!xp: chunk in v6%d\n", chunk);
+ goto raise;
+ }
+ blklen = chunk;
+ if(BLEN(xp) < chunk)
+ blklen = BLEN(xp);
+ memmove(nb->wp, xp->rp, blklen);
+
+ nb->wp += blklen;
+ xp->rp += blklen;
+ chunk -= blklen;
+ if(xp->rp == xp->wp)
+ xp = xp->next;
+ }
+
+ ifc->m->bwrite(ifc, nb, V6, gate);
+ ip->stats[FragCreates]++;
+ }
+ ip->stats[FragOKs]++;
+
+raise:
+ RUNLOCK(ifc);
+ poperror();
+free:
+ freeblist(bp);
+ return rv;
+}
+
+void
+ipiput6(Fs *f, Ipifc *ifc, Block *bp)
+{
+ int hl, hop, tos, notforme, tentative;
+ uchar proto;
+ uchar v6dst[IPaddrlen];
+ IP *ip;
+ Ip6hdr *h;
+ Proto *p;
+ Route *r, *sr;
+
+ ip = f->ip;
+ ip->stats[InReceives]++;
+
+ /*
+ * Ensure we have all the header info in the first
+ * block. Make life easier for other protocols by
+ * collecting up to the first 64 bytes in the first block.
+ */
+ if(BLEN(bp) < 64) {
+ hl = blocklen(bp);
+ if(hl < IP6HDR)
+ hl = IP6HDR;
+ if(hl > 64)
+ hl = 64;
+ bp = pullupblock(bp, hl);
+ if(bp == nil)
+ return;
+ }
+
+ h = (Ip6hdr *)bp->rp;
+
+ memmove(&v6dst[0], &h->dst[0], IPaddrlen);
+ notforme = ipforme(f, v6dst) == 0;
+ tentative = iptentative(f, v6dst);
+
+ if(tentative && h->proto != ICMPv6) {
+ print("tentative addr, drop\n");
+ freeblist(bp);
+ return;
+ }
+
+ /* Check header version */
+ if(BLKIPVER(bp) != IP_VER6) {
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "ip: bad version %ux\n", (h->vcf[0]&0xF0)>>2);
+ freeblist(bp);
+ return;
+ }
+
+ /* route */
+ if(notforme) {
+ if(!ip->iprouting){
+ freeb(bp);
+ return;
+ }
+
+ /* don't forward to link-local destinations */
+ if(islinklocal(h->dst) ||
+ (isv6mcast(h->dst) && (h->dst[1]&0xF) <= Link_local_scop)){
+ ip->stats[OutDiscards]++;
+ freeblist(bp);
+ return;
+ }
+
+ /* don't forward to source's network */
+ sr = v6lookup(f, h->src, nil);
+ r = v6lookup(f, h->dst, nil);
+
+ if(r == nil || sr == r){
+ ip->stats[OutDiscards]++;
+ freeblist(bp);
+ return;
+ }
+
+ /* don't forward if packet has timed out */
+ hop = h->ttl;
+ if(hop < 1) {
+ ip->stats[InHdrErrors]++;
+ icmpttlexceeded6(f, ifc, bp);
+ freeblist(bp);
+ return;
+ }
+
+ /* process headers & reassemble if the interface expects it */
+ bp = procxtns(ip, bp, r->ifc->reassemble);
+ if(bp == nil)
+ return;
+
+ ip->stats[ForwDatagrams]++;
+ h = (Ip6hdr *)bp->rp;
+ tos = IPV6CLASS(h);
+ hop = h->ttl;
+ ipoput6(f, bp, 1, hop-1, tos, nil);
+ return;
+ }
+
+ /* reassemble & process headers if needed */
+ bp = procxtns(ip, bp, 1);
+ if(bp == nil)
+ return;
+
+ h = (Ip6hdr *) (bp->rp);
+ proto = h->proto;
+ p = Fsrcvpcol(f, proto);
+ if(p && p->rcv) {
+ ip->stats[InDelivers]++;
+ (*p->rcv)(p, ifc, bp);
+ return;
+ }
+
+ ip->stats[InDiscards]++;
+ ip->stats[InUnknownProtos]++;
+ freeblist(bp);
+}
+
+/*
+ * ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6
+ */
+void
+ipfragfree6(IP *ip, Fragment6 *frag)
+{
+ Fragment6 *fl, **l;
+
+ if(frag->blist)
+ freeblist(frag->blist);
+
+ memset(frag->src, 0, IPaddrlen);
+ frag->id = 0;
+ frag->blist = nil;
+
+ l = &ip->flisthead6;
+ for(fl = *l; fl; fl = fl->next) {
+ if(fl == frag) {
+ *l = frag->next;
+ break;
+ }
+ l = &fl->next;
+ }
+
+ frag->next = ip->fragfree6;
+ ip->fragfree6 = frag;
+}
+
+/*
+ * ipfragallo6 - copied from ipfragalloc4
+ */
+Fragment6*
+ipfragallo6(IP *ip)
+{
+ Fragment6 *f;
+
+ while(ip->fragfree6 == nil) {
+ /* free last entry on fraglist */
+ for(f = ip->flisthead6; f->next; f = f->next)
+ ;
+ ipfragfree6(ip, f);
+ }
+ f = ip->fragfree6;
+ ip->fragfree6 = f->next;
+ f->next = ip->flisthead6;
+ ip->flisthead6 = f;
+ f->age = NOW + 30000;
+
+ return f;
+}
+
+static Block*
+procxtns(IP *ip, Block *bp, int doreasm)
+{
+ int offset;
+ uchar proto;
+ Ip6hdr *h;
+
+ h = (Ip6hdr *)bp->rp;
+ offset = unfraglen(bp, &proto, 0);
+
+ if(proto == FH && doreasm != 0) {
+ bp = ip6reassemble(ip, offset, bp, h);
+ if(bp == nil)
+ return nil;
+ offset = unfraglen(bp, &proto, 0);
+ }
+
+ if(proto == DOH || offset > IP6HDR)
+ bp = procopts(bp);
+ return bp;
+}
+
+/*
+ * returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
+ * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
+ * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
+ * field of the last header in the "Unfragmentable part" is set to FH.
+ */
+int
+unfraglen(Block *bp, uchar *nexthdr, int setfh)
+{
+ uchar *p, *q;
+ int ufl, hs;
+
+ p = bp->rp;
+ q = p+6; /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
+ *nexthdr = *q;
+ ufl = IP6HDR;
+ p += ufl;
+
+ while (*nexthdr == HBH || *nexthdr == RH) {
+ *nexthdr = *p;
+ hs = ((int)*(p+1) + 1) * 8;
+ ufl += hs;
+ q = p;
+ p += hs;
+ }
+
+ if(*nexthdr == FH)
+ *q = *p;
+ if(setfh)
+ *q = FH;
+ return ufl;
+}
+
+Block*
+procopts(Block *bp)
+{
+ return bp;
+}
+
+Block*
+ip6reassemble(IP* ip, int uflen, Block* bp, Ip6hdr* ih)
+{
+ int fend, offset, ovlap, len, fragsize, pktposn;
+ uint id;
+ uchar src[IPaddrlen], dst[IPaddrlen];
+ Block *bl, **l, *last, *prev;
+ Fraghdr6 *fraghdr;
+ Fragment6 *f, *fnext;
+
+ fraghdr = (Fraghdr6 *)(bp->rp + uflen);
+ memmove(src, ih->src, IPaddrlen);
+ memmove(dst, ih->dst, IPaddrlen);
+ id = nhgetl(fraghdr->id);
+ offset = nhgets(fraghdr->offsetRM) & ~7;
+
+ /*
+ * block lists are too hard, pullupblock into a single block
+ */
+ if(bp->next){
+ bp = pullupblock(bp, blocklen(bp));
+ ih = (Ip6hdr *)bp->rp;
+ }
+
+ qlock(&ip->fraglock6);
+
+ /*
+ * find a reassembly queue for this fragment
+ */
+ for(f = ip->flisthead6; f; f = fnext){
+ fnext = f->next;
+ if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id)
+ break;
+ if(f->age < NOW){
+ ip->stats[ReasmTimeout]++;
+ ipfragfree6(ip, f);
+ }
+ }
+
+ /*
+ * if this isn't a fragmented packet, accept it
+ * and get rid of any fragments that might go
+ * with it.
+ */
+ if(nhgets(fraghdr->offsetRM) == 0) { /* 1st frag is also last */
+ if(f) {
+ ipfragfree6(ip, f);
+ ip->stats[ReasmFails]++;
+ }
+ qunlock(&ip->fraglock6);
+ return bp;
+ }
+
+ if(bp->base+sizeof(Ipfrag) >= bp->rp){
+ bp = padblock(bp, sizeof(Ipfrag));
+ bp->rp += sizeof(Ipfrag);
+ }
+
+ BKFG(bp)->foff = offset;
+ BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR;
+
+ /* First fragment allocates a reassembly queue */
+ if(f == nil) {
+ f = ipfragallo6(ip);
+ f->id = id;
+ memmove(f->src, src, IPaddrlen);
+ memmove(f->dst, dst, IPaddrlen);
+
+ f->blist = bp;
+
+ qunlock(&ip->fraglock6);
+ ip->stats[ReasmReqds]++;
+ return nil;
+ }
+
+ /*
+ * find the new fragment's position in the queue
+ */
+ prev = nil;
+ l = &f->blist;
+ bl = f->blist;
+ while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+ prev = bl;
+ l = &bl->next;
+ bl = bl->next;
+ }
+
+ /* Check overlap of a previous fragment - trim away as necessary */
+ if(prev) {
+ ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+ if(ovlap > 0) {
+ if(ovlap >= BKFG(bp)->flen) {
+ freeblist(bp);
+ qunlock(&ip->fraglock6);
+ return nil;
+ }
+ BKFG(prev)->flen -= ovlap;
+ }
+ }
+
+ /* Link onto assembly queue */
+ bp->next = *l;
+ *l = bp;
+
+ /* Check to see if succeeding segments overlap */
+ if(bp->next) {
+ l = &bp->next;
+ fend = BKFG(bp)->foff + BKFG(bp)->flen;
+
+ /* Take completely covered segments out */
+ while(*l) {
+ ovlap = fend - BKFG(*l)->foff;
+ if(ovlap <= 0)
+ break;
+ if(ovlap < BKFG(*l)->flen) {
+ BKFG(*l)->flen -= ovlap;
+ BKFG(*l)->foff += ovlap;
+ /* move up ih hdrs */
+ memmove((*l)->rp + ovlap, (*l)->rp, uflen);
+ (*l)->rp += ovlap;
+ break;
+ }
+ last = (*l)->next;
+ (*l)->next = nil;
+ freeblist(*l);
+ *l = last;
+ }
+ }
+
+ /*
+ * look for a complete packet. if we get to a fragment
+ * with the trailing bit of fraghdr->offsetRM[1] set, we're done.
+ */
+ pktposn = 0;
+ for(bl = f->blist; bl && BKFG(bl)->foff == pktposn; bl = bl->next) {
+ fraghdr = (Fraghdr6 *)(bl->rp + uflen);
+ if((fraghdr->offsetRM[1] & 1) == 0) {
+ bl = f->blist;
+
+ /* get rid of frag header in first fragment */
+ memmove(bl->rp + IP6FHDR, bl->rp, uflen);
+ bl->rp += IP6FHDR;
+ len = nhgets(((Ip6hdr*)bl->rp)->ploadlen) - IP6FHDR;
+ bl->wp = bl->rp + len + IP6HDR;
+ /*
+ * Pullup all the fragment headers and
+ * return a complete packet
+ */
+ for(bl = bl->next; bl; bl = bl->next) {
+ fragsize = BKFG(bl)->flen;
+ len += fragsize;
+ bl->rp += uflen + IP6FHDR;
+ bl->wp = bl->rp + fragsize;
+ }
+
+ bl = f->blist;
+ f->blist = nil;
+ ipfragfree6(ip, f);
+ ih = (Ip6hdr*)bl->rp;
+ hnputs(ih->ploadlen, len);
+ qunlock(&ip->fraglock6);
+ ip->stats[ReasmOKs]++;
+ return bl;
+ }
+ pktposn += BKFG(bl)->flen;
+ }
+ qunlock(&ip->fraglock6);
+ return nil;
+}
diff --git a/src/9vx/a/ip/ipv6.h b/src/9vx/a/ip/ipv6.h
@@ -0,0 +1,185 @@
+/*
+ * Internet Protocol Version 6
+ *
+ * rfc2460 defines the protocol, rfc2461 neighbour discovery, and
+ * rfc2462 address autoconfiguration. rfc4443 defines ICMP; was rfc2463.
+ * rfc4291 defines the address architecture (including prefices), was rfc3513.
+ * rfc4007 defines the scoped address architecture.
+ *
+ * global unicast is anything but unspecified (::), loopback (::1),
+ * multicast (ff00::/8), and link-local unicast (fe80::/10).
+ *
+ * site-local (fec0::/10) is now deprecated, originally by rfc3879.
+ *
+ * Unique Local IPv6 Unicast Addresses are defined by rfc4193.
+ * prefix is fc00::/7, scope is global, routing is limited to roughly a site.
+ */
+#define isv6mcast(addr) ((addr)[0] == 0xff)
+#define islinklocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0x80)
+
+#define optexsts(np) (nhgets((np)->ploadlen) > 24)
+#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0)
+
+#ifndef MIN
+#define MIN(a, b) ((a) <= (b)? (a): (b))
+#endif
+
+#undef ESP
+
+enum { /* Header Types */
+ HBH = 0, /* hop-by-hop multicast routing protocol */
+ ICMP = 1,
+ IGMP = 2,
+ GGP = 3,
+ IPINIP = 4,
+ ST = 5,
+ TCP = 6,
+ UDP = 17,
+ ISO_TP4 = 29,
+ RH = 43,
+ FH = 44,
+ IDRP = 45,
+ RSVP = 46,
+ AH = 51,
+ ESP = 52,
+ ICMPv6 = 58,
+ NNH = 59,
+ DOH = 60,
+ ISO_IP = 80,
+ IGRP = 88,
+ OSPF = 89,
+
+ Maxhdrtype = 256,
+};
+
+enum {
+ /* multicast flags and scopes */
+
+// Well_known_flg = 0,
+// Transient_flg = 1,
+
+// Interface_local_scop = 1,
+ Link_local_scop = 2,
+// Site_local_scop = 5,
+// Org_local_scop = 8,
+ Global_scop = 14,
+
+ /* various prefix lengths */
+ SOLN_PREF_LEN = 13,
+
+ /* icmpv6 unreachability codes */
+ Icmp6_no_route = 0,
+ Icmp6_ad_prohib = 1,
+ Icmp6_out_src_scope = 2,
+ Icmp6_adr_unreach = 3,
+ Icmp6_port_unreach = 4,
+ Icmp6_gress_src_fail = 5,
+ Icmp6_rej_route = 6,
+ Icmp6_unknown = 7, /* our own invention for internal use */
+
+ /* various flags & constants */
+ v6MINTU = 1280,
+ HOP_LIMIT = 255,
+ IP6HDR = 20, /* sizeof(Ip6hdr) */
+
+ /* option types */
+
+ /* neighbour discovery */
+ SRC_LLADDR = 1,
+ TARGET_LLADDR = 2,
+ PREFIX_INFO = 3,
+ REDIR_HEADER = 4,
+ MTU_OPTION = 5,
+ /* new since rfc2461; see iana.org/assignments/icmpv6-parameters */
+ V6nd_home = 8,
+ V6nd_srcaddrs = 9, /* rfc3122 */
+ V6nd_ip = 17,
+ /* /lib/rfc/drafts/draft-jeong-dnsop-ipv6-dns-discovery-12.txt */
+ V6nd_rdns = 25,
+ /* plan 9 extensions */
+ V6nd_9fs = 250,
+ V6nd_9auth = 251,
+
+ SRC_UNSPEC = 0,
+ SRC_UNI = 1,
+ TARG_UNI = 2,
+ TARG_MULTI = 3,
+
+ Tunitent = 1,
+ Tuniproxy = 2,
+ Tunirany = 3,
+
+ /* Node constants */
+ MAX_MULTICAST_SOLICIT = 3,
+ RETRANS_TIMER = 1000,
+};
+
+typedef struct Ip6hdr Ip6hdr;
+typedef struct Opthdr Opthdr;
+typedef struct Routinghdr Routinghdr;
+typedef struct Fraghdr6 Fraghdr6;
+
+struct Ip6hdr {
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+};
+
+struct Opthdr {
+ uchar nexthdr;
+ uchar len;
+};
+
+/*
+ * Beware routing header type 0 (loose source routing); see
+ * http://www.secdev.org/conf/IPv6_RH_security-csw07.pdf.
+ * Type 1 is unused. Type 2 is for MIPv6 (mobile IPv6) filtering
+ * against type 0 header.
+ */
+struct Routinghdr {
+ uchar nexthdr;
+ uchar len;
+ uchar rtetype;
+ uchar segrem;
+};
+
+struct Fraghdr6 {
+ uchar nexthdr;
+ uchar res;
+ uchar offsetRM[2]; /* Offset, Res, M flag */
+ uchar id[4];
+};
+
+extern uchar v6allnodesN[IPaddrlen];
+extern uchar v6allnodesL[IPaddrlen];
+extern uchar v6allroutersN[IPaddrlen];
+extern uchar v6allroutersL[IPaddrlen];
+extern uchar v6allnodesNmask[IPaddrlen];
+extern uchar v6allnodesLmask[IPaddrlen];
+extern uchar v6solicitednode[IPaddrlen];
+extern uchar v6solicitednodemask[IPaddrlen];
+extern uchar v6Unspecified[IPaddrlen];
+extern uchar v6loopback[IPaddrlen];
+extern uchar v6loopbackmask[IPaddrlen];
+extern uchar v6linklocal[IPaddrlen];
+extern uchar v6linklocalmask[IPaddrlen];
+extern uchar v6multicast[IPaddrlen];
+extern uchar v6multicastmask[IPaddrlen];
+
+extern int v6llpreflen;
+extern int v6mcpreflen;
+extern int v6snpreflen;
+extern int v6aNpreflen;
+extern int v6aLpreflen;
+
+extern int ReTransTimer;
+
+void ipv62smcast(uchar *, uchar *);
+void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
+void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
+void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
+void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
+void icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free);
diff --git a/src/9vx/a/ip/loopbackmedium.c b/src/9vx/a/ip/loopbackmedium.c
@@ -0,0 +1,120 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+enum
+{
+ Maxtu= 16*1024,
+};
+
+typedef struct LB LB;
+struct LB
+{
+ Proc *readp;
+ Queue *q;
+ Fs *f;
+};
+
+static void loopbackread(void *a);
+
+static void
+loopbackbind(Ipifc *ifc, int _, char** __)
+{
+ LB *lb;
+
+ lb = smalloc(sizeof(*lb));
+ lb->f = ifc->conv->p->f;
+ lb->q = qopen(1024*1024, Qmsg, nil, nil);
+ ifc->arg = lb;
+ ifc->mbps = 1000;
+
+ kproc("loopbackread", loopbackread, ifc);
+
+}
+
+static void
+loopbackunbind(Ipifc *ifc)
+{
+ LB *lb = ifc->arg;
+
+ if(lb->readp)
+ postnote(lb->readp, 1, "unbind", 0);
+
+ /* wait for reader to die */
+ while(lb->readp != 0)
+ tsleep(&up->sleep, return0, 0, 300);
+
+ /* clean up */
+ qfree(lb->q);
+ free(lb);
+}
+
+static void
+loopbackbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
+{
+ LB *lb;
+
+ lb = ifc->arg;
+ if(qpass(lb->q, bp) < 0)
+ ifc->outerr++;
+ ifc->out++;
+}
+
+static void
+loopbackread(void *a)
+{
+ Ipifc *ifc;
+ Block *bp;
+ LB *lb;
+
+ ifc = a;
+ lb = ifc->arg;
+ lb->readp = up; /* hide identity under a rock for unbind */
+ if(waserror()){
+ lb->readp = 0;
+ pexit("hangup", 1);
+ }
+ for(;;){
+ bp = qbread(lb->q, Maxtu);
+ if(bp == nil)
+ continue;
+ ifc->in++;
+ if(!CANRLOCK(ifc)){
+ freeb(bp);
+ continue;
+ }
+ if(waserror()){
+ RUNLOCK(ifc);
+ nexterror();
+ }
+ if(ifc->lifc == nil)
+ freeb(bp);
+ else
+ ipiput4(lb->f, ifc, bp);
+ RUNLOCK(ifc);
+ poperror();
+ }
+}
+
+Medium loopbackmedium =
+{
+.hsize= 0,
+.mintu= 0,
+.maxtu= Maxtu,
+.maclen= 0,
+.name= "loopback",
+.bind= loopbackbind,
+.unbind= loopbackunbind,
+.bwrite= loopbackbwrite,
+};
+
+void
+loopbackmediumlink(void)
+{
+ addipmedium(&loopbackmedium);
+}
diff --git a/src/9vx/a/ip/netdevmedium.c b/src/9vx/a/ip/netdevmedium.c
@@ -0,0 +1,153 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+static void netdevbind(Ipifc *ifc, int argc, char **argv);
+static void netdevunbind(Ipifc *ifc);
+static void netdevbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
+static void netdevread(void *a);
+
+typedef struct Netdevrock Netdevrock;
+struct Netdevrock
+{
+ Fs *f; /* file system we belong to */
+ Proc *readp; /* reading process */
+ Chan *mchan; /* Data channel */
+};
+
+Medium netdevmedium =
+{
+.name= "netdev",
+.hsize= 0,
+.mintu= 0,
+.maxtu= 64000,
+.maclen= 0,
+.bind= netdevbind,
+.unbind= netdevunbind,
+.bwrite= netdevbwrite,
+.unbindonclose= 0,
+};
+
+/*
+ * called to bind an IP ifc to a generic network device
+ * called with ifc qlock'd
+ */
+static void
+netdevbind(Ipifc *ifc, int argc, char **argv)
+{
+ Chan *mchan;
+ Netdevrock *er;
+
+ if(argc < 2)
+ error(Ebadarg);
+
+ mchan = namec(argv[2], Aopen, ORDWR, 0);
+
+ er = smalloc(sizeof(*er));
+ er->mchan = mchan;
+ er->f = ifc->conv->p->f;
+
+ ifc->arg = er;
+
+ kproc("netdevread", netdevread, ifc);
+}
+
+/*
+ * called with ifc wlock'd
+ */
+static void
+netdevunbind(Ipifc *ifc)
+{
+ Netdevrock *er = ifc->arg;
+
+ if(er->readp != nil)
+ postnote(er->readp, 1, "unbind", 0);
+
+ /* wait for readers to die */
+ while(er->readp != nil)
+ tsleep(&up->sleep, return0, 0, 300);
+
+ if(er->mchan != nil)
+ cclose(er->mchan);
+
+ free(er);
+}
+
+/*
+ * called by ipoput with a single block to write
+ */
+static void
+netdevbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
+{
+ Netdevrock *er = ifc->arg;
+
+ if(bp->next)
+ bp = concatblock(bp);
+ if(BLEN(bp) < ifc->mintu)
+ bp = adjustblock(bp, ifc->mintu);
+
+ devtab[er->mchan->type]->bwrite(er->mchan, bp, 0);
+ ifc->out++;
+}
+
+/*
+ * process to read from the device
+ */
+static void
+netdevread(void *a)
+{
+ Ipifc *ifc;
+ Block *bp;
+ Netdevrock *er;
+ char *argv[1];
+
+ ifc = a;
+ er = ifc->arg;
+ er->readp = up; /* hide identity under a rock for unbind */
+ if(waserror()){
+ er->readp = nil;
+ pexit("hangup", 1);
+ }
+ for(;;){
+ bp = devtab[er->mchan->type]->bread(er->mchan, ifc->maxtu, 0);
+ if(bp == nil){
+ /*
+ * get here if mchan is a pipe and other side hangs up
+ * clean up this interface & get out
+ZZZ is this a good idea?
+ */
+ poperror();
+ er->readp = nil;
+ argv[0] = "unbind";
+ if(!waserror())
+ ifc->conv->p->ctl(ifc->conv, argv, 1);
+ pexit("hangup", 1);
+ }
+ if(!CANRLOCK(ifc)){
+ freeb(bp);
+ continue;
+ }
+ if(waserror()){
+ RUNLOCK(ifc);
+ nexterror();
+ }
+ ifc->in++;
+ if(ifc->lifc == nil)
+ freeb(bp);
+ else
+ ipiput4(er->f, ifc, bp);
+ RUNLOCK(ifc);
+ poperror();
+ }
+}
+
+void
+netdevmediumlink(void)
+{
+ addipmedium(&netdevmedium);
+}
diff --git a/src/9vx/a/ip/netlog.c b/src/9vx/a/ip/netlog.c
@@ -0,0 +1,261 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "ip/ip.h"
+
+enum {
+ Nlog = 16*1024,
+};
+
+/*
+ * action log
+ */
+struct Netlog {
+ Lock lk;
+ int opens;
+ char* buf;
+ char *end;
+ char *rptr;
+ int len;
+
+ int logmask; /* mask of things to debug */
+ uchar iponly[IPaddrlen]; /* ip address to print debugging for */
+ int iponlyset;
+
+ QLock qlock;
+ Rendez rendez;
+};
+
+typedef struct Netlogflag {
+ char* name;
+ int mask;
+} Netlogflag;
+
+static Netlogflag flags[] =
+{
+ { "ppp", Logppp, },
+ { "ip", Logip, },
+ { "fs", Logfs, },
+ { "tcp", Logtcp, },
+ { "icmp", Logicmp, },
+ { "udp", Logudp, },
+ { "compress", Logcompress, },
+ { "gre", Loggre, },
+ { "tcpwin", Logtcp|Logtcpwin, },
+ { "tcprxmt", Logtcp|Logtcprxmt, },
+ { "udpmsg", Logudp|Logudpmsg, },
+ { "ipmsg", Logip|Logipmsg, },
+ { "esp", Logesp, },
+ { nil, 0, },
+};
+
+char Ebadnetctl[] = "too few arguments for netlog control message";
+
+enum
+{
+ CMset,
+ CMclear,
+ CMonly,
+};
+
+static
+Cmdtab routecmd[] = {
+ CMset, "set", 0,
+ CMclear, "clear", 0,
+ CMonly, "only", 0,
+};
+
+void
+netloginit(Fs *f)
+{
+ f->alog = smalloc(sizeof(Netlog));
+}
+
+void
+netlogopen(Fs *f)
+{
+ LOCK(f->alog);
+ if(waserror()){
+ UNLOCK(f->alog);
+ nexterror();
+ }
+ if(f->alog->opens == 0){
+ if(f->alog->buf == nil)
+ f->alog->buf = malloc(Nlog);
+ f->alog->rptr = f->alog->buf;
+ f->alog->end = f->alog->buf + Nlog;
+ }
+ f->alog->opens++;
+ UNLOCK(f->alog);
+ poperror();
+}
+
+void
+netlogclose(Fs *f)
+{
+ LOCK(f->alog);
+ if(waserror()){
+ UNLOCK(f->alog);
+ nexterror();
+ }
+ f->alog->opens--;
+ if(f->alog->opens == 0){
+ free(f->alog->buf);
+ f->alog->buf = nil;
+ }
+ UNLOCK(f->alog);
+ poperror();
+}
+
+static int
+netlogready(void *a)
+{
+ Fs *f = a;
+
+ return f->alog->len;
+}
+
+long
+netlogread(Fs *f, void *a, ulong _, long n)
+{
+ int i, d;
+ char *p, *rptr;
+
+ QLOCK(f->alog);
+ if(waserror()){
+ QUNLOCK(f->alog);
+ nexterror();
+ }
+
+ for(;;){
+ LOCK(f->alog);
+ if(f->alog->len){
+ if(n > f->alog->len)
+ n = f->alog->len;
+ d = 0;
+ rptr = f->alog->rptr;
+ f->alog->rptr += n;
+ if(f->alog->rptr >= f->alog->end){
+ d = f->alog->rptr - f->alog->end;
+ f->alog->rptr = f->alog->buf + d;
+ }
+ f->alog->len -= n;
+ UNLOCK(f->alog);
+
+ i = n-d;
+ p = a;
+ memmove(p, rptr, i);
+ memmove(p+i, f->alog->buf, d);
+ break;
+ }
+ else
+ UNLOCK(f->alog);
+
+ sleep(&f->alog->rendez, netlogready, f);
+ }
+
+ QUNLOCK(f->alog);
+ poperror();
+
+ return n;
+}
+
+void
+netlogctl(Fs *f, char* s, int n)
+{
+ int i, set;
+ Netlogflag *fp;
+ Cmdbuf *cb;
+ Cmdtab *ct;
+
+ cb = parsecmd(s, n);
+ if(waserror()){
+ free(cb);
+ nexterror();
+ }
+
+ if(cb->nf < 2)
+ error(Ebadnetctl);
+
+ ct = lookupcmd(cb, routecmd, nelem(routecmd));
+
+ set = 1;
+
+ switch(ct->index){
+ case CMset:
+ set = 1;
+ break;
+
+ case CMclear:
+ set = 0;
+ break;
+
+ case CMonly:
+ parseip(f->alog->iponly, cb->f[1]);
+ if(ipcmp(f->alog->iponly, IPnoaddr) == 0)
+ f->alog->iponlyset = 0;
+ else
+ f->alog->iponlyset = 1;
+ free(cb);
+ return;
+
+ default:
+ cmderror(cb, "unknown ip control message");
+ }
+
+ for(i = 1; i < cb->nf; i++){
+ for(fp = flags; fp->name; fp++)
+ if(strcmp(fp->name, cb->f[i]) == 0)
+ break;
+ if(fp->name == nil)
+ continue;
+ if(set)
+ f->alog->logmask |= fp->mask;
+ else
+ f->alog->logmask &= ~fp->mask;
+ }
+
+ free(cb);
+ poperror();
+}
+
+void
+netlog(Fs *f, int mask, char *fmt, ...)
+{
+ char buf[128], *t, *fp;
+ int i, n;
+ va_list arg;
+
+ if(!(f->alog->logmask & mask))
+ return;
+
+ if(f->alog->opens == 0)
+ return;
+
+ va_start(arg, fmt);
+ n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf;
+ va_end(arg);
+
+ LOCK(f->alog);
+ i = f->alog->len + n - Nlog;
+ if(i > 0){
+ f->alog->len -= i;
+ f->alog->rptr += i;
+ if(f->alog->rptr >= f->alog->end)
+ f->alog->rptr = f->alog->buf + (f->alog->rptr - f->alog->end);
+ }
+ t = f->alog->rptr + f->alog->len;
+ fp = buf;
+ f->alog->len += n;
+ while(n-- > 0){
+ if(t >= f->alog->end)
+ t = f->alog->buf + (t - f->alog->end);
+ *t++ = *fp++;
+ }
+ UNLOCK(f->alog);
+
+ wakeup(&f->alog->rendez);
+}
diff --git a/src/9vx/a/ip/nullmedium.c b/src/9vx/a/ip/nullmedium.c
@@ -0,0 +1,39 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+static void
+nullbind(Ipifc* _, int __, char** ___)
+{
+ error("cannot bind null device");
+}
+
+static void
+nullunbind(Ipifc* _)
+{
+}
+
+static void
+nullbwrite(Ipifc* _, Block* __, int ___, uchar* ____)
+{
+ error("nullbwrite");
+}
+
+Medium nullmedium =
+{
+.name= "null",
+.bind= nullbind,
+.unbind= nullunbind,
+.bwrite= nullbwrite,
+};
+
+void
+nullmediumlink(void)
+{
+ addipmedium(&nullmedium);
+}
diff --git a/src/9vx/a/ip/pktmedium.c b/src/9vx/a/ip/pktmedium.c
@@ -0,0 +1,78 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+
+static void pktbind(Ipifc*, int, char**);
+static void pktunbind(Ipifc*);
+static void pktbwrite(Ipifc*, Block*, int, uchar*);
+static void pktin(Fs*, Ipifc*, Block*);
+
+Medium pktmedium =
+{
+.name= "pkt",
+.hsize= 14,
+.mintu= 40,
+.maxtu= 4*1024,
+.maclen= 6,
+.bind= pktbind,
+.unbind= pktunbind,
+.bwrite= pktbwrite,
+.pktin= pktin,
+};
+
+/*
+ * called to bind an IP ifc to an ethernet device
+ * called with ifc wlock'd
+ */
+static void
+pktbind(Ipifc* _, int argc, char **argv)
+{
+}
+
+/*
+ * called with ifc wlock'd
+ */
+static void
+pktunbind(Ipifc* _)
+{
+}
+
+/*
+ * called by ipoput with a single packet to write
+ */
+static void
+pktbwrite(Ipifc *ifc, Block *bp, int _, uchar* __)
+{
+ /* enqueue onto the conversation's rq */
+ bp = concatblock(bp);
+ if(ifc->conv->snoopers.ref > 0)
+ qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
+ qpass(ifc->conv->rq, bp);
+}
+
+/*
+ * called with ifc rlocked when someone write's to 'data'
+ */
+static void
+pktin(Fs *f, Ipifc *ifc, Block *bp)
+{
+ if(ifc->lifc == nil)
+ freeb(bp);
+ else {
+ if(ifc->conv->snoopers.ref > 0)
+ qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
+ ipiput4(f, ifc, bp);
+ }
+}
+
+void
+pktmediumlink(void)
+{
+ addipmedium(&pktmedium);
+}
diff --git a/src/9vx/a/ip/ptclbsum.c b/src/9vx/a/ip/ptclbsum.c
@@ -0,0 +1,72 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+#include "ip.h"
+
+static short endian = 1;
+static uchar* aendian = (uchar*)&endian;
+#define LITTLE *aendian
+
+ushort
+ptclbsum(uchar *addr, int len)
+{
+ ulong losum, hisum, mdsum, x;
+ ulong t1, t2;
+
+ losum = 0;
+ hisum = 0;
+ mdsum = 0;
+
+ x = 0;
+ if((ulong)addr & 1) {
+ if(len) {
+ hisum += addr[0];
+ len--;
+ addr++;
+ }
+ x = 1;
+ }
+ while(len >= 16) {
+ t1 = *(ushort*)(addr+0);
+ t2 = *(ushort*)(addr+2); mdsum += t1;
+ t1 = *(ushort*)(addr+4); mdsum += t2;
+ t2 = *(ushort*)(addr+6); mdsum += t1;
+ t1 = *(ushort*)(addr+8); mdsum += t2;
+ t2 = *(ushort*)(addr+10); mdsum += t1;
+ t1 = *(ushort*)(addr+12); mdsum += t2;
+ t2 = *(ushort*)(addr+14); mdsum += t1;
+ mdsum += t2;
+ len -= 16;
+ addr += 16;
+ }
+ while(len >= 2) {
+ mdsum += *(ushort*)addr;
+ len -= 2;
+ addr += 2;
+ }
+ if(x) {
+ if(len)
+ losum += addr[0];
+ if(LITTLE)
+ losum += mdsum;
+ else
+ hisum += mdsum;
+ } else {
+ if(len)
+ hisum += addr[0];
+ if(LITTLE)
+ hisum += mdsum;
+ else
+ losum += mdsum;
+ }
+
+ losum += hisum >> 8;
+ losum += (hisum & 0xff) << 8;
+ while((hisum = losum>>16))
+ losum = hisum + (losum & 0xffff);
+
+ return losum & 0xffff;
+}
diff --git a/src/9vx/a/ip/rudp.c b/src/9vx/a/ip/rudp.c
@@ -0,0 +1,1055 @@
+/*
+ * Reliable User Datagram Protocol, currently only for IPv4.
+ * This protocol is compatible with UDP's packet format.
+ * It could be done over UDP if need be.
+ */
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+#define DEBUG 0
+#define DPRINT if(DEBUG)print
+
+#define SEQDIFF(a,b) ( (a)>=(b)?\
+ (a)-(b):\
+ 0xffffffffUL-((b)-(a)) )
+#define INSEQ(a,start,end) ( (start)<=(end)?\
+ ((a)>(start)&&(a)<=(end)):\
+ ((a)>(start)||(a)<=(end)) )
+#define UNACKED(r) SEQDIFF(r->sndseq, r->ackrcvd)
+#define NEXTSEQ(a) ( (a)+1 == 0 ? 1 : (a)+1 )
+
+enum
+{
+ UDP_PHDRSIZE = 12, /* pseudo header */
+// UDP_HDRSIZE = 20, /* pseudo header + udp header */
+ UDP_RHDRSIZE = 36, /* pseudo header + udp header + rudp header */
+ UDP_IPHDR = 8, /* ip header */
+ IP_UDPPROTO = 254,
+ UDP_USEAD7 = 52, /* size of new ipv6 headers struct */
+
+ Rudprxms = 200,
+ Rudptickms = 50,
+ Rudpmaxxmit = 10,
+ Maxunacked = 100,
+};
+
+#define Hangupgen 0xffffffff /* used only in hangup messages */
+
+typedef struct Udphdr Udphdr;
+struct Udphdr
+{
+ /* ip header */
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+
+ /* pseudo header starts here */
+ uchar Unused;
+ uchar udpproto; /* Protocol */
+ uchar udpplen[2]; /* Header plus data length */
+ uchar udpsrc[4]; /* Ip source */
+ uchar udpdst[4]; /* Ip destination */
+
+ /* udp header */
+ uchar udpsport[2]; /* Source port */
+ uchar udpdport[2]; /* Destination port */
+ uchar udplen[2]; /* data length */
+ uchar udpcksum[2]; /* Checksum */
+};
+
+typedef struct Rudphdr Rudphdr;
+struct Rudphdr
+{
+ /* ip header */
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+
+ /* pseudo header starts here */
+ uchar Unused;
+ uchar udpproto; /* Protocol */
+ uchar udpplen[2]; /* Header plus data length */
+ uchar udpsrc[4]; /* Ip source */
+ uchar udpdst[4]; /* Ip destination */
+
+ /* udp header */
+ uchar udpsport[2]; /* Source port */
+ uchar udpdport[2]; /* Destination port */
+ uchar udplen[2]; /* data length (includes rudp header) */
+ uchar udpcksum[2]; /* Checksum */
+
+ /* rudp header */
+ uchar relseq[4]; /* id of this packet (or 0) */
+ uchar relsgen[4]; /* generation/time stamp */
+ uchar relack[4]; /* packet being acked (or 0) */
+ uchar relagen[4]; /* generation/time stamp */
+};
+
+
+/*
+ * one state structure per destination
+ */
+typedef struct Reliable Reliable;
+struct Reliable
+{
+ Ref;
+
+ Reliable *next;
+
+ uchar addr[IPaddrlen]; /* always V6 when put here */
+ ushort port;
+
+ Block *unacked; /* unacked msg list */
+ Block *unackedtail; /* and its tail */
+
+ int timeout; /* time since first unacked msg sent */
+ int xmits; /* number of times first unacked msg sent */
+
+ ulong sndseq; /* next packet to be sent */
+ ulong sndgen; /* and its generation */
+
+ ulong rcvseq; /* last packet received */
+ ulong rcvgen; /* and its generation */
+
+ ulong acksent; /* last ack sent */
+ ulong ackrcvd; /* last msg for which ack was rcvd */
+
+ /* flow control */
+ QLock lock;
+ Rendez vous;
+ int blocked;
+};
+
+
+
+/* MIB II counters */
+typedef struct Rudpstats Rudpstats;
+struct Rudpstats
+{
+ ulong rudpInDatagrams;
+ ulong rudpNoPorts;
+ ulong rudpInErrors;
+ ulong rudpOutDatagrams;
+};
+
+typedef struct Rudppriv Rudppriv;
+struct Rudppriv
+{
+ Ipht ht;
+
+ /* MIB counters */
+ Rudpstats ustats;
+
+ /* non-MIB stats */
+ ulong csumerr; /* checksum errors */
+ ulong lenerr; /* short packet */
+ ulong rxmits; /* # of retransmissions */
+ ulong orders; /* # of out of order pkts */
+
+ /* keeping track of the ack kproc */
+ int ackprocstarted;
+ QLock apl;
+};
+
+
+static ulong generation = 0;
+static Rendez rend;
+
+/*
+ * protocol specific part of Conv
+ */
+typedef struct Rudpcb Rudpcb;
+struct Rudpcb
+{
+ QLock;
+ uchar headers;
+ uchar randdrop;
+ Reliable *r;
+};
+
+/*
+ * local functions
+ */
+void relsendack(Conv*, Reliable*, int);
+int reliput(Conv*, Block*, uchar*, ushort);
+Reliable *relstate(Rudpcb*, uchar*, ushort, char*);
+void relput(Reliable*);
+void relforget(Conv *, uchar*, int, int);
+void relackproc(void *);
+void relackq(Reliable *, Block*);
+void relhangup(Conv *, Reliable*);
+void relrexmit(Conv *, Reliable*);
+void relput(Reliable*);
+void rudpkick(void *x);
+
+static void
+rudpstartackproc(Proto *rudp)
+{
+ Rudppriv *rpriv;
+ char kpname[KNAMELEN];
+
+ rpriv = rudp->priv;
+ if(rpriv->ackprocstarted == 0){
+ qlock(&rpriv->apl);
+ if(rpriv->ackprocstarted == 0){
+ sprint(kpname, "#I%drudpack", rudp->f->dev);
+ kproc(kpname, relackproc, rudp);
+ rpriv->ackprocstarted = 1;
+ }
+ qunlock(&rpriv->apl);
+ }
+}
+
+static char*
+rudpconnect(Conv *c, char **argv, int argc)
+{
+ char *e;
+ Rudppriv *upriv;
+
+ upriv = c->p->priv;
+ rudpstartackproc(c->p);
+ e = Fsstdconnect(c, argv, argc);
+ Fsconnected(c, e);
+ iphtadd(&upriv->ht, c);
+
+ return e;
+}
+
+
+static int
+rudpstate(Conv *c, char *state, int n)
+{
+ Rudpcb *ucb;
+ Reliable *r;
+ int m;
+
+ m = snprint(state, n, "%s", c->inuse?"Open":"Closed");
+ ucb = (Rudpcb*)c->ptcl;
+ qlock(ucb);
+ for(r = ucb->r; r; r = r->next)
+ m += snprint(state+m, n-m, " %I/%ld", r->addr, UNACKED(r));
+ m += snprint(state+m, n-m, "\n");
+ qunlock(ucb);
+ return m;
+}
+
+static char*
+rudpannounce(Conv *c, char** argv, int argc)
+{
+ char *e;
+ Rudppriv *upriv;
+
+ upriv = c->p->priv;
+ rudpstartackproc(c->p);
+ e = Fsstdannounce(c, argv, argc);
+ if(e != nil)
+ return e;
+ Fsconnected(c, nil);
+ iphtadd(&upriv->ht, c);
+
+ return nil;
+}
+
+static void
+rudpcreate(Conv *c)
+{
+ c->rq = qopen(64*1024, Qmsg, 0, 0);
+ c->wq = qopen(64*1024, Qkick, rudpkick, c);
+}
+
+static void
+rudpclose(Conv *c)
+{
+ Rudpcb *ucb;
+ Reliable *r, *nr;
+ Rudppriv *upriv;
+
+ upriv = c->p->priv;
+ iphtrem(&upriv->ht, c);
+
+ /* force out any delayed acks */
+ ucb = (Rudpcb*)c->ptcl;
+ qlock(ucb);
+ for(r = ucb->r; r; r = r->next){
+ if(r->acksent != r->rcvseq)
+ relsendack(c, r, 0);
+ }
+ qunlock(ucb);
+
+ qclose(c->rq);
+ qclose(c->wq);
+ qclose(c->eq);
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ c->lport = 0;
+ c->rport = 0;
+
+ ucb->headers = 0;
+ ucb->randdrop = 0;
+ qlock(ucb);
+ for(r = ucb->r; r; r = nr){
+ if(r->acksent != r->rcvseq)
+ relsendack(c, r, 0);
+ nr = r->next;
+ relhangup(c, r);
+ relput(r);
+ }
+ ucb->r = 0;
+
+ qunlock(ucb);
+}
+
+/*
+ * randomly don't send packets
+ */
+static void
+doipoput(Conv *c, Fs *f, Block *bp, int x, int ttl, int tos)
+{
+ Rudpcb *ucb;
+
+ ucb = (Rudpcb*)c->ptcl;
+ if(ucb->randdrop && nrand(100) < ucb->randdrop)
+ freeblist(bp);
+ else
+ ipoput4(f, bp, x, ttl, tos, nil);
+}
+
+int
+flow(void *v)
+{
+ Reliable *r = v;
+
+ return UNACKED(r) <= Maxunacked;
+}
+
+void
+rudpkick(void *x)
+{
+ Conv *c = x;
+ Udphdr *uh;
+ ushort rport;
+ uchar laddr[IPaddrlen], raddr[IPaddrlen];
+ Block *bp;
+ Rudpcb *ucb;
+ Rudphdr *rh;
+ Reliable *r;
+ int dlen, ptcllen;
+ Rudppriv *upriv;
+ Fs *f;
+
+ upriv = c->p->priv;
+ f = c->p->f;
+
+ netlog(c->p->f, Logrudp, "rudp: kick\n");
+ bp = qget(c->wq);
+ if(bp == nil)
+ return;
+
+ ucb = (Rudpcb*)c->ptcl;
+ switch(ucb->headers) {
+ case 7:
+ /* get user specified addresses */
+ bp = pullupblock(bp, UDP_USEAD7);
+ if(bp == nil)
+ return;
+ ipmove(raddr, bp->rp);
+ bp->rp += IPaddrlen;
+ ipmove(laddr, bp->rp);
+ bp->rp += IPaddrlen;
+ /* pick interface closest to dest */
+ if(ipforme(f, laddr) != Runi)
+ findlocalip(f, laddr, raddr);
+ bp->rp += IPaddrlen; /* Ignore ifc address */
+ rport = nhgets(bp->rp);
+ bp->rp += 2+2; /* Ignore local port */
+ break;
+ default:
+ ipmove(raddr, c->raddr);
+ ipmove(laddr, c->laddr);
+ rport = c->rport;
+ break;
+ }
+
+ dlen = blocklen(bp);
+
+ /* Make space to fit rudp & ip header */
+ bp = padblock(bp, UDP_IPHDR+UDP_RHDRSIZE);
+ if(bp == nil)
+ return;
+
+ uh = (Udphdr *)(bp->rp);
+ uh->vihl = IP_VER4;
+
+ rh = (Rudphdr*)uh;
+
+ ptcllen = dlen + (UDP_RHDRSIZE-UDP_PHDRSIZE);
+ uh->Unused = 0;
+ uh->udpproto = IP_UDPPROTO;
+ uh->frag[0] = 0;
+ uh->frag[1] = 0;
+ hnputs(uh->udpplen, ptcllen);
+ switch(ucb->headers){
+ case 7:
+ v6tov4(uh->udpdst, raddr);
+ hnputs(uh->udpdport, rport);
+ v6tov4(uh->udpsrc, laddr);
+ break;
+ default:
+ v6tov4(uh->udpdst, c->raddr);
+ hnputs(uh->udpdport, c->rport);
+ if(ipcmp(c->laddr, IPnoaddr) == 0)
+ findlocalip(f, c->laddr, c->raddr);
+ v6tov4(uh->udpsrc, c->laddr);
+ break;
+ }
+ hnputs(uh->udpsport, c->lport);
+ hnputs(uh->udplen, ptcllen);
+ uh->udpcksum[0] = 0;
+ uh->udpcksum[1] = 0;
+
+ qlock(ucb);
+ r = relstate(ucb, raddr, rport, "kick");
+ r->sndseq = NEXTSEQ(r->sndseq);
+ hnputl(rh->relseq, r->sndseq);
+ hnputl(rh->relsgen, r->sndgen);
+
+ hnputl(rh->relack, r->rcvseq); /* ACK last rcvd packet */
+ hnputl(rh->relagen, r->rcvgen);
+
+ if(r->rcvseq != r->acksent)
+ r->acksent = r->rcvseq;
+
+ hnputs(uh->udpcksum, ptclcsum(bp, UDP_IPHDR, dlen+UDP_RHDRSIZE));
+
+ relackq(r, bp);
+ qunlock(ucb);
+
+ upriv->ustats.rudpOutDatagrams++;
+
+ DPRINT("sent: %lud/%lud, %lud/%lud\n",
+ r->sndseq, r->sndgen, r->rcvseq, r->rcvgen);
+
+ doipoput(c, f, bp, 0, c->ttl, c->tos);
+
+ if(waserror()) {
+ relput(r);
+ qunlock(&r->lock);
+ nexterror();
+ }
+
+ /* flow control of sorts */
+ qlock(&r->lock);
+ if(UNACKED(r) > Maxunacked){
+ r->blocked = 1;
+ sleep(&r->vous, flow, r);
+ r->blocked = 0;
+ }
+
+ qunlock(&r->lock);
+ relput(r);
+ poperror();
+}
+
+void
+rudpiput(Proto *rudp, Ipifc *ifc, Block *bp)
+{
+ int len, olen, ottl;
+ Udphdr *uh;
+ Conv *c;
+ Rudpcb *ucb;
+ uchar raddr[IPaddrlen], laddr[IPaddrlen];
+ ushort rport, lport;
+ Rudppriv *upriv;
+ Fs *f;
+ uchar *p;
+
+ upriv = rudp->priv;
+ f = rudp->f;
+
+ upriv->ustats.rudpInDatagrams++;
+
+ uh = (Udphdr*)(bp->rp);
+
+ /* Put back pseudo header for checksum
+ * (remember old values for icmpnoconv())
+ */
+ ottl = uh->Unused;
+ uh->Unused = 0;
+ len = nhgets(uh->udplen);
+ olen = nhgets(uh->udpplen);
+ hnputs(uh->udpplen, len);
+
+ v4tov6(raddr, uh->udpsrc);
+ v4tov6(laddr, uh->udpdst);
+ lport = nhgets(uh->udpdport);
+ rport = nhgets(uh->udpsport);
+
+ if(nhgets(uh->udpcksum)) {
+ if(ptclcsum(bp, UDP_IPHDR, len+UDP_PHDRSIZE)) {
+ upriv->ustats.rudpInErrors++;
+ upriv->csumerr++;
+ netlog(f, Logrudp, "rudp: checksum error %I\n", raddr);
+ DPRINT("rudp: checksum error %I\n", raddr);
+ freeblist(bp);
+ return;
+ }
+ }
+
+ qlock(rudp);
+
+ c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
+ if(c == nil){
+ /* no conversation found */
+ upriv->ustats.rudpNoPorts++;
+ qunlock(rudp);
+ netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
+ laddr, lport);
+ uh->Unused = ottl;
+ hnputs(uh->udpplen, olen);
+ icmpnoconv(f, bp);
+ freeblist(bp);
+ return;
+ }
+ ucb = (Rudpcb*)c->ptcl;
+ qlock(ucb);
+ qunlock(rudp);
+
+ if(reliput(c, bp, raddr, rport) < 0){
+ qunlock(ucb);
+ freeb(bp);
+ return;
+ }
+
+ /*
+ * Trim the packet down to data size
+ */
+
+ len -= (UDP_RHDRSIZE-UDP_PHDRSIZE);
+ bp = trimblock(bp, UDP_IPHDR+UDP_RHDRSIZE, len);
+ if(bp == nil) {
+ netlog(f, Logrudp, "rudp: len err %I.%d -> %I.%d\n",
+ raddr, rport, laddr, lport);
+ DPRINT("rudp: len err %I.%d -> %I.%d\n",
+ raddr, rport, laddr, lport);
+ upriv->lenerr++;
+ return;
+ }
+
+ netlog(f, Logrudpmsg, "rudp: %I.%d -> %I.%d l %d\n",
+ raddr, rport, laddr, lport, len);
+
+ switch(ucb->headers){
+ case 7:
+ /* pass the src address */
+ bp = padblock(bp, UDP_USEAD7);
+ p = bp->rp;
+ ipmove(p, raddr); p += IPaddrlen;
+ ipmove(p, laddr); p += IPaddrlen;
+ ipmove(p, ifc->lifc->local); p += IPaddrlen;
+ hnputs(p, rport); p += 2;
+ hnputs(p, lport);
+ break;
+ default:
+ /* connection oriented rudp */
+ if(ipcmp(c->raddr, IPnoaddr) == 0){
+ /* save the src address in the conversation */
+ ipmove(c->raddr, raddr);
+ c->rport = rport;
+
+ /* reply with the same ip address (if not broadcast) */
+ if(ipforme(f, laddr) == Runi)
+ ipmove(c->laddr, laddr);
+ else
+ v4tov6(c->laddr, ifc->lifc->local);
+ }
+ break;
+ }
+ if(bp->next)
+ bp = concatblock(bp);
+
+ if(qfull(c->rq)) {
+ netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n", raddr, rport,
+ laddr, lport);
+ freeblist(bp);
+ }
+ else
+ qpass(c->rq, bp);
+
+ qunlock(ucb);
+}
+
+static char *rudpunknown = "unknown rudp ctl request";
+
+char*
+rudpctl(Conv *c, char **f, int n)
+{
+ Rudpcb *ucb;
+ uchar ip[IPaddrlen];
+ int x;
+
+ ucb = (Rudpcb*)c->ptcl;
+ if(n < 1)
+ return rudpunknown;
+
+ if(strcmp(f[0], "headers") == 0){
+ ucb->headers = 7; /* new headers format */
+ return nil;
+ } else if(strcmp(f[0], "hangup") == 0){
+ if(n < 3)
+ return "bad syntax";
+ if (parseip(ip, f[1]) == -1)
+ return Ebadip;
+ x = atoi(f[2]);
+ qlock(ucb);
+ relforget(c, ip, x, 1);
+ qunlock(ucb);
+ return nil;
+ } else if(strcmp(f[0], "randdrop") == 0){
+ x = 10; /* default is 10% */
+ if(n > 1)
+ x = atoi(f[1]);
+ if(x > 100 || x < 0)
+ return "illegal rudp drop rate";
+ ucb->randdrop = x;
+ return nil;
+ }
+ return rudpunknown;
+}
+
+void
+rudpadvise(Proto *rudp, Block *bp, char *msg)
+{
+ Udphdr *h;
+ uchar source[IPaddrlen], dest[IPaddrlen];
+ ushort psource, pdest;
+ Conv *s, **p;
+
+ h = (Udphdr*)(bp->rp);
+
+ v4tov6(dest, h->udpdst);
+ v4tov6(source, h->udpsrc);
+ psource = nhgets(h->udpsport);
+ pdest = nhgets(h->udpdport);
+
+ /* Look for a connection */
+ for(p = rudp->conv; *p; p++) {
+ s = *p;
+ if(s->rport == pdest)
+ if(s->lport == psource)
+ if(ipcmp(s->raddr, dest) == 0)
+ if(ipcmp(s->laddr, source) == 0){
+ qhangup(s->rq, msg);
+ qhangup(s->wq, msg);
+ break;
+ }
+ }
+ freeblist(bp);
+}
+
+int
+rudpstats(Proto *rudp, char *buf, int len)
+{
+ Rudppriv *upriv;
+
+ upriv = rudp->priv;
+ return snprint(buf, len, "%lud %lud %lud %lud %lud %lud\n",
+ upriv->ustats.rudpInDatagrams,
+ upriv->ustats.rudpNoPorts,
+ upriv->ustats.rudpInErrors,
+ upriv->ustats.rudpOutDatagrams,
+ upriv->rxmits,
+ upriv->orders);
+}
+
+void
+rudpinit(Fs *fs)
+{
+
+ Proto *rudp;
+
+ rudp = smalloc(sizeof(Proto));
+ rudp->priv = smalloc(sizeof(Rudppriv));
+ rudp->name = "rudp";
+ rudp->connect = rudpconnect;
+ rudp->announce = rudpannounce;
+ rudp->ctl = rudpctl;
+ rudp->state = rudpstate;
+ rudp->create = rudpcreate;
+ rudp->close = rudpclose;
+ rudp->rcv = rudpiput;
+ rudp->advise = rudpadvise;
+ rudp->stats = rudpstats;
+ rudp->ipproto = IP_UDPPROTO;
+ rudp->nc = 16;
+ rudp->ptclsize = sizeof(Rudpcb);
+
+ Fsproto(fs, rudp);
+}
+
+/*********************************************/
+/* Here starts the reliable helper functions */
+/*********************************************/
+/*
+ * Enqueue a copy of an unacked block for possible retransmissions
+ */
+void
+relackq(Reliable *r, Block *bp)
+{
+ Block *np;
+
+ np = copyblock(bp, blocklen(bp));
+ if(r->unacked)
+ r->unackedtail->list = np;
+ else {
+ /* restart timer */
+ r->timeout = 0;
+ r->xmits = 1;
+ r->unacked = np;
+ }
+ r->unackedtail = np;
+ np->list = nil;
+}
+
+/*
+ * retransmit unacked blocks
+ */
+void
+relackproc(void *a)
+{
+ Rudpcb *ucb;
+ Proto *rudp;
+ Reliable *r;
+ Conv **s, *c;
+
+ rudp = (Proto *)a;
+
+loop:
+ tsleep(&up->sleep, return0, 0, Rudptickms);
+
+ for(s = rudp->conv; *s; s++) {
+ c = *s;
+ ucb = (Rudpcb*)c->ptcl;
+ qlock(ucb);
+
+ for(r = ucb->r; r; r = r->next) {
+ if(r->unacked != nil){
+ r->timeout += Rudptickms;
+ if(r->timeout > Rudprxms*r->xmits)
+ relrexmit(c, r);
+ }
+ if(r->acksent != r->rcvseq)
+ relsendack(c, r, 0);
+ }
+ qunlock(ucb);
+ }
+ goto loop;
+}
+
+/*
+ * get the state record for a conversation
+ */
+Reliable*
+relstate(Rudpcb *ucb, uchar *addr, ushort port, char *from)
+{
+ Reliable *r, **l;
+
+ l = &ucb->r;
+ for(r = *l; r; r = *l){
+ if(memcmp(addr, r->addr, IPaddrlen) == 0 &&
+ port == r->port)
+ break;
+ l = &r->next;
+ }
+
+ /* no state for this addr/port, create some */
+ if(r == nil){
+ while(generation == 0)
+ generation = rand();
+
+ DPRINT("from %s new state %lud for %I!%ud\n",
+ from, generation, addr, port);
+
+ r = smalloc(sizeof(Reliable));
+ memmove(r->addr, addr, IPaddrlen);
+ r->port = port;
+ r->unacked = 0;
+ if(generation == Hangupgen)
+ generation++;
+ r->sndgen = generation++;
+ r->sndseq = 0;
+ r->ackrcvd = 0;
+ r->rcvgen = 0;
+ r->rcvseq = 0;
+ r->acksent = 0;
+ r->xmits = 0;
+ r->timeout = 0;
+ r->ref = 0;
+ incref(r); /* one reference for being in the list */
+
+ *l = r;
+ }
+
+ incref(r);
+ return r;
+}
+
+void
+relput(Reliable *r)
+{
+ if(decref(r) == 0)
+ free(r);
+}
+
+/*
+ * forget a Reliable state
+ */
+void
+relforget(Conv *c, uchar *ip, int port, int originator)
+{
+ Rudpcb *ucb;
+ Reliable *r, **l;
+
+ ucb = (Rudpcb*)c->ptcl;
+
+ l = &ucb->r;
+ for(r = *l; r; r = *l){
+ if(ipcmp(ip, r->addr) == 0 && port == r->port){
+ *l = r->next;
+ if(originator)
+ relsendack(c, r, 1);
+ relhangup(c, r);
+ relput(r); /* remove from the list */
+ break;
+ }
+ l = &r->next;
+ }
+}
+
+/*
+ * process a rcvd reliable packet. return -1 if not to be passed to user process,
+ * 0 therwise.
+ *
+ * called with ucb locked.
+ */
+int
+reliput(Conv *c, Block *bp, uchar *addr, ushort port)
+{
+ Block *nbp;
+ Rudpcb *ucb;
+ Rudppriv *upriv;
+ Udphdr *uh;
+ Reliable *r;
+ Rudphdr *rh;
+ ulong seq, ack, sgen, agen, ackreal;
+ int rv = -1;
+
+ /* get fields */
+ uh = (Udphdr*)(bp->rp);
+ rh = (Rudphdr*)uh;
+ seq = nhgetl(rh->relseq);
+ sgen = nhgetl(rh->relsgen);
+ ack = nhgetl(rh->relack);
+ agen = nhgetl(rh->relagen);
+
+ upriv = c->p->priv;
+ ucb = (Rudpcb*)c->ptcl;
+ r = relstate(ucb, addr, port, "input");
+
+ DPRINT("rcvd %lud/%lud, %lud/%lud, r->sndgen = %lud\n",
+ seq, sgen, ack, agen, r->sndgen);
+
+ /* if acking an incorrect generation, ignore */
+ if(ack && agen != r->sndgen)
+ goto out;
+
+ /* Look for a hangup */
+ if(sgen == Hangupgen) {
+ if(agen == r->sndgen)
+ relforget(c, addr, port, 0);
+ goto out;
+ }
+
+ /* make sure we're not talking to a new remote side */
+ if(r->rcvgen != sgen){
+ if(seq != 0 && seq != 1)
+ goto out;
+
+ /* new connection */
+ if(r->rcvgen != 0){
+ DPRINT("new con r->rcvgen = %lud, sgen = %lud\n", r->rcvgen, sgen);
+ relhangup(c, r);
+ }
+ r->rcvgen = sgen;
+ }
+
+ /* dequeue acked packets */
+ if(ack && agen == r->sndgen){
+ ackreal = 0;
+ while(r->unacked != nil && INSEQ(ack, r->ackrcvd, r->sndseq)){
+ nbp = r->unacked;
+ r->unacked = nbp->list;
+ DPRINT("%lud/%lud acked, r->sndgen = %lud\n",
+ ack, agen, r->sndgen);
+ freeb(nbp);
+ r->ackrcvd = NEXTSEQ(r->ackrcvd);
+ ackreal = 1;
+ }
+
+ /* flow control */
+ if(UNACKED(r) < Maxunacked/8 && r->blocked)
+ wakeup(&r->vous);
+
+ /*
+ * retransmit next packet if the acked packet
+ * was transmitted more than once
+ */
+ if(ackreal && r->unacked != nil){
+ r->timeout = 0;
+ if(r->xmits > 1){
+ r->xmits = 1;
+ relrexmit(c, r);
+ }
+ }
+
+ }
+
+ /* no message or input queue full */
+ if(seq == 0 || qfull(c->rq))
+ goto out;
+
+ /* refuse out of order delivery */
+ if(seq != NEXTSEQ(r->rcvseq)){
+ relsendack(c, r, 0); /* tell him we got it already */
+ upriv->orders++;
+ DPRINT("out of sequence %lud not %lud\n", seq, NEXTSEQ(r->rcvseq));
+ goto out;
+ }
+ r->rcvseq = seq;
+
+ rv = 0;
+out:
+ relput(r);
+ return rv;
+}
+
+void
+relsendack(Conv *c, Reliable *r, int hangup)
+{
+ Udphdr *uh;
+ Block *bp;
+ Rudphdr *rh;
+ int ptcllen;
+ Fs *f;
+
+ bp = allocb(UDP_IPHDR + UDP_RHDRSIZE);
+ if(bp == nil)
+ return;
+ bp->wp += UDP_IPHDR + UDP_RHDRSIZE;
+ f = c->p->f;
+ uh = (Udphdr *)(bp->rp);
+ uh->vihl = IP_VER4;
+ rh = (Rudphdr*)uh;
+
+ ptcllen = (UDP_RHDRSIZE-UDP_PHDRSIZE);
+ uh->Unused = 0;
+ uh->udpproto = IP_UDPPROTO;
+ uh->frag[0] = 0;
+ uh->frag[1] = 0;
+ hnputs(uh->udpplen, ptcllen);
+
+ v6tov4(uh->udpdst, r->addr);
+ hnputs(uh->udpdport, r->port);
+ hnputs(uh->udpsport, c->lport);
+ if(ipcmp(c->laddr, IPnoaddr) == 0)
+ findlocalip(f, c->laddr, c->raddr);
+ v6tov4(uh->udpsrc, c->laddr);
+ hnputs(uh->udplen, ptcllen);
+
+ if(hangup)
+ hnputl(rh->relsgen, Hangupgen);
+ else
+ hnputl(rh->relsgen, r->sndgen);
+ hnputl(rh->relseq, 0);
+ hnputl(rh->relagen, r->rcvgen);
+ hnputl(rh->relack, r->rcvseq);
+
+ if(r->acksent < r->rcvseq)
+ r->acksent = r->rcvseq;
+
+ uh->udpcksum[0] = 0;
+ uh->udpcksum[1] = 0;
+ hnputs(uh->udpcksum, ptclcsum(bp, UDP_IPHDR, UDP_RHDRSIZE));
+
+ DPRINT("sendack: %lud/%lud, %lud/%lud\n", 0L, r->sndgen, r->rcvseq, r->rcvgen);
+ doipoput(c, f, bp, 0, c->ttl, c->tos);
+}
+
+
+/*
+ * called with ucb locked (and c locked if user initiated close)
+ */
+void
+relhangup(Conv *c, Reliable *r)
+{
+ int n;
+ Block *bp;
+ char hup[ERRMAX];
+
+ n = snprint(hup, sizeof(hup), "hangup %I!%d", r->addr, r->port);
+ qproduce(c->eq, hup, n);
+
+ /*
+ * dump any unacked outgoing messages
+ */
+ for(bp = r->unacked; bp != nil; bp = r->unacked){
+ r->unacked = bp->list;
+ bp->list = nil;
+ freeb(bp);
+ }
+
+ r->rcvgen = 0;
+ r->rcvseq = 0;
+ r->acksent = 0;
+ if(generation == Hangupgen)
+ generation++;
+ r->sndgen = generation++;
+ r->sndseq = 0;
+ r->ackrcvd = 0;
+ r->xmits = 0;
+ r->timeout = 0;
+ wakeup(&r->vous);
+}
+
+/*
+ * called with ucb locked
+ */
+void
+relrexmit(Conv *c, Reliable *r)
+{
+ Rudppriv *upriv;
+ Block *np;
+ Fs *f;
+
+ upriv = c->p->priv;
+ f = c->p->f;
+ r->timeout = 0;
+ if(r->xmits++ > Rudpmaxxmit){
+ relhangup(c, r);
+ return;
+ }
+
+ upriv->rxmits++;
+ np = copyblock(r->unacked, blocklen(r->unacked));
+ DPRINT("rxmit r->ackrvcd+1 = %lud\n", r->ackrcvd+1);
+ doipoput(c, f, np, 0, c->ttl, c->tos);
+}
diff --git a/src/9vx/a/ip/tcp.c b/src/9vx/a/ip/tcp.c
@@ -0,0 +1,3209 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+
+enum
+{
+ QMAX = 64*1024-1,
+ IP_TCPPROTO = 6,
+
+ TCP4_IPLEN = 8,
+ TCP4_PHDRSIZE = 12,
+ TCP4_HDRSIZE = 20,
+ TCP4_TCBPHDRSZ = 40,
+ TCP4_PKT = TCP4_IPLEN+TCP4_PHDRSIZE,
+
+ TCP6_IPLEN = 0,
+ TCP6_PHDRSIZE = 40,
+ TCP6_HDRSIZE = 20,
+ TCP6_TCBPHDRSZ = 60,
+ TCP6_PKT = TCP6_IPLEN+TCP6_PHDRSIZE,
+
+ TcptimerOFF = 0,
+ TcptimerON = 1,
+ TcptimerDONE = 2,
+ MAX_TIME = (1<<20), /* Forever */
+ TCP_ACK = 50, /* Timed ack sequence in ms */
+ MAXBACKMS = 9*60*1000, /* longest backoff time (ms) before hangup */
+
+ URG = 0x20, /* Data marked urgent */
+ ACK = 0x10, /* Acknowledge is valid */
+ PSH = 0x08, /* Whole data pipe is pushed */
+ RST = 0x04, /* Reset connection */
+ SYN = 0x02, /* Pkt. is synchronise */
+ FIN = 0x01, /* Start close down */
+
+ EOLOPT = 0,
+ NOOPOPT = 1,
+ MSSOPT = 2,
+ MSS_LENGTH = 4, /* Mean segment size */
+ WSOPT = 3,
+ WS_LENGTH = 3, /* Bits to scale window size by */
+ MSL2 = 10,
+ MSPTICK = 50, /* Milliseconds per timer tick */
+ DEF_MSS = 1460, /* Default mean segment */
+ DEF_MSS6 = 1280, /* Default mean segment (min) for v6 */
+ DEF_RTT = 500, /* Default round trip */
+ DEF_KAT = 120000, /* Default time (ms) between keep alives */
+ TCP_LISTEN = 0, /* Listen connection */
+ TCP_CONNECT = 1, /* Outgoing connection */
+ SYNACK_RXTIMER = 250, /* ms between SYNACK retransmits */
+
+ TCPREXMTTHRESH = 3, /* dupack threshhold for rxt */
+
+ FORCE = 1,
+ CLONE = 2,
+ RETRAN = 4,
+ ACTIVE = 8,
+ SYNACK = 16,
+
+ LOGAGAIN = 3,
+ LOGDGAIN = 2,
+
+ Closed = 0, /* Connection states */
+ Listen,
+ Syn_sent,
+ Syn_received,
+ Established,
+ Finwait1,
+ Finwait2,
+ Close_wait,
+ Closing,
+ Last_ack,
+ Time_wait,
+
+ Maxlimbo = 1000, /* maximum procs waiting for response to SYN ACK */
+ NLHT = 256, /* hash table size, must be a power of 2 */
+ LHTMASK = NLHT-1,
+
+ HaveWS = 1<<8,
+};
+
+/* Must correspond to the enumeration above */
+char *tcpstates[] =
+{
+ "Closed", "Listen", "Syn_sent", "Syn_received",
+ "Established", "Finwait1", "Finwait2", "Close_wait",
+ "Closing", "Last_ack", "Time_wait"
+};
+
+typedef struct Tcptimer Tcptimer;
+struct Tcptimer
+{
+ Tcptimer *next;
+ Tcptimer *prev;
+ Tcptimer *readynext;
+ int state;
+ int start;
+ int count;
+ void (*func)(void*);
+ void *arg;
+};
+
+/*
+ * v4 and v6 pseudo headers used for
+ * checksuming tcp
+ */
+typedef struct Tcp4hdr Tcp4hdr;
+struct Tcp4hdr
+{
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+ uchar Unused;
+ uchar proto;
+ uchar tcplen[2];
+ uchar tcpsrc[4];
+ uchar tcpdst[4];
+ uchar tcpsport[2];
+ uchar tcpdport[2];
+ uchar tcpseq[4];
+ uchar tcpack[4];
+ uchar tcpflag[2];
+ uchar tcpwin[2];
+ uchar tcpcksum[2];
+ uchar tcpurg[2];
+ /* Options segment */
+ uchar tcpopt[1];
+};
+
+typedef struct Tcp6hdr Tcp6hdr;
+struct Tcp6hdr
+{
+ uchar vcf[4];
+ uchar ploadlen[2];
+ uchar proto;
+ uchar ttl;
+ uchar tcpsrc[IPaddrlen];
+ uchar tcpdst[IPaddrlen];
+ uchar tcpsport[2];
+ uchar tcpdport[2];
+ uchar tcpseq[4];
+ uchar tcpack[4];
+ uchar tcpflag[2];
+ uchar tcpwin[2];
+ uchar tcpcksum[2];
+ uchar tcpurg[2];
+ /* Options segment */
+ uchar tcpopt[1];
+};
+
+/*
+ * this represents the control info
+ * for a single packet. It is derived from
+ * a packet in ntohtcp{4,6}() and stuck into
+ * a packet in htontcp{4,6}().
+ */
+typedef struct Tcp Tcp;
+struct Tcp
+{
+ ushort source;
+ ushort dest;
+ ulong seq;
+ ulong ack;
+ uchar flags;
+ ushort ws; /* window scale option (if not zero) */
+ ulong wnd;
+ ushort urg;
+ ushort mss; /* max segment size option (if not zero) */
+ ushort len; /* size of data */
+};
+
+/*
+ * this header is malloc'd to thread together fragments
+ * waiting to be coalesced
+ */
+typedef struct Reseq Reseq;
+struct Reseq
+{
+ Reseq *next;
+ Tcp seg;
+ Block *bp;
+ ushort length;
+};
+
+/*
+ * the QLOCK in the Conv locks this structure
+ */
+typedef struct Tcpctl Tcpctl;
+struct Tcpctl
+{
+ uchar state; /* Connection state */
+ uchar type; /* Listening or active connection */
+ uchar code; /* Icmp code */
+ struct {
+ ulong una; /* Unacked data pointer */
+ ulong nxt; /* Next sequence expected */
+ ulong ptr; /* Data pointer */
+ ulong wnd; /* Tcp send window */
+ ulong urg; /* Urgent data pointer */
+ ulong wl2;
+ int scale; /* how much to right shift window in xmitted packets */
+ /* to implement tahoe and reno TCP */
+ ulong dupacks; /* number of duplicate acks rcvd */
+ int recovery; /* loss recovery flag */
+ ulong rxt; /* right window marker for recovery */
+ } snd;
+ struct {
+ ulong nxt; /* Receive pointer to next uchar slot */
+ ulong wnd; /* Receive window incoming */
+ ulong urg; /* Urgent pointer */
+ int blocked;
+ int una; /* unacked data segs */
+ int scale; /* how much to left shift window in rcved packets */
+ } rcv;
+ ulong iss; /* Initial sequence number */
+ int sawwsopt; /* true if we saw a wsopt on the incoming SYN */
+ ulong cwind; /* Congestion window */
+ int scale; /* desired snd.scale */
+ ushort ssthresh; /* Slow start threshold */
+ int resent; /* Bytes just resent */
+ int irs; /* Initial received squence */
+ ushort mss; /* Mean segment size */
+ int rerecv; /* Overlap of data rerecevived */
+ ulong window; /* Recevive window */
+ uchar backoff; /* Exponential backoff counter */
+ int backedoff; /* ms we've backed off for rexmits */
+ uchar flags; /* State flags */
+ Reseq *reseq; /* Resequencing queue */
+ Tcptimer timer; /* Activity timer */
+ Tcptimer acktimer; /* Acknowledge timer */
+ Tcptimer rtt_timer; /* Round trip timer */
+ Tcptimer katimer; /* keep alive timer */
+ ulong rttseq; /* Round trip sequence */
+ int srtt; /* Shortened round trip */
+ int mdev; /* Mean deviation of round trip */
+ int kacounter; /* count down for keep alive */
+ uint sndsyntime; /* time syn sent */
+ ulong time; /* time Finwait2 or Syn_received was sent */
+ int nochecksum; /* non-zero means don't send checksums */
+ int flgcnt; /* number of flags in the sequence (FIN,SEQ) */
+
+ union {
+ Tcp4hdr tcp4hdr;
+ Tcp6hdr tcp6hdr;
+ } protohdr; /* prototype header */
+};
+
+/*
+ * New calls are put in limbo rather than having a conversation structure
+ * allocated. Thus, a SYN attack results in lots of limbo'd calls but not
+ * any real Conv structures mucking things up. Calls in limbo rexmit their
+ * SYN ACK every SYNACK_RXTIMER ms up to 4 times, i.e., they disappear after 1 second.
+ *
+ * In particular they aren't on a listener's queue so that they don't figure
+ * in the input queue limit.
+ *
+ * If 1/2 of a T3 was attacking SYN packets, we'ld have a permanent queue
+ * of 70000 limbo'd calls. Not great for a linear list but doable. Therefore
+ * there is no hashing of this list.
+ */
+typedef struct Limbo Limbo;
+struct Limbo
+{
+ Limbo *next;
+
+ uchar laddr[IPaddrlen];
+ uchar raddr[IPaddrlen];
+ ushort lport;
+ ushort rport;
+ ulong irs; /* initial received sequence */
+ ulong iss; /* initial sent sequence */
+ ushort mss; /* mss from the other end */
+ ushort rcvscale; /* how much to scale rcvd windows */
+ ushort sndscale; /* how much to scale sent windows */
+ ulong lastsend; /* last time we sent a synack */
+ uchar version; /* v4 or v6 */
+ uchar rexmits; /* number of retransmissions */
+};
+
+int tcp_irtt = DEF_RTT; /* Initial guess at round trip time */
+ushort tcp_mss = DEF_MSS; /* Maximum segment size to be sent */
+
+enum {
+ /* MIB stats */
+ MaxConn,
+ ActiveOpens,
+ PassiveOpens,
+ EstabResets,
+ CurrEstab,
+ InSegs,
+ OutSegs,
+ RetransSegs,
+ RetransTimeouts,
+ InErrs,
+ OutRsts,
+
+ /* non-MIB stats */
+ CsumErrs,
+ HlenErrs,
+ LenErrs,
+ OutOfOrder,
+
+ Nstats
+};
+
+static char *statnames[] =
+{
+[MaxConn] "MaxConn",
+[ActiveOpens] "ActiveOpens",
+[PassiveOpens] "PassiveOpens",
+[EstabResets] "EstabResets",
+[CurrEstab] "CurrEstab",
+[InSegs] "InSegs",
+[OutSegs] "OutSegs",
+[RetransSegs] "RetransSegs",
+[RetransTimeouts] "RetransTimeouts",
+[InErrs] "InErrs",
+[OutRsts] "OutRsts",
+[CsumErrs] "CsumErrs",
+[HlenErrs] "HlenErrs",
+[LenErrs] "LenErrs",
+[OutOfOrder] "OutOfOrder",
+};
+
+typedef struct Tcppriv Tcppriv;
+struct Tcppriv
+{
+ /* List of active timers */
+ QLock tl;
+ Tcptimer *timers;
+
+ /* hash table for matching conversations */
+ Ipht ht;
+
+ /* calls in limbo waiting for an ACK to our SYN ACK */
+ int nlimbo;
+ Limbo *lht[NLHT];
+
+ /* for keeping track of tcpackproc */
+ QLock apl;
+ int ackprocstarted;
+
+ ulong stats[Nstats];
+};
+
+/*
+ * Setting tcpporthogdefense to non-zero enables Dong Lin's
+ * solution to hijacked systems staking out port's as a form
+ * of DoS attack.
+ *
+ * To avoid stateless Conv hogs, we pick a sequence number at random. If
+ * that number gets acked by the other end, we shut down the connection.
+ * Look for tcpporthogdefense in the code.
+ */
+int tcpporthogdefense = 0;
+
+int addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
+void getreseq(Tcpctl*, Tcp*, Block**, ushort*);
+void localclose(Conv*, char*);
+void procsyn(Conv*, Tcp*);
+void tcpiput(Proto*, Ipifc*, Block*);
+void tcpoutput(Conv*);
+int tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
+void tcpstart(Conv*, int);
+void tcptimeout(void*);
+void tcpsndsyn(Conv*, Tcpctl*);
+void tcprcvwin(Conv*);
+void tcpacktimer(void*);
+void tcpkeepalive(void*);
+void tcpsetkacounter(Tcpctl*);
+void tcprxmit(Conv*);
+void tcpsettimer(Tcpctl*);
+void tcpsynackrtt(Conv*);
+void tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+
+static void limborexmit(Proto*);
+static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
+
+void
+tcpsetstate(Conv *s, uchar newstate)
+{
+ Tcpctl *tcb;
+ uchar oldstate;
+ Tcppriv *tpriv;
+
+ tpriv = s->p->priv;
+
+ tcb = (Tcpctl*)s->ptcl;
+
+ oldstate = tcb->state;
+ if(oldstate == newstate)
+ return;
+
+ if(oldstate == Established)
+ tpriv->stats[CurrEstab]--;
+ if(newstate == Established)
+ tpriv->stats[CurrEstab]++;
+
+ /**
+ print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport,
+ tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab );
+ **/
+
+ switch(newstate) {
+ case Closed:
+ qclose(s->rq);
+ qclose(s->wq);
+ qclose(s->eq);
+ break;
+
+ case Close_wait: /* Remote closes */
+ qhangup(s->rq, nil);
+ break;
+ }
+
+ tcb->state = newstate;
+
+ if(oldstate == Syn_sent && newstate != Closed)
+ Fsconnected(s, nil);
+}
+
+static char*
+tcpconnect(Conv *c, char **argv, int argc)
+{
+ char *e;
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)(c->ptcl);
+ if(tcb->state != Closed)
+ return Econinuse;
+
+ e = Fsstdconnect(c, argv, argc);
+ if(e != nil)
+ return e;
+ tcpstart(c, TCP_CONNECT);
+
+ return nil;
+}
+
+static int
+tcpstate(Conv *c, char *state, int n)
+{
+ Tcpctl *s;
+
+ s = (Tcpctl*)(c->ptcl);
+
+ return snprint(state, n,
+ "%s qin %d qout %d srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
+ tcpstates[s->state],
+ c->rq ? qlen(c->rq) : 0,
+ c->wq ? qlen(c->wq) : 0,
+ s->srtt, s->mdev,
+ s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale,
+ s->timer.start, s->timer.count, s->rerecv,
+ s->katimer.start, s->katimer.count);
+}
+
+static int
+tcpinuse(Conv *c)
+{
+ Tcpctl *s;
+
+ s = (Tcpctl*)(c->ptcl);
+ return s->state != Closed;
+}
+
+static char*
+tcpannounce(Conv *c, char **argv, int argc)
+{
+ char *e;
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)(c->ptcl);
+ if(tcb->state != Closed)
+ return Econinuse;
+
+ e = Fsstdannounce(c, argv, argc);
+ if(e != nil)
+ return e;
+ tcpstart(c, TCP_LISTEN);
+ Fsconnected(c, nil);
+
+ return nil;
+}
+
+/*
+ * tcpclose is always called with the q locked
+ */
+static void
+tcpclose(Conv *c)
+{
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)c->ptcl;
+
+ qhangup(c->rq, nil);
+ qhangup(c->wq, nil);
+ qhangup(c->eq, nil);
+ qflush(c->rq);
+
+ switch(tcb->state) {
+ case Listen:
+ /*
+ * reset any incoming calls to this listener
+ */
+ Fsconnected(c, "Hangup");
+
+ localclose(c, nil);
+ break;
+ case Closed:
+ case Syn_sent:
+ localclose(c, nil);
+ break;
+ case Syn_received:
+ case Established:
+ tcb->flgcnt++;
+ tcb->snd.nxt++;
+ tcpsetstate(c, Finwait1);
+ tcpoutput(c);
+ break;
+ case Close_wait:
+ tcb->flgcnt++;
+ tcb->snd.nxt++;
+ tcpsetstate(c, Last_ack);
+ tcpoutput(c);
+ break;
+ }
+}
+
+void
+tcpkick(void *x)
+{
+ Conv *s = x;
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)s->ptcl;
+
+ if(waserror()){
+ QUNLOCK(s);
+ nexterror();
+ }
+ QLOCK(s);
+
+ switch(tcb->state) {
+ case Syn_sent:
+ case Syn_received:
+ case Established:
+ case Close_wait:
+ /*
+ * Push data
+ */
+ tcprcvwin(s);
+ tcpoutput(s);
+ break;
+ default:
+ localclose(s, "Hangup");
+ break;
+ }
+
+ QUNLOCK(s);
+ poperror();
+}
+
+void
+tcprcvwin(Conv *s) /* Call with tcb locked */
+{
+ int w;
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)s->ptcl;
+ w = tcb->window - qlen(s->rq);
+ if(w < 0)
+ w = 0;
+ tcb->rcv.wnd = w;
+ if(w == 0)
+ tcb->rcv.blocked = 1;
+}
+
+void
+tcpacktimer(void *v)
+{
+ Tcpctl *tcb;
+ Conv *s;
+
+ s = v;
+ tcb = (Tcpctl*)s->ptcl;
+
+ if(waserror()){
+ QUNLOCK(s);
+ nexterror();
+ }
+ QLOCK(s);
+ if(tcb->state != Closed){
+ tcb->flags |= FORCE;
+ tcprcvwin(s);
+ tcpoutput(s);
+ }
+ QUNLOCK(s);
+ poperror();
+}
+
+static void
+tcpcreate(Conv *c)
+{
+ c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c);
+ c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c);
+}
+
+static void
+timerstate(Tcppriv *priv, Tcptimer *t, int newstate)
+{
+ if(newstate != TcptimerON){
+ if(t->state == TcptimerON){
+ /* unchain */
+ if(priv->timers == t){
+ priv->timers = t->next;
+ if(t->prev != nil)
+ panic("timerstate1");
+ }
+ if(t->next)
+ t->next->prev = t->prev;
+ if(t->prev)
+ t->prev->next = t->next;
+ t->next = t->prev = nil;
+ }
+ } else {
+ if(t->state != TcptimerON){
+ /* chain */
+ if(t->prev != nil || t->next != nil)
+ panic("timerstate2");
+ t->prev = nil;
+ t->next = priv->timers;
+ if(t->next)
+ t->next->prev = t;
+ priv->timers = t;
+ }
+ }
+ t->state = newstate;
+}
+
+void
+tcpackproc(void *a)
+{
+ Tcptimer *t, *tp, *timeo;
+ Proto *tcp;
+ Tcppriv *priv;
+ int loop;
+
+ tcp = a;
+ priv = tcp->priv;
+
+ for(;;) {
+ tsleep(&up->sleep, return0, 0, MSPTICK);
+
+ qlock(&priv->tl);
+ timeo = nil;
+ loop = 0;
+ for(t = priv->timers; t != nil; t = tp) {
+ if(loop++ > 10000)
+ panic("tcpackproc1");
+ tp = t->next;
+ if(t->state == TcptimerON) {
+ t->count--;
+ if(t->count == 0) {
+ timerstate(priv, t, TcptimerDONE);
+ t->readynext = timeo;
+ timeo = t;
+ }
+ }
+ }
+ qunlock(&priv->tl);
+
+ loop = 0;
+ for(t = timeo; t != nil; t = t->readynext) {
+ if(loop++ > 10000)
+ panic("tcpackproc2");
+ if(t->state == TcptimerDONE && t->func != nil && !waserror()){
+ (*t->func)(t->arg);
+ poperror();
+ }
+ }
+
+ limborexmit(tcp);
+ }
+}
+
+void
+tcpgo(Tcppriv *priv, Tcptimer *t)
+{
+ if(t == nil || t->start == 0)
+ return;
+
+ qlock(&priv->tl);
+ t->count = t->start;
+ timerstate(priv, t, TcptimerON);
+ qunlock(&priv->tl);
+}
+
+void
+tcphalt(Tcppriv *priv, Tcptimer *t)
+{
+ if(t == nil)
+ return;
+
+ qlock(&priv->tl);
+ timerstate(priv, t, TcptimerOFF);
+ qunlock(&priv->tl);
+}
+
+int
+backoff(int n)
+{
+ return 1 << n;
+}
+
+void
+localclose(Conv *s, char *reason) /* called with tcb locked */
+{
+ Tcpctl *tcb;
+ Reseq *rp,*rp1;
+ Tcppriv *tpriv;
+
+ tpriv = s->p->priv;
+ tcb = (Tcpctl*)s->ptcl;
+
+ iphtrem(&tpriv->ht, s);
+
+ tcphalt(tpriv, &tcb->timer);
+ tcphalt(tpriv, &tcb->rtt_timer);
+ tcphalt(tpriv, &tcb->acktimer);
+ tcphalt(tpriv, &tcb->katimer);
+
+ /* Flush reassembly queue; nothing more can arrive */
+ for(rp = tcb->reseq; rp != nil; rp = rp1) {
+ rp1 = rp->next;
+ freeblist(rp->bp);
+ free(rp);
+ }
+ tcb->reseq = nil;
+
+ if(tcb->state == Syn_sent)
+ Fsconnected(s, reason);
+ if(s->state == Announced)
+ wakeup(&s->listenr);
+
+ qhangup(s->rq, reason);
+ qhangup(s->wq, reason);
+
+ tcpsetstate(s, Closed);
+}
+
+/* mtu (- TCP + IP hdr len) of 1st hop */
+int
+tcpmtu(Proto *tcp, uchar *addr, int version, int *scale)
+{
+ Ipifc *ifc;
+ int mtu;
+
+ ifc = findipifc(tcp->f, addr, 0);
+ switch(version){
+ default:
+ case V4:
+ mtu = DEF_MSS;
+ if(ifc != nil)
+ mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
+ break;
+ case V6:
+ mtu = DEF_MSS6;
+ if(ifc != nil)
+ mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
+ break;
+ }
+ if(ifc != nil){
+ if(ifc->mbps > 1000)
+ *scale = HaveWS | 4;
+ else if(ifc->mbps > 100)
+ *scale = HaveWS | 3;
+ else if(ifc->mbps > 10)
+ *scale = HaveWS | 1;
+ else
+ *scale = HaveWS | 0;
+ } else
+ *scale = HaveWS | 0;
+
+ return mtu;
+}
+
+void
+inittcpctl(Conv *s, int mode)
+{
+ Tcpctl *tcb;
+ Tcp4hdr* h4;
+ Tcp6hdr* h6;
+ int mss;
+
+ tcb = (Tcpctl*)s->ptcl;
+
+ memset(tcb, 0, sizeof(Tcpctl));
+
+ tcb->ssthresh = 65535;
+ tcb->srtt = tcp_irtt<<LOGAGAIN;
+ tcb->mdev = 0;
+
+ /* setup timers */
+ tcb->timer.start = tcp_irtt / MSPTICK;
+ tcb->timer.func = tcptimeout;
+ tcb->timer.arg = s;
+ tcb->rtt_timer.start = MAX_TIME;
+ tcb->acktimer.start = TCP_ACK / MSPTICK;
+ tcb->acktimer.func = tcpacktimer;
+ tcb->acktimer.arg = s;
+ tcb->katimer.start = DEF_KAT / MSPTICK;
+ tcb->katimer.func = tcpkeepalive;
+ tcb->katimer.arg = s;
+
+ mss = DEF_MSS;
+
+ /* create a prototype(pseudo) header */
+ if(mode != TCP_LISTEN){
+ if(ipcmp(s->laddr, IPnoaddr) == 0)
+ findlocalip(s->p->f, s->laddr, s->raddr);
+
+ switch(s->ipversion){
+ case V4:
+ h4 = &tcb->protohdr.tcp4hdr;
+ memset(h4, 0, sizeof(*h4));
+ h4->proto = IP_TCPPROTO;
+ hnputs(h4->tcpsport, s->lport);
+ hnputs(h4->tcpdport, s->rport);
+ v6tov4(h4->tcpsrc, s->laddr);
+ v6tov4(h4->tcpdst, s->raddr);
+ break;
+ case V6:
+ h6 = &tcb->protohdr.tcp6hdr;
+ memset(h6, 0, sizeof(*h6));
+ h6->proto = IP_TCPPROTO;
+ hnputs(h6->tcpsport, s->lport);
+ hnputs(h6->tcpdport, s->rport);
+ ipmove(h6->tcpsrc, s->laddr);
+ ipmove(h6->tcpdst, s->raddr);
+ mss = DEF_MSS6;
+ break;
+ default:
+ panic("inittcpctl: version %d", s->ipversion);
+ }
+ }
+
+ tcb->mss = tcb->cwind = mss;
+
+ /* default is no window scaling */
+ tcb->window = QMAX;
+ tcb->rcv.wnd = QMAX;
+ tcb->rcv.scale = 0;
+ tcb->snd.scale = 0;
+ qsetlimit(s->rq, QMAX);
+}
+
+/*
+ * called with s QLOCKed
+ */
+void
+tcpstart(Conv *s, int mode)
+{
+ Tcpctl *tcb;
+ Tcppriv *tpriv;
+ char kpname[KNAMELEN];
+
+ tpriv = s->p->priv;
+
+ if(tpriv->ackprocstarted == 0){
+ qlock(&tpriv->apl);
+ if(tpriv->ackprocstarted == 0){
+ sprint(kpname, "#I%dtcpack", s->p->f->dev);
+ kproc(kpname, tcpackproc, s->p);
+ tpriv->ackprocstarted = 1;
+ }
+ qunlock(&tpriv->apl);
+ }
+
+ tcb = (Tcpctl*)s->ptcl;
+
+ inittcpctl(s, mode);
+
+ iphtadd(&tpriv->ht, s);
+ switch(mode) {
+ case TCP_LISTEN:
+ tpriv->stats[PassiveOpens]++;
+ tcb->flags |= CLONE;
+ tcpsetstate(s, Listen);
+ break;
+
+ case TCP_CONNECT:
+ tpriv->stats[ActiveOpens]++;
+ tcb->flags |= ACTIVE;
+ tcpsndsyn(s, tcb);
+ tcpsetstate(s, Syn_sent);
+ tcpoutput(s);
+ break;
+ }
+}
+
+static char*
+tcpflag(ushort flag)
+{
+ static char buf[128];
+
+ sprint(buf, "%d", flag>>10); /* Head len */
+ if(flag & URG)
+ strcat(buf, " URG");
+ if(flag & ACK)
+ strcat(buf, " ACK");
+ if(flag & PSH)
+ strcat(buf, " PSH");
+ if(flag & RST)
+ strcat(buf, " RST");
+ if(flag & SYN)
+ strcat(buf, " SYN");
+ if(flag & FIN)
+ strcat(buf, " FIN");
+
+ return buf;
+}
+
+Block *
+htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb)
+{
+ int dlen;
+ Tcp6hdr *h;
+ ushort csum;
+ ushort hdrlen, optpad = 0;
+ uchar *opt;
+
+ hdrlen = TCP6_HDRSIZE;
+ if(tcph->flags & SYN){
+ if(tcph->mss)
+ hdrlen += MSS_LENGTH;
+ if(tcph->ws)
+ hdrlen += WS_LENGTH;
+ optpad = hdrlen & 3;
+ if(optpad)
+ optpad = 4 - optpad;
+ hdrlen += optpad;
+ }
+
+ if(data) {
+ dlen = blocklen(data);
+ data = padblock(data, hdrlen + TCP6_PKT);
+ if(data == nil)
+ return nil;
+ }
+ else {
+ dlen = 0;
+ data = allocb(hdrlen + TCP6_PKT + 64); /* the 64 pad is to meet mintu's */
+ if(data == nil)
+ return nil;
+ data->wp += hdrlen + TCP6_PKT;
+ }
+
+ /* copy in pseudo ip header plus port numbers */
+ h = (Tcp6hdr *)(data->rp);
+ memmove(h, ph, TCP6_TCBPHDRSZ);
+
+ /* compose pseudo tcp header, do cksum calculation */
+ hnputl(h->vcf, hdrlen + dlen);
+ h->ploadlen[0] = h->ploadlen[1] = h->proto = 0;
+ h->ttl = ph->proto;
+
+ /* copy in variable bits */
+ hnputl(h->tcpseq, tcph->seq);
+ hnputl(h->tcpack, tcph->ack);
+ hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags);
+ hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0));
+ hnputs(h->tcpurg, tcph->urg);
+
+ if(tcph->flags & SYN){
+ opt = h->tcpopt;
+ if(tcph->mss != 0){
+ *opt++ = MSSOPT;
+ *opt++ = MSS_LENGTH;
+ hnputs(opt, tcph->mss);
+ opt += 2;
+ }
+ if(tcph->ws != 0){
+ *opt++ = WSOPT;
+ *opt++ = WS_LENGTH;
+ *opt++ = tcph->ws;
+ }
+ while(optpad-- > 0)
+ *opt++ = NOOPOPT;
+ }
+
+ if(tcb != nil && tcb->nochecksum){
+ h->tcpcksum[0] = h->tcpcksum[1] = 0;
+ } else {
+ csum = ptclcsum(data, TCP6_IPLEN, hdrlen+dlen+TCP6_PHDRSIZE);
+ hnputs(h->tcpcksum, csum);
+ }
+
+ /* move from pseudo header back to normal ip header */
+ memset(h->vcf, 0, 4);
+ h->vcf[0] = IP_VER6;
+ hnputs(h->ploadlen, hdrlen+dlen);
+ h->proto = ph->proto;
+
+ return data;
+}
+
+Block *
+htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb)
+{
+ int dlen;
+ Tcp4hdr *h;
+ ushort csum;
+ ushort hdrlen, optpad = 0;
+ uchar *opt;
+
+ hdrlen = TCP4_HDRSIZE;
+ if(tcph->flags & SYN){
+ if(tcph->mss)
+ hdrlen += MSS_LENGTH;
+ if(tcph->ws)
+ hdrlen += WS_LENGTH;
+ optpad = hdrlen & 3;
+ if(optpad)
+ optpad = 4 - optpad;
+ hdrlen += optpad;
+ }
+
+ if(data) {
+ dlen = blocklen(data);
+ data = padblock(data, hdrlen + TCP4_PKT);
+ if(data == nil)
+ return nil;
+ }
+ else {
+ dlen = 0;
+ data = allocb(hdrlen + TCP4_PKT + 64); /* the 64 pad is to meet mintu's */
+ if(data == nil)
+ return nil;
+ data->wp += hdrlen + TCP4_PKT;
+ }
+
+ /* copy in pseudo ip header plus port numbers */
+ h = (Tcp4hdr *)(data->rp);
+ memmove(h, ph, TCP4_TCBPHDRSZ);
+
+ /* copy in variable bits */
+ hnputs(h->tcplen, hdrlen + dlen);
+ hnputl(h->tcpseq, tcph->seq);
+ hnputl(h->tcpack, tcph->ack);
+ hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags);
+ hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0));
+ hnputs(h->tcpurg, tcph->urg);
+
+ if(tcph->flags & SYN){
+ opt = h->tcpopt;
+ if(tcph->mss != 0){
+ *opt++ = MSSOPT;
+ *opt++ = MSS_LENGTH;
+ hnputs(opt, tcph->mss);
+ opt += 2;
+ }
+ if(tcph->ws != 0){
+ *opt++ = WSOPT;
+ *opt++ = WS_LENGTH;
+ *opt++ = tcph->ws;
+ }
+ while(optpad-- > 0)
+ *opt++ = NOOPOPT;
+ }
+
+ if(tcb != nil && tcb->nochecksum){
+ h->tcpcksum[0] = h->tcpcksum[1] = 0;
+ } else {
+ csum = ptclcsum(data, TCP4_IPLEN, hdrlen+dlen+TCP4_PHDRSIZE);
+ hnputs(h->tcpcksum, csum);
+ }
+
+ return data;
+}
+
+int
+ntohtcp6(Tcp *tcph, Block **bpp)
+{
+ Tcp6hdr *h;
+ uchar *optr;
+ ushort hdrlen;
+ ushort optlen;
+ int n;
+
+ *bpp = pullupblock(*bpp, TCP6_PKT+TCP6_HDRSIZE);
+ if(*bpp == nil)
+ return -1;
+
+ h = (Tcp6hdr *)((*bpp)->rp);
+ tcph->source = nhgets(h->tcpsport);
+ tcph->dest = nhgets(h->tcpdport);
+ tcph->seq = nhgetl(h->tcpseq);
+ tcph->ack = nhgetl(h->tcpack);
+ hdrlen = (h->tcpflag[0]>>2) & ~3;
+ if(hdrlen < TCP6_HDRSIZE) {
+ freeblist(*bpp);
+ return -1;
+ }
+
+ tcph->flags = h->tcpflag[1];
+ tcph->wnd = nhgets(h->tcpwin);
+ tcph->urg = nhgets(h->tcpurg);
+ tcph->mss = 0;
+ tcph->ws = 0;
+ tcph->len = nhgets(h->ploadlen) - hdrlen;
+
+ *bpp = pullupblock(*bpp, hdrlen+TCP6_PKT);
+ if(*bpp == nil)
+ return -1;
+
+ optr = h->tcpopt;
+ n = hdrlen - TCP6_HDRSIZE;
+ while(n > 0 && *optr != EOLOPT) {
+ if(*optr == NOOPOPT) {
+ n--;
+ optr++;
+ continue;
+ }
+ optlen = optr[1];
+ if(optlen < 2 || optlen > n)
+ break;
+ switch(*optr) {
+ case MSSOPT:
+ if(optlen == MSS_LENGTH)
+ tcph->mss = nhgets(optr+2);
+ break;
+ case WSOPT:
+ if(optlen == WS_LENGTH && *(optr+2) <= 14)
+ tcph->ws = HaveWS | *(optr+2);
+ break;
+ }
+ n -= optlen;
+ optr += optlen;
+ }
+ return hdrlen;
+}
+
+int
+ntohtcp4(Tcp *tcph, Block **bpp)
+{
+ Tcp4hdr *h;
+ uchar *optr;
+ ushort hdrlen;
+ ushort optlen;
+ int n;
+
+ *bpp = pullupblock(*bpp, TCP4_PKT+TCP4_HDRSIZE);
+ if(*bpp == nil)
+ return -1;
+
+ h = (Tcp4hdr *)((*bpp)->rp);
+ tcph->source = nhgets(h->tcpsport);
+ tcph->dest = nhgets(h->tcpdport);
+ tcph->seq = nhgetl(h->tcpseq);
+ tcph->ack = nhgetl(h->tcpack);
+
+ hdrlen = (h->tcpflag[0]>>2) & ~3;
+ if(hdrlen < TCP4_HDRSIZE) {
+ freeblist(*bpp);
+ return -1;
+ }
+
+ tcph->flags = h->tcpflag[1];
+ tcph->wnd = nhgets(h->tcpwin);
+ tcph->urg = nhgets(h->tcpurg);
+ tcph->mss = 0;
+ tcph->ws = 0;
+ tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT);
+
+ *bpp = pullupblock(*bpp, hdrlen+TCP4_PKT);
+ if(*bpp == nil)
+ return -1;
+
+ optr = h->tcpopt;
+ n = hdrlen - TCP4_HDRSIZE;
+ while(n > 0 && *optr != EOLOPT) {
+ if(*optr == NOOPOPT) {
+ n--;
+ optr++;
+ continue;
+ }
+ optlen = optr[1];
+ if(optlen < 2 || optlen > n)
+ break;
+ switch(*optr) {
+ case MSSOPT:
+ if(optlen == MSS_LENGTH)
+ tcph->mss = nhgets(optr+2);
+ break;
+ case WSOPT:
+ if(optlen == WS_LENGTH && *(optr+2) <= 14)
+ tcph->ws = HaveWS | *(optr+2);
+ break;
+ }
+ n -= optlen;
+ optr += optlen;
+ }
+ return hdrlen;
+}
+
+/*
+ * For outgiing calls, generate an initial sequence
+ * number and put a SYN on the send queue
+ */
+void
+tcpsndsyn(Conv *s, Tcpctl *tcb)
+{
+ tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16);
+ tcb->rttseq = tcb->iss;
+ tcb->snd.wl2 = tcb->iss;
+ tcb->snd.una = tcb->iss;
+ tcb->snd.ptr = tcb->rttseq;
+ tcb->snd.nxt = tcb->rttseq;
+ tcb->flgcnt++;
+ tcb->flags |= FORCE;
+ tcb->sndsyntime = NOW;
+
+ /* set desired mss and scale */
+ tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale);
+}
+
+void
+sndrst(Proto *tcp, uchar *source, uchar *dest, ushort length, Tcp *seg, uchar version, char *reason)
+{
+ Block *hbp;
+ uchar rflags;
+ Tcppriv *tpriv;
+ Tcp4hdr ph4;
+ Tcp6hdr ph6;
+
+ netlog(tcp->f, Logtcp, "sndrst: %s\n", reason);
+
+ tpriv = tcp->priv;
+
+ if(seg->flags & RST)
+ return;
+
+ /* make pseudo header */
+ switch(version) {
+ case V4:
+ memset(&ph4, 0, sizeof(ph4));
+ ph4.vihl = IP_VER4;
+ v6tov4(ph4.tcpsrc, dest);
+ v6tov4(ph4.tcpdst, source);
+ ph4.proto = IP_TCPPROTO;
+ hnputs(ph4.tcplen, TCP4_HDRSIZE);
+ hnputs(ph4.tcpsport, seg->dest);
+ hnputs(ph4.tcpdport, seg->source);
+ break;
+ case V6:
+ memset(&ph6, 0, sizeof(ph6));
+ ph6.vcf[0] = IP_VER6;
+ ipmove(ph6.tcpsrc, dest);
+ ipmove(ph6.tcpdst, source);
+ ph6.proto = IP_TCPPROTO;
+ hnputs(ph6.ploadlen, TCP6_HDRSIZE);
+ hnputs(ph6.tcpsport, seg->dest);
+ hnputs(ph6.tcpdport, seg->source);
+ break;
+ default:
+ panic("sndrst: version %d", version);
+ }
+
+ tpriv->stats[OutRsts]++;
+ rflags = RST;
+
+ /* convince the other end that this reset is in band */
+ if(seg->flags & ACK) {
+ seg->seq = seg->ack;
+ seg->ack = 0;
+ }
+ else {
+ rflags |= ACK;
+ seg->ack = seg->seq;
+ seg->seq = 0;
+ if(seg->flags & SYN)
+ seg->ack++;
+ seg->ack += length;
+ if(seg->flags & FIN)
+ seg->ack++;
+ }
+ seg->flags = rflags;
+ seg->wnd = 0;
+ seg->urg = 0;
+ seg->mss = 0;
+ seg->ws = 0;
+ switch(version) {
+ case V4:
+ hbp = htontcp4(seg, nil, &ph4, nil);
+ if(hbp == nil)
+ return;
+ ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
+ break;
+ case V6:
+ hbp = htontcp6(seg, nil, &ph6, nil);
+ if(hbp == nil)
+ return;
+ ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
+ break;
+ default:
+ panic("sndrst2: version %d", version);
+ }
+}
+
+/*
+ * send a reset to the remote side and close the conversation
+ * called with s QLOCKed
+ */
+char*
+tcphangup(Conv *s)
+{
+ Tcp seg;
+ Tcpctl *tcb;
+ Block *hbp;
+
+ tcb = (Tcpctl*)s->ptcl;
+ if(waserror())
+ return commonerror();
+ if(ipcmp(s->raddr, IPnoaddr) != 0) {
+ if(!waserror()){
+ seg.flags = RST | ACK;
+ seg.ack = tcb->rcv.nxt;
+ tcb->rcv.una = 0;
+ seg.seq = tcb->snd.ptr;
+ seg.wnd = 0;
+ seg.urg = 0;
+ seg.mss = 0;
+ seg.ws = 0;
+ switch(s->ipversion) {
+ case V4:
+ tcb->protohdr.tcp4hdr.vihl = IP_VER4;
+ hbp = htontcp4(&seg, nil, &tcb->protohdr.tcp4hdr, tcb);
+ ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s);
+ break;
+ case V6:
+ tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
+ hbp = htontcp6(&seg, nil, &tcb->protohdr.tcp6hdr, tcb);
+ ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s);
+ break;
+ default:
+ panic("tcphangup: version %d", s->ipversion);
+ }
+ poperror();
+ }
+ }
+ localclose(s, nil);
+ poperror();
+ return nil;
+}
+
+/*
+ * (re)send a SYN ACK
+ */
+int
+sndsynack(Proto *tcp, Limbo *lp)
+{
+ Block *hbp;
+ Tcp4hdr ph4;
+ Tcp6hdr ph6;
+ Tcp seg;
+ int scale;
+
+ /* make pseudo header */
+ switch(lp->version) {
+ case V4:
+ memset(&ph4, 0, sizeof(ph4));
+ ph4.vihl = IP_VER4;
+ v6tov4(ph4.tcpsrc, lp->laddr);
+ v6tov4(ph4.tcpdst, lp->raddr);
+ ph4.proto = IP_TCPPROTO;
+ hnputs(ph4.tcplen, TCP4_HDRSIZE);
+ hnputs(ph4.tcpsport, lp->lport);
+ hnputs(ph4.tcpdport, lp->rport);
+ break;
+ case V6:
+ memset(&ph6, 0, sizeof(ph6));
+ ph6.vcf[0] = IP_VER6;
+ ipmove(ph6.tcpsrc, lp->laddr);
+ ipmove(ph6.tcpdst, lp->raddr);
+ ph6.proto = IP_TCPPROTO;
+ hnputs(ph6.ploadlen, TCP6_HDRSIZE);
+ hnputs(ph6.tcpsport, lp->lport);
+ hnputs(ph6.tcpdport, lp->rport);
+ break;
+ default:
+ panic("sndrst: version %d", lp->version);
+ }
+
+ seg.seq = lp->iss;
+ seg.ack = lp->irs+1;
+ seg.flags = SYN|ACK;
+ seg.urg = 0;
+ seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale);
+ seg.wnd = QMAX;
+
+ /* if the other side set scale, we should too */
+ if(lp->rcvscale){
+ seg.ws = scale;
+ lp->sndscale = scale;
+ } else {
+ seg.ws = 0;
+ lp->sndscale = 0;
+ }
+
+ switch(lp->version) {
+ case V4:
+ hbp = htontcp4(&seg, nil, &ph4, nil);
+ if(hbp == nil)
+ return -1;
+ ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
+ break;
+ case V6:
+ hbp = htontcp6(&seg, nil, &ph6, nil);
+ if(hbp == nil)
+ return -1;
+ ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
+ break;
+ default:
+ panic("sndsnack: version %d", lp->version);
+ }
+ lp->lastsend = NOW;
+ return 0;
+}
+
+#define hashipa(a, p) ( ( (a)[IPaddrlen-2] + (a)[IPaddrlen-1] + p )&LHTMASK )
+
+/*
+ * put a call into limbo and respond with a SYN ACK
+ *
+ * called with proto locked
+ */
+static void
+limbo(Conv *s, uchar *source, uchar *dest, Tcp *seg, int version)
+{
+ Limbo *lp, **l;
+ Tcppriv *tpriv;
+ int h;
+
+ tpriv = s->p->priv;
+ h = hashipa(source, seg->source);
+
+ for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){
+ lp = *l;
+ if(lp->lport != seg->dest || lp->rport != seg->source || lp->version != version)
+ continue;
+ if(ipcmp(lp->raddr, source) != 0)
+ continue;
+ if(ipcmp(lp->laddr, dest) != 0)
+ continue;
+
+ /* each new SYN restarts the retransmits */
+ lp->irs = seg->seq;
+ break;
+ }
+ lp = *l;
+ if(lp == nil){
+ if(tpriv->nlimbo >= Maxlimbo && tpriv->lht[h]){
+ lp = tpriv->lht[h];
+ tpriv->lht[h] = lp->next;
+ lp->next = nil;
+ } else {
+ lp = malloc(sizeof(*lp));
+ if(lp == nil)
+ return;
+ tpriv->nlimbo++;
+ }
+ *l = lp;
+ lp->version = version;
+ ipmove(lp->laddr, dest);
+ ipmove(lp->raddr, source);
+ lp->lport = seg->dest;
+ lp->rport = seg->source;
+ lp->mss = seg->mss;
+ lp->rcvscale = seg->ws;
+ lp->irs = seg->seq;
+ lp->iss = (nrand(1<<16)<<16)|nrand(1<<16);
+ }
+
+ if(sndsynack(s->p, lp) < 0){
+ *l = lp->next;
+ tpriv->nlimbo--;
+ free(lp);
+ }
+}
+
+/*
+ * resend SYN ACK's once every SYNACK_RXTIMER ms.
+ */
+static void
+limborexmit(Proto *tcp)
+{
+ Tcppriv *tpriv;
+ Limbo **l, *lp;
+ int h;
+ int seen;
+ ulong now;
+
+ tpriv = tcp->priv;
+
+ if(!CANQLOCK(tcp))
+ return;
+ seen = 0;
+ now = NOW;
+ for(h = 0; h < NLHT && seen < tpriv->nlimbo; h++){
+ for(l = &tpriv->lht[h]; *l != nil && seen < tpriv->nlimbo; ){
+ lp = *l;
+ seen++;
+ if(now - lp->lastsend < (lp->rexmits+1)*SYNACK_RXTIMER)
+ continue;
+
+ /* time it out after 1 second */
+ if(++(lp->rexmits) > 5){
+ tpriv->nlimbo--;
+ *l = lp->next;
+ free(lp);
+ continue;
+ }
+
+ /* if we're being attacked, don't bother resending SYN ACK's */
+ if(tpriv->nlimbo > 100)
+ continue;
+
+ if(sndsynack(tcp, lp) < 0){
+ tpriv->nlimbo--;
+ *l = lp->next;
+ free(lp);
+ continue;
+ }
+
+ l = &lp->next;
+ }
+ }
+ QUNLOCK(tcp);
+}
+
+/*
+ * lookup call in limbo. if found, throw it out.
+ *
+ * called with proto locked
+ */
+static void
+limborst(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version)
+{
+ Limbo *lp, **l;
+ int h;
+ Tcppriv *tpriv;
+
+ tpriv = s->p->priv;
+
+ /* find a call in limbo */
+ h = hashipa(src, segp->source);
+ for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){
+ lp = *l;
+ if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version)
+ continue;
+ if(ipcmp(lp->laddr, dst) != 0)
+ continue;
+ if(ipcmp(lp->raddr, src) != 0)
+ continue;
+
+ /* RST can only follow the SYN */
+ if(segp->seq == lp->irs+1){
+ tpriv->nlimbo--;
+ *l = lp->next;
+ free(lp);
+ }
+ break;
+ }
+}
+
+/*
+ * come here when we finally get an ACK to our SYN-ACK.
+ * lookup call in limbo. if found, create a new conversation
+ *
+ * called with proto locked
+ */
+static Conv*
+tcpincoming(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version)
+{
+ Conv *new;
+ Tcpctl *tcb;
+ Tcppriv *tpriv;
+ Tcp4hdr *h4;
+ Tcp6hdr *h6;
+ Limbo *lp, **l;
+ int h;
+
+ /* unless it's just an ack, it can't be someone coming out of limbo */
+ if((segp->flags & SYN) || (segp->flags & ACK) == 0)
+ return nil;
+
+ tpriv = s->p->priv;
+
+ /* find a call in limbo */
+ h = hashipa(src, segp->source);
+ for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){
+ netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d\n",
+ src, segp->source, lp->raddr, lp->rport,
+ dst, segp->dest, lp->laddr, lp->lport,
+ version, lp->version
+ );
+
+ if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version)
+ continue;
+ if(ipcmp(lp->laddr, dst) != 0)
+ continue;
+ if(ipcmp(lp->raddr, src) != 0)
+ continue;
+
+ /* we're assuming no data with the initial SYN */
+ if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){
+ netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux\n",
+ segp->seq, lp->irs+1, segp->ack, lp->iss+1);
+ lp = nil;
+ } else {
+ tpriv->nlimbo--;
+ *l = lp->next;
+ }
+ break;
+ }
+ if(lp == nil)
+ return nil;
+
+ new = Fsnewcall(s, src, segp->source, dst, segp->dest, version);
+ if(new == nil)
+ return nil;
+
+ memmove(new->ptcl, s->ptcl, sizeof(Tcpctl));
+ tcb = (Tcpctl*)new->ptcl;
+ tcb->flags &= ~CLONE;
+ tcb->timer.arg = new;
+ tcb->timer.state = TcptimerOFF;
+ tcb->acktimer.arg = new;
+ tcb->acktimer.state = TcptimerOFF;
+ tcb->katimer.arg = new;
+ tcb->katimer.state = TcptimerOFF;
+ tcb->rtt_timer.arg = new;
+ tcb->rtt_timer.state = TcptimerOFF;
+
+ tcb->irs = lp->irs;
+ tcb->rcv.nxt = tcb->irs+1;
+ tcb->rcv.urg = tcb->rcv.nxt;
+
+ tcb->iss = lp->iss;
+ tcb->rttseq = tcb->iss;
+ tcb->snd.wl2 = tcb->iss;
+ tcb->snd.una = tcb->iss+1;
+ tcb->snd.ptr = tcb->iss+1;
+ tcb->snd.nxt = tcb->iss+1;
+ tcb->flgcnt = 0;
+ tcb->flags |= SYNACK;
+
+ /* our sending max segment size cannot be bigger than what he asked for */
+ if(lp->mss != 0 && lp->mss < tcb->mss)
+ tcb->mss = lp->mss;
+
+ /* window scaling */
+ tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale);
+
+ /* the congestion window always starts out as a single segment */
+ tcb->snd.wnd = segp->wnd;
+ tcb->cwind = tcb->mss;
+
+ /* set initial round trip time */
+ tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER;
+ tcpsynackrtt(new);
+
+ free(lp);
+
+ /* set up proto header */
+ switch(version){
+ case V4:
+ h4 = &tcb->protohdr.tcp4hdr;
+ memset(h4, 0, sizeof(*h4));
+ h4->proto = IP_TCPPROTO;
+ hnputs(h4->tcpsport, new->lport);
+ hnputs(h4->tcpdport, new->rport);
+ v6tov4(h4->tcpsrc, dst);
+ v6tov4(h4->tcpdst, src);
+ break;
+ case V6:
+ h6 = &tcb->protohdr.tcp6hdr;
+ memset(h6, 0, sizeof(*h6));
+ h6->proto = IP_TCPPROTO;
+ hnputs(h6->tcpsport, new->lport);
+ hnputs(h6->tcpdport, new->rport);
+ ipmove(h6->tcpsrc, dst);
+ ipmove(h6->tcpdst, src);
+ break;
+ default:
+ panic("tcpincoming: version %d", new->ipversion);
+ }
+
+ tcpsetstate(new, Established);
+
+ iphtadd(&tpriv->ht, new);
+
+ return new;
+}
+
+int
+seq_within(ulong x, ulong low, ulong high)
+{
+ if(low <= high){
+ if(low <= x && x <= high)
+ return 1;
+ }
+ else {
+ if(x >= low || x <= high)
+ return 1;
+ }
+ return 0;
+}
+
+int
+seq_lt(ulong x, ulong y)
+{
+ return (int)(x-y) < 0;
+}
+
+int
+seq_le(ulong x, ulong y)
+{
+ return (int)(x-y) <= 0;
+}
+
+int
+seq_gt(ulong x, ulong y)
+{
+ return (int)(x-y) > 0;
+}
+
+int
+seq_ge(ulong x, ulong y)
+{
+ return (int)(x-y) >= 0;
+}
+
+/*
+ * use the time between the first SYN and it's ack as the
+ * initial round trip time
+ */
+void
+tcpsynackrtt(Conv *s)
+{
+ Tcpctl *tcb;
+ int delta;
+ Tcppriv *tpriv;
+
+ tcb = (Tcpctl*)s->ptcl;
+ tpriv = s->p->priv;
+
+ delta = NOW - tcb->sndsyntime;
+ tcb->srtt = delta<<LOGAGAIN;
+ tcb->mdev = delta<<LOGDGAIN;
+
+ /* halt round trip timer */
+ tcphalt(tpriv, &tcb->rtt_timer);
+}
+
+void
+update(Conv *s, Tcp *seg)
+{
+ int rtt, delta;
+ Tcpctl *tcb;
+ ulong acked;
+ ulong expand;
+ Tcppriv *tpriv;
+
+ tpriv = s->p->priv;
+ tcb = (Tcpctl*)s->ptcl;
+
+ /* if everything has been acked, force output(?) */
+ if(seq_gt(seg->ack, tcb->snd.nxt)) {
+ tcb->flags |= FORCE;
+ return;
+ }
+
+ /* added by Dong Lin for fast retransmission */
+ if(seg->ack == tcb->snd.una
+ && tcb->snd.una != tcb->snd.nxt
+ && seg->len == 0
+ && seg->wnd == tcb->snd.wnd) {
+
+ /* this is a pure ack w/o window update */
+ netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
+ tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd);
+
+ if(++tcb->snd.dupacks == TCPREXMTTHRESH) {
+ /*
+ * tahoe tcp rxt the packet, half sshthresh,
+ * and set cwnd to one packet
+ */
+ tcb->snd.recovery = 1;
+ tcb->snd.rxt = tcb->snd.nxt;
+ netlog(s->p->f, Logtcprxmt, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt);
+ tcprxmit(s);
+ } else {
+ /* do reno tcp here. */
+ }
+ }
+
+ /*
+ * update window
+ */
+ if(seq_gt(seg->ack, tcb->snd.wl2)
+ || (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){
+ tcb->snd.wnd = seg->wnd;
+ tcb->snd.wl2 = seg->ack;
+ }
+
+ if(!seq_gt(seg->ack, tcb->snd.una)){
+ /*
+ * don't let us hangup if sending into a closed window and
+ * we're still getting acks
+ */
+ if((tcb->flags&RETRAN) && tcb->snd.wnd == 0){
+ tcb->backedoff = MAXBACKMS/4;
+ }
+ return;
+ }
+
+ /*
+ * any positive ack turns off fast rxt,
+ * (should we do new-reno on partial acks?)
+ */
+ if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) {
+ tcb->snd.dupacks = 0;
+ tcb->snd.recovery = 0;
+ } else
+ netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
+
+ /* Compute the new send window size */
+ acked = seg->ack - tcb->snd.una;
+
+ /* avoid slow start and timers for SYN acks */
+ if((tcb->flags & SYNACK) == 0) {
+ tcb->flags |= SYNACK;
+ acked--;
+ tcb->flgcnt--;
+ goto done;
+ }
+
+ /* slow start as long as we're not recovering from lost packets */
+ if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) {
+ if(tcb->cwind < tcb->ssthresh) {
+ expand = tcb->mss;
+ if(acked < expand)
+ expand = acked;
+ }
+ else
+ expand = ((int)tcb->mss * tcb->mss) / tcb->cwind;
+
+ if(tcb->cwind + expand < tcb->cwind)
+ expand = tcb->snd.wnd - tcb->cwind;
+ if(tcb->cwind + expand > tcb->snd.wnd)
+ expand = tcb->snd.wnd - tcb->cwind;
+ tcb->cwind += expand;
+ }
+
+ /* Adjust the timers according to the round trip time */
+ if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) {
+ tcphalt(tpriv, &tcb->rtt_timer);
+ if((tcb->flags&RETRAN) == 0) {
+ tcb->backoff = 0;
+ tcb->backedoff = 0;
+ rtt = tcb->rtt_timer.start - tcb->rtt_timer.count;
+ if(rtt == 0)
+ rtt = 1; /* otherwise all close systems will rexmit in 0 time */
+ rtt *= MSPTICK;
+ if(tcb->srtt == 0) {
+ tcb->srtt = rtt << LOGAGAIN;
+ tcb->mdev = rtt << LOGDGAIN;
+ } else {
+ delta = rtt - (tcb->srtt>>LOGAGAIN);
+ tcb->srtt += delta;
+ if(tcb->srtt <= 0)
+ tcb->srtt = 1;
+
+ delta = abs(delta) - (tcb->mdev>>LOGDGAIN);
+ tcb->mdev += delta;
+ if(tcb->mdev <= 0)
+ tcb->mdev = 1;
+ }
+ tcpsettimer(tcb);
+ }
+ }
+
+done:
+ if(qdiscard(s->wq, acked) < acked)
+ tcb->flgcnt--;
+
+ tcb->snd.una = seg->ack;
+ if(seq_gt(seg->ack, tcb->snd.urg))
+ tcb->snd.urg = seg->ack;
+
+ if(tcb->snd.una != tcb->snd.nxt)
+ tcpgo(tpriv, &tcb->timer);
+ else
+ tcphalt(tpriv, &tcb->timer);
+
+ if(seq_lt(tcb->snd.ptr, tcb->snd.una))
+ tcb->snd.ptr = tcb->snd.una;
+
+ tcb->flags &= ~RETRAN;
+ tcb->backoff = 0;
+ tcb->backedoff = 0;
+}
+
+void
+tcpiput(Proto *tcp, Ipifc* _, Block *bp)
+{
+ Tcp seg;
+ Tcp4hdr *h4;
+ Tcp6hdr *h6;
+ int hdrlen;
+ Tcpctl *tcb;
+ ushort length, csum;
+ uchar source[IPaddrlen], dest[IPaddrlen];
+ Conv *s;
+ Fs *f;
+ Tcppriv *tpriv;
+ uchar version;
+
+ f = tcp->f;
+ tpriv = tcp->priv;
+
+ tpriv->stats[InSegs]++;
+
+ h4 = (Tcp4hdr*)(bp->rp);
+ h6 = (Tcp6hdr*)(bp->rp);
+
+ if((h4->vihl&0xF0)==IP_VER4) {
+ version = V4;
+ length = nhgets(h4->length);
+ v4tov6(dest, h4->tcpdst);
+ v4tov6(source, h4->tcpsrc);
+
+ h4->Unused = 0;
+ hnputs(h4->tcplen, length-TCP4_PKT);
+ if(!(bp->flag & Btcpck) && (h4->tcpcksum[0] || h4->tcpcksum[1]) &&
+ ptclcsum(bp, TCP4_IPLEN, length-TCP4_IPLEN)) {
+ tpriv->stats[CsumErrs]++;
+ tpriv->stats[InErrs]++;
+ netlog(f, Logtcp, "bad tcp proto cksum\n");
+ freeblist(bp);
+ return;
+ }
+
+ hdrlen = ntohtcp4(&seg, &bp);
+ if(hdrlen < 0){
+ tpriv->stats[HlenErrs]++;
+ tpriv->stats[InErrs]++;
+ netlog(f, Logtcp, "bad tcp hdr len\n");
+ return;
+ }
+
+ /* trim the packet to the size claimed by the datagram */
+ length -= hdrlen+TCP4_PKT;
+ bp = trimblock(bp, hdrlen+TCP4_PKT, length);
+ if(bp == nil){
+ tpriv->stats[LenErrs]++;
+ tpriv->stats[InErrs]++;
+ netlog(f, Logtcp, "tcp len < 0 after trim\n");
+ return;
+ }
+ }
+ else {
+ int ttl = h6->ttl;
+ int proto = h6->proto;
+
+ version = V6;
+ length = nhgets(h6->ploadlen);
+ ipmove(dest, h6->tcpdst);
+ ipmove(source, h6->tcpsrc);
+
+ h6->ploadlen[0] = h6->ploadlen[1] = h6->proto = 0;
+ h6->ttl = proto;
+ hnputl(h6->vcf, length);
+ if((h6->tcpcksum[0] || h6->tcpcksum[1]) &&
+ (csum = ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) != 0) {
+ tpriv->stats[CsumErrs]++;
+ tpriv->stats[InErrs]++;
+ netlog(f, Logtcp,
+ "bad tcpv6 proto cksum: got %#ux, computed %#ux\n",
+ h6->tcpcksum[0]<<8 | h6->tcpcksum[1], csum);
+ freeblist(bp);
+ return;
+ }
+ h6->ttl = ttl;
+ h6->proto = proto;
+ hnputs(h6->ploadlen, length);
+
+ hdrlen = ntohtcp6(&seg, &bp);
+ if(hdrlen < 0){
+ tpriv->stats[HlenErrs]++;
+ tpriv->stats[InErrs]++;
+ netlog(f, Logtcp, "bad tcpv6 hdr len\n");
+ return;
+ }
+
+ /* trim the packet to the size claimed by the datagram */
+ length -= hdrlen;
+ bp = trimblock(bp, hdrlen+TCP6_PKT, length);
+ if(bp == nil){
+ tpriv->stats[LenErrs]++;
+ tpriv->stats[InErrs]++;
+ netlog(f, Logtcp, "tcpv6 len < 0 after trim\n");
+ return;
+ }
+ }
+
+ /* lock protocol while searching for a conversation */
+ QLOCK(tcp);
+
+ /* Look for a matching conversation */
+ s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
+ if(s == nil){
+ netlog(f, Logtcp, "iphtlook failed\n");
+reset:
+ QUNLOCK(tcp);
+ sndrst(tcp, source, dest, length, &seg, version, "no conversation");
+ freeblist(bp);
+ return;
+ }
+
+ /* if it's a listener, look for the right flags and get a new conv */
+ tcb = (Tcpctl*)s->ptcl;
+ if(tcb->state == Listen){
+ if(seg.flags & RST){
+ limborst(s, &seg, source, dest, version);
+ QUNLOCK(tcp);
+ freeblist(bp);
+ return;
+ }
+
+ /* if this is a new SYN, put the call into limbo */
+ if((seg.flags & SYN) && (seg.flags & ACK) == 0){
+ limbo(s, source, dest, &seg, version);
+ QUNLOCK(tcp);
+ freeblist(bp);
+ return;
+ }
+
+ /*
+ * if there's a matching call in limbo, tcpincoming will
+ * return it in state Syn_received
+ */
+ s = tcpincoming(s, &seg, source, dest, version);
+ if(s == nil)
+ goto reset;
+ }
+
+ /* The rest of the input state machine is run with the control block
+ * locked and implements the state machine directly out of the RFC.
+ * Out-of-band data is ignored - it was always a bad idea.
+ */
+ tcb = (Tcpctl*)s->ptcl;
+ if(waserror()){
+ QUNLOCK(s);
+ nexterror();
+ }
+ QLOCK(s);
+ QUNLOCK(tcp);
+
+ /* fix up window */
+ seg.wnd <<= tcb->rcv.scale;
+
+ /* every input packet in puts off the keep alive time out */
+ tcpsetkacounter(tcb);
+
+ switch(tcb->state) {
+ case Closed:
+ sndrst(tcp, source, dest, length, &seg, version, "sending to Closed");
+ goto raise;
+ case Syn_sent:
+ if(seg.flags & ACK) {
+ if(!seq_within(seg.ack, tcb->iss+1, tcb->snd.nxt)) {
+ sndrst(tcp, source, dest, length, &seg, version,
+ "bad seq in Syn_sent");
+ goto raise;
+ }
+ }
+ if(seg.flags & RST) {
+ if(seg.flags & ACK)
+ localclose(s, Econrefused);
+ goto raise;
+ }
+
+ if(seg.flags & SYN) {
+ procsyn(s, &seg);
+ if(seg.flags & ACK){
+ update(s, &seg);
+ tcpsynackrtt(s);
+ tcpsetstate(s, Established);
+ tcpsetscale(s, tcb, seg.ws, tcb->scale);
+ }
+ else {
+ tcb->time = NOW;
+ tcpsetstate(s, Syn_received); /* DLP - shouldn't this be a reset? */
+ }
+
+ if(length != 0 || (seg.flags & FIN))
+ break;
+
+ freeblist(bp);
+ goto output;
+ }
+ else
+ freeblist(bp);
+
+ QUNLOCK(s);
+ poperror();
+ return;
+ case Syn_received:
+ /* doesn't matter if it's the correct ack, we're just trying to set timing */
+ if(seg.flags & ACK)
+ tcpsynackrtt(s);
+ break;
+ }
+
+ /*
+ * One DOS attack is to open connections to us and then forget about them,
+ * thereby tying up a conv at no long term cost to the attacker.
+ * This is an attempt to defeat these stateless DOS attacks. See
+ * corresponding code in tcpsendka().
+ */
+ if(tcb->state != Syn_received && (seg.flags & RST) == 0){
+ if(tcpporthogdefense
+ && seq_within(seg.ack, tcb->snd.una-(1<<31), tcb->snd.una-(1<<29))){
+ print("stateless hog %I.%d->%I.%d f %ux %lux - %lux - %lux\n",
+ source, seg.source, dest, seg.dest, seg.flags,
+ tcb->snd.una-(1<<31), seg.ack, tcb->snd.una-(1<<29));
+ localclose(s, "stateless hog");
+ }
+ }
+
+ /* Cut the data to fit the receive window */
+ if(tcptrim(tcb, &seg, &bp, &length) == -1) {
+ netlog(f, Logtcp, "tcp len < 0, %lud %d\n", seg.seq, length);
+ update(s, &seg);
+ if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) {
+ tcphalt(tpriv, &tcb->rtt_timer);
+ tcphalt(tpriv, &tcb->acktimer);
+ tcphalt(tpriv, &tcb->katimer);
+ tcpsetstate(s, Time_wait);
+ tcb->timer.start = MSL2*(1000 / MSPTICK);
+ tcpgo(tpriv, &tcb->timer);
+ }
+ if(!(seg.flags & RST)) {
+ tcb->flags |= FORCE;
+ goto output;
+ }
+ QUNLOCK(s);
+ poperror();
+ return;
+ }
+
+ /* Cannot accept so answer with a rst */
+ if(length && tcb->state == Closed) {
+ sndrst(tcp, source, dest, length, &seg, version, "sending to Closed");
+ goto raise;
+ }
+
+ /* The segment is beyond the current receive pointer so
+ * queue the data in the resequence queue
+ */
+ if(seg.seq != tcb->rcv.nxt)
+ if(length != 0 || (seg.flags & (SYN|FIN))) {
+ update(s, &seg);
+ if(addreseq(tcb, tpriv, &seg, bp, length) < 0)
+ print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport);
+ tcb->flags |= FORCE;
+ goto output;
+ }
+
+ /*
+ * keep looping till we've processed this packet plus any
+ * adjacent packets in the resequence queue
+ */
+ for(;;) {
+ if(seg.flags & RST) {
+ if(tcb->state == Established) {
+ tpriv->stats[EstabResets]++;
+ if(tcb->rcv.nxt != seg.seq)
+ print("out of order RST rcvd: %I.%d -> %I.%d, rcv.nxt %lux seq %lux\n", s->raddr, s->rport, s->laddr, s->lport, tcb->rcv.nxt, seg.seq);
+ }
+ localclose(s, Econrefused);
+ goto raise;
+ }
+
+ if((seg.flags&ACK) == 0)
+ goto raise;
+
+ switch(tcb->state) {
+ case Syn_received:
+ if(!seq_within(seg.ack, tcb->snd.una+1, tcb->snd.nxt)){
+ sndrst(tcp, source, dest, length, &seg, version,
+ "bad seq in Syn_received");
+ goto raise;
+ }
+ update(s, &seg);
+ tcpsetstate(s, Established);
+ case Established:
+ case Close_wait:
+ update(s, &seg);
+ break;
+ case Finwait1:
+ update(s, &seg);
+ if(qlen(s->wq)+tcb->flgcnt == 0){
+ tcphalt(tpriv, &tcb->rtt_timer);
+ tcphalt(tpriv, &tcb->acktimer);
+ tcpsetkacounter(tcb);
+ tcb->time = NOW;
+ tcpsetstate(s, Finwait2);
+ tcb->katimer.start = MSL2 * (1000 / MSPTICK);
+ tcpgo(tpriv, &tcb->katimer);
+ }
+ break;
+ case Finwait2:
+ update(s, &seg);
+ break;
+ case Closing:
+ update(s, &seg);
+ if(qlen(s->wq)+tcb->flgcnt == 0) {
+ tcphalt(tpriv, &tcb->rtt_timer);
+ tcphalt(tpriv, &tcb->acktimer);
+ tcphalt(tpriv, &tcb->katimer);
+ tcpsetstate(s, Time_wait);
+ tcb->timer.start = MSL2*(1000 / MSPTICK);
+ tcpgo(tpriv, &tcb->timer);
+ }
+ break;
+ case Last_ack:
+ update(s, &seg);
+ if(qlen(s->wq)+tcb->flgcnt == 0) {
+ localclose(s, nil);
+ goto raise;
+ }
+ case Time_wait:
+ tcb->flags |= FORCE;
+ if(tcb->timer.state != TcptimerON)
+ tcpgo(tpriv, &tcb->timer);
+ }
+
+ if((seg.flags&URG) && seg.urg) {
+ if(seq_gt(seg.urg + seg.seq, tcb->rcv.urg)) {
+ tcb->rcv.urg = seg.urg + seg.seq;
+ pullblock(&bp, seg.urg);
+ }
+ }
+ else
+ if(seq_gt(tcb->rcv.nxt, tcb->rcv.urg))
+ tcb->rcv.urg = tcb->rcv.nxt;
+
+ if(length == 0) {
+ if(bp != nil)
+ freeblist(bp);
+ }
+ else {
+ switch(tcb->state){
+ default:
+ /* Ignore segment text */
+ if(bp != nil)
+ freeblist(bp);
+ break;
+
+ case Syn_received:
+ case Established:
+ case Finwait1:
+ /* If we still have some data place on
+ * receive queue
+ */
+ if(bp) {
+ bp = packblock(bp);
+ if(bp == nil)
+ panic("tcp packblock");
+ qpassnolim(s->rq, bp);
+ bp = nil;
+
+ /*
+ * Force an ack every 2 data messages. This is
+ * a hack for rob to make his home system run
+ * faster.
+ *
+ * this also keeps the standard TCP congestion
+ * control working since it needs an ack every
+ * 2 max segs worth. This is not quite that,
+ * but under a real stream is equivalent since
+ * every packet has a max seg in it.
+ */
+ if(++(tcb->rcv.una) >= 2)
+ tcb->flags |= FORCE;
+ }
+ tcb->rcv.nxt += length;
+
+ /*
+ * update our rcv window
+ */
+ tcprcvwin(s);
+
+ /*
+ * turn on the acktimer if there's something
+ * to ack
+ */
+ if(tcb->acktimer.state != TcptimerON)
+ tcpgo(tpriv, &tcb->acktimer);
+
+ break;
+ case Finwait2:
+ /* no process to read the data, send a reset */
+ if(bp != nil)
+ freeblist(bp);
+ sndrst(tcp, source, dest, length, &seg, version,
+ "send to Finwait2");
+ QUNLOCK(s);
+ poperror();
+ return;
+ }
+ }
+
+ if(seg.flags & FIN) {
+ tcb->flags |= FORCE;
+
+ switch(tcb->state) {
+ case Syn_received:
+ case Established:
+ tcb->rcv.nxt++;
+ tcpsetstate(s, Close_wait);
+ break;
+ case Finwait1:
+ tcb->rcv.nxt++;
+ if(qlen(s->wq)+tcb->flgcnt == 0) {
+ tcphalt(tpriv, &tcb->rtt_timer);
+ tcphalt(tpriv, &tcb->acktimer);
+ tcphalt(tpriv, &tcb->katimer);
+ tcpsetstate(s, Time_wait);
+ tcb->timer.start = MSL2*(1000/MSPTICK);
+ tcpgo(tpriv, &tcb->timer);
+ }
+ else
+ tcpsetstate(s, Closing);
+ break;
+ case Finwait2:
+ tcb->rcv.nxt++;
+ tcphalt(tpriv, &tcb->rtt_timer);
+ tcphalt(tpriv, &tcb->acktimer);
+ tcphalt(tpriv, &tcb->katimer);
+ tcpsetstate(s, Time_wait);
+ tcb->timer.start = MSL2 * (1000/MSPTICK);
+ tcpgo(tpriv, &tcb->timer);
+ break;
+ case Close_wait:
+ case Closing:
+ case Last_ack:
+ break;
+ case Time_wait:
+ tcpgo(tpriv, &tcb->timer);
+ break;
+ }
+ }
+
+ /*
+ * get next adjacent segment from the resequence queue.
+ * dump/trim any overlapping segments
+ */
+ for(;;) {
+ if(tcb->reseq == nil)
+ goto output;
+
+ if(seq_ge(tcb->rcv.nxt, tcb->reseq->seg.seq) == 0)
+ goto output;
+
+ getreseq(tcb, &seg, &bp, &length);
+
+ if(tcptrim(tcb, &seg, &bp, &length) == 0)
+ break;
+ }
+ }
+output:
+ tcpoutput(s);
+ QUNLOCK(s);
+ poperror();
+ return;
+raise:
+ QUNLOCK(s);
+ poperror();
+ freeblist(bp);
+ tcpkick(s);
+}
+
+/*
+ * always enters and exits with the s locked. We drop
+ * the lock to ipoput the packet so some care has to be
+ * taken by callers.
+ */
+void
+tcpoutput(Conv *s)
+{
+ Tcp seg;
+ int msgs;
+ Tcpctl *tcb;
+ Block *hbp, *bp;
+ int sndcnt, n;
+ ulong ssize, dsize, usable, sent;
+ Fs *f;
+ Tcppriv *tpriv;
+ uchar version;
+
+ f = s->p->f;
+ tpriv = s->p->priv;
+ version = s->ipversion;
+
+ for(msgs = 0; msgs < 100; msgs++) {
+ tcb = (Tcpctl*)s->ptcl;
+
+ switch(tcb->state) {
+ case Listen:
+ case Closed:
+ case Finwait2:
+ return;
+ }
+
+ /* force an ack when a window has opened up */
+ if(tcb->rcv.blocked && tcb->rcv.wnd > 0){
+ tcb->rcv.blocked = 0;
+ tcb->flags |= FORCE;
+ }
+
+ sndcnt = qlen(s->wq)+tcb->flgcnt;
+ sent = tcb->snd.ptr - tcb->snd.una;
+
+ /* Don't send anything else until our SYN has been acked */
+ if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
+ break;
+
+ /* Compute usable segment based on offered window and limit
+ * window probes to one
+ */
+ if(tcb->snd.wnd == 0){
+ if(sent != 0) {
+ if((tcb->flags&FORCE) == 0)
+ break;
+// tcb->snd.ptr = tcb->snd.una;
+ }
+ usable = 1;
+ }
+ else {
+ usable = tcb->cwind;
+ if(tcb->snd.wnd < usable)
+ usable = tcb->snd.wnd;
+ usable -= sent;
+ }
+ ssize = sndcnt-sent;
+ if(ssize && usable < 2)
+ netlog(s->p->f, Logtcp, "throttled snd.wnd %lud cwind %lud\n",
+ tcb->snd.wnd, tcb->cwind);
+ if(usable < ssize)
+ ssize = usable;
+ if(tcb->mss < ssize)
+ ssize = tcb->mss;
+ dsize = ssize;
+ seg.urg = 0;
+
+ if(ssize == 0)
+ if((tcb->flags&FORCE) == 0)
+ break;
+
+ tcb->flags &= ~FORCE;
+ tcprcvwin(s);
+
+ /* By default we will generate an ack */
+ tcphalt(tpriv, &tcb->acktimer);
+ tcb->rcv.una = 0;
+ seg.source = s->lport;
+ seg.dest = s->rport;
+ seg.flags = ACK;
+ seg.mss = 0;
+ seg.ws = 0;
+ switch(tcb->state){
+ case Syn_sent:
+ seg.flags = 0;
+ if(tcb->snd.ptr == tcb->iss){
+ seg.flags |= SYN;
+ dsize--;
+ seg.mss = tcb->mss;
+ seg.ws = tcb->scale;
+ }
+ break;
+ case Syn_received:
+ /*
+ * don't send any data with a SYN/ACK packet
+ * because Linux rejects the packet in its
+ * attempt to solve the SYN attack problem
+ */
+ if(tcb->snd.ptr == tcb->iss){
+ seg.flags |= SYN;
+ dsize = 0;
+ ssize = 1;
+ seg.mss = tcb->mss;
+ seg.ws = tcb->scale;
+ }
+ break;
+ }
+ seg.seq = tcb->snd.ptr;
+ seg.ack = tcb->rcv.nxt;
+ seg.wnd = tcb->rcv.wnd;
+
+ /* Pull out data to send */
+ bp = nil;
+ if(dsize != 0) {
+ bp = qcopy(s->wq, dsize, sent);
+ if(BLEN(bp) != dsize) {
+ seg.flags |= FIN;
+ dsize--;
+ }
+ }
+
+ if(sent+dsize == sndcnt)
+ seg.flags |= PSH;
+
+ /* keep track of balance of resent data */
+ if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) {
+ n = tcb->snd.nxt - tcb->snd.ptr;
+ if(ssize < n)
+ n = ssize;
+ tcb->resent += n;
+ netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
+ s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr, tcb->snd.nxt);
+ tpriv->stats[RetransSegs]++;
+ }
+
+ tcb->snd.ptr += ssize;
+
+ /* Pull up the send pointer so we can accept acks
+ * for this window
+ */
+ if(seq_gt(tcb->snd.ptr,tcb->snd.nxt))
+ tcb->snd.nxt = tcb->snd.ptr;
+
+ /* Build header, link data and compute cksum */
+ switch(version){
+ case V4:
+ tcb->protohdr.tcp4hdr.vihl = IP_VER4;
+ hbp = htontcp4(&seg, bp, &tcb->protohdr.tcp4hdr, tcb);
+ if(hbp == nil) {
+ freeblist(bp);
+ return;
+ }
+ break;
+ case V6:
+ tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
+ hbp = htontcp6(&seg, bp, &tcb->protohdr.tcp6hdr, tcb);
+ if(hbp == nil) {
+ freeblist(bp);
+ return;
+ }
+ break;
+ default:
+ hbp = nil; /* to suppress a warning */
+ panic("tcpoutput: version %d", version);
+ }
+
+ /* Start the transmission timers if there is new data and we
+ * expect acknowledges
+ */
+ if(ssize != 0){
+ if(tcb->timer.state != TcptimerON)
+ tcpgo(tpriv, &tcb->timer);
+
+ /* If round trip timer isn't running, start it.
+ * measure the longest packet only in case the
+ * transmission time dominates RTT
+ */
+ if(tcb->rtt_timer.state != TcptimerON)
+ if(ssize == tcb->mss) {
+ tcpgo(tpriv, &tcb->rtt_timer);
+ tcb->rttseq = tcb->snd.ptr;
+ }
+ }
+
+ tpriv->stats[OutSegs]++;
+
+ /* put off the next keep alive */
+ tcpgo(tpriv, &tcb->katimer);
+
+ switch(version){
+ case V4:
+ if(ipoput4(f, hbp, 0, s->ttl, s->tos, s) < 0){
+ /* a negative return means no route */
+ localclose(s, "no route");
+ }
+ break;
+ case V6:
+ if(ipoput6(f, hbp, 0, s->ttl, s->tos, s) < 0){
+ /* a negative return means no route */
+ localclose(s, "no route");
+ }
+ break;
+ default:
+ panic("tcpoutput2: version %d", version);
+ }
+ if((uint)(msgs%4) == 1){
+ QUNLOCK(s);
+ sched();
+ QLOCK(s);
+ }
+ }
+}
+
+/*
+ * the BSD convention (hack?) for keep alives. resend last uchar acked.
+ */
+void
+tcpsendka(Conv *s)
+{
+ Tcp seg;
+ Tcpctl *tcb;
+ Block *hbp,*dbp;
+
+ tcb = (Tcpctl*)s->ptcl;
+
+ dbp = nil;
+ seg.urg = 0;
+ seg.source = s->lport;
+ seg.dest = s->rport;
+ seg.flags = ACK|PSH;
+ seg.mss = 0;
+ seg.ws = 0;
+ if(tcpporthogdefense)
+ seg.seq = tcb->snd.una-(1<<30)-nrand(1<<20);
+ else
+ seg.seq = tcb->snd.una-1;
+ seg.ack = tcb->rcv.nxt;
+ tcb->rcv.una = 0;
+ seg.wnd = tcb->rcv.wnd;
+ if(tcb->state == Finwait2){
+ seg.flags |= FIN;
+ } else {
+ dbp = allocb(1);
+ dbp->wp++;
+ }
+
+ if(isv4(s->raddr)) {
+ /* Build header, link data and compute cksum */
+ tcb->protohdr.tcp4hdr.vihl = IP_VER4;
+ hbp = htontcp4(&seg, dbp, &tcb->protohdr.tcp4hdr, tcb);
+ if(hbp == nil) {
+ freeblist(dbp);
+ return;
+ }
+ ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s);
+ }
+ else {
+ /* Build header, link data and compute cksum */
+ tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
+ hbp = htontcp6(&seg, dbp, &tcb->protohdr.tcp6hdr, tcb);
+ if(hbp == nil) {
+ freeblist(dbp);
+ return;
+ }
+ ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s);
+ }
+}
+
+/*
+ * set connection to time out after 12 minutes
+ */
+void
+tcpsetkacounter(Tcpctl *tcb)
+{
+ tcb->kacounter = (12 * 60 * 1000) / (tcb->katimer.start*MSPTICK);
+ if(tcb->kacounter < 3)
+ tcb->kacounter = 3;
+}
+
+/*
+ * if we've timed out, close the connection
+ * otherwise, send a keepalive and restart the timer
+ */
+void
+tcpkeepalive(void *v)
+{
+ Tcpctl *tcb;
+ Conv *s;
+
+ s = v;
+ tcb = (Tcpctl*)s->ptcl;
+ if(waserror()){
+ QUNLOCK(s);
+ nexterror();
+ }
+ QLOCK(s);
+ if(tcb->state != Closed){
+ if(--(tcb->kacounter) <= 0) {
+ localclose(s, Etimedout);
+ } else {
+ tcpsendka(s);
+ tcpgo(s->p->priv, &tcb->katimer);
+ }
+ }
+ QUNLOCK(s);
+ poperror();
+}
+
+/*
+ * start keepalive timer
+ */
+char*
+tcpstartka(Conv *s, char **f, int n)
+{
+ Tcpctl *tcb;
+ int x;
+
+ tcb = (Tcpctl*)s->ptcl;
+ if(tcb->state != Established)
+ return "connection must be in Establised state";
+ if(n > 1){
+ x = atoi(f[1]);
+ if(x >= MSPTICK)
+ tcb->katimer.start = x/MSPTICK;
+ }
+ tcpsetkacounter(tcb);
+ tcpgo(s->p->priv, &tcb->katimer);
+
+ return nil;
+}
+
+/*
+ * turn checksums on/off
+ */
+char*
+tcpsetchecksum(Conv *s, char **f, int _)
+{
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)s->ptcl;
+ tcb->nochecksum = !atoi(f[1]);
+
+ return nil;
+}
+
+void
+tcprxmit(Conv *s)
+{
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)s->ptcl;
+
+ tcb->flags |= RETRAN|FORCE;
+ tcb->snd.ptr = tcb->snd.una;
+
+ /*
+ * We should be halving the slow start threshhold (down to one
+ * mss) but leaving it at mss seems to work well enough
+ */
+ tcb->ssthresh = tcb->mss;
+
+ /*
+ * pull window down to a single packet
+ */
+ tcb->cwind = tcb->mss;
+ tcpoutput(s);
+}
+
+void
+tcptimeout(void *arg)
+{
+ Conv *s;
+ Tcpctl *tcb;
+ int maxback;
+ Tcppriv *tpriv;
+
+ s = (Conv*)arg;
+ tpriv = s->p->priv;
+ tcb = (Tcpctl*)s->ptcl;
+
+ if(waserror()){
+ QUNLOCK(s);
+ nexterror();
+ }
+ QLOCK(s);
+ switch(tcb->state){
+ default:
+ tcb->backoff++;
+ if(tcb->state == Syn_sent)
+ maxback = MAXBACKMS/2;
+ else
+ maxback = MAXBACKMS;
+ tcb->backedoff += tcb->timer.start * MSPTICK;
+ if(tcb->backedoff >= maxback) {
+ localclose(s, Etimedout);
+ break;
+ }
+ netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
+ tcpsettimer(tcb);
+ tcprxmit(s);
+ tpriv->stats[RetransTimeouts]++;
+ tcb->snd.dupacks = 0;
+ break;
+ case Time_wait:
+ localclose(s, nil);
+ break;
+ case Closed:
+ break;
+ }
+ QUNLOCK(s);
+ poperror();
+}
+
+int
+inwindow(Tcpctl *tcb, int seq)
+{
+ return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1);
+}
+
+/*
+ * set up state for a received SYN (or SYN ACK) packet
+ */
+void
+procsyn(Conv *s, Tcp *seg)
+{
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)s->ptcl;
+ tcb->flags |= FORCE;
+
+ tcb->rcv.nxt = seg->seq + 1;
+ tcb->rcv.urg = tcb->rcv.nxt;
+ tcb->irs = seg->seq;
+
+ /* our sending max segment size cannot be bigger than what he asked for */
+ if(seg->mss != 0 && seg->mss < tcb->mss)
+ tcb->mss = seg->mss;
+
+ /* the congestion window always starts out as a single segment */
+ tcb->snd.wnd = seg->wnd;
+ tcb->cwind = tcb->mss;
+}
+
+int
+addreseq(Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+{
+ Reseq *rp, *rp1;
+ int i, rqlen, qmax;
+
+ rp = malloc(sizeof(Reseq));
+ if(rp == nil){
+ freeblist(bp); /* bp always consumed by add_reseq */
+ return 0;
+ }
+
+ rp->seg = *seg;
+ rp->bp = bp;
+ rp->length = length;
+
+ /* Place on reassembly list sorting by starting seq number */
+ rp1 = tcb->reseq;
+ if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) {
+ rp->next = rp1;
+ tcb->reseq = rp;
+ if(rp->next != nil)
+ tpriv->stats[OutOfOrder]++;
+ return 0;
+ }
+
+ rqlen = 0;
+ for(i = 0;; i++) {
+ rqlen += rp1->length;
+ if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) {
+ rp->next = rp1->next;
+ rp1->next = rp;
+ if(rp->next != nil)
+ tpriv->stats[OutOfOrder]++;
+ break;
+ }
+ rp1 = rp1->next;
+ }
+ qmax = QMAX<<tcb->rcv.scale;
+ if(rqlen > qmax){
+ print("resequence queue > window: %d > %d\n", rqlen, qmax);
+ i = 0;
+ for(rp1 = tcb->reseq; rp1 != nil; rp1 = rp1->next){
+ print("%#lux %#lux %#ux\n", rp1->seg.seq,
+ rp1->seg.ack, rp1->seg.flags);
+ if(i++ > 10){
+ print("...\n");
+ break;
+ }
+ }
+
+ /*
+ * delete entire reassembly queue; wait for retransmit.
+ * - should we be smarter and only delete the tail?
+ */
+ for(rp = tcb->reseq; rp != nil; rp = rp1){
+ rp1 = rp->next;
+ freeblist(rp->bp);
+ free(rp);
+ }
+ tcb->reseq = nil;
+
+ return -1;
+ }
+ return 0;
+}
+
+void
+getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
+{
+ Reseq *rp;
+
+ rp = tcb->reseq;
+ if(rp == nil)
+ return;
+
+ tcb->reseq = rp->next;
+
+ *seg = rp->seg;
+ *bp = rp->bp;
+ *length = rp->length;
+
+ free(rp);
+}
+
+int
+tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
+{
+ ushort len;
+ uchar accept;
+ int dupcnt, excess;
+
+ accept = 0;
+ len = *length;
+ if(seg->flags & SYN)
+ len++;
+ if(seg->flags & FIN)
+ len++;
+
+ if(tcb->rcv.wnd == 0) {
+ if(len == 0 && seg->seq == tcb->rcv.nxt)
+ return 0;
+ }
+ else {
+ /* Some part of the segment should be in the window */
+ if(inwindow(tcb,seg->seq))
+ accept++;
+ else
+ if(len != 0) {
+ if(inwindow(tcb, seg->seq+len-1) ||
+ seq_within(tcb->rcv.nxt, seg->seq,seg->seq+len-1))
+ accept++;
+ }
+ }
+ if(!accept) {
+ freeblist(*bp);
+ return -1;
+ }
+ dupcnt = tcb->rcv.nxt - seg->seq;
+ if(dupcnt > 0){
+ tcb->rerecv += dupcnt;
+ if(seg->flags & SYN){
+ seg->flags &= ~SYN;
+ seg->seq++;
+
+ if(seg->urg > 1)
+ seg->urg--;
+ else
+ seg->flags &= ~URG;
+ dupcnt--;
+ }
+ if(dupcnt > 0){
+ pullblock(bp, (ushort)dupcnt);
+ seg->seq += dupcnt;
+ *length -= dupcnt;
+
+ if(seg->urg > dupcnt)
+ seg->urg -= dupcnt;
+ else {
+ seg->flags &= ~URG;
+ seg->urg = 0;
+ }
+ }
+ }
+ excess = seg->seq + *length - (tcb->rcv.nxt + tcb->rcv.wnd);
+ if(excess > 0) {
+ tcb->rerecv += excess;
+ *length -= excess;
+ *bp = trimblock(*bp, 0, *length);
+ if(*bp == nil)
+ panic("presotto is a boofhead");
+ seg->flags &= ~FIN;
+ }
+ return 0;
+}
+
+void
+tcpadvise(Proto *tcp, Block *bp, char *msg)
+{
+ Tcp4hdr *h4;
+ Tcp6hdr *h6;
+ Tcpctl *tcb;
+ uchar source[IPaddrlen];
+ uchar dest[IPaddrlen];
+ ushort psource, pdest;
+ Conv *s, **p;
+
+ h4 = (Tcp4hdr*)(bp->rp);
+ h6 = (Tcp6hdr*)(bp->rp);
+
+ if((h4->vihl&0xF0)==IP_VER4) {
+ v4tov6(dest, h4->tcpdst);
+ v4tov6(source, h4->tcpsrc);
+ psource = nhgets(h4->tcpsport);
+ pdest = nhgets(h4->tcpdport);
+ }
+ else {
+ ipmove(dest, h6->tcpdst);
+ ipmove(source, h6->tcpsrc);
+ psource = nhgets(h6->tcpsport);
+ pdest = nhgets(h6->tcpdport);
+ }
+
+ /* Look for a connection */
+ QLOCK(tcp);
+ for(p = tcp->conv; *p; p++) {
+ s = *p;
+ tcb = (Tcpctl*)s->ptcl;
+ if(s->rport == pdest)
+ if(s->lport == psource)
+ if(tcb->state != Closed)
+ if(ipcmp(s->raddr, dest) == 0)
+ if(ipcmp(s->laddr, source) == 0){
+ QLOCK(s);
+ QUNLOCK(tcp);
+ switch(tcb->state){
+ case Syn_sent:
+ localclose(s, msg);
+ break;
+ }
+ QUNLOCK(s);
+ freeblist(bp);
+ return;
+ }
+ }
+ QUNLOCK(tcp);
+ freeblist(bp);
+}
+
+static char*
+tcpporthogdefensectl(char *val)
+{
+ if(strcmp(val, "on") == 0)
+ tcpporthogdefense = 1;
+ else if(strcmp(val, "off") == 0)
+ tcpporthogdefense = 0;
+ else
+ return "unknown value for tcpporthogdefense";
+ return nil;
+}
+
+/* called with c QLOCKed */
+char*
+tcpctl(Conv* c, char** f, int n)
+{
+ if(n == 1 && strcmp(f[0], "hangup") == 0)
+ return tcphangup(c);
+ if(n >= 1 && strcmp(f[0], "keepalive") == 0)
+ return tcpstartka(c, f, n);
+ if(n >= 1 && strcmp(f[0], "checksum") == 0)
+ return tcpsetchecksum(c, f, n);
+ if(n >= 1 && strcmp(f[0], "tcpporthogdefense") == 0)
+ return tcpporthogdefensectl(f[1]);
+ return "unknown control request";
+}
+
+int
+tcpstats(Proto *tcp, char *buf, int len)
+{
+ Tcppriv *priv;
+ char *p, *e;
+ int i;
+
+ priv = tcp->priv;
+ p = buf;
+ e = p+len;
+ for(i = 0; i < Nstats; i++)
+ p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+ return p - buf;
+}
+
+/*
+ * garbage collect any stale conversations:
+ * - SYN received but no SYN-ACK after 5 seconds (could be the SYN attack)
+ * - Finwait2 after 5 minutes
+ *
+ * this is called whenever we run out of channels. Both checks are
+ * of questionable validity so we try to use them only when we're
+ * up against the wall.
+ */
+int
+tcpgc(Proto *tcp)
+{
+ Conv *c, **pp, **ep;
+ int n;
+ Tcpctl *tcb;
+
+
+ n = 0;
+ ep = &tcp->conv[tcp->nc];
+ for(pp = tcp->conv; pp < ep; pp++) {
+ c = *pp;
+ if(c == nil)
+ break;
+ if(!CANQLOCK(c))
+ continue;
+ tcb = (Tcpctl*)c->ptcl;
+ switch(tcb->state){
+ case Syn_received:
+ if(NOW - tcb->time > 5000){
+ localclose(c, "timed out");
+ n++;
+ }
+ break;
+ case Finwait2:
+ if(NOW - tcb->time > 5*60*1000){
+ localclose(c, "timed out");
+ n++;
+ }
+ break;
+ }
+ QUNLOCK(c);
+ }
+ return n;
+}
+
+void
+tcpsettimer(Tcpctl *tcb)
+{
+ int x;
+
+ /* round trip dependency */
+ x = backoff(tcb->backoff) *
+ (tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK;
+
+ /* bounded twixt 1/2 and 64 seconds */
+ if(x < 500/MSPTICK)
+ x = 500/MSPTICK;
+ else if(x > (64000/MSPTICK))
+ x = 64000/MSPTICK;
+ tcb->timer.start = x;
+}
+
+void
+tcpinit(Fs *fs)
+{
+ Proto *tcp;
+ Tcppriv *tpriv;
+
+ tcp = smalloc(sizeof(Proto));
+ tpriv = tcp->priv = smalloc(sizeof(Tcppriv));
+ tcp->name = "tcp";
+ tcp->connect = tcpconnect;
+ tcp->announce = tcpannounce;
+ tcp->ctl = tcpctl;
+ tcp->state = tcpstate;
+ tcp->create = tcpcreate;
+ tcp->close = tcpclose;
+ tcp->rcv = tcpiput;
+ tcp->advise = tcpadvise;
+ tcp->stats = tcpstats;
+ tcp->inuse = tcpinuse;
+ tcp->gc = tcpgc;
+ tcp->ipproto = IP_TCPPROTO;
+ tcp->nc = scalednconv();
+ tcp->ptclsize = sizeof(Tcpctl);
+ tpriv->stats[MaxConn] = tcp->nc;
+
+ Fsproto(fs, tcp);
+}
+
+void
+tcpsetscale(Conv *s, Tcpctl *tcb, ushort rcvscale, ushort sndscale)
+{
+ if(rcvscale){
+ tcb->rcv.scale = rcvscale & 0xff;
+ tcb->snd.scale = sndscale & 0xff;
+ tcb->window = QMAX<<tcb->snd.scale;
+ qsetlimit(s->rq, tcb->window);
+ } else {
+ tcb->rcv.scale = 0;
+ tcb->snd.scale = 0;
+ tcb->window = QMAX;
+ qsetlimit(s->rq, tcb->window);
+ }
+}
diff --git a/src/9vx/a/ip/tripmedium.c b/src/9vx/a/ip/tripmedium.c
@@ -0,0 +1,398 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+#include "trip.h"
+
+static void tripread(void *a);
+static void tripbind(Ipifc *ifc, int argc, char **argv);
+static void tripunbind(Ipifc *ifc);
+static void tripbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
+static void tripaddmulti(Ipifc *ifc, uchar*, uchar*);
+static void tripremmulti(Ipifc *ifc, uchar*, uchar*);
+static void tripaddroute(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
+static void tripremroute(Ipifc *ifc, int, uchar*, uchar*);
+static void tripares(Fs*, int, uchar*, uchar*, int, int);
+
+Medium tripmedium =
+{
+.name= "trip",
+.mintu= 20,
+.maxtu= 64*1024,
+.maclen= LCIMACSIZE,
+.bind= tripbind,
+.unbind= tripunbind,
+.bwrite= tripbwrite,
+.addmulti= tripaddmulti,
+.remmulti= tripremmulti,
+.addroute= tripaddroute,
+.remroute= tripremroute,
+.ares= tripares,
+};
+
+typedef struct Tripinfo Tripinfo;
+struct Tripinfo
+{
+ Fs* fs; /* my instance of the IP stack */
+ Ipifc* ifc; /* IP interface */
+ Card* dev;
+ Proc* readp; /* reading process */
+ Chan* mchan; /* Data channel */
+};
+
+/*
+ * called to bind an IP ifc to an ethernet device
+ * called with ifc qlock'd
+ */
+static void
+tripbind(Ipifc *ifc, int argc, char **argv)
+{
+ int fd;
+ Chan *mchan;
+ Tripinfo *er;
+
+ if(argc < 2)
+ error(Ebadarg);
+
+ fd = kopen(argv[2], ORDWR);
+ if(fd < 0)
+ error("trip open failed");
+
+ mchan = fdtochan(up->env->fgrp, fd, ORDWR, 0, 1);
+ kclose(fd);
+
+ if(devtab[mchan->type]->dc != 'T') {
+ cclose(mchan);
+ error(Enoport);
+ }
+
+ er = smalloc(sizeof(*er));
+ er->mchan = mchan;
+ er->ifc = ifc;
+ er->dev = tripsetifc(mchan, ifc);
+ er->fs = ifc->conv->p->f;
+
+ ifc->arg = er;
+
+ kproc("tripread", tripread, ifc);
+}
+
+/*
+ * called with ifc qlock'd
+ */
+static void
+tripunbind(Ipifc *ifc)
+{
+ Tripinfo *er = ifc->arg;
+/*
+ if(er->readp)
+ postnote(er->readp, 1, "unbind", 0);
+*/
+ tsleep(&up->sleep, return0, 0, 300);
+
+ if(er->mchan != nil)
+ cclose(er->mchan);
+
+ free(er);
+}
+
+/*
+ * called by ipoput with a single block to write
+ */
+static void
+tripbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip)
+{
+ Tripinfo *er = ifc->arg;
+
+ /*
+ * Packet is rerouted at linecard
+ * so the gateway is ignored
+ */
+ USED(ip);
+ USED(version);
+
+ if(waserror()) {
+ print("tripwrite failed\n");
+ return;
+ }
+
+ devtab[er->mchan->type]->bwrite(er->mchan, bp, 0);
+ poperror();
+ ifc->out++;
+}
+
+/*
+ * process to read from the trip interface
+ */
+static void
+tripread(void *a)
+{
+ Ipifc *ifc;
+ Block *bp;
+ Tripinfo *er;
+
+ ifc = a;
+ er = ifc->arg;
+ er->readp = up; /* hide identity under a rock for unbind */
+
+ for(;;) {
+ bp = devtab[er->mchan->type]->bread(er->mchan, ifc->maxtu, 0);
+ ifc->in++;
+ ipiput4(er->fs, ifc, bp);
+ }
+
+ pexit("hangup", 1);
+}
+
+static void
+tripaddroute(Ipifc *ifc, int v, uchar *addr, uchar *mask, uchar *gate, int t)
+{
+ int alen;
+ MTroute mtr;
+ Tripinfo *tinfo;
+
+ tinfo = ifc->arg;
+ if(!tinfo->dev->routing)
+ return;
+
+ /*
+ * Multicast addresses are handled on the linecard by
+ * the multicast port driver, so the route load is dumped.
+ * loaded by addmulti/remmulti for SBC routes
+ * joinmulti/leavemulti for inter LC
+ */
+ if(ipismulticast(addr))
+ return;
+
+ mtr.type = T_ROUTEADMIN;
+ if(v & Rv4) {
+ mtr.op = RTADD4;
+ alen = IPv4addrlen;
+ }
+ else {
+ mtr.op = RTADD6;
+ alen = IPaddrlen;
+ }
+ mtr.rtype = t;
+ memmove(mtr.addr, addr, alen);
+ memmove(mtr.mask, mask, alen);
+ memmove(mtr.gate, gate, alen);
+
+ i2osend(tinfo->dev, &mtr, sizeof(mtr));
+}
+
+static void
+tripremroute(Ipifc *ifc, int v, uchar *addr, uchar *mask)
+{
+ int alen;
+ MTroute mtr;
+ Tripinfo *tinfo;
+
+ tinfo = ifc->arg;
+ if(!tinfo->dev->routing)
+ return;
+
+ if(ipismulticast(addr))
+ return;
+
+ mtr.type = T_ROUTEADMIN;
+ if(v & Rv4) {
+ mtr.op = RTDEL4;
+ alen = IPv4addrlen;
+ }
+ else {
+ mtr.op = RTDEL6;
+ alen = IPaddrlen;
+ }
+ memmove(mtr.addr, addr, alen);
+ memmove(mtr.mask, mask, alen);
+
+ i2osend(tinfo->dev, &mtr, sizeof(mtr));
+}
+
+static void
+tripxmitroute(Route *r, Routewalk *rw)
+{
+ int nifc;
+ char t[5];
+ uchar a[IPaddrlen], m[IPaddrlen], g[IPaddrlen];
+
+ convroute(r, a, m, g, t, &nifc);
+ if(!(r->type & Rv4)) {
+ tripaddroute(rw->state, 0, a, m, g, r->type);
+ return;
+ }
+
+ tripaddroute(rw->state, Rv4, a+IPv4off, m+IPv4off, g+IPv4off, r->type);
+}
+
+static void
+sendifcinfo(Ipifc *dest)
+{
+ Conv **cp, **e;
+ Iplifc *l;
+ Ipifc *ifc;
+ MTifctl mtc;
+ Tripinfo *tinfo, *oinfo;
+ Proto *p;
+
+ tinfo = dest->arg;
+
+ /* Install interfaces */
+ p = tinfo->fs->ipifc;
+ e = &p->conv[p->nc];
+ for(cp = p->conv; cp < e; cp++) {
+
+ if(*cp == nil)
+ continue;
+
+ ifc = (Ipifc*)(*cp)->ptcl;
+ if(dest == ifc)
+ continue;
+
+ mtc.type = T_CTLIFADMIN;
+ mtc.maxtu = ifc->maxtu;
+ mtc.mintu = ifc->mintu;
+
+ mtc.port = 0;
+ if(ifc->m == &tripmedium) {
+ oinfo = ifc->arg;
+ mtc.port = oinfo->dev->bar[0].bar;
+ }
+
+ for(l = ifc->lifc; l != nil; l = l->next) {
+ if(isv4(l->local)) {
+ mtc.op = IFADD4;
+ memmove(mtc.addr, l->local+IPv4off, IPv4addrlen);
+ memmove(mtc.mask, l->mask+IPv4off, IPv4addrlen);
+ }
+ else {
+ mtc.op = IFADD6;
+ memmove(mtc.addr, l->local, sizeof(mtc.addr));
+ memmove(mtc.mask, l->mask, sizeof(mtc.mask));
+ }
+
+ i2osend(tinfo->dev, &mtc, sizeof(mtc));
+ }
+ }
+}
+
+void
+tripsync(Ipifc *ifc)
+{
+ Routewalk rw;
+
+ if(ifc == nil) {
+ print("tripsync: interface not bound\n");
+ return;
+ }
+
+ /* Mirror the route table into the lincard */
+ rw.o = 0;
+ rw.n = (1<<22);
+ rw.state = ifc;
+ rw.walk = tripxmitroute;
+
+ ipwalkroutes(ifc->conv->p->f, &rw);
+
+ /*
+ * Tell the linecard about interfaces that already
+ * exist elsewhere
+ */
+ sendifcinfo(ifc);
+}
+
+/* Tell a line card the SBC is interested in listening
+ * to a multicast address
+ */
+static void
+tripaddmulti(Ipifc *ifc, uchar *addr, uchar *ifca)
+{
+ MTmultiears mt;
+ Tripinfo *tinfo;
+
+ /* print("tripaddmulti %I %I\n", addr, ifca); /**/
+
+ tinfo = ifc->arg;
+ if(!tinfo->dev->routing)
+ return;
+
+ mt.type = T_MULTIEAR;
+ mt.op = ADDMULTI;
+ memmove(mt.addr, addr, sizeof(mt.addr));
+ memmove(mt.ifca, ifca, sizeof(mt.ifca));
+
+ i2osend(tinfo->dev, &mt, sizeof(mt));
+}
+
+/* Tell a line card the SBC is no longer interested in listening
+ * to a multicast address
+ */
+static void
+tripremmulti(Ipifc *ifc, uchar *addr, uchar *ifca)
+{
+ MTmultiears mt;
+ Tripinfo *tinfo;
+
+ tinfo = ifc->arg;
+ if(!tinfo->dev->routing)
+ return;
+
+ mt.type = T_MULTIEAR;
+ mt.op = REMMULTI;
+ memmove(mt.addr, addr, sizeof(mt.addr));
+ memmove(mt.ifca, ifca, sizeof(mt.ifca));
+
+ i2osend(tinfo->dev, &mt, sizeof(mt));
+}
+
+static void
+tripares(Fs *fs, int vers, uchar *ip, uchar *mac, int l, int)
+{
+ Route *r;
+ Ipifc *ifc;
+ MTaresenter ta;
+ Tripinfo *tinfo;
+ uchar v6ip[IPaddrlen];
+
+ if(vers == V4) {
+ r = v4lookup(fs, ip);
+ v4tov6(v6ip, ip);
+ ip = v6ip;
+ }
+ else
+ r = v6lookup(fs, ip);
+
+ if(r == nil) {
+ print("tripares: no route for entry\n");
+ return;
+ }
+
+ ifc = r->ifc;
+
+ tinfo = ifc->arg;
+ if(!tinfo->dev->routing)
+ return;
+
+ if(vers == V4) {
+ v4tov6(v6ip, ip);
+ ip = v6ip;
+ }
+
+ ta.type = T_ARESENTER;
+ ta.maclen = l;
+ memmove(ta.addr, ip, IPaddrlen);
+ memmove(ta.amac, mac, l);
+
+ i2osend(tinfo->dev, &ta, sizeof(ta));
+}
+
+void
+tripmediumlink(void)
+{
+ addipmedium(&tripmedium);
+}
diff --git a/src/9vx/a/ip/udp.c b/src/9vx/a/ip/udp.c
@@ -0,0 +1,619 @@
+#include "u.h"
+#include "lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+
+
+#define DPRINT if(0)print
+
+enum
+{
+ UDP_UDPHDR_SZ = 8,
+
+ UDP4_PHDR_OFF = 8,
+ UDP4_PHDR_SZ = 12,
+ UDP4_IPHDR_SZ = 20,
+ UDP6_IPHDR_SZ = 40,
+ UDP6_PHDR_SZ = 40,
+ UDP6_PHDR_OFF = 0,
+
+ IP_UDPPROTO = 17,
+ UDP_USEAD7 = 52,
+
+ Udprxms = 200,
+ Udptickms = 100,
+ Udpmaxxmit = 10,
+};
+
+typedef struct Udp4hdr Udp4hdr;
+struct Udp4hdr
+{
+ /* ip header */
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+ uchar Unused;
+ uchar udpproto; /* Protocol */
+ uchar udpplen[2]; /* Header plus data length */
+ uchar udpsrc[IPv4addrlen]; /* Ip source */
+ uchar udpdst[IPv4addrlen]; /* Ip destination */
+
+ /* udp header */
+ uchar udpsport[2]; /* Source port */
+ uchar udpdport[2]; /* Destination port */
+ uchar udplen[2]; /* data length */
+ uchar udpcksum[2]; /* Checksum */
+};
+
+typedef struct Udp6hdr Udp6hdr;
+struct Udp6hdr {
+ uchar viclfl[4];
+ uchar len[2];
+ uchar nextheader;
+ uchar hoplimit;
+ uchar udpsrc[IPaddrlen];
+ uchar udpdst[IPaddrlen];
+
+ /* udp header */
+ uchar udpsport[2]; /* Source port */
+ uchar udpdport[2]; /* Destination port */
+ uchar udplen[2]; /* data length */
+ uchar udpcksum[2]; /* Checksum */
+};
+
+/* MIB II counters */
+typedef struct Udpstats Udpstats;
+struct Udpstats
+{
+ ulong udpInDatagrams;
+ ulong udpNoPorts;
+ ulong udpInErrors;
+ ulong udpOutDatagrams;
+};
+
+typedef struct Udppriv Udppriv;
+struct Udppriv
+{
+ Ipht ht;
+
+ /* MIB counters */
+ Udpstats ustats;
+
+ /* non-MIB stats */
+ ulong csumerr; /* checksum errors */
+ ulong lenerr; /* short packet */
+};
+
+void (*etherprofiler)(char *name, int qlen);
+void udpkick(void *x, Block *bp);
+
+/*
+ * protocol specific part of Conv
+ */
+typedef struct Udpcb Udpcb;
+struct Udpcb
+{
+ QLock qlock;
+ uchar headers;
+};
+
+static char*
+udpconnect(Conv *c, char **argv, int argc)
+{
+ char *e;
+ Udppriv *upriv;
+
+ upriv = c->p->priv;
+ e = Fsstdconnect(c, argv, argc);
+ Fsconnected(c, e);
+ if(e != nil)
+ return e;
+
+ iphtadd(&upriv->ht, c);
+ return nil;
+}
+
+
+static int
+udpstate(Conv *c, char *state, int n)
+{
+ return snprint(state, n, "%s qin %d qout %d\n",
+ c->inuse ? "Open" : "Closed",
+ c->rq ? qlen(c->rq) : 0,
+ c->wq ? qlen(c->wq) : 0
+ );
+}
+
+static char*
+udpannounce(Conv *c, char** argv, int argc)
+{
+ char *e;
+ Udppriv *upriv;
+
+ upriv = c->p->priv;
+ e = Fsstdannounce(c, argv, argc);
+ if(e != nil)
+ return e;
+ Fsconnected(c, nil);
+ iphtadd(&upriv->ht, c);
+
+ return nil;
+}
+
+static void
+udpcreate(Conv *c)
+{
+ c->rq = qopen(128*1024, Qmsg, 0, 0);
+ c->wq = qbypass(udpkick, c);
+}
+
+static void
+udpclose(Conv *c)
+{
+ Udpcb *ucb;
+ Udppriv *upriv;
+
+ upriv = c->p->priv;
+ iphtrem(&upriv->ht, c);
+
+ c->state = 0;
+ qclose(c->rq);
+ qclose(c->wq);
+ qclose(c->eq);
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ c->lport = 0;
+ c->rport = 0;
+
+ ucb = (Udpcb*)c->ptcl;
+ ucb->headers = 0;
+}
+
+void
+udpkick(void *x, Block *bp)
+{
+ Conv *c = x;
+ Udp4hdr *uh4;
+ Udp6hdr *uh6;
+ ushort rport;
+ uchar laddr[IPaddrlen], raddr[IPaddrlen];
+ Udpcb *ucb;
+ int dlen, ptcllen;
+ Udppriv *upriv;
+ Fs *f;
+ int version;
+ Conv *rc;
+
+ upriv = c->p->priv;
+ f = c->p->f;
+
+ netlog(c->p->f, Logudp, "udp: kick\n");
+ if(bp == nil)
+ return;
+
+ ucb = (Udpcb*)c->ptcl;
+ switch(ucb->headers) {
+ case 7:
+ /* get user specified addresses */
+ bp = pullupblock(bp, UDP_USEAD7);
+ if(bp == nil)
+ return;
+ ipmove(raddr, bp->rp);
+ bp->rp += IPaddrlen;
+ ipmove(laddr, bp->rp);
+ bp->rp += IPaddrlen;
+ /* pick interface closest to dest */
+ if(ipforme(f, laddr) != Runi)
+ findlocalip(f, laddr, raddr);
+ bp->rp += IPaddrlen; /* Ignore ifc address */
+ rport = nhgets(bp->rp);
+ bp->rp += 2+2; /* Ignore local port */
+ break;
+ default:
+ rport = 0;
+ break;
+ }
+
+ if(ucb->headers) {
+ if(memcmp(laddr, v4prefix, IPv4off) == 0
+ || ipcmp(laddr, IPnoaddr) == 0)
+ version = 4;
+ else
+ version = 6;
+ } else {
+ if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
+ memcmp(c->laddr, v4prefix, IPv4off) == 0)
+ || ipcmp(c->raddr, IPnoaddr) == 0)
+ version = 4;
+ else
+ version = 6;
+ }
+
+ dlen = blocklen(bp);
+
+ /* fill in pseudo header and compute checksum */
+ switch(version){
+ case V4:
+ bp = padblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ);
+ if(bp == nil)
+ return;
+
+ uh4 = (Udp4hdr *)(bp->rp);
+ ptcllen = dlen + UDP_UDPHDR_SZ;
+ uh4->Unused = 0;
+ uh4->udpproto = IP_UDPPROTO;
+ uh4->frag[0] = 0;
+ uh4->frag[1] = 0;
+ hnputs(uh4->udpplen, ptcllen);
+ if(ucb->headers) {
+ v6tov4(uh4->udpdst, raddr);
+ hnputs(uh4->udpdport, rport);
+ v6tov4(uh4->udpsrc, laddr);
+ rc = nil;
+ } else {
+ v6tov4(uh4->udpdst, c->raddr);
+ hnputs(uh4->udpdport, c->rport);
+ if(ipcmp(c->laddr, IPnoaddr) == 0)
+ findlocalip(f, c->laddr, c->raddr);
+ v6tov4(uh4->udpsrc, c->laddr);
+ rc = c;
+ }
+ hnputs(uh4->udpsport, c->lport);
+ hnputs(uh4->udplen, ptcllen);
+ uh4->udpcksum[0] = 0;
+ uh4->udpcksum[1] = 0;
+ hnputs(uh4->udpcksum,
+ ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ));
+ uh4->vihl = IP_VER4;
+ ipoput4(f, bp, 0, c->ttl, c->tos, rc);
+ break;
+
+ case V6:
+ bp = padblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ);
+ if(bp == nil)
+ return;
+
+ /*
+ * using the v6 ip header to create pseudo header
+ * first then reset it to the normal ip header
+ */
+ uh6 = (Udp6hdr *)(bp->rp);
+ memset(uh6, 0, 8);
+ ptcllen = dlen + UDP_UDPHDR_SZ;
+ hnputl(uh6->viclfl, ptcllen);
+ uh6->hoplimit = IP_UDPPROTO;
+ if(ucb->headers) {
+ ipmove(uh6->udpdst, raddr);
+ hnputs(uh6->udpdport, rport);
+ ipmove(uh6->udpsrc, laddr);
+ rc = nil;
+ } else {
+ ipmove(uh6->udpdst, c->raddr);
+ hnputs(uh6->udpdport, c->rport);
+ if(ipcmp(c->laddr, IPnoaddr) == 0)
+ findlocalip(f, c->laddr, c->raddr);
+ ipmove(uh6->udpsrc, c->laddr);
+ rc = c;
+ }
+ hnputs(uh6->udpsport, c->lport);
+ hnputs(uh6->udplen, ptcllen);
+ uh6->udpcksum[0] = 0;
+ uh6->udpcksum[1] = 0;
+ hnputs(uh6->udpcksum,
+ ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ));
+ memset(uh6, 0, 8);
+ uh6->viclfl[0] = IP_VER6;
+ hnputs(uh6->len, ptcllen);
+ uh6->nextheader = IP_UDPPROTO;
+ ipoput6(f, bp, 0, c->ttl, c->tos, rc);
+ break;
+
+ default:
+ panic("udpkick: version %d", version);
+ }
+ upriv->ustats.udpOutDatagrams++;
+}
+
+void
+udpiput(Proto *udp, Ipifc *ifc, Block *bp)
+{
+ int len;
+ Udp4hdr *uh4;
+ Udp6hdr *uh6;
+ Conv *c;
+ Udpcb *ucb;
+ uchar raddr[IPaddrlen], laddr[IPaddrlen];
+ ushort rport, lport;
+ Udppriv *upriv;
+ Fs *f;
+ int version;
+ int ottl, oviclfl, olen;
+ uchar *p;
+
+ upriv = udp->priv;
+ f = udp->f;
+ upriv->ustats.udpInDatagrams++;
+
+ uh4 = (Udp4hdr*)(bp->rp);
+ version = ((uh4->vihl&0xF0)==IP_VER6) ? 6 : 4;
+
+ /* Put back pseudo header for checksum
+ * (remember old values for icmpnoconv()) */
+ switch(version) {
+ case V4:
+ ottl = uh4->Unused;
+ uh4->Unused = 0;
+ len = nhgets(uh4->udplen);
+ olen = nhgets(uh4->udpplen);
+ hnputs(uh4->udpplen, len);
+
+ v4tov6(raddr, uh4->udpsrc);
+ v4tov6(laddr, uh4->udpdst);
+ lport = nhgets(uh4->udpdport);
+ rport = nhgets(uh4->udpsport);
+
+ if(nhgets(uh4->udpcksum)) {
+ if(ptclcsum(bp, UDP4_PHDR_OFF, len+UDP4_PHDR_SZ)) {
+ upriv->ustats.udpInErrors++;
+ netlog(f, Logudp, "udp: checksum error %I\n", raddr);
+ DPRINT("udp: checksum error %I\n", raddr);
+ freeblist(bp);
+ return;
+ }
+ }
+ uh4->Unused = ottl;
+ hnputs(uh4->udpplen, olen);
+ break;
+ case V6:
+ uh6 = (Udp6hdr*)(bp->rp);
+ len = nhgets(uh6->udplen);
+ oviclfl = nhgetl(uh6->viclfl);
+ olen = nhgets(uh6->len);
+ ottl = uh6->hoplimit;
+ ipmove(raddr, uh6->udpsrc);
+ ipmove(laddr, uh6->udpdst);
+ lport = nhgets(uh6->udpdport);
+ rport = nhgets(uh6->udpsport);
+ memset(uh6, 0, 8);
+ hnputl(uh6->viclfl, len);
+ uh6->hoplimit = IP_UDPPROTO;
+ if(ptclcsum(bp, UDP6_PHDR_OFF, len+UDP6_PHDR_SZ)) {
+ upriv->ustats.udpInErrors++;
+ netlog(f, Logudp, "udp: checksum error %I\n", raddr);
+ DPRINT("udp: checksum error %I\n", raddr);
+ freeblist(bp);
+ return;
+ }
+ hnputl(uh6->viclfl, oviclfl);
+ hnputs(uh6->len, olen);
+ uh6->nextheader = IP_UDPPROTO;
+ uh6->hoplimit = ottl;
+ break;
+ default:
+ panic("udpiput: version %d", version);
+ return; /* to avoid a warning */
+ }
+
+ QLOCK(udp);
+
+ c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
+ if(c == nil){
+ /* no conversation found */
+ upriv->ustats.udpNoPorts++;
+ QUNLOCK(udp);
+ netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
+ laddr, lport);
+
+ switch(version){
+ case V4:
+ icmpnoconv(f, bp);
+ break;
+ case V6:
+ icmphostunr(f, ifc, bp, Icmp6_port_unreach, 0);
+ break;
+ default:
+ panic("udpiput2: version %d", version);
+ }
+
+ freeblist(bp);
+ return;
+ }
+ ucb = (Udpcb*)c->ptcl;
+
+ if(c->state == Announced){
+ if(ucb->headers == 0){
+ /* create a new conversation */
+ if(ipforme(f, laddr) != Runi) {
+ switch(version){
+ case V4:
+ v4tov6(laddr, ifc->lifc->local);
+ break;
+ case V6:
+ ipmove(laddr, ifc->lifc->local);
+ break;
+ default:
+ panic("udpiput3: version %d", version);
+ }
+ }
+ c = Fsnewcall(c, raddr, rport, laddr, lport, version);
+ if(c == nil){
+ QUNLOCK(udp);
+ freeblist(bp);
+ return;
+ }
+ iphtadd(&upriv->ht, c);
+ ucb = (Udpcb*)c->ptcl;
+ }
+ }
+
+ QLOCK(c);
+ QUNLOCK(udp);
+
+ /*
+ * Trim the packet down to data size
+ */
+ len -= UDP_UDPHDR_SZ;
+ switch(version){
+ case V4:
+ bp = trimblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ, len);
+ break;
+ case V6:
+ bp = trimblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ, len);
+ break;
+ default:
+ bp = nil;
+ panic("udpiput4: version %d", version);
+ }
+ if(bp == nil){
+ QUNLOCK(c);
+ netlog(f, Logudp, "udp: len err %I.%d -> %I.%d\n", raddr, rport,
+ laddr, lport);
+ upriv->lenerr++;
+ return;
+ }
+
+ netlog(f, Logudpmsg, "udp: %I.%d -> %I.%d l %d\n", raddr, rport,
+ laddr, lport, len);
+
+ switch(ucb->headers){
+ case 7:
+ /* pass the src address */
+ bp = padblock(bp, UDP_USEAD7);
+ p = bp->rp;
+ ipmove(p, raddr); p += IPaddrlen;
+ ipmove(p, laddr); p += IPaddrlen;
+ ipmove(p, ifc->lifc->local); p += IPaddrlen;
+ hnputs(p, rport); p += 2;
+ hnputs(p, lport);
+ break;
+ }
+
+ if(bp->next)
+ bp = concatblock(bp);
+
+ if(qfull(c->rq)){
+ QUNLOCK(c);
+ netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport,
+ laddr, lport);
+ freeblist(bp);
+ return;
+ }
+
+ qpass(c->rq, bp);
+ QUNLOCK(c);
+
+}
+
+char*
+udpctl(Conv *c, char **f, int n)
+{
+ Udpcb *ucb;
+
+ ucb = (Udpcb*)c->ptcl;
+ if(n == 1){
+ if(strcmp(f[0], "headers") == 0){
+ ucb->headers = 7; /* new headers format */
+ return nil;
+ }
+ }
+ return "unknown control request";
+}
+
+void
+udpadvise(Proto *udp, Block *bp, char *msg)
+{
+ Udp4hdr *h4;
+ Udp6hdr *h6;
+ uchar source[IPaddrlen], dest[IPaddrlen];
+ ushort psource, pdest;
+ Conv *s, **p;
+ int version;
+
+ h4 = (Udp4hdr*)(bp->rp);
+ version = ((h4->vihl&0xF0)==IP_VER6) ? 6 : 4;
+
+ switch(version) {
+ case V4:
+ v4tov6(dest, h4->udpdst);
+ v4tov6(source, h4->udpsrc);
+ psource = nhgets(h4->udpsport);
+ pdest = nhgets(h4->udpdport);
+ break;
+ case V6:
+ h6 = (Udp6hdr*)(bp->rp);
+ ipmove(dest, h6->udpdst);
+ ipmove(source, h6->udpsrc);
+ psource = nhgets(h6->udpsport);
+ pdest = nhgets(h6->udpdport);
+ break;
+ default:
+ panic("udpadvise: version %d", version);
+ return; /* to avoid a warning */
+ }
+
+ /* Look for a connection */
+ QLOCK(udp);
+ for(p = udp->conv; *p; p++) {
+ s = *p;
+ if(s->rport == pdest)
+ if(s->lport == psource)
+ if(ipcmp(s->raddr, dest) == 0)
+ if(ipcmp(s->laddr, source) == 0){
+ if(s->ignoreadvice)
+ break;
+ QLOCK(s);
+ QUNLOCK(udp);
+ qhangup(s->rq, msg);
+ qhangup(s->wq, msg);
+ QUNLOCK(s);
+ freeblist(bp);
+ return;
+ }
+ }
+ QUNLOCK(udp);
+ freeblist(bp);
+}
+
+int
+udpstats(Proto *udp, char *buf, int len)
+{
+ Udppriv *upriv;
+
+ upriv = udp->priv;
+ return snprint(buf, len, "InDatagrams: %lud\nNoPorts: %lud\nInErrors: %lud\nOutDatagrams: %lud\n",
+ upriv->ustats.udpInDatagrams,
+ upriv->ustats.udpNoPorts,
+ upriv->ustats.udpInErrors,
+ upriv->ustats.udpOutDatagrams);
+}
+
+void
+udpinit(Fs *fs)
+{
+ Proto *udp;
+
+ udp = smalloc(sizeof(Proto));
+ udp->priv = smalloc(sizeof(Udppriv));
+ udp->name = "udp";
+ udp->connect = udpconnect;
+ udp->announce = udpannounce;
+ udp->ctl = udpctl;
+ udp->state = udpstate;
+ udp->create = udpcreate;
+ udp->close = udpclose;
+ udp->rcv = udpiput;
+ udp->advise = udpadvise;
+ udp->stats = udpstats;
+ udp->ipproto = IP_UDPPROTO;
+ udp->nc = Nchans;
+ udp->ptclsize = sizeof(Udpcb);
+
+ Fsproto(fs, udp);
+}