--- /sys/src/9/pc/ethervirtio.c +++ /sys/src/9/pc/ethervirtio.c @@ -0,0 +1,688 @@ +/* + * virtio ethernet driver implementing the legacy interface: + * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html + */ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "../port/error.h" +#include "../port/netif.h" +#include "etherif.h" + +typedef struct Vring Vring; +typedef struct Vdesc Vdesc; +typedef struct Vused Vused; +typedef struct Vheader Vheader; +typedef struct Vqueue Vqueue; +typedef struct Ctlr Ctlr; + +enum { + /* §2.1 Device Status Field */ + Sacknowledge = 1, + Sdriver = 2, + Sdriverok = 4, + Sfeatureok = 8, + Sfailed = 128, + + /* §4.1.4.8 Legacy Interfaces: A Note on PCI Device Layout */ + Qdevfeat = 0, + Qdrvfeat = 4, + Qaddr = 8, + Qsize = 12, + Qselect = 14, + Qnotify = 16, + Qstatus = 18, + Qisr = 19, + Qmac = 20, + Qnetstatus = 26, + + /* flags in Qnetstatus */ + Nlinkup = (1<<0), + Nannounce = (1<<1), + + /* feature bits */ + Fmac = (1<<5), + Fstatus = (1<<16), + Fctrlvq = (1<<17), + Fctrlrx = (1<<18), + + /* vring used flags */ + Unonotify = 1, + /* vring avail flags */ + Rnointerrupt = 1, + + /* descriptor flags */ + Dnext = 1, + Dwrite = 2, + Dindirect = 4, + + /* struct sizes */ + VringSize = 4, + VdescSize = 16, + VusedSize = 8, + VheaderSize = 10, + + /* §4.1.5.1.4.1 says pages are 4096 bytes + * for the purposes of the driver. + */ + VBY2PG = 4096, +#define VPGROUND(s) ROUND(s, VBY2PG) + + Vrxq = 0, + Vtxq = 1, + Vctlq = 2, + + /* class/cmd for Vctlq */ + CtrlRx = 0x00, + CmdPromisc = 0x00, + CmdAllmulti = 0x01, + CtrlMac = 0x01, + CmdMacTableSet = 0x00, + CtrlVlan= 0x02, + CmdVlanAdd = 0x00, + CmdVlanDel = 0x01, +}; + +struct Vring +{ + u16int flags; + u16int idx; +}; + +struct Vdesc +{ + u64int addr; + u32int len; + u16int flags; + u16int next; +}; + +struct Vused +{ + u32int id; + u32int len; +}; + +struct Vheader +{ + u8int flags; + u8int segtype; + u16int hlen; + u16int seglen; + u16int csumstart; + u16int csumend; +}; + +/* §2.4 Virtqueues */ +struct Vqueue +{ + Rendez; + + uint qsize; + uint qmask; + + Vdesc *desc; + + Vring *avail; + u16int *availent; + u16int *availevent; + + Vring *used; + Vused *usedent; + u16int *usedevent; + u16int lastused; + + uint nintr; + uint nnote; +}; + +struct Ctlr { + Lock; + + QLock ctllock; + + int attached; + + int port; + Pcidev *pcidev; + Ctlr *next; + int active; + int id; + int typ; + ulong feat; + int nqueue; + + /* virtioether has 3 queues: rx, tx and ctl */ + Vqueue queue[3]; +}; + +static Ctlr *ctlrhead; + +static int +vhasroom(void *v) +{ + Vqueue *q = v; + return q->lastused != q->used->idx; +} + +static void +vqnotify(Ctlr *ctlr, int x) +{ + Vqueue *q; + + coherence(); + q = &ctlr->queue[x]; + if(q->used->flags & Unonotify) + return; + q->nnote++; + outs(ctlr->port+Qnotify, x); +} + +static void +txproc(void *v) +{ + Vheader *header; + Block **blocks; + Ether *edev; + Ctlr *ctlr; + Vqueue *q; + Vused *u; + Block *b; + int i, j; + + edev = v; + ctlr = edev->ctlr; + q = &ctlr->queue[Vtxq]; + + header = smalloc(VheaderSize); + blocks = smalloc(sizeof(Block*) * (q->qsize/2)); + + for(i = 0; i < q->qsize/2; i++){ + j = i << 1; + q->desc[j].addr = PADDR(header); + q->desc[j].len = VheaderSize; + q->desc[j].next = j | 1; + q->desc[j].flags = Dnext; + + q->availent[i] = q->availent[i + q->qsize/2] = j; + + j |= 1; + q->desc[j].next = 0; + q->desc[j].flags = 0; + } + + q->avail->flags &= ~Rnointerrupt; + + while(waserror()) + ; + + while((b = qbread(edev->oq, 1000000)) != nil){ + for(;;){ + /* retire completed packets */ + while((i = q->lastused) != q->used->idx){ + u = &q->usedent[i & q->qmask]; + i = (u->id & q->qmask) >> 1; + if(blocks[i] == nil) + break; + freeb(blocks[i]); + blocks[i] = nil; + q->lastused++; + } + + /* have free slot? */ + i = q->avail->idx & (q->qmask >> 1); + if(blocks[i] == nil) + break; + + /* ring full, wait and retry */ + if(!vhasroom(q)) + sleep(q, vhasroom, q); + } + + /* slot is free, fill in descriptor */ + blocks[i] = b; + j = (i << 1) | 1; + q->desc[j].addr = PADDR(b->rp); + q->desc[j].len = BLEN(b); + coherence(); + q->avail->idx++; + vqnotify(ctlr, Vtxq); + } + + pexit("ether out queue closed", 1); +} + +static void +rxproc(void *v) +{ + Vheader *header; + Block **blocks; + Ether *edev; + Ctlr *ctlr; + Vqueue *q; + Vused *u; + Block *b; + int i, j; + + edev = v; + ctlr = edev->ctlr; + q = &ctlr->queue[Vrxq]; + + header = smalloc(VheaderSize); + blocks = smalloc(sizeof(Block*) * (q->qsize/2)); + + for(i = 0; i < q->qsize/2; i++){ + j = i << 1; + q->desc[j].addr = PADDR(header); + q->desc[j].len = VheaderSize; + q->desc[j].next = j | 1; + q->desc[j].flags = Dwrite|Dnext; + + q->availent[i] = q->availent[i + q->qsize/2] = j; + + j |= 1; + q->desc[j].next = 0; + q->desc[j].flags = Dwrite; + } + + q->avail->flags &= ~Rnointerrupt; + + while(waserror()) + ; + + for(;;){ + /* replenish receive ring */ + do { + i = q->avail->idx & (q->qmask >> 1); + if(blocks[i] != nil) + break; + if((b = iallocb(ETHERMAXTU)) == nil) + break; + blocks[i] = b; + j = (i << 1) | 1; + q->desc[j].addr = PADDR(b->rp); + q->desc[j].len = BALLOC(b); + coherence(); + q->avail->idx++; + } while(q->avail->idx != q->used->idx); + vqnotify(ctlr, Vrxq); + + /* wait for any packets to complete */ + if(!vhasroom(q)) + sleep(q, vhasroom, q); + + /* retire completed packets */ + while((i = q->lastused) != q->used->idx) { + u = &q->usedent[i & q->qmask]; + i = (u->id & q->qmask) >> 1; + if((b = blocks[i]) == nil) + break; + + blocks[i] = nil; + + b->wp = b->rp + u->len - VheaderSize; + etheriq(edev, b, 1); + q->lastused++; + } + } +} + +static int +vctlcmd(Ether *edev, uchar class, uchar cmd, uchar *data, int ndata) +{ + uchar hdr[2], ack[1]; + Ctlr *ctlr; + Vqueue *q; + Vdesc *d; + int i; + + ctlr = edev->ctlr; + q = &ctlr->queue[Vctlq]; + if(q->qsize < 3) + return -1; + + qlock(&ctlr->ctllock); + while(waserror()) + ; + + ack[0] = 0x55; + hdr[0] = class; + hdr[1] = cmd; + + d = &q->desc[0]; + d->addr = PADDR(hdr); + d->len = sizeof(hdr); + d->next = 1; + d->flags = Dnext; + d++; + d->addr = PADDR(data); + d->len = ndata; + d->next = 2; + d->flags = Dnext; + d++; + d->addr = PADDR(ack); + d->len = sizeof(ack); + d->next = 0; + d->flags = Dwrite; + + i = q->avail->idx & q->qmask; + q->availent[i] = 0; + coherence(); + + q->avail->flags &= ~Rnointerrupt; + q->avail->idx++; + vqnotify(ctlr, Vctlq); + while(!vhasroom(q)) + sleep(q, vhasroom, q); + q->lastused = q->used->idx; + q->avail->flags |= Rnointerrupt; + + qunlock(&ctlr->ctllock); + poperror(); + + if(ack[0] != 0) + print("#l%d: vctlcmd: %ux.%ux -> %ux\n", edev->ctlrno, class, cmd, ack[0]); + + return ack[0]; +} + +static void +interrupt(Ureg*, void* arg) +{ + Ether *edev; + Ctlr *ctlr; + Vqueue *q; + int i; + + edev = arg; + ctlr = edev->ctlr; + if(inb(ctlr->port+Qisr) & 1){ + for(i = 0; i < ctlr->nqueue; i++){ + q = &ctlr->queue[i]; + if(vhasroom(q)){ + q->nintr++; + wakeup(q); + } + } + } +} + +static void +attach(Ether* edev) +{ + char name[KNAMELEN]; + Ctlr* ctlr; + + ctlr = edev->ctlr; + lock(ctlr); + if(ctlr->attached){ + unlock(ctlr); + return; + } + ctlr->attached = 1; + unlock(ctlr); + + /* ready to go */ + outb(ctlr->port+Qstatus, inb(ctlr->port+Qstatus) | Sdriverok); + + /* start kprocs */ + snprint(name, sizeof name, "#l%drx", edev->ctlrno); + kproc(name, rxproc, edev); + snprint(name, sizeof name, "#l%dtx", edev->ctlrno); + kproc(name, txproc, edev); +} + +static long +ifstat(Ether *edev, void *a, long n, ulong offset) +{ + int i, l; + char *p; + Ctlr *ctlr; + Vqueue *q; + + ctlr = edev->ctlr; + + p = smalloc(READSTR); + + l = snprint(p, READSTR, "devfeat %4.4luX\n", ctlr->feat); + l += snprint(p+l, READSTR-l, "drvfeat %4.4luX\n", inl(ctlr->port+Qdrvfeat)); + l += snprint(p+l, READSTR-l, "devstatus %uX\n", inb(ctlr->port+Qstatus)); + if(ctlr->feat & Fstatus) + l += snprint(p+l, READSTR-l, "netstatus %uX\n", inb(ctlr->port+Qnetstatus)); + + for(i = 0; i < ctlr->nqueue; i++){ + q = &ctlr->queue[i]; + l += snprint(p+l, READSTR-l, + "vq%d %#p size %d avail->idx %d used->idx %d lastused %hud nintr %ud nnote %ud\n", + i, q, q->qsize, q->avail->idx, q->used->idx, q->lastused, q->nintr, q->nnote); + } + + n = readstr(offset, a, n, p); + free(p); + + return n; +} + +static void +shutdown(Ether* edev) +{ + Ctlr *ctlr = edev->ctlr; + outb(ctlr->port+Qstatus, 0); + pciclrbme(ctlr->pcidev); +} + +static void +promiscuous(void *arg, int on) +{ + Ether *edev = arg; + uchar b[1]; + + b[0] = on != 0; + vctlcmd(edev, CtrlRx, CmdPromisc, b, sizeof(b)); +} + +static void +multicast(void *arg, uchar*, int) +{ + Ether *edev = arg; + uchar b[1]; + + b[0] = edev->nmaddr > 0; + vctlcmd(edev, CtrlRx, CmdAllmulti, b, sizeof(b)); +} + +/* §2.4.2 Legacy Interfaces: A Note on Virtqueue Layout */ +static ulong +queuesize(ulong size) +{ + return VPGROUND(VdescSize*size + sizeof(u16int)*(3+size)) + + VPGROUND(sizeof(u16int)*3 + VusedSize*size); +} + +static int +initqueue(Vqueue *q, int size) +{ + uchar *p; + + /* §2.4: Queue Size value is always a power of 2 and <= 32768 */ + assert(!(size & (size - 1)) && size <= 32768); + + p = mallocalign(queuesize(size), VBY2PG, 0, 0); + if(p == nil){ + print("ethervirtio: no memory for Vqueue\n"); + free(p); + return -1; + } + + q->desc = (void*)p; + p += VdescSize*size; + q->avail = (void*)p; + p += VringSize; + q->availent = (void*)p; + p += sizeof(u16int)*size; + q->availevent = (void*)p; + p += sizeof(u16int); + + p = (uchar*)VPGROUND((uintptr)p); + q->used = (void*)p; + p += VringSize; + q->usedent = (void*)p; + p += VusedSize*size; + q->usedevent = (void*)p; + + q->qsize = size; + q->qmask = q->qsize - 1; + + q->lastused = q->avail->idx = q->used->idx = 0; + + q->avail->flags |= Rnointerrupt; + + return 0; +} + +static Ctlr* +pciprobe(int typ) +{ + Ctlr *c, *h, *t; + Pcidev *p; + int n, i; + + h = t = nil; + + /* §4.1.2 PCI Device Discovery */ + for(p = nil; p = pcimatch(p, 0x1AF4, 0);){ + /* the two possible DIDs for virtio-net */ + if(p->did != 0x1000 && p->did != 0x1041) + continue; + /* + * non-transitional devices will have a revision > 0, + * these are handled by ethervirtio10 driver. + */ + if(p->rid != 0) + continue; + /* first membar needs to be I/O */ + if((p->mem[0].bar & 1) == 0) + continue; + /* non-transitional device will have typ+0x40 */ + if(pcicfgr16(p, 0x2E) != typ) + continue; + if((c = mallocz(sizeof(Ctlr), 1)) == nil){ + print("ethervirtio: no memory for Ctlr\n"); + break; + } + c->port = p->mem[0].bar & ~3; + if(ioalloc(c->port, p->mem[0].size, 0, "ethervirtio") < 0){ + print("ethervirtio: port %ux in use\n", c->port); + free(c); + continue; + } + + c->typ = typ; + c->pcidev = p; + c->id = (p->did<<16)|p->vid; + + /* §3.1.2 Legacy Device Initialization */ + outb(c->port+Qstatus, 0); + while(inb(c->port+Qstatus) != 0) + delay(1); + outb(c->port+Qstatus, Sacknowledge|Sdriver); + + /* negotiate feature bits */ + c->feat = inl(c->port+Qdevfeat); + outl(c->port+Qdrvfeat, c->feat & (Fmac|Fstatus|Fctrlvq|Fctrlrx)); + + /* §4.1.5.1.4 Virtqueue Configuration */ + for(i=0; iqueue); i++){ + outs(c->port+Qselect, i); + n = ins(c->port+Qsize); + if(n == 0 || (n & (n-1)) != 0){ + if(i < 2) + print("ethervirtio: queue %d has invalid size %d\n", i, n); + break; + } + if(initqueue(&c->queue[i], n) < 0) + break; + coherence(); + outl(c->port+Qaddr, PADDR(c->queue[i].desc)/VBY2PG); + } + if(i < 2){ + print("ethervirtio: no queues\n"); + free(c); + continue; + } + c->nqueue = i; + + if(h == nil) + h = c; + else + t->next = c; + t = c; + } + + return h; +} + + +static int +reset(Ether* edev) +{ + static uchar zeros[Eaddrlen]; + Ctlr *ctlr; + int i; + + if(ctlrhead == nil) + ctlrhead = pciprobe(1); + + for(ctlr = ctlrhead; ctlr != nil; ctlr = ctlr->next){ + if(ctlr->active) + continue; + if(edev->port == 0 || edev->port == ctlr->port){ + ctlr->active = 1; + break; + } + } + + if(ctlr == nil) + return -1; + + edev->ctlr = ctlr; + edev->port = ctlr->port; + edev->irq = ctlr->pcidev->intl; + edev->tbdf = ctlr->pcidev->tbdf; + edev->mbps = 1000; + edev->link = 1; + + if((ctlr->feat & Fmac) != 0 && memcmp(edev->ea, zeros, Eaddrlen) == 0){ + for(i = 0; i < Eaddrlen; i++) + edev->ea[i] = inb(ctlr->port+Qmac+i); + } else { + for(i = 0; i < Eaddrlen; i++) + outb(ctlr->port+Qmac+i, edev->ea[i]); + } + + edev->arg = edev; + + edev->attach = attach; + edev->shutdown = shutdown; + edev->ifstat = ifstat; + + if((ctlr->feat & (Fctrlvq|Fctrlrx)) == (Fctrlvq|Fctrlrx)){ + edev->multicast = multicast; + edev->promiscuous = promiscuous; + } + + pcisetbme(ctlr->pcidev); + intrenable(edev->irq, interrupt, edev, edev->tbdf, edev->name); + + return 0; +} + +void +ethervirtiolink(void) +{ + addethercard("virtio", reset); +} +