/*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #ifndef NETMAP_WITH_LIBS #define NETMAP_WITH_LIBS #endif #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "pci_emul.h" #include "mevent.h" #include "virtio.h" #include "net_utils.h" #include "net_backends.h" #define VTNET_RINGSZ 1024 #define VTNET_MAXSEGS 256 /* Our capabilities: we don't support VIRTIO_NET_F_MRG_RXBUF at the moment. */ #define VTNET_S_HOSTCAPS \ ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \ VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC) /* * PCI config-space "registers" */ struct virtio_net_config { uint8_t mac[6]; uint16_t status; uint16_t max_virtqueue_pairs; } __packed; /* * Queue definitions. */ #define VTNET_RXQ 0 #define VTNET_TXQ 1 #define VTNET_CTLQ 2 /* NB: not yet supported */ #define VTNET_MAXQ 3 /* * Debug printf */ static int pci_vtnet_debug; #define DPRINTF(params) if (pci_vtnet_debug) printf params #define WPRINTF(params) printf params /* * Per-device softc */ struct pci_vtnet_softc { struct virtio_softc vsc_vs; struct vqueue_info vsc_queues[VTNET_MAXQ - 1]; pthread_mutex_t vsc_mtx; struct net_backend *vsc_be; int vsc_rx_ready; volatile int resetting; /* set and checked outside lock */ uint64_t vsc_features; /* negotiated features */ struct virtio_net_config vsc_config; pthread_mutex_t rx_mtx; int rx_vhdrlen; int rx_merge; /* merged rx bufs in use */ pthread_t tx_tid; pthread_mutex_t tx_mtx; pthread_cond_t tx_cond; int tx_in_progress; }; static void pci_vtnet_reset(void *); /* static void pci_vtnet_notify(void *, struct vqueue_info *); */ static int pci_vtnet_cfgread(void *, int, int, uint32_t *); static int pci_vtnet_cfgwrite(void *, int, int, uint32_t); static void pci_vtnet_neg_features(void *, uint64_t); static struct virtio_consts vtnet_vi_consts = { "vtnet", /* our name */ VTNET_MAXQ - 1, /* we currently support 2 virtqueues */ sizeof(struct virtio_net_config), /* config reg size */ pci_vtnet_reset, /* reset */ NULL, /* device-wide qnotify -- not used */ pci_vtnet_cfgread, /* read PCI config */ pci_vtnet_cfgwrite, /* write PCI config */ pci_vtnet_neg_features, /* apply negotiated features */ VTNET_S_HOSTCAPS, /* our capabilities */ }; /* * If the transmit thread is active then stall until it is done. */ static void pci_vtnet_txwait(struct pci_vtnet_softc *sc) { pthread_mutex_lock(&sc->tx_mtx); while (sc->tx_in_progress) { pthread_mutex_unlock(&sc->tx_mtx); usleep(10000); pthread_mutex_lock(&sc->tx_mtx); } pthread_mutex_unlock(&sc->tx_mtx); } /* * If the receive thread is active then stall until it is done. * It is enough to lock and unlock the RX mutex. */ static void pci_vtnet_rxwait(struct pci_vtnet_softc *sc) { pthread_mutex_lock(&sc->rx_mtx); pthread_mutex_unlock(&sc->rx_mtx); } static void pci_vtnet_reset(void *vsc) { struct pci_vtnet_softc *sc = vsc; DPRINTF(("vtnet: device reset requested !\n")); sc->resetting = 1; /* * Wait for the transmit and receive threads to finish their * processing. */ pci_vtnet_txwait(sc); pci_vtnet_rxwait(sc); sc->vsc_rx_ready = 0; sc->rx_merge = 1; sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr); /* now reset rings, MSI-X vectors, and negotiated capabilities */ vi_reset_dev(&sc->vsc_vs); sc->resetting = 0; } /* * Called when there is read activity on the tap file descriptor. * Each buffer posted by the guest is assumed to be able to contain * an entire ethernet frame + rx header. * MP note: the dummybuf is only used for discarding frames, so there * is no need for it to be per-vtnet or locked. */ void pci_vtnet_rx_discard(struct pci_vtnet_softc *sc, struct iovec *iov) { /* * MP note: the dummybuf is only used to discard frames, * so there is no need for it to be per-vtnet or locked. * We only make it large enough for TSO-sized segment. */ static uint8_t dummybuf[65536+64]; iov[0].iov_base = dummybuf; iov[0].iov_len = sizeof(dummybuf); netbe_recv(sc->vsc_be, iov, 1); } static void pci_vtnet_rx(struct pci_vtnet_softc *sc) { struct iovec iov[VTNET_MAXSEGS + 1]; struct vqueue_info *vq; int len, n; uint16_t idx; /* * This will be called when the rx ring hasn't yet * been set up or the guest is resetting the device. */ if (!sc->vsc_rx_ready || sc->resetting) { /* * Drop the packet and try later. */ pci_vtnet_rx_discard(sc, iov); return; } /* * Check for available rx buffers */ vq = &sc->vsc_queues[VTNET_RXQ]; if (!vq_has_descs(vq)) { /* * Drop the packet and try later. Interrupt on * empty, if that's negotiated. */ pci_vtnet_rx_discard(sc, iov); vq_endchains(vq, 1); return; } do { /* * Get descriptor chain. */ n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); assert(n >= 1 && n <= VTNET_MAXSEGS); len = netbe_recv(sc->vsc_be, iov, n); if (len == 0) { /* * No more packets, but still some avail ring * entries. Interrupt if needed/appropriate. */ vq_retchain(vq); vq_endchains(vq, 0); return; } /* * Release this chain and handle more chains. */ vq_relchain(vq, idx, len + sc->rx_vhdrlen); } while (vq_has_descs(vq)); /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */ vq_endchains(vq, 1); } static void pci_vtnet_rx_callback(int fd, enum ev_type type, void *param) { struct pci_vtnet_softc *sc = param; pthread_mutex_lock(&sc->rx_mtx); pci_vtnet_rx(sc); pthread_mutex_unlock(&sc->rx_mtx); } static void pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq) { struct pci_vtnet_softc *sc = vsc; /* * A qnotify means that the rx process can now begin */ if (sc->vsc_rx_ready == 0) { sc->vsc_rx_ready = 1; vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY; } } static void pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq) { struct iovec iov[VTNET_MAXSEGS + 1]; int i, n; int len; uint16_t idx; /* * Obtain chain of descriptors. The first descriptor also * contains the virtio-net header. */ n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); assert(n >= 1 && n <= VTNET_MAXSEGS); len = 0; for (i = 0; i < n; i++) { len += iov[i].iov_len; } netbe_send(sc->vsc_be, iov, n, len, 0 /* more */); /* chain is processed, release it and set len */ vq_relchain(vq, idx, len); } static void pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq) { struct pci_vtnet_softc *sc = vsc; /* * Any ring entries to process? */ if (!vq_has_descs(vq)) return; /* Signal the tx thread for processing */ pthread_mutex_lock(&sc->tx_mtx); vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY; if (sc->tx_in_progress == 0) pthread_cond_signal(&sc->tx_cond); pthread_mutex_unlock(&sc->tx_mtx); } /* * Thread which will handle processing of TX desc */ static void * pci_vtnet_tx_thread(void *param) { struct pci_vtnet_softc *sc = param; struct vqueue_info *vq; int error; vq = &sc->vsc_queues[VTNET_TXQ]; /* * Let us wait till the tx queue pointers get initialised & * first tx signaled */ pthread_mutex_lock(&sc->tx_mtx); error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); assert(error == 0); for (;;) { /* note - tx mutex is locked here */ while (sc->resetting || !vq_has_descs(vq)) { vq->vq_used->vu_flags &= ~VRING_USED_F_NO_NOTIFY; mb(); if (!sc->resetting && vq_has_descs(vq)) break; sc->tx_in_progress = 0; error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); assert(error == 0); } vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY; sc->tx_in_progress = 1; pthread_mutex_unlock(&sc->tx_mtx); do { /* * Run through entries, placing them into * iovecs and sending when an end-of-packet * is found */ pci_vtnet_proctx(sc, vq); } while (vq_has_descs(vq)); /* * Generate an interrupt if needed. */ vq_endchains(vq, 1); pthread_mutex_lock(&sc->tx_mtx); } } #ifdef notyet static void pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq) { DPRINTF(("vtnet: control qnotify!\n\r")); } #endif static int pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { char tname[MAXCOMLEN + 1]; struct pci_vtnet_softc *sc; char *devname; char *vtopts; int mac_provided; struct virtio_consts *vc; /* sc also contains a copy of the vtnet_vi_consts, * because the capabilities change depending on * the backend. */ sc = calloc(1, sizeof(struct pci_vtnet_softc) + sizeof(struct virtio_consts)); vc = (struct virtio_consts *)(sc + 1); memcpy(vc, &vtnet_vi_consts, sizeof(*vc)); pthread_mutex_init(&sc->vsc_mtx, NULL); vi_softc_linkup(&sc->vsc_vs, vc, sc, pi, sc->vsc_queues); sc->vsc_vs.vs_mtx = &sc->vsc_mtx; sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ; sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq; sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ; sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq; #ifdef notyet sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ; sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq; #endif /* * Attempt to open the backend device and read the MAC address * if specified */ mac_provided = 0; if (opts != NULL) { int err; devname = vtopts = strdup(opts); (void) strsep(&vtopts, ","); if (vtopts != NULL) { err = net_parsemac(vtopts, sc->vsc_config.mac); if (err != 0) { free(devname); return (err); } mac_provided = 1; } sc->vsc_be = netbe_init(devname, pci_vtnet_rx_callback, sc); if (!sc->vsc_be) { WPRINTF(("net backend initialization failed\n")); } else { vc->vc_hv_caps |= netbe_get_cap(sc->vsc_be); } free(devname); } if (!mac_provided) { net_genmac(pi, sc->vsc_config.mac); } /* initialize config space */ pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); /* Link is up if we managed to open backend device. */ sc->vsc_config.status = (opts == NULL || sc->vsc_be); /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */ if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) return (1); /* use BAR 0 to map config regs in IO space */ vi_set_io_bar(&sc->vsc_vs, 0); sc->resetting = 0; sc->rx_merge = 1; sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr); pthread_mutex_init(&sc->rx_mtx, NULL); /* * Initialize tx semaphore & spawn TX processing thread. * As of now, only one thread for TX desc processing is * spawned. */ sc->tx_in_progress = 0; pthread_mutex_init(&sc->tx_mtx, NULL); pthread_cond_init(&sc->tx_cond, NULL); pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc); snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot, pi->pi_func); pthread_set_name_np(sc->tx_tid, tname); return (0); } static int pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value) { struct pci_vtnet_softc *sc = vsc; void *ptr; if (offset < 6) { assert(offset + size <= 6); /* * The driver is allowed to change the MAC address */ ptr = &sc->vsc_config.mac[offset]; memcpy(ptr, &value, size); } else { /* silently ignore other writes */ DPRINTF(("vtnet: write to readonly reg %d\n\r", offset)); } return (0); } static int pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval) { struct pci_vtnet_softc *sc = vsc; void *ptr; ptr = (uint8_t *)&sc->vsc_config + offset; memcpy(retval, ptr, size); return (0); } static void pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features) { struct pci_vtnet_softc *sc = vsc; sc->vsc_features = negotiated_features; if (!(negotiated_features & VIRTIO_NET_F_MRG_RXBUF)) { sc->rx_merge = 0; /* non-merge rx header is 2 bytes shorter */ sc->rx_vhdrlen -= 2; } /* Tell the backend to enable some capabilities it has advertised. */ netbe_set_cap(sc->vsc_be, negotiated_features, sc->rx_vhdrlen); } struct pci_devemu pci_de_vnet = { .pe_emu = "virtio-net", .pe_init = pci_vtnet_init, .pe_barwrite = vi_pci_write, .pe_barread = vi_pci_read }; PCI_EMUL_SET(pci_de_vnet);