[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Minios-devel] [UNIKRAFT PATCH] plat/kvm: Introduce virtio network driver
Initial implementation of a virtio network driver based on Unikraft Net API and virtio base driver/rings. Supports basic Net API functions such as start/stop, RX/TX packet and RX interrupt callback. Tested with lwIP, both as polling-mode driver and with interrupts. The implementation was ported from Solo5 and adapted to Unikraft APIs. Signed-off-by: Razvan Cojocaru <razvan.cojocaru93@xxxxxxxxx> --- plat/drivers/virtio/virtio_net.c | 546 +++++++++++++++++++++++++++++++++++++++ plat/kvm/Config.uk | 10 +- plat/kvm/Makefile.uk | 11 +- 3 files changed, 560 insertions(+), 7 deletions(-) create mode 100644 plat/drivers/virtio/virtio_net.c diff --git a/plat/drivers/virtio/virtio_net.c b/plat/drivers/virtio/virtio_net.c new file mode 100644 index 0000000..d416752 --- /dev/null +++ b/plat/drivers/virtio/virtio_net.c @@ -0,0 +1,546 @@ +/* SPDX-License-Identifier: ISC */ +/* + * Authors: Dan Williams + * Martin Lucina + * Ricardo Koller + * Razvan Cojocaru <razvan.cojocaru93@xxxxxxxxx> + * + * Copyright (c) 2015-2017 IBM + * Copyright (c) 2016-2017 Docker, Inc. + * Copyright (c) 2018, NEC Europe Ltd., NEC Corporation + * + * Permission to use, copy, modify, and/or distribute this software + * for any purpose with or without fee is hereby granted, provided + * that the above copyright notice and this permission notice appear + * in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL + * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE + * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS + * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* Taken and adapted from solo5 virtio_net.c */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <uk/plat/lcpu.h> +#include <pci/pci_bus.h> +#include <kvm/irq.h> +#include <cpu.h> +#include <pci/virtio/virtio_ring.h> +#include <pci/virtio/virtio_pci.h> +#include <uk/wait.h> +#include <uk/netdev.h> +#include <uk/print.h> +#include <uk/assert.h> +#include <uk/essentials.h> +#if CONFIG_HAVE_SCHED +#include <uk/thread.h> +#include <uk/wait.h> +#endif + +#define VENDOR_QUMRANET_VIRTIO 0x1af4 +#define PCI_CONF_SUBSYS_NET 1 + +/* The feature bitmap for virtio net */ +#define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */ +#define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */ +#define VIRTIO_NET_F_MAC (1 << 5) /* Host has given MAC address. */ + +#define PKT_BUFFER_LEN 1526 + +static struct uk_alloc *a; + +#define VIRTQ_RECV 0 +#define VIRTQ_XMIT 1 + +/* This header comes first in the scatter-gather list. + * If VIRTIO_F_ANY_LAYOUT is not negotiated, it must + * be the first element of the scatter-gather list. If you don't + * specify GSO or CSUM features, you can simply ignore the header. + */ +struct __packed virtio_net_hdr { +#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */ +#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ + uint8_t flags; +#define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */ +#define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ +#define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */ +#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */ +#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */ + uint8_t gso_type; + uint16_t hdr_len; /* Ethernet + IP + tcp/udp hdrs */ + uint16_t gso_size; /* Bytes to append to hdr_len per frame */ + uint16_t csum_start; /* Position to start checksumming from */ + uint16_t csum_offset; /* Offset after that to place checksum */ +}; + +struct virtio_net_device { + struct pci_device *dev; + struct uk_netdev netdev; + uint16_t pci_base; /* base in PCI config space */ + struct virtq recvq; + uint8_t recv_intr_enabled; + struct virtq xmitq; + struct uk_thread *thread; + struct uk_waitq wq; +}; + +static int virtio_net_irq_handle(void *arg) +{ + struct virtio_net_device *d = (struct virtio_net_device *) arg; + uint8_t isr_status; + + if (unlikely(d->netdev.data->state != UK_NETDEV_RUNNING)) + return 0; + + isr_status = inb(d->pci_base + VIRTIO_PCI_ISR); + if (isr_status & VIRTIO_PCI_ISR_HAS_INTR) { + uk_waitq_wake_up(&d->wq); + return 1; + } + return 0; +} + +static void recv_setup(struct virtio_net_device *d) +{ + uint16_t mask = (uint16_t)(d->recvq.num - 1); + + do { + struct io_buffer + *buf; /* header and data in a single descriptor */ + buf = &d->recvq.bufs[d->recvq.next_avail & mask]; + memset(buf->data, 0, PKT_BUFFER_LEN); + buf->len = PKT_BUFFER_LEN; + buf->extra_flags = VIRTQ_DESC_F_WRITE; + UK_ASSERT(virtq_add_descriptor_chain( + &d->recvq, d->recvq.next_avail & mask, 1) + == 0); + } while ((d->recvq.next_avail & mask) != 0); + + outw(d->pci_base + VIRTIO_PCI_QUEUE_NOTIFY, VIRTQ_RECV); +} + +static int virtio_netdev_xmit(struct uk_netdev *n, + uint16_t queue_id __unused, struct uk_netdev_mbuf *mbuf) +{ + struct virtio_net_device *d; + struct io_buffer *head_buf, *data_buf; + uint16_t mask; + uint16_t head; + int r; + + UK_ASSERT(n != NULL); + d = __containerof(n, struct virtio_net_device, netdev); + mask = (uint16_t)(d->xmitq.num - 1); + + if (unlikely(mbuf->len > PKT_BUFFER_LEN)) + return -EINVAL; + + /* Consume used descriptors from all the previous tx'es. */ + for (; d->xmitq.last_used != d->xmitq.used->idx; d->xmitq.last_used++) + d->xmitq.num_avail += 2; /* 2 descriptors per chain */ + + /* next_avail is incremented by virtq_add_descriptor_chain below. */ + head = d->xmitq.next_avail & mask; + head_buf = &d->xmitq.bufs[head]; + data_buf = &d->xmitq.bufs[(head + 1) & mask]; + + /* The header buf */ + memset(head_buf->data, 0, sizeof(struct virtio_net_hdr)); + head_buf->len = sizeof(struct virtio_net_hdr); + head_buf->extra_flags = 0; + + /* The data buf */ + memcpy(data_buf->data, mbuf->payload, mbuf->len); + data_buf->len = mbuf->len; + data_buf->extra_flags = 0; + + r = virtq_add_descriptor_chain(&d->xmitq, head, 2); + + outw(d->pci_base + VIRTIO_PCI_QUEUE_NOTIFY, VIRTQ_XMIT); + + return r; +} + +/* Get the data from the next_avail (top-most) receive buffer/descriptor in + * the available ring. + */ +static uint8_t *virtio_net_recv_ring_get(struct virtio_net_device *d, + uint16_t *size) +{ + uint16_t mask; + struct virtq_used_elem *e; + struct io_buffer *buf; + uint8_t *pkt; + + mask = (uint16_t)(d->recvq.num - 1); + + d->recvq.avail->flags |= VIRTQ_AVAIL_F_NO_INTERRUPT; + + /* The device increments used->idx whenever it uses a packet (i.e. it + * put a packet on our receive queue) and if it's ahead of last_used it + * means that we have a pending packet. + */ + if (d->recvq.last_used == d->recvq.used->idx) + return NULL; + + e = &(d->recvq.used->ring[d->recvq.last_used & mask]); + + if (e->len == 0) { + if (d->recv_intr_enabled) + d->recvq.avail->flags &= ~VIRTQ_AVAIL_F_NO_INTERRUPT; + *size = 0; + return NULL; + } + + buf = (struct io_buffer *)d->recvq.desc[e->id].addr; + buf->len = e->len; + + /* Remove the virtio_net_hdr */ + *size = buf->len - sizeof(struct virtio_net_hdr); + pkt = buf->data + sizeof(struct virtio_net_hdr); + + UK_ASSERT(*size <= PKT_BUFFER_LEN); + + return pkt; +} + +/* Return the next_avail (top-most) receive buffer/descriptor to the available + * ring. + */ +static void virtio_net_recv_ring_release(struct virtio_net_device *d) +{ + uint16_t mask; + + /* Consume the recently used descriptor. */ + d->recvq.last_used++; + d->recvq.num_avail++; + + mask = (uint16_t)(d->recvq.num - 1); + d->recvq.bufs[d->recvq.next_avail & mask].len = PKT_BUFFER_LEN; + d->recvq.bufs[d->recvq.next_avail & mask].extra_flags = + VIRTQ_DESC_F_WRITE; + + /* This sets the returned descriptor to be ready for incoming + * packets, and advances the next_avail index. + */ + UK_ASSERT( + virtq_add_descriptor_chain(&d->recvq, d->recvq.next_avail & mask, 1) + == 0); + outw(d->pci_base + VIRTIO_PCI_QUEUE_NOTIFY, VIRTQ_RECV); + + if (d->recv_intr_enabled) + d->recvq.avail->flags &= ~VIRTQ_AVAIL_F_NO_INTERRUPT; +} + +static int virtio_netdev_recv(struct uk_netdev *n, + uint16_t queue_id __unused, struct uk_netdev_mbuf *mbuf) +{ + struct virtio_net_device *d; + uint8_t *pkt; + uint16_t pktlen = 0; + + UK_ASSERT(n != NULL); + d = __containerof(n, struct virtio_net_device, netdev); + + pkt = virtio_net_recv_ring_get(d, &pktlen); + + if (pkt) { + /* also, it's clearly not zero copy */ + memcpy(mbuf->payload, pkt, pktlen); + mbuf->len = pktlen; + virtio_net_recv_ring_release(d); + } + + return pktlen; +} + +static int virtio_netdev_rx_queue_setup(struct uk_netdev *n, + uint16_t queue_id __unused, + const struct uk_netdev_rxqueue_conf *conf __unused) +{ + struct virtio_net_device *d; + int err; + + UK_ASSERT(n != NULL); + d = __containerof(n, struct virtio_net_device, netdev); + + /* + * Perform device-specific setup, including discovery of virtqueues + * for the device, optional per-bus setup, reading and possibly writing + * the device's virtio configuration space, and population of + * virtqueues. + */ + err = virtq_rings_init(&d->recvq, d->pci_base, VIRTQ_RECV, a); + if (err) + goto err_out; + + d->recvq.bufs = uk_calloc(a, d->recvq.num, sizeof(struct io_buffer)); + if (!d->recvq.bufs) { + err = -ENOMEM; + goto err_freeq; + } + + recv_setup(d); + + return 0; + +err_freeq: + virtq_rings_fini(&d->recvq, d->pci_base, VIRTQ_RECV, a); +err_out: + return err; +} + +static int virtio_netdev_tx_queue_setup(struct uk_netdev *n, + uint16_t queue_id __unused, + const struct uk_netdev_txqueue_conf *conf __unused) +{ + struct virtio_net_device *d; + int err; + + UK_ASSERT(n != NULL); + d = __containerof(n, struct virtio_net_device, netdev); + + err = virtq_rings_init(&d->xmitq, d->pci_base, VIRTQ_XMIT, a); + if (err) + goto err_out; + + d->xmitq.bufs = uk_calloc(a, d->xmitq.num, sizeof(struct io_buffer)); + UK_ASSERT(d->recvq.bufs != NULL); + if (!d->xmitq.bufs) { + err = -ENOMEM; + goto err_freeq; + } + + return 0; + +err_freeq: + virtq_rings_fini(&d->xmitq, d->pci_base, VIRTQ_XMIT, a); +err_out: + return err; +} + +static int virtio_netdev_configure(struct uk_netdev *n, + __unused const struct uk_netdev_conf *conf) +{ + struct virtio_net_device *d; + uint32_t host_features, guest_features; + struct uk_hwaddr mac; + + UK_ASSERT(n != NULL); + d = __containerof(n, struct virtio_net_device, netdev); + + d->pci_base = d->dev->base; + + /* + * Set the ACKNOWLEDGE status bit: the guest OS has notice the + * device. + * Set the DRIVER status bit: the guest OS knows how to drive the + * device. + */ + outb(d->pci_base + VIRTIO_PCI_STATUS, VIRTIO_PCI_STATUS_ACK); + outb(d->pci_base + VIRTIO_PCI_STATUS, VIRTIO_PCI_STATUS_DRIVER); + + /* + * Read device feature bits, and write the subset of feature bits + * understood by the OS and driver to the device. During this step the + * driver MAY read (but MUST NOT write) the device-specific + * configuration fields to check that it can support the device before + * accepting it. + */ + host_features = inl(d->pci_base + VIRTIO_PCI_HOST_FEATURES); + UK_ASSERT(host_features & VIRTIO_NET_F_MAC); + + /* only negotiate that the mac was set for now */ + guest_features = VIRTIO_NET_F_MAC; + outl(d->pci_base + VIRTIO_PCI_GUEST_FEATURES, guest_features); + + for (int i = 0; i < UK_HWADDR_LEN; i++) + mac.addr_bytes[i] = + inb(d->pci_base + VIRTIO_PCI_CONFIG_OFF + i); + memcpy(&n->data->mac_addr, &mac, sizeof(struct uk_hwaddr)); + + ukplat_irq_register(d->dev->irq, virtio_net_irq_handle, d); + + /* + * Set the DRIVER_OK status bit. At this point the device is "live". + */ + outb(d->pci_base + VIRTIO_PCI_STATUS, VIRTIO_PCI_STATUS_DRIVER_OK); + + d->netdev.data->state = UK_NETDEV_CONFIGURED; + + uk_printd(DLVL_INFO, + "PCI:%02x:%02x: Configured (features=0x%x, irq=%lu)\n", + d->dev->addr.bus, d->dev->addr.devid, host_features, d->dev->irq); + + return 0; +} + +int virtio_net_enable_rx_intr(struct uk_netdev *n, + uint16_t rx_queue_id __unused) +{ + struct virtio_net_device *d; + + UK_ASSERT(n != NULL); + d = __containerof(n, struct virtio_net_device, netdev); + d->recv_intr_enabled = 1; + d->recvq.avail->flags &= ~VIRTQ_AVAIL_F_NO_INTERRUPT; + return 0; +} + +int virtio_net_disable_rx_intr(struct uk_netdev *n, + uint16_t rx_queue_id __unused) +{ + struct virtio_net_device *d; + + UK_ASSERT(n != NULL); + d = __containerof(n, struct virtio_net_device, netdev); + d->recv_intr_enabled = 0; + d->recvq.avail->flags |= VIRTQ_AVAIL_F_NO_INTERRUPT; + return 0; +} + +static void virtio_net_thread(void *arg) +{ + struct virtio_net_device *d = arg; + struct uk_netdev *n; + + UK_ASSERT(d != NULL); + n = &d->netdev; + + while (n->data->state == UK_NETDEV_RUNNING) { + uk_waitq_wait_event(&d->wq, + d->recvq.last_used != d->recvq.used->idx); + n->rx_cb(n, 0); + } +} + +int virtio_net_start(struct uk_netdev *n) +{ + struct virtio_net_device *d; + char buf[UK_NETDEV_NAME_MAX_LEN]; + + UK_ASSERT(n != NULL); + d = __containerof(n, struct virtio_net_device, netdev); + + sprintf(buf, "virtio-net%d", n->data->id); + uk_netdev_name_set(n, buf, (uint16_t)strlen(buf)); + uk_printd(DLVL_INFO, "%s started\n", buf); + + d->netdev.data->state = UK_NETDEV_RUNNING; + + /* Start the thread that handles packet RX callbacks */ + if (n->rx_cb != NULL) { + uk_waitq_init(&d->wq); + d->thread = uk_thread_create(buf, virtio_net_thread, d); + if (d->thread == NULL) { + uk_printd(DLVL_ERR, "Error creating %s thread.", buf); + return -ENOMEM; + } + } + + /* + * By default, interrupts are disabled and it is up to the user or + * network stack to manually enable them with a call to + * enable_tx|rx_intr() + */ + d->recv_intr_enabled = 0; + d->recvq.avail->flags |= VIRTQ_AVAIL_F_NO_INTERRUPT; + d->xmitq.avail->flags |= VIRTQ_AVAIL_F_NO_INTERRUPT; + + return 0; +} + +void virtio_net_stop(struct uk_netdev *n) +{ + struct virtio_net_device *d; + + UK_ASSERT(n != NULL); + d = __containerof(n, struct virtio_net_device, netdev); + + d->netdev.data->state = UK_NETDEV_CONFIGURED; +} + +void virtio_net_close(struct uk_netdev *n) +{ + struct virtio_net_device *d; + + UK_ASSERT(n != NULL); + d = __containerof(n, struct virtio_net_device, netdev); + + d->netdev.data->state = UK_NETDEV_UNCONFIGURED; +} + +static const struct uk_netdev_ops virtio_netdev_ops = { + .dev_configure = virtio_netdev_configure, + .rx_queue_setup = virtio_netdev_rx_queue_setup, + .tx_queue_setup = virtio_netdev_tx_queue_setup, + .dev_start = virtio_net_start, + .dev_stop = virtio_net_stop, + .dev_close = virtio_net_close, + .rx_enable_intr = virtio_net_enable_rx_intr, + .rx_disable_intr = virtio_net_disable_rx_intr, +}; + +static int virtio_net_add_dev(struct pci_device *dev) +{ + struct virtio_net_device *d; + int err; + + UK_ASSERT(dev != NULL); + + d = uk_malloc(a, sizeof(*d)); + if (!d) { + err = -ENOMEM; + goto err_out; + } + + d->dev = dev; + + /* register netdev */ + d->netdev.rx_pkt = virtio_netdev_recv; + d->netdev.tx_pkt = virtio_netdev_xmit; + d->netdev.dev_ops = &virtio_netdev_ops; + + d->netdev.data = uk_malloc(a, sizeof(struct uk_netdev_data)); + d->netdev.data->state = UK_NETDEV_UNCONFIGURED; + d->netdev.data->mtu = PKT_BUFFER_LEN; + + uk_netdev_register(&d->netdev); + + return 0; + +err_out: + return err; +} + +static int virtio_net_drv_init(struct uk_alloc *drv_allocator) +{ + /* driver initialization */ + if (!drv_allocator) + return -EINVAL; + + a = drv_allocator; + return 0; +} + +static const struct pci_device_id pci_id_map[] = { + {PCI_CLASS_ANY_ID, VENDOR_QUMRANET_VIRTIO, PCI_ANY_ID, PCI_ANY_ID, + PCI_CONF_SUBSYS_NET}, + {PCI_ANY_DEVICE_ID}, +}; + +static struct pci_driver virtio_net_drv = { + .device_ids = pci_id_map, + .init = virtio_net_drv_init, + .add_dev = virtio_net_add_dev +}; + +PCI_REGISTER_DRIVER(&virtio_net_drv); diff --git a/plat/kvm/Config.uk b/plat/kvm/Config.uk index 118954d..4526925 100644 --- a/plat/kvm/Config.uk +++ b/plat/kvm/Config.uk @@ -19,10 +19,14 @@ config KVM_PCI PCI bus driver for probing and operating PCI devices if (KVM_PCI) -config KVM_PCI_VIRTIO - bool "Virtio Ring" +menu "Virtio" +config KVM_PCI_VIRTIONET + bool "Virtio Networking" default n + select LIBUKNETDEV help - Virtual queues to traverse host and guest transition + Paravirtualized networking driver + +endmenu endif endif diff --git a/plat/kvm/Makefile.uk b/plat/kvm/Makefile.uk index e379c83..d761588 100644 --- a/plat/kvm/Makefile.uk +++ b/plat/kvm/Makefile.uk @@ -8,7 +8,7 @@ $(eval $(call addplat_s,kvm,$(CONFIG_PLAT_KVM))) ## $(eval $(call addplatlib,kvm,libkvmplat)) $(eval $(call addplatlib_s,kvm,libkvmpci,$(CONFIG_KVM_PCI))) -$(eval $(call addplatlib_s,kvm,libkvmpcivirtio,$(CONFIG_KVM_PCI_VIRTIO))) +$(eval $(call addplatlib_s,kvm,libkvmpcivirtionet,$(CONFIG_KVM_PCI_VIRTIONET))) ## ## Platform library definitions @@ -52,6 +52,9 @@ LIBKVMPCI_SRCS-y += $(UK_PLAT_COMMON_BASE)/pci_bus.c|common ## ## Virtio library definitions ## -LIBKVMPCIVIRTIO_ASINCLUDES-y += -I$(UK_PLAT_COMMON_BASE)/include -LIBKVMPCIVIRTIO_CINCLUDES-y += -I$(UK_PLAT_COMMON_BASE)/include -LIBKVMPCIVIRTIO_SRCS-y += $(UK_PLAT_DRIVERS_BASE)/virtio/virtio_ring.c +LIBKVMPCIVIRTIONET_ASINCLUDES-y += -I$(LIBKVMPLAT_BASE)/include +LIBKVMPCIVIRTIONET_CINCLUDES-y += -I$(LIBKVMPLAT_BASE)/include +LIBKVMPCIVIRTIONET_ASINCLUDES-y += -I$(UK_PLAT_COMMON_BASE)/include +LIBKVMPCIVIRTIONET_CINCLUDES-y += -I$(UK_PLAT_COMMON_BASE)/include +LIBKVMPCIVIRTIONET_SRCS-y += $(UK_PLAT_DRIVERS_BASE)/virtio/virtio_ring.c +LIBKVMPCIVIRTIONET_SRCS-y += $(UK_PLAT_DRIVERS_BASE)/virtio/virtio_net.c -- 2.7.4 _______________________________________________ Minios-devel mailing list Minios-devel@xxxxxxxxxxxxxxxxxxxx https://lists.xenproject.org/mailman/listinfo/minios-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |