diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 8d2501e..db500cc 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -196,4 +196,8 @@ config XEN_ACPI_PROCESSOR called xen_acpi_processor If you do not know what to choose, select M here. If the CPUFREQ drivers are built in, select Y here. +config XEN_V4V + tristate "Xen V4V driver" + default m + endmenu diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index fc34886..a3d3014 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_XEN_DOM0) += pci.o acpi.o obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o +obj-$(CONFIG_XEN_V4V) += v4v.o xen-evtchn-y := evtchn.o xen-gntdev-y := gntdev.o xen-gntalloc-y := gntalloc.o diff --git a/drivers/xen/v4v.c b/drivers/xen/v4v.c new file mode 100644 index 0000000..141be66 --- /dev/null +++ b/drivers/xen/v4v.c @@ -0,0 +1,2639 @@ +/****************************************************************************** + * drivers/xen/v4v/v4v.c + * + * V4V interdomain communication driver. + * + * Copyright (c) 2012 Jean Guyader + * Copyright (c) 2009 Ross Philipson + * Copyright (c) 2009 James McKenzie + * Copyright (c) 2009 Citrix Systems, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "v4v_utils.h" + +#define DEFAULT_RING_SIZE \ + (V4V_ROUNDUP((((PAGE_SIZE)*32) - sizeof(v4v_ring_t)-V4V_ROUNDUP(1)))) + +/* The type of a ring*/ +typedef enum { + V4V_RTYPE_IDLE = 0, + V4V_RTYPE_DGRAM, + V4V_RTYPE_LISTENER, + V4V_RTYPE_CONNECTOR, +} v4v_rtype; + +/* the state of a v4V_private*/ +typedef enum { + V4V_STATE_IDLE = 0, + V4V_STATE_BOUND, + V4V_STATE_LISTENING, + V4V_STATE_ACCEPTED, + V4V_STATE_CONNECTING, + V4V_STATE_CONNECTED, + V4V_STATE_DISCONNECTED +} v4v_state; + +typedef enum { + V4V_PTYPE_DGRAM = 1, + V4V_PTYPE_STREAM, +} v4v_ptype; + +static rwlock_t list_lock; +static struct list_head ring_list; + +struct v4v_private; + +/* + * Ring pointer itself is protected by the refcnt the lists its in by list_lock. + * + * It's permittable to decrement the refcnt whilst holding the read lock, and then + * clean up refcnt=0 rings later. + * + * If a ring has refcnt!=0 we expect ->ring to be non NULL, and for the ring to + * be registered with Xen. + */ + +struct ring { + struct list_head node; + atomic_t refcnt; + + spinlock_t lock; /* Protects the data in the v4v_ring_t also privates and sponsor */ + + struct list_head privates; /* Protected by lock */ + struct v4v_private *sponsor; /* Protected by lock */ + + v4v_rtype type; + + /* Ring */ + v4v_ring_t *ring; + v4v_pfn_t *pfn_list; + size_t pfn_list_npages; + int order; +}; + +struct v4v_private { + struct list_head node; + v4v_state state; + v4v_ptype ptype; + uint32_t desired_ring_size; + struct ring *r; + wait_queue_head_t readq; + wait_queue_head_t writeq; + v4v_addr_t peer; + uint32_t conid; + spinlock_t pending_recv_lock; /* Protects pending messages, and pending_error */ + struct list_head pending_recv_list; /* For LISTENER contains only ... */ + atomic_t pending_recv_count; + int pending_error; + int full; + int send_blocked; + int rx; +}; + +struct pending_recv { + struct list_head node; + v4v_addr_t from; + size_t data_len, data_ptr; + struct v4v_stream_header sh; + uint8_t data[0]; +} V4V_PACKED; + +static spinlock_t interrupt_lock; +static spinlock_t pending_xmit_lock; +static struct list_head pending_xmit_list; +static atomic_t pending_xmit_count; + +enum v4v_pending_xmit_type { + V4V_PENDING_XMIT_INLINE = 1, /* Send the inline xmit */ + V4V_PENDING_XMIT_WAITQ_MATCH_SPONSOR, /* Wake up writeq of sponsor of the ringid from */ + V4V_PENDING_XMIT_WAITQ_MATCH_PRIVATES, /* Wake up writeq of a private of ringid from with conid */ +}; + +struct pending_xmit { + struct list_head node; + enum v4v_pending_xmit_type type; + uint32_t conid; + struct v4v_ring_id from; + v4v_addr_t to; + size_t len; + uint32_t protocol; + uint8_t data[0]; +}; + +#define MAX_PENDING_RECVS 16 + +/* Hypercalls */ + +static inline int __must_check +HYPERVISOR_v4v_op(int cmd, void *arg1, void *arg2, + uint32_t arg3, uint32_t arg4) +{ + return _hypercall5(int, v4v_op, cmd, arg1, arg2, arg3, arg4); +} + +static int v4v_info(v4v_info_t *info) +{ + (void)(*(volatile int*)info); + return HYPERVISOR_v4v_op (V4VOP_info, info, NULL, 0, 0); +} + +static int H_v4v_register_ring(v4v_ring_t * r, v4v_pfn_t * l, size_t npages) +{ + (void)(*(volatile int *)r); + return HYPERVISOR_v4v_op(V4VOP_register_ring, r, l, npages, 0); +} + +static int H_v4v_unregister_ring(v4v_ring_t * r) +{ + (void)(*(volatile int *)r); + return HYPERVISOR_v4v_op(V4VOP_unregister_ring, r, NULL, 0, 0); +} + +static int +H_v4v_send(v4v_addr_t * s, v4v_addr_t * d, const void *buf, uint32_t len, + uint32_t protocol) +{ + v4v_send_addr_t addr; + addr.src = *s; + addr.dst = *d; + return HYPERVISOR_v4v_op(V4VOP_send, &addr, (void *)buf, len, protocol); +} + +static int +H_v4v_sendv(v4v_addr_t * s, v4v_addr_t * d, const v4v_iov_t * iovs, + uint32_t niov, uint32_t protocol) +{ + v4v_send_addr_t addr; + addr.src = *s; + addr.dst = *d; + return HYPERVISOR_v4v_op(V4VOP_sendv, &addr, (void *)iovs, niov, + protocol); +} + +static int H_v4v_notify(v4v_ring_data_t * rd) +{ + return HYPERVISOR_v4v_op(V4VOP_notify, rd, NULL, 0, 0); +} + +static int H_v4v_viptables_add(v4v_viptables_rule_t * rule, int position) +{ + return HYPERVISOR_v4v_op(V4VOP_viptables_add, rule, NULL, + position, 0); +} + +static int H_v4v_viptables_del(v4v_viptables_rule_t * rule, int position) +{ + return HYPERVISOR_v4v_op(V4VOP_viptables_del, rule, NULL, + position, 0); +} + +static int H_v4v_viptables_list(struct v4v_viptables_list *list) +{ + return HYPERVISOR_v4v_op(V4VOP_viptables_list, list, NULL, 0, 0); +} + +/* Port/Ring uniqueness */ + +/* Need to hold write lock for all of these */ + +static int v4v_id_in_use(struct v4v_ring_id *id) +{ + struct ring *r; + + list_for_each_entry(r, &ring_list, node) { + if ((r->ring->id.addr.port == id->addr.port) + && (r->ring->id.partner == id->partner)) + return 1; + } + + return 0; +} + +static int v4v_port_in_use(uint32_t port, uint32_t * max) +{ + uint32_t ret = 0; + struct ring *r; + + list_for_each_entry(r, &ring_list, node) { + if (r->ring->id.addr.port == port) + ret++; + if (max && (r->ring->id.addr.port > *max)) + *max = r->ring->id.addr.port; + } + + return ret; +} + +static uint32_t v4v_random_port(void) +{ + uint32_t port; + + port = random32(); + port |= 0x80000000U; + if (port > 0xf0000000U) { + port -= 0x10000000; + } + + return port; +} + +/* Caller needs to hold lock */ +static uint32_t v4v_find_spare_port_number(void) +{ + uint32_t port, max = 0x80000000U; + + port = v4v_random_port(); + if (!v4v_port_in_use(port, &max)) { + return port; + } else { + port = max + 1; + } + + return port; +} + +/* Ring Goo */ + +static int register_ring(struct ring *r) +{ + return H_v4v_register_ring((void *)r->ring, + r->pfn_list, + r->pfn_list_npages); +} + +static int unregister_ring(struct ring *r) +{ + return H_v4v_unregister_ring((void *)r->ring); +} + +static void refresh_pfn_list(struct ring *r) +{ + uint8_t *b = (void *)r->ring; + int i; + + for (i = 0; i < r->pfn_list_npages; ++i) { + r->pfn_list[i] = pfn_to_mfn(vmalloc_to_pfn(b)); + b += PAGE_SIZE; + } +} + +static void allocate_pfn_list(struct ring *r) +{ + int n = (r->ring->len + PAGE_SIZE - 1) >> PAGE_SHIFT; + int len = sizeof(v4v_pfn_t) * n; + + r->pfn_list = kmalloc(len, GFP_KERNEL); + if (!r->pfn_list) + return; + r->pfn_list_npages = n; + + refresh_pfn_list(r); +} + +static int allocate_ring(struct ring *r, int ring_len) +{ + int len = ring_len + sizeof(v4v_ring_t); + int ret = 0; + + if (ring_len != V4V_ROUNDUP(ring_len)) { + ret = -EINVAL; + goto fail; + } + + r->ring = NULL; + r->pfn_list = NULL; + r->order = 0; + + r->order = get_order(len); + + r->ring = vmalloc(len); + + if (!r->ring) { + ret = -ENOMEM; + goto fail; + } + + memset((void *)r->ring, 0, len); + + r->ring->magic = V4V_RING_MAGIC; + r->ring->len = ring_len; + r->ring->rx_ptr = r->ring->tx_ptr = 0; + + memset((void *)r->ring->ring, 0x5a, ring_len); + + allocate_pfn_list(r); + if (!r->pfn_list) { + + ret = -ENOMEM; + goto fail; + } + + return 0; + fail: + if (r->ring) + vfree(r->ring); + if (r->pfn_list) + kfree(r->pfn_list); + + r->ring = NULL; + r->pfn_list = NULL; + + return ret; +} + +/* Caller must hold lock */ +static void recover_ring(struct ring *r) +{ + /* It's all gone horribly wrong */ + r->ring->rx_ptr = r->ring->tx_ptr; + /* Xen updates tx_ptr atomically to always be pointing somewhere sensible */ +} + +/* Caller must hold no locks, ring is allocated with a refcnt of 1 */ +static int new_ring(struct v4v_private *sponsor, struct v4v_ring_id *pid) +{ + struct v4v_ring_id id = *pid; + struct ring *r; + int ret; + unsigned long flags; + + if (id.addr.domain != V4V_DOMID_NONE) + return -EINVAL; + + r = kmalloc(sizeof(struct ring), GFP_KERNEL); + memset(r, 0, sizeof(struct ring)); + + ret = allocate_ring(r, sponsor->desired_ring_size); + if (ret) { + kfree(r); + return ret; + } + + INIT_LIST_HEAD(&r->privates); + spin_lock_init(&r->lock); + atomic_set(&r->refcnt, 1); + + write_lock_irqsave(&list_lock, flags); + if (sponsor->state != V4V_STATE_IDLE) { + ret = -EINVAL; + goto fail; + } + + if (!id.addr.port) { + id.addr.port = v4v_find_spare_port_number(); + } else if (v4v_id_in_use(&id)) { + ret = -EADDRINUSE; + goto fail; + } + + r->ring->id = id; + r->sponsor = sponsor; + sponsor->r = r; + sponsor->state = V4V_STATE_BOUND; + + ret = register_ring(r); + if (ret) + goto fail; + + list_add(&r->node, &ring_list); + write_unlock_irqrestore(&list_lock, flags); + return 0; + + fail: + write_unlock_irqrestore(&list_lock, flags); + + vfree(r->ring); + kfree(r->pfn_list); + kfree(r); + + sponsor->r = NULL; + sponsor->state = V4V_STATE_IDLE; + + return ret; +} + +/* Cleans up old rings */ +static void delete_ring(struct ring *r) +{ + int ret; + + list_del(&r->node); + + if ((ret = unregister_ring(r))) { + printk(KERN_ERR + "unregister_ring hypercall failed: %d. Leaking ring.\n", + ret); + } else { + vfree(r->ring); + } + + kfree(r->pfn_list); + kfree(r); +} + +/* Returns !0 if you sucessfully got a reference to the ring */ +static int get_ring(struct ring *r) +{ + return atomic_add_unless(&r->refcnt, 1, 0); +} + +/* Must be called with DEBUG_WRITELOCK; v4v_write_lock */ +static void put_ring(struct ring *r) +{ + if (!r) + return; + + if (atomic_dec_and_test(&r->refcnt)) { + delete_ring(r); + } +} + +/* Caller must hold ring_lock */ +static struct ring *find_ring_by_id(struct v4v_ring_id *id) +{ + struct ring *r; + + list_for_each_entry(r, &ring_list, node) { + if (!memcmp + ((void *)&r->ring->id, id, sizeof(struct v4v_ring_id))) + return r; + } + return NULL; +} + +/* Caller must hold ring_lock */ +struct ring *find_ring_by_id_type(struct v4v_ring_id *id, v4v_rtype t) +{ + struct ring *r; + + list_for_each_entry(r, &ring_list, node) { + if (r->type != t) + continue; + if (!memcmp + ((void *)&r->ring->id, id, sizeof(struct v4v_ring_id))) + return r; + } + + return NULL; +} + +/* Pending xmits */ + +/* Caller must hold pending_xmit_lock */ + +static void +xmit_queue_wakeup_private(struct v4v_ring_id *from, + uint32_t conid, v4v_addr_t * to, int len, int delete) +{ + struct pending_xmit *p; + + list_for_each_entry(p, &pending_xmit_list, node) { + if (p->type != V4V_PENDING_XMIT_WAITQ_MATCH_PRIVATES) + continue; + if (p->conid != conid) + continue; + + if ((!memcmp(from, &p->from, sizeof(struct v4v_ring_id))) + && (!memcmp(to, &p->to, sizeof(v4v_addr_t)))) { + if (delete) { + atomic_dec(&pending_xmit_count); + list_del(&p->node); + } else { + p->len = len; + } + return; + } + } + + if (delete) + return; + + p = kmalloc(sizeof(struct pending_xmit), GFP_ATOMIC); + if (!p) { + printk(KERN_ERR + "Out of memory trying to queue an xmit sponsor wakeup\n"); + return; + } + p->type = V4V_PENDING_XMIT_WAITQ_MATCH_PRIVATES; + p->conid = conid; + p->from = *from; + p->to = *to; + p->len = len; + + atomic_inc(&pending_xmit_count); + list_add_tail(&p->node, &pending_xmit_list); +} + +/* Caller must hold pending_xmit_lock */ +static void +xmit_queue_wakeup_sponsor(struct v4v_ring_id *from, v4v_addr_t * to, + int len, int delete) +{ + struct pending_xmit *p; + + list_for_each_entry(p, &pending_xmit_list, node) { + if (p->type != V4V_PENDING_XMIT_WAITQ_MATCH_SPONSOR) + continue; + if ((!memcmp(from, &p->from, sizeof(struct v4v_ring_id))) + && (!memcmp(to, &p->to, sizeof(v4v_addr_t)))) { + if (delete) { + atomic_dec(&pending_xmit_count); + list_del(&p->node); + } else { + p->len = len; + } + return; + } + } + + if (delete) + return; + + p = kmalloc(sizeof(struct pending_xmit), GFP_ATOMIC); + if (!p) { + printk(KERN_ERR + "Out of memory trying to queue an xmit sponsor wakeup\n"); + return; + } + p->type = V4V_PENDING_XMIT_WAITQ_MATCH_SPONSOR; + p->from = *from; + p->to = *to; + p->len = len; + atomic_inc(&pending_xmit_count); + list_add_tail(&p->node, &pending_xmit_list); +} + +static int +xmit_queue_inline(struct v4v_ring_id *from, v4v_addr_t * to, + void *buf, size_t len, uint32_t protocol) +{ + ssize_t ret; + unsigned long flags; + struct pending_xmit *p; + + spin_lock_irqsave(&pending_xmit_lock, flags); + + ret = H_v4v_send(&from->addr, to, buf, len, protocol); + if (ret != -EAGAIN) { + spin_unlock_irqrestore(&pending_xmit_lock, flags); + return ret; + } + + p = kmalloc(sizeof(struct pending_xmit) + len, GFP_ATOMIC); + if (!p) { + spin_unlock_irqrestore(&pending_xmit_lock, flags); + printk(KERN_ERR + "Out of memory trying to queue an xmit of %zu bytes\n", + len); + + return -ENOMEM; + } + + p->type = V4V_PENDING_XMIT_INLINE; + p->from = *from; + p->to = *to; + p->len = len; + p->protocol = protocol; + + if (len) + memcpy(p->data, buf, len); + + list_add_tail(&p->node, &pending_xmit_list); + atomic_inc(&pending_xmit_count); + spin_unlock_irqrestore(&pending_xmit_lock, flags); + + return len; +} + +static void +xmit_queue_rst_to(struct v4v_ring_id *from, uint32_t conid, v4v_addr_t * to) +{ + struct v4v_stream_header sh; + + if (!to) + return; + + sh.conid = conid; + sh.flags = V4V_SHF_RST; + xmit_queue_inline(from, to, &sh, sizeof(sh), V4V_PROTO_STREAM); +} + +/* RX */ + +static int +copy_into_pending_recv(struct ring *r, int len, struct v4v_private *p) +{ + struct pending_recv *pending; + int k; + + /* Too much queued? Let the ring take the strain */ + if (atomic_read(&p->pending_recv_count) > MAX_PENDING_RECVS) { + spin_lock(&p->pending_recv_lock); + p->full = 1; + spin_unlock(&p->pending_recv_lock); + + return -1; + } + + pending = + kmalloc(sizeof(struct pending_recv) - + sizeof(struct v4v_stream_header) + len, GFP_ATOMIC); + + if (!pending) + return -1; + + pending->data_ptr = 0; + pending->data_len = len - sizeof(struct v4v_stream_header); + + k = v4v_copy_out(r->ring, &pending->from, NULL, &pending->sh, len, 1); + + spin_lock(&p->pending_recv_lock); + list_add_tail(&pending->node, &p->pending_recv_list); + atomic_inc(&p->pending_recv_count); + p->full = 0; + spin_unlock(&p->pending_recv_lock); + + return 0; +} + +/* Notify */ + +/* Caller must hold list_lock */ +static void +wakeup_privates(struct v4v_ring_id *id, v4v_addr_t * peer, uint32_t conid) +{ + struct ring *r = find_ring_by_id_type(id, V4V_RTYPE_LISTENER); + struct v4v_private *p; + + if (!r) + return; + + list_for_each_entry(p, &r->privates, node) { + if ((p->conid == conid) + && !memcmp(peer, &p->peer, sizeof(v4v_addr_t))) { + p->send_blocked = 0; + wake_up_interruptible_all(&p->writeq); + return; + } + } +} + +/* Caller must hold list_lock */ +static void wakeup_sponsor(struct v4v_ring_id *id) +{ + struct ring *r = find_ring_by_id(id); + + if (!r) + return; + + if (!r->sponsor) + return; + + r->sponsor->send_blocked = 0; + wake_up_interruptible_all(&r->sponsor->writeq); +} + +static void v4v_null_notify(void) +{ + H_v4v_notify(NULL); +} + +/* Caller must hold list_lock */ +static void v4v_notify(void) +{ + unsigned long flags; + int ret; + int nent; + struct pending_xmit *p, *n; + v4v_ring_data_t *d; + int i = 0; + + spin_lock_irqsave(&pending_xmit_lock, flags); + + nent = atomic_read(&pending_xmit_count); + d = kmalloc(sizeof(v4v_ring_data_t) + + nent * sizeof(v4v_ring_data_ent_t), GFP_ATOMIC); + if (!d) { + spin_unlock_irqrestore(&pending_xmit_lock, flags); + return; + } + memset(d, 0, sizeof(v4v_ring_data_t)); + + d->magic = V4V_RING_DATA_MAGIC; + + list_for_each_entry(p, &pending_xmit_list, node) { + if (i != nent) { + d->data[i].ring = p->to; + d->data[i].space_required = p->len; + i++; + } + } + d->nent = i; + + if (H_v4v_notify(d)) { + kfree(d); + spin_unlock_irqrestore(&pending_xmit_lock, flags); + //MOAN; + return; + } + + i = 0; + list_for_each_entry_safe(p, n, &pending_xmit_list, node) { + int processed = 1; + + if (i == nent) + continue; + + if (d->data[i].flags & V4V_RING_DATA_F_EXISTS) { + switch (p->type) { + case V4V_PENDING_XMIT_INLINE: + if (! + (d->data[i].flags & + V4V_RING_DATA_F_SUFFICIENT)) { + processed = 0; + break; + } + ret = + H_v4v_send(&p->from.addr, &p->to, p->data, + p->len, p->protocol); + if (ret == -EAGAIN) + processed = 0; + break; + case V4V_PENDING_XMIT_WAITQ_MATCH_SPONSOR: + if (d-> + data[i].flags & V4V_RING_DATA_F_SUFFICIENT) + { + wakeup_sponsor(&p->from); + } else { + processed = 0; + } + break; + case V4V_PENDING_XMIT_WAITQ_MATCH_PRIVATES: + if (d-> + data[i].flags & V4V_RING_DATA_F_SUFFICIENT) + { + wakeup_privates(&p->from, &p->to, + p->conid); + } else { + processed = 0; + } + break; + } + } + if (processed) { + list_del(&p->node); /* No one to talk to */ + atomic_dec(&pending_xmit_count); + kfree(p); + } + i++; + } + + spin_unlock_irqrestore(&pending_xmit_lock, flags); + kfree(d); +} + +/* VIPtables */ +static void +v4v_viptables_add(struct v4v_private *p, struct v4v_viptables_rule *rule, + int position) +{ + H_v4v_viptables_add(rule, position); +} + +static void +v4v_viptables_del(struct v4v_private *p, struct v4v_viptables_rule *rule, + int position) +{ + H_v4v_viptables_del(rule, position); +} + +static int v4v_viptables_list(struct v4v_private *p, struct v4v_viptables_list *list) +{ + return H_v4v_viptables_list(list); +} + +/* State Machines */ +static int +connector_state_machine(struct v4v_private *p, struct v4v_stream_header *sh) +{ + if (sh->flags & V4V_SHF_ACK) { + switch (p->state) { + case V4V_STATE_CONNECTING: + p->state = V4V_STATE_CONNECTED; + + spin_lock(&p->pending_recv_lock); + p->pending_error = 0; + spin_unlock(&p->pending_recv_lock); + + wake_up_interruptible_all(&p->writeq); + return 0; + case V4V_STATE_CONNECTED: + case V4V_STATE_DISCONNECTED: + p->state = V4V_STATE_DISCONNECTED; + + wake_up_interruptible_all(&p->readq); + wake_up_interruptible_all(&p->writeq); + return 1; /* Send RST */ + default: + break; + } + } + + if (sh->flags & V4V_SHF_RST) { + switch (p->state) { + case V4V_STATE_CONNECTING: + spin_lock(&p->pending_recv_lock); + p->pending_error = -ECONNREFUSED; + spin_unlock(&p->pending_recv_lock); + case V4V_STATE_CONNECTED: + p->state = V4V_STATE_DISCONNECTED; + wake_up_interruptible_all(&p->readq); + wake_up_interruptible_all(&p->writeq); + return 0; + default: + break; + } + } + + return 0; +} + +static void +acceptor_state_machine(struct v4v_private *p, struct v4v_stream_header *sh) +{ + if ((sh->flags & V4V_SHF_RST) + && ((p->state == V4V_STATE_ACCEPTED))) { + p->state = V4V_STATE_DISCONNECTED; + wake_up_interruptible_all(&p->readq); + wake_up_interruptible_all(&p->writeq); + } +} + +/* Interrupt handler */ + +static int connector_interrupt(struct ring *r) +{ + ssize_t msg_len; + uint32_t protocol; + struct v4v_stream_header sh; + v4v_addr_t from; + int ret = 0; + + if (!r->sponsor) { + //MOAN; + return -1; + } + + msg_len = v4v_copy_out(r->ring, &from, &protocol, &sh, sizeof(sh), 0); /* Peek the header */ + if (msg_len == -1) { + recover_ring(r); + return ret; + } + + if ((protocol != V4V_PROTO_STREAM) || (msg_len < sizeof(sh))) { + /* Wrong protocol bin it */ + v4v_copy_out(r->ring, NULL, NULL, NULL, 0, 1); + return ret; + } + + if (sh.flags & V4V_SHF_SYN) { /* This is a connector no-one should send SYN, send RST back */ + msg_len = + v4v_copy_out(r->ring, &from, &protocol, &sh, sizeof(sh), 1); + if (msg_len == sizeof(sh)) + xmit_queue_rst_to(&r->ring->id, sh.conid, &from); + return ret; + } + + /* Right connexion? */ + if (sh.conid != r->sponsor->conid) { + msg_len = + v4v_copy_out(r->ring, &from, &protocol, &sh, sizeof(sh), 1); + xmit_queue_rst_to(&r->ring->id, sh.conid, &from); + return ret; + } + + /* Any messages to eat? */ + if (sh.flags & (V4V_SHF_ACK | V4V_SHF_RST)) { + msg_len = + v4v_copy_out(r->ring, &from, &protocol, &sh, sizeof(sh), 1); + if (msg_len == sizeof(sh)) { + if (connector_state_machine(r->sponsor, &sh)) + xmit_queue_rst_to(&r->ring->id, sh.conid, + &from); + } + return ret; + } + //FIXME set a flag to say wake up the userland process next time, and do that rather than copy + ret = copy_into_pending_recv(r, msg_len, r->sponsor); + wake_up_interruptible_all(&r->sponsor->readq); + + return ret; +} + +static int +acceptor_interrupt(struct v4v_private *p, struct ring *r, + struct v4v_stream_header *sh, ssize_t msg_len) +{ + v4v_addr_t from; + int ret = 0; + + if (sh->flags & (V4V_SHF_SYN | V4V_SHF_ACK)) { /* This is an acceptor no-one should send SYN or ACK, send RST back */ + msg_len = + v4v_copy_out(r->ring, &from, NULL, sh, sizeof(*sh), 1); + if (msg_len == sizeof(*sh)) + xmit_queue_rst_to(&r->ring->id, sh->conid, &from); + return ret; + } + + /* Is it all over */ + if (sh->flags & V4V_SHF_RST) { + /* Consume the RST */ + msg_len = + v4v_copy_out(r->ring, &from, NULL, sh, sizeof(*sh), 1); + if (msg_len == sizeof(*sh)) + acceptor_state_machine(p, sh); + return ret; + } + + /* Copy the message out */ + ret = copy_into_pending_recv(r, msg_len, p); + wake_up_interruptible_all(&p->readq); + + return ret; +} + +static int listener_interrupt(struct ring *r) +{ + int ret = 0; + ssize_t msg_len; + uint32_t protocol; + struct v4v_stream_header sh; + struct v4v_private *p; + v4v_addr_t from; + + msg_len = v4v_copy_out(r->ring, &from, &protocol, &sh, sizeof(sh), 0); /* Peek the header */ + if (msg_len == -1) { + recover_ring(r); + return ret; + } + + if ((protocol != V4V_PROTO_STREAM) || (msg_len < sizeof(sh))) { + /* Wrong protocol bin it */ + v4v_copy_out(r->ring, NULL, NULL, NULL, 0, 1); + return ret; + } + + list_for_each_entry(p, &r->privates, node) { + if ((p->conid == sh.conid) + && (!memcmp(&p->peer, &from, sizeof(v4v_addr_t)))) { + ret = acceptor_interrupt(p, r, &sh, msg_len); + return ret; + } + } + + /* Consume it */ + if (r->sponsor && (sh.flags & V4V_SHF_RST)) { + /* + * If we previously received a SYN which has not been pulled by + * v4v_accept() from the pending queue yet, the RST will be dropped here + * and the connection will never be closed. + * Hence we must make sure to evict the SYN header from the pending queue + * before it gets picked up by v4v_accept(). + */ + struct pending_recv *pending, *t; + + spin_lock(&r->sponsor->pending_recv_lock); + list_for_each_entry_safe(pending, t, + &r->sponsor->pending_recv_list, node) { + if (pending->sh.flags & V4V_SHF_SYN + && pending->sh.conid == sh.conid) { + list_del(&pending->node); + atomic_dec(&r->sponsor->pending_recv_count); + kfree(pending); + break; + } + } + spin_unlock(&r->sponsor->pending_recv_lock); + + /* Rst to a listener, should be picked up above for the connexion, drop it */ + v4v_copy_out(r->ring, NULL, NULL, NULL, sizeof(sh), 1); + return ret; + } + + if (sh.flags & V4V_SHF_SYN) { + /* Syn to new connexion */ + if ((!r->sponsor) || (msg_len != sizeof(sh))) { + v4v_copy_out(r->ring, NULL, NULL, NULL, + sizeof(sh), 1); + return ret; + } + ret = copy_into_pending_recv(r, msg_len, r->sponsor); + wake_up_interruptible_all(&r->sponsor->readq); + return ret; + } + + v4v_copy_out(r->ring, NULL, NULL, NULL, sizeof(sh), 1); + /* Data for unknown destination, RST them */ + xmit_queue_rst_to(&r->ring->id, sh.conid, &from); + + return ret; +} + +static void v4v_interrupt_rx(void) +{ + struct ring *r; + + read_lock(&list_lock); + + /* Wake up anyone pending */ + list_for_each_entry(r, &ring_list, node) { + if (r->ring->tx_ptr == r->ring->rx_ptr) + continue; + + switch (r->type) { + case V4V_RTYPE_IDLE: + v4v_copy_out(r->ring, NULL, NULL, NULL, 1, 1); + break; + case V4V_RTYPE_DGRAM: /* For datagrams we just wake up the reader */ + if (r->sponsor) + wake_up_interruptible_all(&r->sponsor->readq); + break; + case V4V_RTYPE_CONNECTOR: + spin_lock(&r->lock); + while ((r->ring->tx_ptr != r->ring->rx_ptr) + && !connector_interrupt(r)) ; + spin_unlock(&r->lock); + break; + case V4V_RTYPE_LISTENER: + spin_lock(&r->lock); + while ((r->ring->tx_ptr != r->ring->rx_ptr) + && !listener_interrupt(r)) ; + spin_unlock(&r->lock); + break; + default: /* enum warning */ + break; + } + } + read_unlock(&list_lock); +} + +static irqreturn_t v4v_interrupt(int irq, void *dev_id) +{ + unsigned long flags; + + spin_lock_irqsave(&interrupt_lock, flags); + v4v_interrupt_rx(); + v4v_notify(); + spin_unlock_irqrestore(&interrupt_lock, flags); + + return IRQ_HANDLED; +} + +static void v4v_fake_irq(void) +{ + unsigned long flags; + + spin_lock_irqsave(&interrupt_lock, flags); + v4v_interrupt_rx(); + v4v_null_notify(); + spin_unlock_irqrestore(&interrupt_lock, flags); +} + +/* Filesystem gunge */ + +#define V4VFS_MAGIC 0x56345644 /* "V4VD" */ + +static struct vfsmount *v4v_mnt = NULL; +static const struct file_operations v4v_fops_stream; + +static struct dentry *v4vfs_mount_pseudo(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data) +{ + return mount_pseudo(fs_type, "v4v:", NULL, NULL, V4VFS_MAGIC); +} + +static struct file_system_type v4v_fs = { + /* No owner field so module can be unloaded */ + .name = "v4vfs", + .mount = v4vfs_mount_pseudo, + .kill_sb = kill_litter_super +}; + +static int setup_fs(void) +{ + int ret; + + ret = register_filesystem(&v4v_fs); + if (ret) { + printk(KERN_ERR + "v4v: couldn't register tedious filesystem thingy\n"); + return ret; + } + + v4v_mnt = kern_mount(&v4v_fs); + if (IS_ERR(v4v_mnt)) { + unregister_filesystem(&v4v_fs); + ret = PTR_ERR(v4v_mnt); + printk(KERN_ERR + "v4v: couldn't mount tedious filesystem thingy\n"); + return ret; + } + + return 0; +} + +static void unsetup_fs(void) +{ + mntput(v4v_mnt); + unregister_filesystem(&v4v_fs); +} + +/* Methods */ + +static int stream_connected(struct v4v_private *p) +{ + switch (p->state) { + case V4V_STATE_ACCEPTED: + case V4V_STATE_CONNECTED: + return 1; + default: + return 0; + } +} + +static size_t +v4v_try_send_sponsor(struct v4v_private *p, + v4v_addr_t * dest, + const void *buf, size_t len, uint32_t protocol) +{ + size_t ret; + unsigned long flags; + + ret = H_v4v_send(&p->r->ring->id.addr, dest, buf, len, protocol); + spin_lock_irqsave(&pending_xmit_lock, flags); + if (ret == -EAGAIN) { + /* Add pending xmit */ + xmit_queue_wakeup_sponsor(&p->r->ring->id, dest, len, 0); + p->send_blocked++; + + } else { + /* Remove pending xmit */ + xmit_queue_wakeup_sponsor(&p->r->ring->id, dest, len, 1); + p->send_blocked = 0; + } + + spin_unlock_irqrestore(&pending_xmit_lock, flags); + + return ret; +} + +static size_t +v4v_try_sendv_sponsor(struct v4v_private *p, + v4v_addr_t * dest, + const v4v_iov_t * iovs, size_t niov, size_t len, + uint32_t protocol) +{ + size_t ret; + unsigned long flags; + + ret = H_v4v_sendv(&p->r->ring->id.addr, dest, iovs, niov, protocol); + + spin_lock_irqsave(&pending_xmit_lock, flags); + if (ret == -EAGAIN) { + /* Add pending xmit */ + xmit_queue_wakeup_sponsor(&p->r->ring->id, dest, len, 0); + p->send_blocked++; + + } else { + /* Remove pending xmit */ + xmit_queue_wakeup_sponsor(&p->r->ring->id, dest, len, 1); + p->send_blocked = 0; + } + spin_unlock_irqrestore(&pending_xmit_lock, flags); + + return ret; +} + +/* + * Try to send from one of the ring's privates (not its sponsor), + * and queue an writeq wakeup if we fail + */ +static size_t +v4v_try_sendv_privates(struct v4v_private *p, + v4v_addr_t * dest, + const v4v_iov_t * iovs, size_t niov, size_t len, + uint32_t protocol) +{ + size_t ret; + unsigned long flags; + + ret = H_v4v_sendv(&p->r->ring->id.addr, dest, iovs, niov, protocol); + + spin_lock_irqsave(&pending_xmit_lock, flags); + if (ret == -EAGAIN) { + /* Add pending xmit */ + xmit_queue_wakeup_private(&p->r->ring->id, p->conid, dest, len, + 0); + p->send_blocked++; + } else { + /* Remove pending xmit */ + xmit_queue_wakeup_private(&p->r->ring->id, p->conid, dest, len, + 1); + p->send_blocked = 0; + } + spin_unlock_irqrestore(&pending_xmit_lock, flags); + + return ret; +} + +static ssize_t +v4v_sendto_from_sponsor(struct v4v_private *p, + const void *buf, size_t len, + int nonblock, v4v_addr_t * dest, uint32_t protocol) +{ + size_t ret = 0, ts_ret; + + switch (p->state) { + case V4V_STATE_CONNECTING: + ret = -ENOTCONN; + break; + case V4V_STATE_DISCONNECTED: + ret = -EPIPE; + break; + case V4V_STATE_BOUND: + case V4V_STATE_CONNECTED: + break; + default: + ret = -EINVAL; + } + + if (len > (p->r->ring->len - sizeof(struct v4v_ring_message_header))) + return -EMSGSIZE; + + if (ret) + return ret; + + if (nonblock) { + return H_v4v_send(&p->r->ring->id.addr, dest, buf, len, + protocol);; + } + /* + * I happen to know that wait_event_interruptible will never + * evaluate the 2nd argument once it has returned true but + * I shouldn't. + * + * The EAGAIN will cause xen to send an interrupt which will + * via the pending_xmit_list and writeq wake us up. + */ + ret = wait_event_interruptible(p->writeq, + ((ts_ret = + v4v_try_send_sponsor + (p, dest, + buf, len, protocol)) != -EAGAIN)); + if (ret) + ret = ts_ret; + + return ret; +} + +static ssize_t +v4v_stream_sendvto_from_sponsor(struct v4v_private *p, + const v4v_iov_t * iovs, size_t niov, + size_t len, int nonblock, + v4v_addr_t * dest, uint32_t protocol) +{ + size_t ret = 0, ts_ret; + + switch (p->state) { + case V4V_STATE_CONNECTING: + return -ENOTCONN; + case V4V_STATE_DISCONNECTED: + return -EPIPE; + case V4V_STATE_BOUND: + case V4V_STATE_CONNECTED: + break; + default: + return -EINVAL; + } + + if (len > (p->r->ring->len - sizeof(struct v4v_ring_message_header))) + return -EMSGSIZE; + + if (ret) + return ret; + + if (nonblock) { + return H_v4v_sendv(&p->r->ring->id.addr, dest, iovs, niov, + protocol); + } + /* + * I happen to know that wait_event_interruptible will never + * evaluate the 2nd argument once it has returned true but + * I shouldn't. + * + * The EAGAIN will cause xen to send an interrupt which will + * via the pending_xmit_list and writeq wake us up. + */ + ret = wait_event_interruptible(p->writeq, + ((ts_ret = + v4v_try_sendv_sponsor + (p, dest, + iovs, niov, len, + protocol)) != -EAGAIN) + || !stream_connected(p)); + if (ret == 0) + ret = ts_ret; + + return ret; +} +static ssize_t +v4v_stream_sendvto_from_private(struct v4v_private *p, + const v4v_iov_t * iovs, size_t niov, + size_t len, int nonblock, + v4v_addr_t * dest, uint32_t protocol) +{ + size_t ret = 0, ts_ret; + + switch (p->state) { + case V4V_STATE_DISCONNECTED: + return -EPIPE; + case V4V_STATE_ACCEPTED: + break; + default: + return -EINVAL; + } + + if (len > (p->r->ring->len - sizeof(struct v4v_ring_message_header))) + return -EMSGSIZE; + + if (ret) + return ret; + + if (nonblock) { + return H_v4v_sendv(&p->r->ring->id.addr, dest, iovs, niov, + protocol); + } + /* + * I happen to know that wait_event_interruptible will never + * evaluate the 2nd argument once it has returned true but + * I shouldn't. + * + * The EAGAIN will cause xen to send an interrupt which will + * via the pending_xmit_list and writeq wake us up. + */ + ret = wait_event_interruptible(p->writeq, + ((ts_ret = + v4v_try_sendv_privates + (p, dest, + iovs, niov, len, + protocol)) != -EAGAIN) + || !stream_connected(p)); + if (ret == 0) + ret = ts_ret; + + return ret; +} + +static int v4v_get_sock_name(struct v4v_private *p, struct v4v_ring_id *id) +{ + int rc = 0; + + read_lock(&list_lock); + if ((p->r) && (p->r->ring)) { + *id = p->r->ring->id; + } else { + rc = -EINVAL; + } + read_unlock(&list_lock); + + return rc; +} + +static int v4v_get_peer_name(struct v4v_private *p, v4v_addr_t * id) +{ + int rc = 0; + read_lock(&list_lock); + + switch (p->state) { + case V4V_STATE_CONNECTING: + case V4V_STATE_CONNECTED: + case V4V_STATE_ACCEPTED: + *id = p->peer; + break; + default: + rc = -ENOTCONN; + } + + read_unlock(&list_lock); + return rc; +} + +static int v4v_set_ring_size(struct v4v_private *p, uint32_t ring_size) +{ + + if (ring_size < + (sizeof(struct v4v_ring_message_header) + V4V_ROUNDUP(1))) + return -EINVAL; + if (ring_size != V4V_ROUNDUP(ring_size)) + return -EINVAL; + + read_lock(&list_lock); + if (p->state != V4V_STATE_IDLE) { + read_unlock(&list_lock); + return -EINVAL; + } + + p->desired_ring_size = ring_size; + read_unlock(&list_lock); + + return 0; +} + +static ssize_t +v4v_recvfrom_dgram(struct v4v_private *p, void *buf, size_t len, + int nonblock, int peek, v4v_addr_t * src) +{ + ssize_t ret; + uint32_t protocol; + v4v_addr_t lsrc; + + if (!src) + src = &lsrc; + +retry: + if (!nonblock) { + ret = wait_event_interruptible(p->readq, + (p->r->ring->rx_ptr != + p->r->ring->tx_ptr)); + if (ret) + return ret; + } + + read_lock(&list_lock); + + /* + * For datagrams, we know the interrrupt handler will never use + * the ring, leave irqs on + */ + spin_lock(&p->r->lock); + if (p->r->ring->rx_ptr == p->r->ring->tx_ptr) { + spin_unlock(&p->r->lock); + if (nonblock) { + ret = -EAGAIN; + goto unlock; + } + read_unlock(&list_lock); + goto retry; + } + ret = v4v_copy_out(p->r->ring, src, &protocol, buf, len, !peek); + if (ret < 0) { + recover_ring(p->r); + spin_unlock(&p->r->lock); + read_unlock(&list_lock); + goto retry; + } + spin_unlock(&p->r->lock); + + if (!peek) + v4v_null_notify(); + + if (protocol != V4V_PROTO_DGRAM) { + /* If peeking consume the rubbish */ + if (peek) + v4v_copy_out(p->r->ring, NULL, NULL, NULL, 1, 1); + read_unlock(&list_lock); + goto retry; + } + + if ((p->state == V4V_STATE_CONNECTED) && + memcmp(src, &p->peer, sizeof(v4v_addr_t))) { + /* Wrong source - bin it */ + if (peek) + v4v_copy_out(p->r->ring, NULL, NULL, NULL, 1, 1); + read_unlock(&list_lock); + goto retry; + } + +unlock: + read_unlock(&list_lock); + + return ret; +} + +static ssize_t +v4v_recv_stream(struct v4v_private *p, void *_buf, int len, int recv_flags, + int nonblock) +{ + size_t count = 0; + int ret = 0; + unsigned long flags; + int schedule_irq = 0; + uint8_t *buf = (void *)_buf; + + read_lock(&list_lock); + + switch (p->state) { + case V4V_STATE_DISCONNECTED: + ret = -EPIPE; + goto unlock; + case V4V_STATE_CONNECTING: + ret = -ENOTCONN; + goto unlock; + case V4V_STATE_CONNECTED: + case V4V_STATE_ACCEPTED: + break; + default: + ret = -EINVAL; + goto unlock; + } + + do { + if (!nonblock) { + ret = wait_event_interruptible(p->readq, + (!list_empty(&p->pending_recv_list) + || !stream_connected(p))); + + if (ret) + break; + } + + spin_lock_irqsave(&p->pending_recv_lock, flags); + + while (!list_empty(&p->pending_recv_list) && len) { + size_t to_copy; + struct pending_recv *pending; + int unlink = 0; + + pending = list_first_entry(&p->pending_recv_list, + struct pending_recv, node); + + if ((pending->data_len - pending->data_ptr) > len) { + to_copy = len; + } else { + unlink = 1; + to_copy = pending->data_len - pending->data_ptr; + } + + if (!access_ok(VERIFY_WRITE, buf, to_copy)) { + printk(KERN_ERR + "V4V - ERROR: buf invalid _buf=%p buf=%p len=%d to_copy=%zu count=%zu\n", + _buf, buf, len, to_copy, count); + spin_unlock_irqrestore(&p->pending_recv_lock, flags); + read_unlock(&list_lock); + return -EFAULT; + } + + if (copy_to_user(buf, pending->data + pending->data_ptr, to_copy)) + { + spin_unlock_irqrestore(&p->pending_recv_lock, flags); + read_unlock(&list_lock); + return -EFAULT; + } + + if (unlink) { + list_del(&pending->node); + kfree(pending); + atomic_dec(&p->pending_recv_count); + if (p->full) + schedule_irq = 1; + } else + pending->data_ptr += to_copy; + + buf += to_copy; + count += to_copy; + len -= to_copy; + } + + spin_unlock_irqrestore(&p->pending_recv_lock, flags); + + if (p->state == V4V_STATE_DISCONNECTED) { + ret = -EPIPE; + break; + } + + if (nonblock) + ret = -EAGAIN; + + } while ((recv_flags & MSG_WAITALL) && len); + +unlock: + read_unlock(&list_lock); + + if (schedule_irq) + v4v_fake_irq(); + + return count ? count : ret; +} + +static ssize_t +v4v_send_stream(struct v4v_private *p, const void *_buf, int len, int nonblock) +{ + int write_lump; + const uint8_t *buf = _buf; + size_t count = 0; + ssize_t ret; + int to_send; + + write_lump = DEFAULT_RING_SIZE >> 2; + + switch (p->state) { + case V4V_STATE_DISCONNECTED: + return -EPIPE; + case V4V_STATE_CONNECTING: + return -ENOTCONN; + case V4V_STATE_CONNECTED: + case V4V_STATE_ACCEPTED: + break; + default: + return -EINVAL; + } + + while (len) { + struct v4v_stream_header sh; + v4v_iov_t iovs[2]; + + to_send = len > write_lump ? write_lump : len; + sh.flags = 0; + sh.conid = p->conid; + + iovs[0].iov_base = (uintptr_t)&sh; + iovs[0].iov_len = sizeof (sh); + + iovs[1].iov_base = (uintptr_t)buf; + iovs[1].iov_len = to_send; + + if (p->state == V4V_STATE_CONNECTED) + ret = v4v_stream_sendvto_from_sponsor( + p, iovs, 2, + to_send + sizeof(struct v4v_stream_header), + nonblock, &p->peer, V4V_PROTO_STREAM); + else + ret = v4v_stream_sendvto_from_private( + p, iovs, 2, + to_send + sizeof(struct v4v_stream_header), + nonblock, &p->peer, V4V_PROTO_STREAM); + + if (ret < 0) { + return count ? count : ret; + } + + len -= to_send; + buf += to_send; + count += to_send; + + if (nonblock) + return count; + } + + return count; +} + +static int v4v_bind(struct v4v_private *p, struct v4v_ring_id *ring_id) +{ + int ret = 0; + + if (ring_id->addr.domain != V4V_DOMID_NONE) { + return -EINVAL; + } + + switch (p->ptype) { + case V4V_PTYPE_DGRAM: + ret = new_ring(p, ring_id); + if (!ret) + p->r->type = V4V_RTYPE_DGRAM; + break; + case V4V_PTYPE_STREAM: + ret = new_ring(p, ring_id); + break; + } + + return ret; +} + +static int v4v_listen(struct v4v_private *p) +{ + if (p->ptype != V4V_PTYPE_STREAM) + return -EINVAL; + + if (p->state != V4V_STATE_BOUND) { + return -EINVAL; + } + + p->r->type = V4V_RTYPE_LISTENER; + p->state = V4V_STATE_LISTENING; + + return 0; +} + +static int v4v_connect(struct v4v_private *p, v4v_addr_t * peer, int nonblock) +{ + struct v4v_stream_header sh; + int ret = -EINVAL; + + if (p->ptype == V4V_PTYPE_DGRAM) { + switch (p->state) { + case V4V_STATE_BOUND: + case V4V_STATE_CONNECTED: + if (peer) { + p->state = V4V_STATE_CONNECTED; + memcpy(&p->peer, peer, sizeof(v4v_addr_t)); + } else { + p->state = V4V_STATE_BOUND; + } + return 0; + default: + return -EINVAL; + } + } + if (p->ptype != V4V_PTYPE_STREAM) { + return -EINVAL; + } + + /* Irritiatingly we need to be restartable */ + switch (p->state) { + case V4V_STATE_BOUND: + p->r->type = V4V_RTYPE_CONNECTOR; + p->state = V4V_STATE_CONNECTING; + p->conid = random32(); + p->peer = *peer; + + sh.flags = V4V_SHF_SYN; + sh.conid = p->conid; + + ret = + xmit_queue_inline(&p->r->ring->id, &p->peer, &sh, + sizeof(sh), V4V_PROTO_STREAM); + if (ret == sizeof(sh)) + ret = 0; + + if (ret && (ret != -EAGAIN)) { + p->state = V4V_STATE_BOUND; + p->r->type = V4V_RTYPE_DGRAM; + return ret; + } + + break; + case V4V_STATE_CONNECTED: + if (memcmp(peer, &p->peer, sizeof(v4v_addr_t))) { + return -EINVAL; + } else { + return 0; + } + case V4V_STATE_CONNECTING: + if (memcmp(peer, &p->peer, sizeof(v4v_addr_t))) { + return -EINVAL; + } + break; + default: + return -EINVAL; + } + + if (nonblock) { + return -EINPROGRESS; + } + + while (p->state != V4V_STATE_CONNECTED) { + ret = + wait_event_interruptible(p->writeq, + (p->state != + V4V_STATE_CONNECTING)); + if (ret) + return ret; + + if (p->state == V4V_STATE_DISCONNECTED) { + p->state = V4V_STATE_BOUND; + p->r->type = V4V_RTYPE_DGRAM; + ret = -ECONNREFUSED; + break; + } + } + + return ret; +} + +static int allocate_fd_with_private(void *private) +{ + int fd; + struct file *f; + struct qstr name = {.name = "" }; + struct path path; + struct inode *ind; + + fd = get_unused_fd(); + if (fd < 0) + return fd; + + path.dentry = d_alloc_pseudo(v4v_mnt->mnt_sb, &name); + if (unlikely(!path.dentry)) { + put_unused_fd(fd); + return -ENOMEM; + } + ind = new_inode(v4v_mnt->mnt_sb); + ind->i_ino = get_next_ino(); + ind->i_fop = v4v_mnt->mnt_root->d_inode->i_fop; + ind->i_state = v4v_mnt->mnt_root->d_inode->i_state; + ind->i_mode = v4v_mnt->mnt_root->d_inode->i_mode; + ind->i_uid = current_fsuid(); + ind->i_gid = current_fsgid(); + d_instantiate(path.dentry, ind); + + path.mnt = mntget(v4v_mnt); + + f = alloc_file(&path, FMODE_READ | FMODE_WRITE, &v4v_fops_stream); + if (!f) { + /* Put back fd ? */ + return -ENFILE; + } + + f->private_data = private; + fd_install(fd, f); + + return fd; +} + +static int +v4v_accept(struct v4v_private *p, struct v4v_addr *peer, int nonblock) +{ + int fd; + int ret = 0; + struct v4v_private *a = NULL; + struct pending_recv *r = NULL; + unsigned long flags; + struct v4v_stream_header sh; + + if (p->ptype != V4V_PTYPE_STREAM) + return -ENOTTY; + + if (p->state != V4V_STATE_LISTENING) { + return -EINVAL; + } + + /* FIXME: leak! */ + for (;;) { + ret = + wait_event_interruptible(p->readq, + (!list_empty + (&p->pending_recv_list)) + || nonblock); + if (ret) + return ret; + + /* Write lock implicitly has pending_recv_lock */ + write_lock_irqsave(&list_lock, flags); + + if (!list_empty(&p->pending_recv_list)) { + r = list_first_entry(&p->pending_recv_list, + struct pending_recv, node); + + list_del(&r->node); + atomic_dec(&p->pending_recv_count); + + if ((!r->data_len) && (r->sh.flags & V4V_SHF_SYN)) + break; + + kfree(r); + } + + write_unlock_irqrestore(&list_lock, flags); + if (nonblock) + return -EAGAIN; + } + write_unlock_irqrestore(&list_lock, flags); + + a = kmalloc(sizeof(struct v4v_private), GFP_KERNEL); + if (!a) { + ret = -ENOMEM; + goto release; + } + + memset(a, 0, sizeof(struct v4v_private)); + a->state = V4V_STATE_ACCEPTED; + a->ptype = V4V_PTYPE_STREAM; + a->r = p->r; + if (!get_ring(a->r)) { + a->r = NULL; + ret = -EINVAL; + goto release; + } + + init_waitqueue_head(&a->readq); + init_waitqueue_head(&a->writeq); + spin_lock_init(&a->pending_recv_lock); + INIT_LIST_HEAD(&a->pending_recv_list); + atomic_set(&a->pending_recv_count, 0); + + a->send_blocked = 0; + a->peer = r->from; + a->conid = r->sh.conid; + + if (peer) + *peer = r->from; + + fd = allocate_fd_with_private(a); + if (fd < 0) { + ret = fd; + goto release; + } + + write_lock_irqsave(&list_lock, flags); + list_add(&a->node, &a->r->privates); + write_unlock_irqrestore(&list_lock, flags); + + /* Ship the ACK */ + sh.conid = a->conid; + sh.flags = V4V_SHF_ACK; + + xmit_queue_inline(&a->r->ring->id, &a->peer, &sh, + sizeof(sh), V4V_PROTO_STREAM); + kfree(r); + + return fd; + + release: + kfree(r); + if (a) { + write_lock_irqsave(&list_lock, flags); + if (a->r) + put_ring(a->r); + write_unlock_irqrestore(&list_lock, flags); + kfree(a); + } + return ret; +} + +ssize_t +v4v_sendto(struct v4v_private * p, const void *buf, size_t len, int flags, + v4v_addr_t * addr, int nonblock) +{ + ssize_t rc; + + if (!access_ok(VERIFY_READ, buf, len)) + return -EFAULT; + if (!access_ok(VERIFY_READ, addr, len)) + return -EFAULT; + + if (flags & MSG_DONTWAIT) + nonblock++; + + switch (p->ptype) { + case V4V_PTYPE_DGRAM: + switch (p->state) { + case V4V_STATE_BOUND: + if (!addr) + return -ENOTCONN; + rc = v4v_sendto_from_sponsor(p, buf, len, nonblock, + addr, V4V_PROTO_DGRAM); + break; + + case V4V_STATE_CONNECTED: + if (addr) + return -EISCONN; + + rc = v4v_sendto_from_sponsor(p, buf, len, nonblock, + &p->peer, V4V_PROTO_DGRAM); + break; + + default: + return -EINVAL; + } + break; + case V4V_PTYPE_STREAM: + if (addr) + return -EISCONN; + switch (p->state) { + case V4V_STATE_CONNECTING: + case V4V_STATE_BOUND: + return -ENOTCONN; + case V4V_STATE_CONNECTED: + case V4V_STATE_ACCEPTED: + rc = v4v_send_stream(p, buf, len, nonblock); + break; + case V4V_STATE_DISCONNECTED: + + rc = -EPIPE; + break; + default: + + return -EINVAL; + } + break; + default: + + return -ENOTTY; + } + + if ((rc == -EPIPE) && !(flags & MSG_NOSIGNAL)) + send_sig(SIGPIPE, current, 0); + + return rc; +} + +ssize_t +v4v_recvfrom(struct v4v_private * p, void *buf, size_t len, int flags, + v4v_addr_t * addr, int nonblock) +{ + int peek = 0; + ssize_t rc = 0; + + if (!access_ok(VERIFY_WRITE, buf, len)) + return -EFAULT; + if ((addr) && (!access_ok(VERIFY_WRITE, addr, sizeof(v4v_addr_t)))) + return -EFAULT; + + if (flags & MSG_DONTWAIT) + nonblock++; + if (flags & MSG_PEEK) + peek++; + + switch (p->ptype) { + case V4V_PTYPE_DGRAM: + rc = v4v_recvfrom_dgram(p, buf, len, nonblock, peek, addr); + break; + case V4V_PTYPE_STREAM: + if (peek) + return -EINVAL; + + switch (p->state) { + case V4V_STATE_BOUND: + return -ENOTCONN; + case V4V_STATE_CONNECTED: + case V4V_STATE_ACCEPTED: + if (addr) + *addr = p->peer; + rc = v4v_recv_stream(p, buf, len, flags, nonblock); + break; + case V4V_STATE_DISCONNECTED: + rc = 0; + break; + default: + rc = -EINVAL; + } + } + + if ((rc > (ssize_t) len) && !(flags & MSG_TRUNC)) + rc = len; + + return rc; +} + +/* fops */ + +static int v4v_open_dgram(struct inode *inode, struct file *f) +{ + struct v4v_private *p; + + p = kmalloc(sizeof(struct v4v_private), GFP_KERNEL); + if (!p) + return -ENOMEM; + + memset(p, 0, sizeof(struct v4v_private)); + p->state = V4V_STATE_IDLE; + p->desired_ring_size = DEFAULT_RING_SIZE; + p->r = NULL; + p->ptype = V4V_PTYPE_DGRAM; + p->send_blocked = 0; + + init_waitqueue_head(&p->readq); + init_waitqueue_head(&p->writeq); + + spin_lock_init(&p->pending_recv_lock); + INIT_LIST_HEAD(&p->pending_recv_list); + atomic_set(&p->pending_recv_count, 0); + + f->private_data = p; + return 0; +} + +static int v4v_open_stream(struct inode *inode, struct file *f) +{ + struct v4v_private *p; + + p = kmalloc(sizeof(struct v4v_private), GFP_KERNEL); + if (!p) + return -ENOMEM; + + memset(p, 0, sizeof(struct v4v_private)); + p->state = V4V_STATE_IDLE; + p->desired_ring_size = DEFAULT_RING_SIZE; + p->r = NULL; + p->ptype = V4V_PTYPE_STREAM; + p->send_blocked = 0; + + init_waitqueue_head(&p->readq); + init_waitqueue_head(&p->writeq); + + spin_lock_init(&p->pending_recv_lock); + INIT_LIST_HEAD(&p->pending_recv_list); + atomic_set(&p->pending_recv_count, 0); + + f->private_data = p; + return 0; +} + +static int v4v_release(struct inode *inode, struct file *f) +{ + struct v4v_private *p = (struct v4v_private *)f->private_data; + unsigned long flags; + struct pending_recv *pending; + + if (p->ptype == V4V_PTYPE_STREAM) { + switch (p->state) { + case V4V_STATE_CONNECTED: + case V4V_STATE_CONNECTING: + case V4V_STATE_ACCEPTED: + xmit_queue_rst_to(&p->r->ring->id, p->conid, &p->peer); + break; + default: + break; + } + } + + write_lock_irqsave(&list_lock, flags); + if (!p->r) { + write_unlock_irqrestore(&list_lock, flags); + goto release; + } + + if (p != p->r->sponsor) { + put_ring(p->r); + list_del(&p->node); + write_unlock_irqrestore(&list_lock, flags); + goto release; + } + + p->r->sponsor = NULL; + put_ring(p->r); + write_unlock_irqrestore(&list_lock, flags); + + while (!list_empty(&p->pending_recv_list)) { + pending = + list_first_entry(&p->pending_recv_list, + struct pending_recv, node); + + list_del(&pending->node); + kfree(pending); + atomic_dec(&p->pending_recv_count); + } + + release: + kfree(p); + + return 0; +} + +static ssize_t +v4v_write(struct file *f, const char __user * buf, size_t count, loff_t * ppos) +{ + struct v4v_private *p = f->private_data; + int nonblock = f->f_flags & O_NONBLOCK; + + return v4v_sendto(p, buf, count, 0, NULL, nonblock); +} + +static ssize_t +v4v_read(struct file *f, char __user * buf, size_t count, loff_t * ppos) +{ + struct v4v_private *p = f->private_data; + int nonblock = f->f_flags & O_NONBLOCK; + + return v4v_recvfrom(p, (void *)buf, count, 0, NULL, nonblock); +} + +static long v4v_ioctl(struct file *f, unsigned int cmd, unsigned long arg) +{ + int rc = -ENOTTY; + + int nonblock = f->f_flags & O_NONBLOCK; + struct v4v_private *p = f->private_data; + + if (_IOC_TYPE(cmd) != V4V_TYPE) + return rc; + + switch (cmd) { + case V4VIOCSETRINGSIZE: + if (!access_ok(VERIFY_READ, arg, sizeof(uint32_t))) + return -EFAULT; + rc = v4v_set_ring_size(p, *(uint32_t *) arg); + break; + case V4VIOCBIND: + if (!access_ok(VERIFY_READ, arg, sizeof(struct v4v_ring_id))) + return -EFAULT; + rc = v4v_bind(p, (struct v4v_ring_id *)arg); + break; + case V4VIOCGETSOCKNAME: + if (!access_ok(VERIFY_WRITE, arg, sizeof(struct v4v_ring_id))) + return -EFAULT; + rc = v4v_get_sock_name(p, (struct v4v_ring_id *)arg); + break; + case V4VIOCGETPEERNAME: + if (!access_ok(VERIFY_WRITE, arg, sizeof(v4v_addr_t))) + return -EFAULT; + rc = v4v_get_peer_name(p, (v4v_addr_t *) arg); + break; + case V4VIOCCONNECT: + if (!access_ok(VERIFY_READ, arg, sizeof(v4v_addr_t))) + return -EFAULT; + /* Bind if not done */ + if (p->state == V4V_STATE_IDLE) { + struct v4v_ring_id id; + memset(&id, 0, sizeof(id)); + id.partner = V4V_DOMID_NONE; + id.addr.domain = V4V_DOMID_NONE; + id.addr.port = 0; + rc = v4v_bind(p, &id); + if (rc) + break; + } + rc = v4v_connect(p, (v4v_addr_t *) arg, nonblock); + break; + case V4VIOCGETCONNECTERR: + { + unsigned long flags; + if (!access_ok(VERIFY_WRITE, arg, sizeof(int))) + return -EFAULT; + + spin_lock_irqsave(&p->pending_recv_lock, flags); + *(int *)arg = p->pending_error; + p->pending_error = 0; + spin_unlock_irqrestore(&p->pending_recv_lock, flags); + rc = 0; + } + break; + case V4VIOCLISTEN: + rc = v4v_listen(p); + break; + case V4VIOCACCEPT: + if (!access_ok(VERIFY_WRITE, arg, sizeof(v4v_addr_t))) + return -EFAULT; + rc = v4v_accept(p, (v4v_addr_t *) arg, nonblock); + break; + case V4VIOCSEND: + if (!access_ok(VERIFY_READ, arg, sizeof(struct v4v_dev))) + return -EFAULT; + { + struct v4v_dev a = *(struct v4v_dev *)arg; + + rc = v4v_sendto(p, a.buf, a.len, a.flags, a.addr, + nonblock); + } + break; + case V4VIOCRECV: + if (!access_ok(VERIFY_READ, arg, sizeof(struct v4v_dev))) + return -EFAULT; + { + struct v4v_dev a = *(struct v4v_dev *)arg; + rc = v4v_recvfrom(p, a.buf, a.len, a.flags, a.addr, + nonblock); + } + break; + case V4VIOCVIPTABLESADD: + if (!access_ok + (VERIFY_READ, arg, sizeof(struct v4v_viptables_rule_pos))) + return -EFAULT; + { + struct v4v_viptables_rule_pos *rule = + (struct v4v_viptables_rule_pos *)arg; + v4v_viptables_add(p, rule->rule, rule->position); + rc = 0; + } + break; + case V4VIOCVIPTABLESDEL: + if (!access_ok + (VERIFY_READ, arg, sizeof(struct v4v_viptables_rule_pos))) + return -EFAULT; + { + struct v4v_viptables_rule_pos *rule = + (struct v4v_viptables_rule_pos *)arg; + v4v_viptables_del(p, rule->rule, rule->position); + rc = 0; + } + break; + case V4VIOCVIPTABLESLIST: + if (!access_ok + (VERIFY_READ, arg, sizeof(struct v4v_viptables_list))) + return -EFAULT; + { + struct v4v_viptables_list *list = + (struct v4v_viptables_list *)arg; + rc = v4v_viptables_list(p, list); + } + break; + default: + printk(KERN_ERR "v4v: unkown ioctl, cmd:0x%x nr:%d size:0x%x\n", + cmd, _IOC_NR(cmd), _IOC_SIZE(cmd)); + } + + return rc; +} + +static unsigned int v4v_poll(struct file *f, poll_table * pt) +{ + unsigned int mask = 0; + struct v4v_private *p = f->private_data; + + read_lock(&list_lock); + + switch (p->ptype) { + case V4V_PTYPE_DGRAM: + switch (p->state) { + case V4V_STATE_CONNECTED: + case V4V_STATE_BOUND: + poll_wait(f, &p->readq, pt); + mask |= POLLOUT | POLLWRNORM; + if (p->r->ring->tx_ptr != p->r->ring->rx_ptr) + mask |= POLLIN | POLLRDNORM; + break; + default: + break; + } + break; + case V4V_PTYPE_STREAM: + switch (p->state) { + case V4V_STATE_BOUND: + break; + case V4V_STATE_LISTENING: + poll_wait(f, &p->readq, pt); + if (!list_empty(&p->pending_recv_list)) + mask |= POLLIN | POLLRDNORM; + break; + case V4V_STATE_ACCEPTED: + case V4V_STATE_CONNECTED: + poll_wait(f, &p->readq, pt); + poll_wait(f, &p->writeq, pt); + if (!p->send_blocked) + mask |= POLLOUT | POLLWRNORM; + if (!list_empty(&p->pending_recv_list)) + mask |= POLLIN | POLLRDNORM; + break; + case V4V_STATE_CONNECTING: + poll_wait(f, &p->writeq, pt); + break; + case V4V_STATE_DISCONNECTED: + mask |= POLLOUT | POLLWRNORM; + mask |= POLLIN | POLLRDNORM; + break; + case V4V_STATE_IDLE: + break; + } + break; + } + + read_unlock(&list_lock); + return mask; +} + +static const struct file_operations v4v_fops_stream = { + .owner = THIS_MODULE, + .write = v4v_write, + .read = v4v_read, + .unlocked_ioctl = v4v_ioctl, + .open = v4v_open_stream, + .release = v4v_release, + .poll = v4v_poll, +}; + +static const struct file_operations v4v_fops_dgram = { + .owner = THIS_MODULE, + .write = v4v_write, + .read = v4v_read, + .unlocked_ioctl = v4v_ioctl, + .open = v4v_open_dgram, + .release = v4v_release, + .poll = v4v_poll, +}; + +/* Xen VIRQ */ +static int v4v_irq = -1; + +static void unbind_virq(void) +{ + unbind_from_irqhandler (v4v_irq, NULL); + v4v_irq = -1; +} + +static int bind_evtchn(void) +{ + v4v_info_t info; + int result; + + v4v_info(&info); + if (info.ring_magic != V4V_RING_MAGIC) + return 1; + + result = + bind_interdomain_evtchn_to_irqhandler( + 0, info.evtchn, + v4v_interrupt, IRQF_SAMPLE_RANDOM, "v4v", NULL); + + if (result < 0) { + unbind_virq(); + return result; + } + + v4v_irq = result; + + return 0; +} + +/* V4V Device */ + +static struct miscdevice v4v_miscdev_dgram = { + .minor = MISC_DYNAMIC_MINOR, + .name = "v4v_dgram", + .fops = &v4v_fops_dgram, +}; + +static struct miscdevice v4v_miscdev_stream = { + .minor = MISC_DYNAMIC_MINOR, + .name = "v4v_stream", + .fops = &v4v_fops_stream, +}; + +static int v4v_suspend(struct platform_device *dev, pm_message_t state) +{ + unbind_virq(); + return 0; +} + +static int v4v_resume(struct platform_device *dev) +{ + struct ring *r; + + read_lock(&list_lock); + list_for_each_entry(r, &ring_list, node) { + refresh_pfn_list(r); + if (register_ring(r)) { + printk(KERN_ERR + "Failed to re-register a v4v ring on resume, port=0x%08x\n", + r->ring->id.addr.port); + } + } + read_unlock(&list_lock); + + if (bind_evtchn()) { + printk(KERN_ERR "v4v_resume: failed to bind v4v evtchn\n"); + return -ENODEV; + } + + return 0; +} + +static void v4v_shutdown(struct platform_device *dev) +{ +} + +static int __devinit v4v_probe(struct platform_device *dev) +{ + int err = 0; + int ret; + + ret = setup_fs(); + if (ret) + return ret; + + INIT_LIST_HEAD(&ring_list); + rwlock_init(&list_lock); + INIT_LIST_HEAD(&pending_xmit_list); + spin_lock_init(&pending_xmit_lock); + spin_lock_init(&interrupt_lock); + atomic_set(&pending_xmit_count, 0); + + if (bind_evtchn()) { + printk(KERN_ERR "failed to bind v4v evtchn\n"); + unsetup_fs(); + return -ENODEV; + } + + err = misc_register(&v4v_miscdev_dgram); + if (err != 0) { + printk(KERN_ERR "Could not register /dev/v4v_dgram\n"); + unsetup_fs(); + return err; + } + + err = misc_register(&v4v_miscdev_stream); + if (err != 0) { + printk(KERN_ERR "Could not register /dev/v4v_stream\n"); + unsetup_fs(); + return err; + } + + printk(KERN_INFO "Xen V4V device installed.\n"); + return 0; +} + +/* Platform Gunge */ + +static int __devexit v4v_remove(struct platform_device *dev) +{ + unbind_virq(); + misc_deregister(&v4v_miscdev_dgram); + misc_deregister(&v4v_miscdev_stream); + unsetup_fs(); + return 0; +} + +static struct platform_driver v4v_driver = { + .driver = { + .name = "v4v", + .owner = THIS_MODULE, + }, + .probe = v4v_probe, + .remove = __devexit_p(v4v_remove), + .shutdown = v4v_shutdown, + .suspend = v4v_suspend, + .resume = v4v_resume, +}; + +static struct platform_device *v4v_platform_device; + +static int __init v4v_init(void) +{ + int error; + + if (!xen_domain()) + { + printk(KERN_ERR "v4v only works under Xen\n"); + return -ENODEV; + } + + error = platform_driver_register(&v4v_driver); + if (error) + return error; + + v4v_platform_device = platform_device_alloc("v4v", -1); + if (!v4v_platform_device) { + platform_driver_unregister(&v4v_driver); + return -ENOMEM; + } + + error = platform_device_add(v4v_platform_device); + if (error) { + platform_device_put(v4v_platform_device); + platform_driver_unregister(&v4v_driver); + return error; + } + + return 0; +} + +static void __exit v4v_cleanup(void) +{ + platform_device_unregister(v4v_platform_device); + platform_driver_unregister(&v4v_driver); +} + +module_init(v4v_init); +module_exit(v4v_cleanup); +MODULE_LICENSE("GPL"); diff --git a/drivers/xen/v4v_utils.h b/drivers/xen/v4v_utils.h new file mode 100644 index 0000000..91c00b6 --- /dev/null +++ b/drivers/xen/v4v_utils.h @@ -0,0 +1,278 @@ +/****************************************************************************** + * V4V + * + * Version 2 of v2v (Virtual-to-Virtual) + * + * Copyright (c) 2010, Citrix Systems + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __V4V_UTILS_H__ +# define __V4V_UTILS_H__ + +/* Compiler specific hacks */ +#if defined(__GNUC__) +# define V4V_UNUSED __attribute__ ((unused)) +# ifndef __STRICT_ANSI__ +# define V4V_INLINE inline +# else +# define V4V_INLINE +# endif +#else /* !__GNUC__ */ +# define V4V_UNUSED +# define V4V_INLINE +#endif + + +/* + * Utility functions + */ +static V4V_INLINE uint32_t +v4v_ring_bytes_to_read (volatile struct v4v_ring *r) +{ + int32_t ret; + ret = r->tx_ptr - r->rx_ptr; + if (ret >= 0) + return ret; + return (uint32_t) (r->len + ret); +} + + +/* + * Copy at most t bytes of the next message in the ring, into the buffer + * at _buf, setting from and protocol if they are not NULL, returns + * the actual length of the message, or -1 if there is nothing to read + */ +V4V_UNUSED static V4V_INLINE ssize_t +v4v_copy_out (struct v4v_ring *r, struct v4v_addr *from, uint32_t * protocol, + void *_buf, size_t t, int consume) +{ + volatile struct v4v_ring_message_header *mh; + /* unnecessary cast from void * required by MSVC compiler */ + uint8_t *buf = (uint8_t *) _buf; + uint32_t btr = v4v_ring_bytes_to_read (r); + uint32_t rxp = r->rx_ptr; + uint32_t bte; + uint32_t len; + ssize_t ret; + + + if (btr < sizeof (*mh)) + return -1; + + /* + * Becuase the message_header is 128 bits long and the ring is 128 bit + * aligned, we're gaurunteed never to wrap + */ + mh = (volatile struct v4v_ring_message_header *) &r->ring[r->rx_ptr]; + + len = mh->len; + + if (btr < len) + { + return -1; + } + +#if defined(__GNUC__) + if (from) + *from = mh->source; +#else + /* MSVC can't do the above */ + if (from) + memcpy((void *) from, (void *) &(mh->source), sizeof(struct v4v_addr)); +#endif + + if (protocol) + *protocol = mh->protocol; + + rxp += sizeof (*mh); + if (rxp == r->len) + rxp = 0; + len -= sizeof (*mh); + ret = len; + + bte = r->len - rxp; + + if (bte < len) + { + if (t < bte) + { + if (buf) + { + memcpy (buf, (void *) &r->ring[rxp], t); + buf += t; + } + + rxp = 0; + len -= bte; + t = 0; + } + else + { + if (buf) + { + memcpy (buf, (void *) &r->ring[rxp], bte); + buf += bte; + } + rxp = 0; + len -= bte; + t -= bte; + } + } + + if (buf && t) + memcpy (buf, (void *) &r->ring[rxp], (t < len) ? t : len); + + + rxp += V4V_ROUNDUP (len); + if (rxp == r->len) + rxp = 0; + + mb (); + + if (consume) + r->rx_ptr = rxp; + + return ret; +} + +static V4V_INLINE void +v4v_memcpy_skip (void *_dst, const void *_src, size_t len, size_t *skip) +{ + const uint8_t *src = (const uint8_t *) _src; + uint8_t *dst = (uint8_t *) _dst; + + if (!*skip) + { + memcpy (dst, src, len); + return; + } + + if (*skip >= len) + { + *skip -= len; + return; + } + + src += *skip; + dst += *skip; + len -= *skip; + *skip = 0; + + memcpy (dst, src, len); +} + +/* + * Copy at most t bytes of the next message in the ring, into the buffer + * at _buf, skipping skip bytes, setting from and protocol if they are not + * NULL, returns the actual length of the message, or -1 if there is + * nothing to read + */ +static ssize_t +v4v_copy_out_offset(struct v4v_ring *r, struct v4v_addr *from, + uint32_t * protocol, void *_buf, size_t t, int consume, + size_t skip) V4V_UNUSED; + +V4V_INLINE static ssize_t +v4v_copy_out_offset(struct v4v_ring *r, struct v4v_addr *from, + uint32_t * protocol, void *_buf, size_t t, int consume, + size_t skip) +{ + volatile struct v4v_ring_message_header *mh; + /* unnecessary cast from void * required by MSVC compiler */ + uint8_t *buf = (uint8_t *) _buf; + uint32_t btr = v4v_ring_bytes_to_read (r); + uint32_t rxp = r->rx_ptr; + uint32_t bte; + uint32_t len; + ssize_t ret; + + buf -= skip; + + if (btr < sizeof (*mh)) + return -1; + + /* + * Becuase the message_header is 128 bits long and the ring is 128 bit + * aligned, we're gaurunteed never to wrap + */ + mh = (volatile struct v4v_ring_message_header *)&r->ring[r->rx_ptr]; + + len = mh->len; + if (btr < len) + return -1; + +#if defined(__GNUC__) + if (from) + *from = mh->source; +#else + /* MSVC can't do the above */ + if (from) + memcpy((void *)from, (void *)&(mh->source), sizeof(struct v4v_addr)); +#endif + + if (protocol) + *protocol = mh->protocol; + + rxp += sizeof (*mh); + if (rxp == r->len) + rxp = 0; + len -= sizeof (*mh); + ret = len; + + bte = r->len - rxp; + + if (bte < len) + { + if (t < bte) + { + if (buf) + { + v4v_memcpy_skip (buf, (void *) &r->ring[rxp], t, &skip); + buf += t; + } + + rxp = 0; + len -= bte; + t = 0; + } + else + { + if (buf) + { + v4v_memcpy_skip (buf, (void *) &r->ring[rxp], bte, + &skip); + buf += bte; + } + rxp = 0; + len -= bte; + t -= bte; + } + } + + if (buf && t) + v4v_memcpy_skip (buf, (void *) &r->ring[rxp], (t < len) ? t : len, + &skip); + + + rxp += V4V_ROUNDUP (len); + if (rxp == r->len) + rxp = 0; + + mb (); + + if (consume) + r->rx_ptr = rxp; + + return ret; +} + +#endif /* !__V4V_UTILS_H__ */ diff --git a/include/xen/interface/v4v.h b/include/xen/interface/v4v.h new file mode 100644 index 0000000..36ff95c --- /dev/null +++ b/include/xen/interface/v4v.h @@ -0,0 +1,299 @@ +/****************************************************************************** + * V4V + * + * Version 2 of v2v (Virtual-to-Virtual) + * + * Copyright (c) 2010, Citrix Systems + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __XEN_PUBLIC_V4V_H__ +#define __XEN_PUBLIC_V4V_H__ + +/* + * Structure definitions + */ + +#define V4V_RING_MAGIC 0xA822F72BB0B9D8CC +#define V4V_RING_DATA_MAGIC 0x45FE852220B801E4 + +#define V4V_PROTO_DGRAM 0x3c2c1db8 +#define V4V_PROTO_STREAM 0x70f6a8e5 + +#define V4V_DOMID_INVALID (0x7FFFU) +#define V4V_DOMID_NONE V4V_DOMID_INVALID +#define V4V_DOMID_ANY V4V_DOMID_INVALID +#define V4V_PORT_NONE 0 + +typedef struct v4v_iov +{ + uint64_t iov_base; + uint64_t iov_len; +} v4v_iov_t; + +typedef struct v4v_addr +{ + uint32_t port; + domid_t domain; + uint16_t pad; +} v4v_addr_t; + +typedef struct v4v_ring_id +{ + v4v_addr_t addr; + domid_t partner; + uint16_t pad; +} v4v_ring_id_t; + +typedef uint64_t v4v_pfn_t; + +typedef struct +{ + v4v_addr_t src; + v4v_addr_t dst; +} v4v_send_addr_t; + +/* + * v4v_ring + * id: + * xen only looks at this during register/unregister + * and will fill in id.addr.domain + * + * rx_ptr: rx pointer, modified by domain + * tx_ptr: tx pointer, modified by xen + * + */ +struct v4v_ring +{ + uint64_t magic; + v4v_ring_id_t id; + uint32_t len; + uint32_t rx_ptr; + uint32_t tx_ptr; + uint8_t reserved[32]; + uint8_t ring[0]; +}; +typedef struct v4v_ring v4v_ring_t; + +#define V4V_RING_DATA_F_EMPTY (1U << 0) /* Ring is empty */ +#define V4V_RING_DATA_F_EXISTS (1U << 1) /* Ring exists */ +#define V4V_RING_DATA_F_PENDING (1U << 2) /* Pending interrupt exists - do not + * rely on this field - for + * profiling only */ +#define V4V_RING_DATA_F_SUFFICIENT (1U << 3) /* Sufficient space to queue + * space_required bytes exists */ + +#if defined(__GNUC__) +# define V4V_RING_DATA_ENT_FULLRING +# define V4V_RING_DATA_ENT_FULL +#else +# define V4V_RING_DATA_ENT_FULLRING fullring +# define V4V_RING_DATA_ENT_FULL full +#endif +typedef struct v4v_ring_data_ent +{ + v4v_addr_t ring; + uint16_t flags; + uint16_t pad; + uint32_t space_required; + uint32_t max_message_size; +} v4v_ring_data_ent_t; + +typedef struct v4v_ring_data +{ + uint64_t magic; + uint32_t nent; + uint32_t pad; + uint64_t reserved[4]; + v4v_ring_data_ent_t data[0]; +} v4v_ring_data_t; + +struct v4v_info +{ + uint64_t ring_magic; + uint64_t data_magic; + evtchn_port_t evtchn; +}; +typedef struct v4v_info v4v_info_t; + +#define V4V_ROUNDUP(a) (((a) +0xf ) & ~0xf) +/* + * Messages on the ring are padded to 128 bits + * Len here refers to the exact length of the data not including the + * 128 bit header. The message uses + * ((len +0xf) & ~0xf) + sizeof(v4v_ring_message_header) bytes + */ + +#define V4V_SHF_SYN (1 << 0) +#define V4V_SHF_ACK (1 << 1) +#define V4V_SHF_RST (1 << 2) + +#define V4V_SHF_PING (1 << 8) +#define V4V_SHF_PONG (1 << 9) + +struct v4v_stream_header +{ + uint32_t flags; + uint32_t conid; +}; + +struct v4v_ring_message_header +{ + uint32_t len; + uint32_t pad0; + v4v_addr_t source; + uint32_t protocol; + uint32_t pad1; + uint8_t data[0]; +}; + +typedef struct v4v_viptables_rule +{ + v4v_addr_t src; + v4v_addr_t dst; + uint32_t accept; + uint32_t pad; +} v4v_viptables_rule_t; + +typedef struct v4v_viptables_list +{ + uint32_t start_rule; + uint32_t nb_rules; + struct v4v_viptables_rule rules[0]; +} v4v_viptables_list_t; + +/* + * HYPERCALLS + */ + +#define V4VOP_register_ring 1 +/* + * Registers a ring with Xen, if a ring with the same v4v_ring_id exists, + * this ring takes its place, registration will not change tx_ptr + * unless it is invalid + * + * do_v4v_op(V4VOP_unregister_ring, + * v4v_ring, XEN_GUEST_HANDLE(v4v_pfn), + * npage, 0) + */ + + +#define V4VOP_unregister_ring 2 +/* + * Unregister a ring. + * + * v4v_hypercall(V4VOP_send, v4v_ring, NULL, 0, 0) + */ + +#define V4VOP_send 3 +/* + * Sends len bytes of buf to dst, giving src as the source address (xen will + * ignore src->domain and put your domain in the actually message), xen + * first looks for a ring with id.addr==dst and id.partner==sending_domain + * if that fails it looks for id.addr==dst and id.partner==DOMID_ANY. + * protocol is the 32 bit protocol number used from the message + * most likely V4V_PROTO_DGRAM or STREAM. If insufficient space exists + * it will return -EAGAIN and xen will twing the V4V_INTERRUPT when + * sufficient space becomes available + * + * v4v_hypercall(V4VOP_send, + * v4v_send_addr_t addr, + * void* buf, + * uint32_t len, + * uint32_t protocol) + */ + + +#define V4VOP_notify 4 +/* Asks xen for information about other rings in the system + * + * ent->ring is the v4v_addr_t of the ring you want information on + * the same matching rules are used as for V4VOP_send. + * + * ent->space_required if this field is not null xen will check + * that there is space in the destination ring for this many bytes + * of payload. If there is it will set the V4V_RING_DATA_F_SUFFICIENT + * and CANCEL any pending interrupt for that ent->ring, if insufficient + * space is available it will schedule an interrupt and the flag will + * not be set. + * + * The flags are set by xen when notify replies + * V4V_RING_DATA_F_EMPTY ring is empty + * V4V_RING_DATA_F_PENDING interrupt is pending - don't rely on this + * V4V_RING_DATA_F_SUFFICIENT sufficient space for space_required is there + * V4V_RING_DATA_F_EXISTS ring exists + * + * v4v_hypercall(V4VOP_notify, + * XEN_GUEST_HANDLE(v4v_ring_data_ent) ent, + * NULL, nent, 0) + */ + +#define V4VOP_sendv 5 +/* + * Identical to V4VOP_send except rather than buf and len it takes + * an array of v4v_iov and a length of the array. + * + * v4v_hypercall(V4VOP_sendv, + * v4v_send_addr_t addr, + * v4v_iov iov, + * uint32_t niov, + * uint32_t protocol) + */ + +#define V4VOP_viptables_add 6 +/* + * Insert a filtering rules after a given position. + * + * v4v_hypercall(V4VOP_viptables_add, + * v4v_viptables_rule_t rule, + * NULL, + * uint32_t position, 0) + */ + +#define V4VOP_viptables_del 7 +/* + * Delete a filtering rules at a position or the rule + * that matches "rule". + * + * v4v_hypercall(V4VOP_viptables_del, + * v4v_viptables_rule_t rule, + * NULL, + * uint32_t position, 0) + */ + +#define V4VOP_viptables_list 8 +/* + * Delete a filtering rules at a position or the rule + * that matches "rule". + * + * v4v_hypercall(V4VOP_viptables_list, + * v4v_vitpables_list_t list, + * NULL, 0, 0) + */ + +#define V4VOP_info 9 +/* + * v4v_hypercall(V4VOP_info, + * XEN_GUEST_HANDLE(v4v_info_t) info, + * NULL, 0, 0) + */ + +#endif /* __XEN_PUBLIC_V4V_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index a890804..395f6cd 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -59,6 +59,7 @@ #define __HYPERVISOR_physdev_op 33 #define __HYPERVISOR_hvm_op 34 #define __HYPERVISOR_tmem_op 38 +#define __HYPERVISOR_v4v_op 39 /* Architecture-specific hypercall definitions. */ #define __HYPERVISOR_arch_0 48 diff --git a/include/xen/v4vdev.h b/include/xen/v4vdev.h new file mode 100644 index 0000000..a30b608 --- /dev/null +++ b/include/xen/v4vdev.h @@ -0,0 +1,34 @@ +#ifndef __V4V_DGRAM_H__ +#define __V4V_DGRAM_H__ + +struct v4v_dev +{ + void *buf; + size_t len; + int flags; + v4v_addr_t *addr; +}; + +struct v4v_viptables_rule_pos +{ + struct v4v_viptables_rule* rule; + int position; +}; + +#define V4V_TYPE 'W' + +#define V4VIOCSETRINGSIZE _IOW (V4V_TYPE, 1, uint32_t) +#define V4VIOCBIND _IOW (V4V_TYPE, 2, v4v_ring_id_t) +#define V4VIOCGETSOCKNAME _IOW (V4V_TYPE, 3, v4v_ring_id_t) +#define V4VIOCGETPEERNAME _IOW (V4V_TYPE, 4, v4v_addr_t) +#define V4VIOCCONNECT _IOW (V4V_TYPE, 5, v4v_addr_t) +#define V4VIOCGETCONNECTERR _IOW (V4V_TYPE, 6, int) +#define V4VIOCLISTEN _IOW (V4V_TYPE, 7, uint32_t) /*unused args */ +#define V4VIOCACCEPT _IOW (V4V_TYPE, 8, v4v_addr_t) +#define V4VIOCSEND _IOW (V4V_TYPE, 9, struct v4v_dev) +#define V4VIOCRECV _IOW (V4V_TYPE, 10, struct v4v_dev) +#define V4VIOCVIPTABLESADD _IOW (V4V_TYPE, 11, struct v4v_viptables_rule_pos) +#define V4VIOCVIPTABLESDEL _IOW (V4V_TYPE, 12, struct v4v_viptables_rule_pos) +#define V4VIOCVIPTABLESLIST _IOW (V4V_TYPE, 13, struct v4v_viptables_list) + +#endif