|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [Xen-devel] [PATCH 1/3] net: introduce new socket support: xensock
Those sockets will be used for the xen-sock frontend/backend
drivers. Those drivers will allow to connect via xensock
sockets (in this case dom0/domD sockets can be used for the
server application and domU sockets can be used for the
client application).
Signed-off-by: Oleksandr Dmytryshyn <oleksandr.dmytryshyn@xxxxxxxxxxxxxxx>
---
drivers/net/Kconfig | 4 +
drivers/net/Makefile | 2 +
drivers/net/xensock/Makefile | 5 +
drivers/net/xensock/xensock-dev.c | 269 +++++++++++++
drivers/net/xensock/xensock-proto.c | 767 ++++++++++++++++++++++++++++++++++++
include/linux/socket.h | 4 +-
include/net/af_xensock.h | 46 +++
include/net/xensock.h | 130 ++++++
net/core/sock.c | 9 +-
9 files changed, 1232 insertions(+), 4 deletions(-)
create mode 100644 drivers/net/xensock/Makefile
create mode 100644 drivers/net/xensock/xensock-dev.c
create mode 100644 drivers/net/xensock/xensock-proto.c
create mode 100644 include/net/af_xensock.h
create mode 100644 include/net/xensock.h
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 89402c3..420981a 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -348,6 +348,10 @@ config XEN_NETDEV_BACKEND
compile this driver as a module, chose M here: the module
will be called xen-netback.
+config XEN_SOCKDEV_PROTO
+ bool
+ default n
+
config VMXNET3
tristate "VMware VMXNET3 ethernet driver"
depends on PCI && INET
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 3fef8a8..43bf910 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -61,6 +61,8 @@ obj-$(CONFIG_VMXNET3) += vmxnet3/
obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
obj-$(CONFIG_XEN_NETDEV_BACKEND) += xen-netback/
+obj-$(CONFIG_XEN_SOCKDEV_PROTO) += xensock/
+
obj-$(CONFIG_USB_CATC) += usb/
obj-$(CONFIG_USB_KAWETH) += usb/
obj-$(CONFIG_USB_PEGASUS) += usb/
diff --git a/drivers/net/xensock/Makefile b/drivers/net/xensock/Makefile
new file mode 100644
index 0000000..d70db09
--- /dev/null
+++ b/drivers/net/xensock/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the xensock driver
+#
+
+obj-$(CONFIG_XEN_SOCKDEV_PROTO) := xensock-proto.o xensock-dev.o
diff --git a/drivers/net/xensock/xensock-dev.c
b/drivers/net/xensock/xensock-dev.c
new file mode 100644
index 0000000..6da8f34
--- /dev/null
+++ b/drivers/net/xensock/xensock-dev.c
@@ -0,0 +1,269 @@
+/*
+ * Xen socket dev driver.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+#include <net/af_xensock.h>
+
+LIST_HEAD(xensock_list);
+DEFINE_RWLOCK(xensock_list_lock);
+DECLARE_WAIT_QUEUE_HEAD(xensock_accept_wait);
+
+static inline void xensock_lock_dev(struct xen_sock_dev *dev)
+{
+ spin_lock(&dev->lock);
+}
+
+static inline void xensock_unlock_dev(struct xen_sock_dev *dev)
+{
+ spin_unlock(&dev->lock);
+}
+
+struct xen_sock_dev *alloc_xen_sock_dev(int sizeof_priv, const char *name)
+{
+ struct xen_sock_dev *dev;
+ int alloc_size;
+ struct xen_sock_dev *p;
+
+ BUG_ON(strlen(name) >= sizeof(dev->name));
+
+ alloc_size = sizeof(struct xen_sock_dev);
+
+ if (sizeof_priv) {
+ alloc_size = ALIGN(sizeof(struct xen_sock_dev), SOCKDEV_ALIGN);
+ alloc_size += sizeof_priv;
+ }
+
+ /* ensure 32-byte alignment of whole construct */
+ alloc_size += SOCKDEV_ALIGN - 1;
+
+ p = kzalloc(alloc_size, GFP_KERNEL);
+ if (!p) {
+ pr_err("alloc_xen_sock_dev: Unable to allocate device\n");
+ return NULL;
+ }
+
+ dev = PTR_ALIGN(p, SOCKDEV_ALIGN);
+ dev->padded = (char *)dev - (char *)p;
+
+ strcpy(dev->name, name);
+ spin_lock_init(&dev->lock);
+ sockif_carrier_off(dev);
+ sockif_stop_queue(dev);
+
+ return dev;
+}
+EXPORT_SYMBOL(alloc_xen_sock_dev);
+
+void free_xen_sock_dev(struct xen_sock_dev *dev)
+{
+ if (!dev)
+ return;
+
+ kfree((char *)dev - dev->padded);
+}
+EXPORT_SYMBOL(free_xen_sock_dev);
+
+
+int xensock_register_dev(struct xen_sock_dev *dev)
+{
+ write_lock_bh(&xensock_list_lock);
+ list_add_tail(&dev->list, &xensock_list);
+ write_unlock_bh(&xensock_list_lock);
+ wake_up_interruptible(&xensock_accept_wait);
+
+ return 0;
+}
+EXPORT_SYMBOL(xensock_register_dev);
+
+static void __xensock_dev_unlink_sk(struct xen_sock_dev *dev);
+
+void xensock_unregister_dev(struct xen_sock_dev *dev)
+{
+ write_lock_bh(&xensock_list_lock);
+ xensock_lock_dev(dev);
+ __xensock_dev_unlink_sk(dev);
+ list_del(&dev->list);
+ xensock_unlock_dev(dev);
+ write_unlock_bh(&xensock_list_lock);
+}
+EXPORT_SYMBOL(xensock_unregister_dev);
+
+static int __xensock_dev_link_sk(struct xen_sock_dev *dev, struct sock *sk)
+{
+ sock_hold(sk);
+ dev->sk = sk;
+ xen_sk(sk)->dev = dev;
+
+ return 0;
+}
+
+static void __xensock_dev_unlink_sk(struct xen_sock_dev *dev)
+{
+ struct sock *sk = dev->sk;
+ struct socket *sock;
+
+ if (sk) {
+ sock = sk->sk_socket;
+ sock->state = SS_UNCONNECTED;
+
+ xen_sk(sk)->dev = NULL;
+ dev->sk = NULL;
+ sk->sk_err = ENOTCONN;
+ sk->sk_state_change(sk);
+ sock_put(sk);
+ }
+}
+
+void xensock_dev_unlink_sk(struct xen_sock_dev *dev)
+{
+ write_lock_bh(&xensock_list_lock);
+ xensock_lock_dev(dev);
+ __xensock_dev_unlink_sk(dev);
+ xensock_unlock_dev(dev);
+ write_unlock_bh(&xensock_list_lock);
+ wake_up_interruptible(&xensock_accept_wait);
+}
+
+void xensock_unlink_all_dev_sk(void)
+{
+ struct xen_sock_dev *ldev;
+
+ write_lock_bh(&xensock_list_lock);
+ list_for_each_entry(ldev, &xensock_list, list) {
+ xensock_lock_dev(ldev);
+ __xensock_dev_unlink_sk(ldev);
+ xensock_unlock_dev(ldev);
+ }
+ write_unlock_bh(&xensock_list_lock);
+}
+
+int xensock_dev_wait(struct sock *sk, struct sock *nsk)
+{
+ int rc = 0;
+ long timeout = sk->sk_rcvtimeo;
+ struct xen_sock_dev *ldev;
+ bool dev_found;
+
+ DECLARE_WAITQUEUE(wait, current);
+
+ add_wait_queue_exclusive(&xensock_accept_wait, &wait);
+ for (;;) {
+ __set_current_state(TASK_INTERRUPTIBLE);
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ break;
+ rc = -ERESTARTSYS;
+ if (signal_pending(current))
+ break;
+ rc = -EAGAIN;
+ if (!timeout)
+ break;
+ rc = 0;
+ dev_found = false;
+ read_lock_bh(&xensock_list_lock);
+ list_for_each_entry(ldev, &xensock_list, list) {
+ xensock_lock_dev(ldev);
+ if (ldev->sk == NULL) {
+ __xensock_dev_link_sk(ldev, nsk);
+ xensock_unlock_dev(ldev);
+ dev_found = true;
+ break;
+ }
+ xensock_unlock_dev(ldev);
+ }
+ read_unlock_bh(&xensock_list_lock);
+ if (dev_found)
+ break;
+
+ release_sock(sk);
+ timeout = schedule_timeout(timeout);
+ lock_sock(sk);
+ }
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&xensock_accept_wait, &wait);
+
+ return rc;
+}
+
+/* xensock_dev_send - transmit a xensock frame
+ * @skb: pointer to socket buffer with xensock frame in data section
+ */
+int xensock_dev_send(struct sk_buff *skb, struct xen_sock_dev *dev)
+{
+ int err = -EINVAL;
+
+ /* Make sure the xensock frame can pass the selected xensock device */
+ if (unlikely(skb->len > XENSOCK_MTU)) {
+ err = -EMSGSIZE;
+ goto inval_skb;
+ }
+
+ if (unlikely(!dev->start_xmit)) {
+ err = -EIO;
+ goto inval_skb;
+ }
+
+ if (sockif_queue_stopped(dev)) {
+ err = -ENOBUFS;
+ goto inval_skb;
+ }
+
+ err = dev->start_xmit(skb, dev);
+ if (err)
+ goto inval_skb;
+
+ return 0;
+
+inval_skb:
+ kfree_skb(skb);
+ return err;
+}
+
+int xensock_dev_queue_rx_skb(struct sk_buff *skb, struct xen_sock_dev *dev)
+{
+ int rc = -ENETDOWN;
+ struct sock *sk = dev->sk;
+
+ xensock_lock_dev(dev);
+ if (sk == NULL)
+ goto out;
+
+ rc = 0;
+ skb_queue_tail(&sk->sk_receive_queue, skb);
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_data_ready(sk, skb->len);
+out:
+ xensock_unlock_dev(dev);
+ return rc;
+}
diff --git a/drivers/net/xensock/xensock-proto.c
b/drivers/net/xensock/xensock-proto.c
new file mode 100644
index 0000000..d05e5d5
--- /dev/null
+++ b/drivers/net/xensock/xensock-proto.c
@@ -0,0 +1,767 @@
+/*
+ * Xen socket protocol driver.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/if_arp.h>
+#include <linux/highmem.h>
+
+#include <net/tcp_states.h>
+#include <net/af_xensock.h>
+
+#define XENSOCK_DISCONNECT BIT(0)
+
+struct xensock_skb_cb {
+ unsigned int flags_checked;
+};
+
+/* Return pointer to store the extra msg flags for xensock_proto_recvmsg().
+ * We use the space of one unsigned int beyond the 'int' size
+ * in skb->cb. Xensock frontend and backend drivers are using this
+ * first 'int' part of the skb->cb.
+ */
+static inline struct xensock_skb_cb *xensock_proto_cb(struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(skb->cb) <= (sizeof(int) +
+ sizeof(struct xensock_skb_cb)));
+
+ /* return pointer after 'int' size */
+ return (struct xensock_skb_cb *)(&((int *)skb->cb)[1]);
+}
+
+static int xensock_prot_init(struct sock *sk)
+{
+ return 0;
+}
+
+static struct proto xensock_proto __read_mostly = {
+ .name = "XENSOCK",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct xen_sock),
+ .init = xensock_prot_init,
+};
+
+struct sock *xen_allocate_sock(struct net *net)
+{
+ struct xen_sock *xsk;
+ struct sock *sk;
+
+ sk = sk_alloc(net, PF_XENSOCK, GFP_KERNEL, &xensock_proto);
+
+ if (!sk)
+ goto out;
+
+ sock_init_data(NULL, sk);
+
+ xsk = xen_sk(sk);
+ xsk->dev = NULL;
+out:
+ return sk;
+}
+
+static struct sock *xensock_make_new(struct sock *osk)
+{
+ struct sock *sk = NULL;
+
+ if (osk->sk_type != SOCK_RAW)
+ goto out;
+
+ sk = xen_allocate_sock(sock_net(osk));
+ if (sk == NULL)
+ goto out;
+
+ sk->sk_type = osk->sk_type;
+ sk->sk_priority = osk->sk_priority;
+ sk->sk_protocol = osk->sk_protocol;
+ sk->sk_rcvbuf = osk->sk_rcvbuf;
+ sk->sk_sndbuf = osk->sk_sndbuf;
+ sk->sk_state = TCP_ESTABLISHED;
+ sk->sk_backlog_rcv = osk->sk_backlog_rcv;
+ sock_copy_flags(sk, osk);
+
+out:
+ return sk;
+}
+
+static int xensock_prot_connect(struct socket *sock, struct sockaddr *uaddr,
+ int addr_len, int flags)
+{
+ struct sock *sk;
+ int rc;
+
+ if (sock->state == SS_CONNECTED)
+ return 0;
+
+ sk = sock->sk;
+
+ lock_sock(sk);
+ rc = xensock_dev_wait(sk, sk);
+
+ if (!rc)
+ sock->state = SS_CONNECTED;
+
+ release_sock(sk);
+
+ return rc;
+}
+
+static int xensock_prot_accept(struct socket *sock, struct socket *newsock,
+ int flags)
+{
+ struct sock *nsk, *sk = sock->sk;
+ int rc = -EINVAL;
+
+ if (!sk)
+ goto out;
+
+ rc = -EOPNOTSUPP;
+ if (sk->sk_type != SOCK_RAW)
+ goto out;
+
+ lock_sock(sk);
+ rc = -EINVAL;
+ if (sk->sk_state != TCP_LISTEN)
+ goto err_release_sk;
+
+ rc = -ENOMEM;
+ nsk = xensock_make_new(sk);
+ if (!nsk)
+ goto err_release_sk;
+
+ rc = xensock_dev_wait(sk, nsk);
+ if (rc)
+ goto err_remove_nsk;
+
+ sock_graft(nsk, newsock);
+
+ /* Now attach up the new socket */
+ sk->sk_ack_backlog--;
+ newsock->state = SS_CONNECTED;
+ rc = 0;
+err_release_sk:
+ release_sock(sk);
+out:
+ return rc;
+
+err_remove_nsk:
+ sock_orphan(nsk);
+ sock_put(nsk);
+ goto err_release_sk;
+}
+
+static int xensock_prot_listen(struct socket *sock, int backlog)
+{
+ struct sock *sk = sock->sk;
+ int rc = -EOPNOTSUPP;
+
+ lock_sock(sk);
+ /* All created sockets (in .accept callback) will have a non
+ * NULL pointer and listen operation for them is prohibited.
+ */
+ if (xen_sk(sk)->dev)
+ goto out;
+
+ if (sk->sk_state != TCP_LISTEN) {
+ sk->sk_max_ack_backlog = backlog;
+ sk->sk_state = TCP_LISTEN;
+ rc = 0;
+ }
+out:
+ release_sock(sk);
+
+ return rc;
+}
+
+static int xensock_prot_release(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ struct xen_sock_dev *dev;
+ struct sk_buff *skb;
+ unsigned char xflag;
+ int err;
+
+ if (!sk)
+ return 0;
+
+ lock_sock(sk);
+ dev = xen_sk(sk)->dev;
+
+ if (dev) {
+ release_sock(sk);
+ skb = sock_alloc_send_skb(sk, 1, 0, &err);
+ lock_sock(sk);
+ if (!skb)
+ goto skip_send_no_con;
+
+ xflag = XENSOCK_DISCONNECT;
+ memcpy(skb_put(skb, 1), &xflag, 1);
+ skb->dev = NULL;
+ skb->sk = sk;
+
+ err = xensock_dev_send(skb, dev);
+
+ if (err)
+ kfree_skb(skb);
+
+skip_send_no_con:
+ xensock_dev_unlink_sk(dev);
+ }
+
+ /* Flush the recv buffs */
+ while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL)
+ __kfree_skb(skb);
+
+ sock_orphan(sk);
+ sock->sk = NULL;
+
+ release_sock(sk);
+ sock_put(sk);
+
+ return 0;
+}
+
+int xensock_prot_ioctl(struct socket *sock, unsigned int cmd, unsigned long
arg)
+{
+ struct sock *sk = sock->sk;
+
+ switch (cmd) {
+ case SIOCGSTAMP:
+ return sock_get_timestamp(sk, (struct timeval __user *)arg);
+
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+
+static int xensock_prot_sendmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *msg, size_t size)
+{
+ struct sock *sk = sock->sk;
+ struct xen_sock_dev *dev;
+ struct sk_buff *skb;
+ int err;
+ unsigned char xflag;
+ size_t sent = 0;
+ unsigned int header_len, data_len;
+ unsigned int chunk;
+
+ if (msg->msg_flags & MSG_OOB)
+ return -EOPNOTSUPP;
+
+ lock_sock(sk);
+
+ err = sock_error(sk);
+ if (err)
+ goto out;
+
+ err = -ENOTCONN;
+
+ dev = xen_sk(sk)->dev;
+ if (!dev)
+ goto out;
+
+ /* Another side has closed connection */
+ if (sock->state == SS_DISCONNECTING) {
+ err = size;
+ goto out;
+ }
+
+ if (sock->state != SS_CONNECTED)
+ goto out;
+
+ if (size == 0) {
+ err = 0;
+ goto out;
+ }
+
+ do {
+ chunk = size + 1;
+
+ if (chunk > XENSOCK_MTU)
+ chunk = XENSOCK_MTU;
+
+ if (chunk <= PAGE_SIZE) {
+ header_len = chunk;
+ data_len = 0;
+ } else {
+ header_len = PAGE_SIZE;
+ data_len = chunk - PAGE_SIZE;
+ }
+
+ release_sock(sk);
+ skb = sock_alloc_send_pskb(sk, header_len, data_len,
+ msg->msg_flags & MSG_DONTWAIT,
+ &err);
+ lock_sock(sk);
+ if (!skb)
+ goto out;
+
+ xflag = 0;
+ memcpy(skb_tail_pointer(skb), &xflag, 1);
+
+ skb->data_len = data_len;
+ skb->len = chunk;
+
+ err = skb_copy_datagram_from_iovec(skb, 1, msg->msg_iov,
+ sent, chunk - 1);
+ if (err < 0)
+ goto free_skb;
+
+ /* move pointers in the skb */
+ skb->tail += header_len;
+
+ err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
+ if (err < 0)
+ goto free_skb;
+
+ skb->dev = NULL;
+ skb->sk = sk;
+
+ err = xensock_dev_send(skb, dev);
+
+ if (err)
+ goto free_skb;
+
+ /* First byte of the SKB data is the xensock flags */
+ chunk--;
+
+ sent += chunk;
+ size -= chunk;
+ } while (size);
+
+ err = sent;
+out:
+ release_sock(sk);
+ return err;
+
+free_skb:
+ kfree_skb(skb);
+ goto out;
+}
+
+static long xen_sock_data_wait(struct sock *sk, long timeo)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ add_wait_queue(sk_sleep(sk), &wait);
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ if (!skb_queue_empty(&sk->sk_receive_queue))
+ break;
+
+ if (sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN))
+ break;
+
+ if (signal_pending(current) || !timeo)
+ break;
+
+ set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ release_sock(sk);
+ timeo = schedule_timeout(timeo);
+ lock_sock(sk);
+ clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ }
+
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(sk_sleep(sk), &wait);
+ return timeo;
+}
+
+static unsigned char get_skb_xflags(struct sk_buff *skb)
+{
+ unsigned char ret;
+ int i;
+ int nr_frags;
+ int skb_len = skb_headlen(skb);
+
+ if (1 <= skb_len) {
+ ret = skb->data[0];
+ __skb_pull(skb, 1);
+ return ret;
+ } else {
+ struct sk_buff *frag1;
+
+ skb_walk_frags(skb, frag1) {
+ if (frag1->len) {
+ skb->len -= 1;
+ skb->data_len -= 1;
+ ret = frag1->data[0];
+ __skb_pull(frag1, 1);
+ return ret;
+ }
+ }
+
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ for (i = 0; i < nr_frags; i++) {
+ skb_frag_t *frag2;
+
+ frag2 = &skb_shinfo(skb)->frags[i];
+ if (skb_frag_size(frag2)) {
+ struct page *page = skb_frag_page(frag2);
+ u8 *vaddr;
+
+ vaddr = kmap(page);
+ ret = vaddr[frag2->page_offset];
+ kunmap(page);
+ skb->len -= 1;
+ skb->data_len -= 1;
+ skb_frag_size_sub(frag2, 1);
+ frag2->page_offset += 1;
+ return ret;
+ }
+ }
+ }
+ return 0;
+}
+
+static int xensock_prot_recvmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *msg, size_t size, int flags)
+{
+ struct sock *sk = sock->sk;
+ int err = 0;
+ size_t target, copied = 0;
+ long timeo;
+ int i;
+ struct sk_buff *skb;
+ int nr_frags;
+ unsigned char xflags;
+ struct xensock_skb_cb *scb;
+ struct xen_sock_dev *dev = xen_sk(sk)->dev;
+
+ if (flags & MSG_OOB)
+ return -EOPNOTSUPP;
+
+ msg->msg_namelen = 0;
+
+ lock_sock(sk);
+
+ if (!dev) {
+ copied = -ENOTCONN;
+ goto out;
+ }
+
+ if (sock->state == SS_DISCONNECTING)
+ goto out_disconnecting_state;
+
+ target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+ do {
+ int chunk;
+
+ skb = skb_dequeue(&sk->sk_receive_queue);
+ if (!skb) {
+ if (copied >= target)
+ break;
+
+ err = sock_error(sk);
+ if (err)
+ break;
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ break;
+
+ err = -EAGAIN;
+ if (!timeo)
+ break;
+
+ timeo = xen_sock_data_wait(sk, timeo);
+
+ if (signal_pending(current)) {
+ err = sock_intr_errno(timeo);
+ goto out;
+ }
+ continue;
+ }
+
+ scb = xensock_proto_cb(skb);
+
+ if (!scb->flags_checked) {
+ /* First byte of the SKB data is the xensock flags */
+ if (skb->len < 1) {
+ copied = -EFAULT;
+ break;
+ }
+
+ scb->flags_checked = 1;
+
+ xflags = get_skb_xflags(skb);
+ if (xflags & XENSOCK_DISCONNECT) {
+ sock->state = SS_DISCONNECTING;
+ xensock_dev_unlink_sk(dev);
+ goto out_disconnecting_state;
+ }
+ }
+
+ chunk = min_t(unsigned int, skb->len, size);
+ if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, chunk)) {
+ skb_queue_head(&sk->sk_receive_queue, skb);
+ if (!copied)
+ copied = -EFAULT;
+ break;
+ }
+ copied += chunk;
+ size -= chunk;
+
+ sock_recv_ts_and_drops(msg, sk, skb);
+
+ if (!(flags & MSG_PEEK)) {
+ int skb_len = skb_headlen(skb);
+
+ if (chunk <= skb_len) {
+ __skb_pull(skb, chunk);
+ } else {
+ struct sk_buff *frag1;
+
+ __skb_pull(skb, skb_len);
+ chunk -= skb_len;
+
+ skb_walk_frags(skb, frag1) {
+ if (chunk <= frag1->len) {
+ /* Pulling partial data */
+ skb->len -= chunk;
+ skb->data_len -= chunk;
+ __skb_pull(frag1, chunk);
+ break;
+ } else if (frag1->len) {
+ /* Pulling all frag data */
+ chunk -= frag1->len;
+ skb->len -= frag1->len;
+ skb->data_len -= frag1->len;
+ __skb_pull(frag1, frag1->len);
+ }
+ }
+
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ for (i = 0; i < nr_frags; i++) {
+ skb_frag_t *frag2;
+ unsigned int frag_size;
+
+ frag2 = &skb_shinfo(skb)->frags[i];
+ frag_size = skb_frag_size(frag2);
+
+ if (chunk <= frag_size) {
+ /* Pulling partial data */
+ skb->len -= chunk;
+ skb->data_len -= chunk;
+ skb_frag_size_sub(frag2, chunk);
+ frag2->page_offset += chunk;
+ break;
+ } else if (frag_size) {
+ /* Pulling all frag data */
+ chunk -= frag_size;
+ skb->len -= frag_size;
+ skb->data_len -= frag_size;
+ skb_frag_size_set(frag2, 0);
+ }
+ }
+ }
+
+ if (skb->len) {
+ skb_queue_head(&sk->sk_receive_queue, skb);
+ break;
+ }
+ kfree_skb(skb);
+
+ } else {
+ /* put message back and return */
+ skb_queue_head(&sk->sk_receive_queue, skb);
+ break;
+ }
+ } while (size);
+out:
+ release_sock(sk);
+ return copied ? : err;
+
+out_disconnecting_state:
+ /* Flush the recv buffs */
+ while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL)
+ __kfree_skb(skb);
+
+ err = 0;
+ copied = 0;
+ goto out;
+}
+
+static const struct proto_ops xen_sock_server_ops = {
+ .family = PF_XENSOCK,
+ .release = xensock_prot_release,
+ .bind = sock_no_bind,
+ .connect = sock_no_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = xensock_prot_accept,
+ .getname = sock_no_getname,
+ .poll = sock_no_poll,
+ .ioctl = xensock_prot_ioctl,
+ .listen = xensock_prot_listen,
+ .shutdown = sock_no_shutdown,
+ .setsockopt = sock_no_setsockopt,
+ .getsockopt = sock_no_getsockopt,
+ .sendmsg = xensock_prot_sendmsg,
+ .recvmsg = xensock_prot_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+};
+
+static const struct proto_ops xen_sock_client_ops = {
+ .family = PF_XENSOCK,
+ .release = xensock_prot_release,
+ .bind = sock_no_bind,
+ .connect = xensock_prot_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = sock_no_getname,
+ .poll = sock_no_poll,
+ .ioctl = xensock_prot_ioctl,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .setsockopt = sock_no_setsockopt,
+ .getsockopt = sock_no_getsockopt,
+ .sendmsg = xensock_prot_sendmsg,
+ .recvmsg = xensock_prot_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+};
+
+static void xensock_destruct(struct sock *sk)
+{
+}
+
+static int xensock_create(struct net *net, struct socket *sock, int proto,
+ int kern, const struct proto_ops *xen_proto_ops)
+{
+ struct sock *sk;
+ int err = 0;
+
+ if (sock->type != SOCK_RAW)
+ return -ESOCKTNOSUPPORT;
+ if (proto != 0)
+ return -EPROTONOSUPPORT;
+
+ sk = xen_allocate_sock(net);
+ if (!sk)
+ return -ENOMEM;
+
+ sock->state = SS_UNCONNECTED;
+ sock->ops = xen_proto_ops;
+ sock_init_data(sock, sk);
+
+ sk->sk_destruct = xensock_destruct;
+ sk->sk_protocol = proto;
+
+ xen_sk(sk)->dev = NULL;
+
+ if (sk->sk_prot->init)
+ err = sk->sk_prot->init(sk);
+
+ if (err) {
+ /* release sk on errors */
+ sock_orphan(sk);
+ sock_put(sk);
+ }
+
+ return err;
+}
+
+static int xensock_server_create(struct net *net, struct socket *sock,
+ int proto, int kern)
+{
+ return xensock_create(net, sock, proto, kern, &xen_sock_server_ops);
+}
+
+static int xensock_client_create(struct net *net, struct socket *sock,
+ int proto, int kern)
+{
+ return xensock_create(net, sock, proto, kern, &xen_sock_client_ops);
+}
+
+static const struct net_proto_family xensock_server_family_ops = {
+ .family = PF_XENSOCK,
+ .create = xensock_server_create,
+ .owner = THIS_MODULE,
+};
+
+static const struct net_proto_family xensock_client_family_ops = {
+ .family = PF_XENSOCK,
+ .create = xensock_client_create,
+ .owner = THIS_MODULE,
+};
+
+static int xensock_proto_init(bool is_server_ops)
+{
+ int ret;
+
+ ret = proto_register(&xensock_proto, 0);
+ if (ret) {
+ pr_err("proto_register failed: %d\n", ret);
+ return ret;
+ }
+
+ if (is_server_ops)
+ ret = sock_register(&xensock_server_family_ops);
+ else
+ ret = sock_register(&xensock_client_family_ops);
+
+ if (ret) {
+ pr_err("sock_register failed: %d\n", ret);
+ goto proto_unreg;
+ }
+
+ return 0;
+
+proto_unreg:
+ proto_unregister(&xensock_proto);
+ return ret;
+}
+
+int xensock_proto_server_init(void)
+{
+ return xensock_proto_init(true);
+}
+EXPORT_SYMBOL(xensock_proto_server_init);
+
+int xensock_proto_client_init(void)
+{
+ return xensock_proto_init(false);
+}
+EXPORT_SYMBOL(xensock_proto_client_init);
+
+void xensock_proto_cleanup(void)
+{
+ xensock_unlink_all_dev_sk();
+ sock_unregister(PF_XENSOCK);
+ proto_unregister(&xensock_proto);
+}
+EXPORT_SYMBOL(xensock_proto_cleanup);
+
+MODULE_DESCRIPTION("xensock protocol");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_NETPROTO(AF_XENSOCK);
diff --git a/include/linux/socket.h b/include/linux/socket.h
index ec538fc..79ffa55 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -180,7 +180,8 @@ struct ucred {
#define AF_ALG 38 /* Algorithm sockets */
#define AF_NFC 39 /* NFC sockets */
#define AF_VSOCK 40 /* vSockets */
-#define AF_MAX 41 /* For now.. */
+#define AF_XENSOCK 41 /* xensock sockets */
+#define AF_MAX 42 /* For now.. */
/* Protocol families, same as address families. */
#define PF_UNSPEC AF_UNSPEC
@@ -225,6 +226,7 @@ struct ucred {
#define PF_ALG AF_ALG
#define PF_NFC AF_NFC
#define PF_VSOCK AF_VSOCK
+#define PF_XENSOCK AF_XENSOCK
#define PF_MAX AF_MAX
/* Maximum queue length specifiable by listen. */
diff --git a/include/net/af_xensock.h b/include/net/af_xensock.h
new file mode 100644
index 0000000..48df5ce
--- /dev/null
+++ b/include/net/af_xensock.h
@@ -0,0 +1,46 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __AF_XENSOCK_H__
+#define __AF_XENSOCK_H__
+
+#include <net/xensock.h>
+
+struct xen_sock {
+ struct sock sk;
+ struct xen_sock_dev *dev;
+};
+
+static inline struct xen_sock *xen_sk(const struct sock *sk)
+{
+ return (struct xen_sock *)sk;
+}
+
+int xensock_proto_server_init(void);
+int xensock_proto_client_init(void);
+void xensock_proto_cleanup(void);
+
+#endif /* __AF_XENSOCK_H__ */
diff --git a/include/net/xensock.h b/include/net/xensock.h
new file mode 100644
index 0000000..2e5949b
--- /dev/null
+++ b/include/net/xensock.h
@@ -0,0 +1,130 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __XENSOCK_H__
+#define __XENSOCK_H__
+
+#include <linux/bitops.h>
+#include <linux/if.h>
+#include <net/sock.h>
+
+#define XENSOCK_MTU 65535
+
+#define SOCKDEV_ALIGN 32
+
+#define __SOCK_STATE_NOCARRIER 0
+#define __SOCK_STATE_QUEUESTOPPED 1
+
+struct xen_sock_dev {
+ char name[IFNAMSIZ];
+ struct sock *sk;
+ spinlock_t lock; /* sock_dev operations lock */
+ unsigned long state;
+ unsigned short padded; /* Padding added by alloc_xen_sock_dev() */
+
+ int (*start_xmit)(struct sk_buff *skb, struct xen_sock_dev *dev);
+ unsigned long tx_queue_len;
+ struct list_head list;
+};
+
+/*
+ * xensock_dev_priv - access sock device private data
+ * @dev: xen_sock device
+ *
+ * Get xen_sock device private data
+ */
+static inline void *xensock_dev_priv(const struct xen_sock_dev *dev)
+{
+ return (char *)dev + ALIGN(sizeof(struct xen_sock_dev), SOCKDEV_ALIGN);
+}
+
+/*
+ * sockif_carrier_ok - test if carrier present
+ * @dev: xensock device
+ *
+ * Check if carrier is present on device
+ */
+static inline bool sockif_carrier_ok(const struct xen_sock_dev *dev)
+{
+ return !test_bit(__SOCK_STATE_NOCARRIER, &dev->state);
+}
+
+/*
+ * sockif_carrier_on - set carrier
+ * @dev: xensock device
+ */
+static inline void sockif_carrier_on(struct xen_sock_dev *dev)
+{
+ clear_bit(__SOCK_STATE_NOCARRIER, &dev->state);
+}
+
+/*
+ * sockif_carrier_on - clear carrier
+ * @dev: xensock device
+ */
+static inline void sockif_carrier_off(struct xen_sock_dev *dev)
+{
+ set_bit(__SOCK_STATE_NOCARRIER, &dev->state);
+}
+
+/*
+ * sockif_queue_stopped - test if tx queue is stopped
+ * @dev: xensock device
+ */
+static inline bool sockif_queue_stopped(const struct xen_sock_dev *dev)
+{
+ return test_bit(__SOCK_STATE_QUEUESTOPPED, &dev->state);
+}
+
+/*
+ * sockif_wake_queue - wake tx queue
+ * @dev: xensock device
+ */
+static inline void sockif_wake_queue(struct xen_sock_dev *dev)
+{
+ clear_bit(__SOCK_STATE_QUEUESTOPPED, &dev->state);
+}
+
+/*
+ * sockif_stop_queue - stop tx queue
+ * @dev: xensock device
+ */
+static inline void sockif_stop_queue(struct xen_sock_dev *dev)
+{
+ set_bit(__SOCK_STATE_QUEUESTOPPED, &dev->state);
+}
+
+struct xen_sock_dev *alloc_xen_sock_dev(int sizeof_priv, const char *name);
+void free_xen_sock_dev(struct xen_sock_dev *dev);
+int xensock_register_dev(struct xen_sock_dev *dev);
+void xensock_unregister_dev(struct xen_sock_dev *dev);
+void xensock_dev_unlink_sk(struct xen_sock_dev *dev);
+void xensock_unlink_all_dev_sk(void);
+int xensock_dev_wait(struct sock *sk, struct sock *nsk);
+int xensock_dev_send(struct sk_buff *skb, struct xen_sock_dev *dev);
+int xensock_dev_queue_rx_skb(struct sk_buff *skb, struct xen_sock_dev *dev);
+
+#endif /* __XENSOCK_H__ */
diff --git a/net/core/sock.c b/net/core/sock.c
index 026e01f..a57f264 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -262,7 +262,8 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
"sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
"sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
"sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
- "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX"
+ "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_XENSOCK" ,
+ "sk_lock-AF_MAX"
};
static const char *const af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
@@ -278,7 +279,8 @@ static const char *const
af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
"slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
"slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
- "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX"
+ "slock-AF_NFC" , "slock-AF_VSOCK" , "slock-AF_XENSOCK" ,
+ "slock-AF_MAX"
};
static const char *const af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
@@ -294,7 +296,8 @@ static const char *const
af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
"clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
"clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
- "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX"
+ "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_XENSOCK" ,
+ "clock-AF_MAX"
};
/*
--
1.8.2.rc2
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |