[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 11/18] xen/pvcalls: implement accept command



Implement the accept command by calling inet_accept. To avoid blocking
in the kernel, call inet_accept(O_NONBLOCK) from a workqueue, which get
scheduled on sk_data_ready (for a passive socket, it means that there
are connections to accept).

Use the reqcopy field to store the request. Accept the new socket from
the delayed work function, create a new sock_mapping for it, map
the indexes page and data ring, and reply to the other end. Choose an
ioworker for the socket randomly.

Only support one outstanding blocking accept request for every socket at
any time.

Add a field to sock_mapping to remember the passive socket from which an
active socket was created.

Signed-off-by: Stefano Stabellini <stefano@xxxxxxxxxxx>
CC: boris.ostrovsky@xxxxxxxxxx
CC: jgross@xxxxxxxx
---
 drivers/xen/pvcalls-back.c | 156 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 156 insertions(+)

diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index a762877..d8e0a60 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -67,6 +67,7 @@ struct sock_mapping {
        struct list_head list;
        struct list_head queue;
        struct pvcalls_back_priv *priv;
+       struct sockpass_mapping *sockpass;
        struct socket *sock;
        int data_worker;
        uint64_t id;
@@ -263,10 +264,128 @@ static int pvcalls_back_release(struct xenbus_device 
*dev,
 
 static void __pvcalls_back_accept(struct work_struct *work)
 {
+       struct sockpass_mapping *mappass = container_of(
+               work, struct sockpass_mapping, register_work);
+       struct sock_mapping *map;
+       struct pvcalls_ioworker *iow;
+       struct pvcalls_back_priv *priv;
+       struct xen_pvcalls_response *rsp;
+       struct xen_pvcalls_request *req;
+       void *page = NULL;
+       int notify;
+       int ret = -EINVAL;
+       unsigned long flags;
+
+       priv = mappass->priv;
+       /* We only need to check the value of "cmd" atomically on read. */
+       spin_lock_irqsave(&mappass->copy_lock, flags);
+       req = &mappass->reqcopy;
+       if (req->cmd != PVCALLS_ACCEPT) {
+               spin_unlock_irqrestore(&mappass->copy_lock, flags);
+               return;
+       }
+       spin_unlock_irqrestore(&mappass->copy_lock, flags);
+
+       map = kzalloc(sizeof(*map), GFP_KERNEL);
+       if (map == NULL) {
+               ret = -ENOMEM;
+               goto out_error;
+       }
+
+       map->sock = sock_alloc();
+       if (!map->sock)
+               goto out_error;
+
+       INIT_LIST_HEAD(&map->queue);
+       map->data_worker = get_random_int() % pvcalls_back_global.nr_ioworkers;
+       map->ref = req->u.accept.ref;
+
+       map->priv = priv;
+       map->sockpass = mappass;
+       map->sock->type = mappass->sock->type;
+       map->sock->ops = mappass->sock->ops;
+       map->id = req->u.accept.id_new;
+
+       ret = xenbus_map_ring_valloc(priv->dev, &req->u.accept.ref, 1, &page);
+       if (ret < 0)
+               goto out_error;
+       map->ring = page;
+       map->ring_order = map->ring->ring_order;
+       /* first read the order, then map the data ring */
+       virt_rmb();
+       if (map->ring_order > MAX_RING_ORDER) {
+               ret = -EFAULT;
+               goto out_error;
+       }
+       ret = xenbus_map_ring_valloc(priv->dev, map->ring->ref,
+                                    (1 << map->ring_order), &page);
+       if (ret < 0)
+               goto out_error;
+       map->bytes = page;
+
+       ret = bind_interdomain_evtchn_to_irqhandler(priv->dev->otherend_id,
+                                                   req->u.accept.evtchn,
+                                                   pvcalls_back_conn_event,
+                                                   0,
+                                                   "pvcalls-backend",
+                                                   map);
+       if (ret < 0)
+               goto out_error;
+       map->irq = ret;
+
+       map->data.in = map->bytes;
+       map->data.out = map->bytes + XEN_FLEX_RING_SIZE(map->ring_order);
+
+       down_write(&priv->pvcallss_lock);
+       list_add_tail(&map->list, &priv->socket_mappings);
+       up_write(&priv->pvcallss_lock);
+
+       ret = inet_accept(mappass->sock, map->sock, O_NONBLOCK, true);
+       if (ret == -EAGAIN)
+               goto out_error;
+
+       lock_sock(map->sock->sk);
+       map->saved_data_ready = map->sock->sk->sk_data_ready;
+       map->sock->sk->sk_user_data = map;
+       map->sock->sk->sk_data_ready = pvcalls_sk_data_ready;
+       map->sock->sk->sk_state_change = pvcalls_sk_state_change;
+       release_sock(map->sock->sk);
+
+       iow = &pvcalls_back_global.ioworkers[map->data_worker];
+       spin_lock_irqsave(&iow->lock, flags);
+       atomic_inc(&map->read);
+       if (list_empty(&map->queue))
+               list_add_tail(&map->queue, &iow->wqs);
+       spin_unlock_irqrestore( &iow->lock, flags);
+       atomic_inc(&iow->io);
+       queue_work_on(map->data_worker, pvcalls_back_global.wq, 
&iow->register_work);
+
+out_error:
+       if (ret < 0)
+               pvcalls_back_release_active(priv->dev, priv, map);
+
+       rsp = RING_GET_RESPONSE(&priv->ring, priv->ring.rsp_prod_pvt++);
+       rsp->req_id = req->req_id;
+       rsp->cmd = req->cmd;
+       rsp->u.accept.id = req->u.accept.id;
+       rsp->ret = ret;
+       RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&priv->ring, notify);
+       if (notify)
+               notify_remote_via_irq(priv->irq);
+
+       spin_lock_irqsave(&mappass->copy_lock, flags);
+       mappass->reqcopy.cmd = 0;
+       spin_unlock_irqrestore(&mappass->copy_lock, flags);
 }
 
 static void pvcalls_pass_sk_data_ready(struct sock *sock)
 {
+       struct sockpass_mapping *mappass = sock->sk_user_data;
+
+       if (mappass == NULL)
+               return;
+
+       queue_work(mappass->wq, &mappass->register_work);
 }
 
 static int pvcalls_back_bind(struct xenbus_device *dev,
@@ -372,7 +491,44 @@ static int pvcalls_back_listen(struct xenbus_device *dev,
 static int pvcalls_back_accept(struct xenbus_device *dev,
                               struct xen_pvcalls_request *req)
 {
+       struct pvcalls_back_priv *priv;
+       struct sockpass_mapping *mappass;
+       int ret = -EINVAL;
+       struct xen_pvcalls_response *rsp;
+       unsigned long flags;
+
+       if (dev == NULL)
+               return 0;
+       priv = dev_get_drvdata(&dev->dev);
+
+       mappass = radix_tree_lookup(&priv->socketpass_mappings,
+               req->u.accept.id);
+       if (mappass == NULL)
+               goto out_error;
+
+       /* 
+        * Limitation of the current implementation: only support one
+        * concurrent accept or poll call on one socket.
+        */
+       spin_lock_irqsave(&mappass->copy_lock, flags);
+       if (mappass->reqcopy.cmd != 0) {
+               spin_unlock_irqrestore(&mappass->copy_lock, flags);
+               ret = -EINTR;
+               goto out_error;
+       }
+
+       mappass->reqcopy = *req;
+       spin_unlock_irqrestore(&mappass->copy_lock, flags);
+       queue_work(mappass->wq, &mappass->register_work);
        return 0;
+
+out_error:
+       rsp = RING_GET_RESPONSE(&priv->ring, priv->ring.rsp_prod_pvt++);
+       rsp->req_id = req->req_id;
+       rsp->cmd = req->cmd;
+       rsp->u.accept.id = req->u.accept.id;
+       rsp->ret = ret;
+       return 1;
 }
 
 static int pvcalls_back_poll(struct xenbus_device *dev,
-- 
1.9.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.