[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH RFC] xen-netback: coalesce frags before copying



This patch tries to coalesce txps when constructing grant copy structures. It
enables netback to deal with situation when frontend's MAX_SKB_FRAGS is larger
than backend's MAX_SKB_FRAGS.

It also defines max_skb_frags, which is a estimation of the maximum number of
frags a guest can send, anything bigger than that is considered malicious. Now
it is set to 20, which should be enough to accommodate Linux (16 to 19) and
possibly Windows (19?).

Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>
---
 drivers/net/xen-netback/netback.c |  164 +++++++++++++++++++++++++++++--------
 1 file changed, 128 insertions(+), 36 deletions(-)

diff --git a/drivers/net/xen-netback/netback.c 
b/drivers/net/xen-netback/netback.c
index 6e8e51a..e27f81a 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -47,9 +47,20 @@
 #include <asm/xen/hypercall.h>
 #include <asm/xen/page.h>
 
+/*
+ * This is an estimation of the maximum possible frags a SKB might
+ * have, anything larger than this is considered malicious. Typically
+ * Linux has 16 to 19, Windows has 19.
+ */
+#define NETBK_MAX_SKB_FRAGS_DEFAULT 20
+static unsigned int max_skb_frags = NETBK_MAX_SKB_FRAGS_DEFAULT;
+module_param(max_skb_frags, uint, 0444);
+
 struct pending_tx_info {
-       struct xen_netif_tx_request req;
+       struct xen_netif_tx_request req; /* points to the first txp  */
        struct xenvif *vif;
+       unsigned int nr_tx_req; /* how many tx req we have in a chain?, at 
least 1 */
+       unsigned int start_idx; /* this is starting index of pending ring index 
*/
 };
 typedef unsigned int pending_ring_idx_t;
 
@@ -251,7 +262,7 @@ static int max_required_rx_slots(struct xenvif *vif)
        int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);
 
        if (vif->can_sg || vif->gso || vif->gso_prefix)
-               max += MAX_SKB_FRAGS + 1; /* extra_info + frags */
+               max += max_skb_frags + 1; /* extra_info + frags */
 
        return max;
 }
@@ -657,7 +668,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
                __skb_queue_tail(&rxq, skb);
 
                /* Filled the batch queue? */
-               if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE)
+               if (count + max_skb_frags >= XEN_NETIF_RX_RING_SIZE)
                        break;
        }
 
@@ -920,7 +931,7 @@ static int netbk_count_requests(struct xenvif *vif,
                        return -ENODATA;
                }
 
-               if (unlikely(frags >= MAX_SKB_FRAGS)) {
+               if (unlikely(frags >= max_skb_frags)) {
                        netdev_err(vif->dev, "Too many frags\n");
                        netbk_fatal_tx_err(vif);
                        return -E2BIG;
@@ -968,42 +979,98 @@ static struct gnttab_copy *xen_netbk_get_requests(struct 
xen_netbk *netbk,
        struct skb_shared_info *shinfo = skb_shinfo(skb);
        skb_frag_t *frags = shinfo->frags;
        u16 pending_idx = *((u16 *)skb->data);
-       int i, start;
+       u16 head_idx = 0;
+       int i, j, start;
+       struct page *page;
+       pending_ring_idx_t index;
+       uint16_t dst_offset;
+       int total_frags_merged;
+       unsigned int nr_frags = shinfo->nr_frags; /* This can be as large as 
max_skb_frags */
+       struct pending_tx_info *first = NULL;
+       int nr_txp;
 
        /* Skip first skb fragment if it is on same page as header fragment. */
        start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
 
-       for (i = start; i < shinfo->nr_frags; i++, txp++) {
-               struct page *page;
-               pending_ring_idx_t index;
+       /* Coalesce */
+       total_frags_merged = 0;
+       for (i = j = start; i < MAX_SKB_FRAGS && j < nr_frags; i++) {
                struct pending_tx_info *pending_tx_info =
                        netbk->pending_tx_info;
 
-               index = pending_index(netbk->pending_cons++);
-               pending_idx = netbk->pending_ring[index];
-               page = xen_netbk_alloc_page(netbk, pending_idx);
+               page = alloc_page(GFP_KERNEL|__GFP_COLD);
                if (!page)
                        goto err;
 
-               gop->source.u.ref = txp->gref;
-               gop->source.domid = vif->domid;
-               gop->source.offset = txp->offset;
-
-               gop->dest.u.gmfn = virt_to_mfn(page_address(page));
-               gop->dest.domid = DOMID_SELF;
-               gop->dest.offset = txp->offset;
-
-               gop->len = txp->size;
-               gop->flags = GNTCOPY_source_gref;
+               nr_txp = 0;
+               dst_offset = 0;
+               first = NULL;
+               while (dst_offset < PAGE_SIZE && j < nr_frags) {
+                       gop->flags = GNTCOPY_source_gref;
+
+                       gop->source.u.ref = txp->gref;
+                       gop->source.domid = vif->domid;
+                       gop->source.offset = txp->offset;
+
+                       gop->dest.domid = DOMID_SELF;
+
+                       gop->dest.offset = dst_offset;
+                       gop->dest.u.gmfn = virt_to_mfn(page_address(page));
+
+                       if (dst_offset + txp->size > PAGE_SIZE) {
+                               /* This page can only merge a portion of txp */
+                               gop->len = PAGE_SIZE - dst_offset;
+                               txp->offset += gop->len;
+                               txp->size -= gop->len;
+                               dst_offset += gop->len; /* == PAGE_SIZE, will 
quit loop */
+                       } else {
+                               /* This txp can be merged in the page */
+                               gop->len = txp->size;
+                               dst_offset += gop->len;
+
+                               index = pending_index(netbk->pending_cons++);
+
+                               pending_idx = netbk->pending_ring[index];
+
+                               memcpy(&pending_tx_info[pending_idx].req, txp, 
sizeof(*txp));
+                               xenvif_get(vif);
+
+                               pending_tx_info[pending_idx].vif = vif;
+
+                               /*
+                                * Poison these fields, corrsponding
+                                * fields for head txp will be set to
+                                * correct values after the loop.
+                                */
+                               pending_tx_info[pending_idx].nr_tx_req = 
(u16)(~(u16)0);
+                               netbk->mmap_pages[pending_idx] = (void *)(~0UL);
+                               pending_tx_info[pending_idx].start_idx = -1;
+
+                               if (unlikely(!first)) {
+                                       first = &pending_tx_info[pending_idx];
+                                       first->start_idx = index;
+                                       head_idx = pending_idx;
+                               }
+
+                               txp++;
+                               nr_txp++;
+                               j++;
+                       }
 
-               gop++;
+                       gop++;
+               }
 
-               memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
-               xenvif_get(vif);
-               pending_tx_info[pending_idx].vif = vif;
-               frag_set_pending_idx(&frags[i], pending_idx);
+               first->req.offset = 0;
+               first->req.size = dst_offset;
+               first->nr_tx_req = nr_txp;
+               total_frags_merged += nr_txp - 1;
+               set_page_ext(page, netbk, head_idx);
+               netbk->mmap_pages[head_idx] = page;
+               frag_set_pending_idx(&frags[i], head_idx);
        }
 
+       shinfo->nr_frags -= total_frags_merged;
+
        return gop;
 err:
        /* Unwind, freeing all pages and sending error responses. */
@@ -1025,6 +1092,7 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
        struct gnttab_copy *gop = *gopp;
        u16 pending_idx = *((u16 *)skb->data);
        struct skb_shared_info *shinfo = skb_shinfo(skb);
+       struct pending_tx_info *tx_info;
        int nr_frags = shinfo->nr_frags;
        int i, err, start;
 
@@ -1037,12 +1105,14 @@ static int xen_netbk_tx_check_gop(struct xen_netbk 
*netbk,
        start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
 
        for (i = start; i < nr_frags; i++) {
-               int j, newerr;
+               int j, newerr = 0, n;
 
                pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
+               tx_info = &netbk->pending_tx_info[pending_idx];
 
                /* Check error status: if okay then remember grant handle. */
-               newerr = (++gop)->status;
+               for (n = 0; n < tx_info->nr_tx_req; n++)
+                       newerr |= (++gop)->status;
                if (likely(!newerr)) {
                        /* Had a previous error? Invalidate this fragment. */
                        if (unlikely(err))
@@ -1267,11 +1337,11 @@ static unsigned xen_netbk_tx_build_gops(struct 
xen_netbk *netbk)
        struct sk_buff *skb;
        int ret;
 
-       while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+       while (((nr_pending_reqs(netbk) + max_skb_frags) < MAX_PENDING_REQS) &&
                !list_empty(&netbk->net_schedule_list)) {
                struct xenvif *vif;
                struct xen_netif_tx_request txreq;
-               struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
+               struct xen_netif_tx_request txfrags[max_skb_frags];
                struct page *page;
                struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
                u16 pending_idx;
@@ -1359,7 +1429,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk 
*netbk)
                pending_idx = netbk->pending_ring[index];
 
                data_len = (txreq.size > PKT_PROT_LEN &&
-                           ret < MAX_SKB_FRAGS) ?
+                           ret < max_skb_frags) ?
                        PKT_PROT_LEN : txreq.size;
 
                skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
@@ -1409,11 +1479,14 @@ static unsigned xen_netbk_tx_build_gops(struct 
xen_netbk *netbk)
                memcpy(&netbk->pending_tx_info[pending_idx].req,
                       &txreq, sizeof(txreq));
                netbk->pending_tx_info[pending_idx].vif = vif;
+               netbk->pending_tx_info[pending_idx].start_idx = index;
+               netbk->pending_tx_info[pending_idx].nr_tx_req = 1;
                *((u16 *)skb->data) = pending_idx;
 
                __skb_put(skb, data_len);
 
                skb_shinfo(skb)->nr_frags = ret;
+               /* If first skb fragment if it is on same page as header 
fragment. */
                if (data_len < txreq.size) {
                        skb_shinfo(skb)->nr_frags++;
                        frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
@@ -1540,6 +1613,11 @@ static void xen_netbk_idx_release(struct xen_netbk 
*netbk, u16 pending_idx,
        struct xenvif *vif;
        struct pending_tx_info *pending_tx_info;
        pending_ring_idx_t index;
+       unsigned int nr = 0;
+       unsigned int i = 0;
+       unsigned int start_idx = 0;
+
+       BUG_ON(netbk->mmap_pages[pending_idx] == (void *)(~0UL));
 
        /* Already complete? */
        if (netbk->mmap_pages[pending_idx] == NULL)
@@ -1548,13 +1626,27 @@ static void xen_netbk_idx_release(struct xen_netbk 
*netbk, u16 pending_idx,
        pending_tx_info = &netbk->pending_tx_info[pending_idx];
 
        vif = pending_tx_info->vif;
+       nr = pending_tx_info->nr_tx_req;
+       start_idx = pending_tx_info->start_idx;
 
-       make_tx_response(vif, &pending_tx_info->req, status);
+       BUG_ON(nr == (u16)(~(u16)0));
 
-       index = pending_index(netbk->pending_prod++);
-       netbk->pending_ring[index] = pending_idx;
+       BUG_ON(netbk->pending_ring[pending_index(start_idx)] != pending_idx);
 
-       xenvif_put(vif);
+       for (i = 0; i < nr; i++) {
+               struct xen_netif_tx_request *txp;
+               unsigned int idx = pending_index(start_idx + i);
+               unsigned int info_idx = netbk->pending_ring[idx];
+
+               pending_tx_info = &netbk->pending_tx_info[info_idx];
+               txp = &pending_tx_info->req;
+               make_tx_response(vif, &pending_tx_info->req, status);
+
+               index = pending_index(netbk->pending_prod++);
+               netbk->pending_ring[index] = netbk->pending_ring[info_idx];
+
+               xenvif_put(vif);
+       }
 
        netbk->mmap_pages[pending_idx]->mapping = 0;
        put_page(netbk->mmap_pages[pending_idx]);
@@ -1613,7 +1705,7 @@ static inline int rx_work_todo(struct xen_netbk *netbk)
 static inline int tx_work_todo(struct xen_netbk *netbk)
 {
 
-       if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+       if (((nr_pending_reqs(netbk)+max_skb_frags) < MAX_PENDING_REQS) &&
                        !list_empty(&netbk->net_schedule_list))
                return 1;
 
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.