|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH RFC] xen-netback: coalesce frags before copying
On Thu, 2013-03-14 at 18:26 +0000, Wei Liu wrote:
> This patch tries to coalesce txps when constructing grant copy structures. It
> enables netback to deal with situation when frontend's MAX_SKB_FRAGS is larger
> than backend's MAX_SKB_FRAGS.
>
> It also defines max_skb_frags, which is a estimation of the maximum number of
> frags a guest can send, anything bigger than that is considered malicious. Now
> it is set to 20, which should be enough to accommodate Linux (16 to 19) and
> possibly Windows (19?).
>
> Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>
> ---
> drivers/net/xen-netback/netback.c | 164
> +++++++++++++++++++++++++++++--------
> 1 file changed, 128 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/net/xen-netback/netback.c
> b/drivers/net/xen-netback/netback.c
> index 6e8e51a..e27f81a 100644
> --- a/drivers/net/xen-netback/netback.c
> +++ b/drivers/net/xen-netback/netback.c
> @@ -47,9 +47,20 @@
> #include <asm/xen/hypercall.h>
> #include <asm/xen/page.h>
>
> +/*
> + * This is an estimation of the maximum possible frags a SKB might
> + * have, anything larger than this is considered malicious. Typically
> + * Linux has 16 to 19, Windows has 19.
Perhaps rather than this static (although configurable)
> + */
> +#define NETBK_MAX_SKB_FRAGS_DEFAULT 20
> +static unsigned int max_skb_frags = NETBK_MAX_SKB_FRAGS_DEFAULT;
> +module_param(max_skb_frags, uint, 0444);
You could at least make this 0644 so root can up the limit dynamically?
> +
> struct pending_tx_info {
> - struct xen_netif_tx_request req;
> + struct xen_netif_tx_request req; /* points to the first txp */
> struct xenvif *vif;
> + unsigned int nr_tx_req; /* how many tx req we have in a chain?, at
> least 1 */
> + unsigned int start_idx; /* this is starting index of pending ring index
> */
> };
> typedef unsigned int pending_ring_idx_t;
>
> @@ -251,7 +262,7 @@ static int max_required_rx_slots(struct xenvif *vif)
> int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);
>
> if (vif->can_sg || vif->gso || vif->gso_prefix)
> - max += MAX_SKB_FRAGS + 1; /* extra_info + frags */
> + max += max_skb_frags + 1; /* extra_info + frags */
>
> return max;
> }
> @@ -657,7 +668,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
> __skb_queue_tail(&rxq, skb);
>
> /* Filled the batch queue? */
> - if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE)
> + if (count + max_skb_frags >= XEN_NETIF_RX_RING_SIZE)
> break;
> }
>
> @@ -920,7 +931,7 @@ static int netbk_count_requests(struct xenvif *vif,
> return -ENODATA;
> }
>
> - if (unlikely(frags >= MAX_SKB_FRAGS)) {
> + if (unlikely(frags >= max_skb_frags)) {
> netdev_err(vif->dev, "Too many frags\n");
> netbk_fatal_tx_err(vif);
> return -E2BIG;
> @@ -968,42 +979,98 @@ static struct gnttab_copy
> *xen_netbk_get_requests(struct xen_netbk *netbk,
> struct skb_shared_info *shinfo = skb_shinfo(skb);
> skb_frag_t *frags = shinfo->frags;
> u16 pending_idx = *((u16 *)skb->data);
> - int i, start;
> + u16 head_idx = 0;
> + int i, j, start;
> + struct page *page;
> + pending_ring_idx_t index;
> + uint16_t dst_offset;
> + int total_frags_merged;
> + unsigned int nr_frags = shinfo->nr_frags; /* This can be as large as
> max_skb_frags */
> + struct pending_tx_info *first = NULL;
> + int nr_txp;
>
> /* Skip first skb fragment if it is on same page as header fragment. */
> start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
>
> - for (i = start; i < shinfo->nr_frags; i++, txp++) {
> - struct page *page;
> - pending_ring_idx_t index;
> + /* Coalesce */
> + total_frags_merged = 0;
> + for (i = j = start; i < MAX_SKB_FRAGS && j < nr_frags; i++) {
> struct pending_tx_info *pending_tx_info =
> netbk->pending_tx_info;
>
> - index = pending_index(netbk->pending_cons++);
> - pending_idx = netbk->pending_ring[index];
> - page = xen_netbk_alloc_page(netbk, pending_idx);
> + page = alloc_page(GFP_KERNEL|__GFP_COLD);
> if (!page)
> goto err;
>
> - gop->source.u.ref = txp->gref;
> - gop->source.domid = vif->domid;
> - gop->source.offset = txp->offset;
> -
> - gop->dest.u.gmfn = virt_to_mfn(page_address(page));
> - gop->dest.domid = DOMID_SELF;
> - gop->dest.offset = txp->offset;
> -
> - gop->len = txp->size;
> - gop->flags = GNTCOPY_source_gref;
> + nr_txp = 0;
> + dst_offset = 0;
> + first = NULL;
> + while (dst_offset < PAGE_SIZE && j < nr_frags) {
> + gop->flags = GNTCOPY_source_gref;
> +
> + gop->source.u.ref = txp->gref;
> + gop->source.domid = vif->domid;
> + gop->source.offset = txp->offset;
> +
> + gop->dest.domid = DOMID_SELF;
> +
> + gop->dest.offset = dst_offset;
> + gop->dest.u.gmfn = virt_to_mfn(page_address(page));
> +
> + if (dst_offset + txp->size > PAGE_SIZE) {
> + /* This page can only merge a portion of txp */
> + gop->len = PAGE_SIZE - dst_offset;
> + txp->offset += gop->len;
> + txp->size -= gop->len;
> + dst_offset += gop->len; /* == PAGE_SIZE, will
> quit loop */
> + } else {
> + /* This txp can be merged in the page */
> + gop->len = txp->size;
> + dst_offset += gop->len;
> +
> + index = pending_index(netbk->pending_cons++);
> +
> + pending_idx = netbk->pending_ring[index];
> +
> + memcpy(&pending_tx_info[pending_idx].req, txp,
> sizeof(*txp));
> + xenvif_get(vif);
> +
> + pending_tx_info[pending_idx].vif = vif;
> +
> + /*
> + * Poison these fields, corrsponding
> + * fields for head txp will be set to
> + * correct values after the loop.
> + */
> + pending_tx_info[pending_idx].nr_tx_req =
> (u16)(~(u16)0);
> + netbk->mmap_pages[pending_idx] = (void *)(~0UL);
> + pending_tx_info[pending_idx].start_idx = -1;
> +
> + if (unlikely(!first)) {
> + first = &pending_tx_info[pending_idx];
> + first->start_idx = index;
> + head_idx = pending_idx;
> + }
> +
> + txp++;
> + nr_txp++;
> + j++;
> + }
>
> - gop++;
> + gop++;
> + }
>
> - memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
> - xenvif_get(vif);
> - pending_tx_info[pending_idx].vif = vif;
> - frag_set_pending_idx(&frags[i], pending_idx);
> + first->req.offset = 0;
> + first->req.size = dst_offset;
> + first->nr_tx_req = nr_txp;
> + total_frags_merged += nr_txp - 1;
> + set_page_ext(page, netbk, head_idx);
> + netbk->mmap_pages[head_idx] = page;
> + frag_set_pending_idx(&frags[i], head_idx);
> }
>
> + shinfo->nr_frags -= total_frags_merged;
> +
> return gop;
> err:
> /* Unwind, freeing all pages and sending error responses. */
> @@ -1025,6 +1092,7 @@ static int xen_netbk_tx_check_gop(struct xen_netbk
> *netbk,
> struct gnttab_copy *gop = *gopp;
> u16 pending_idx = *((u16 *)skb->data);
> struct skb_shared_info *shinfo = skb_shinfo(skb);
> + struct pending_tx_info *tx_info;
> int nr_frags = shinfo->nr_frags;
> int i, err, start;
>
> @@ -1037,12 +1105,14 @@ static int xen_netbk_tx_check_gop(struct xen_netbk
> *netbk,
> start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
>
> for (i = start; i < nr_frags; i++) {
> - int j, newerr;
> + int j, newerr = 0, n;
>
> pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
> + tx_info = &netbk->pending_tx_info[pending_idx];
>
> /* Check error status: if okay then remember grant handle. */
> - newerr = (++gop)->status;
> + for (n = 0; n < tx_info->nr_tx_req; n++)
> + newerr |= (++gop)->status;
> if (likely(!newerr)) {
> /* Had a previous error? Invalidate this fragment. */
> if (unlikely(err))
> @@ -1267,11 +1337,11 @@ static unsigned xen_netbk_tx_build_gops(struct
> xen_netbk *netbk)
> struct sk_buff *skb;
> int ret;
>
> - while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
> + while (((nr_pending_reqs(netbk) + max_skb_frags) < MAX_PENDING_REQS) &&
> !list_empty(&netbk->net_schedule_list)) {
> struct xenvif *vif;
> struct xen_netif_tx_request txreq;
> - struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS];
> + struct xen_netif_tx_request txfrags[max_skb_frags];
> struct page *page;
> struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
> u16 pending_idx;
> @@ -1359,7 +1429,7 @@ static unsigned xen_netbk_tx_build_gops(struct
> xen_netbk *netbk)
> pending_idx = netbk->pending_ring[index];
>
> data_len = (txreq.size > PKT_PROT_LEN &&
> - ret < MAX_SKB_FRAGS) ?
> + ret < max_skb_frags) ?
> PKT_PROT_LEN : txreq.size;
>
> skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
> @@ -1409,11 +1479,14 @@ static unsigned xen_netbk_tx_build_gops(struct
> xen_netbk *netbk)
> memcpy(&netbk->pending_tx_info[pending_idx].req,
> &txreq, sizeof(txreq));
> netbk->pending_tx_info[pending_idx].vif = vif;
> + netbk->pending_tx_info[pending_idx].start_idx = index;
> + netbk->pending_tx_info[pending_idx].nr_tx_req = 1;
> *((u16 *)skb->data) = pending_idx;
>
> __skb_put(skb, data_len);
>
> skb_shinfo(skb)->nr_frags = ret;
> + /* If first skb fragment if it is on same page as header
> fragment. */
> if (data_len < txreq.size) {
> skb_shinfo(skb)->nr_frags++;
> frag_set_pending_idx(&skb_shinfo(skb)->frags[0],
> @@ -1540,6 +1613,11 @@ static void xen_netbk_idx_release(struct xen_netbk
> *netbk, u16 pending_idx,
> struct xenvif *vif;
> struct pending_tx_info *pending_tx_info;
> pending_ring_idx_t index;
> + unsigned int nr = 0;
> + unsigned int i = 0;
> + unsigned int start_idx = 0;
> +
> + BUG_ON(netbk->mmap_pages[pending_idx] == (void *)(~0UL));
>
> /* Already complete? */
> if (netbk->mmap_pages[pending_idx] == NULL)
> @@ -1548,13 +1626,27 @@ static void xen_netbk_idx_release(struct xen_netbk
> *netbk, u16 pending_idx,
> pending_tx_info = &netbk->pending_tx_info[pending_idx];
>
> vif = pending_tx_info->vif;
> + nr = pending_tx_info->nr_tx_req;
> + start_idx = pending_tx_info->start_idx;
>
> - make_tx_response(vif, &pending_tx_info->req, status);
> + BUG_ON(nr == (u16)(~(u16)0));
>
> - index = pending_index(netbk->pending_prod++);
> - netbk->pending_ring[index] = pending_idx;
> + BUG_ON(netbk->pending_ring[pending_index(start_idx)] != pending_idx);
>
> - xenvif_put(vif);
> + for (i = 0; i < nr; i++) {
> + struct xen_netif_tx_request *txp;
> + unsigned int idx = pending_index(start_idx + i);
> + unsigned int info_idx = netbk->pending_ring[idx];
> +
> + pending_tx_info = &netbk->pending_tx_info[info_idx];
> + txp = &pending_tx_info->req;
> + make_tx_response(vif, &pending_tx_info->req, status);
> +
> + index = pending_index(netbk->pending_prod++);
> + netbk->pending_ring[index] = netbk->pending_ring[info_idx];
> +
> + xenvif_put(vif);
> + }
>
> netbk->mmap_pages[pending_idx]->mapping = 0;
> put_page(netbk->mmap_pages[pending_idx]);
> @@ -1613,7 +1705,7 @@ static inline int rx_work_todo(struct xen_netbk *netbk)
> static inline int tx_work_todo(struct xen_netbk *netbk)
> {
>
> - if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
> + if (((nr_pending_reqs(netbk)+max_skb_frags) < MAX_PENDING_REQS) &&
> !list_empty(&netbk->net_schedule_list))
> return 1;
>
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |