[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH 2/3] xen-netback: switch to per-cpu scratch space
On 2013-5-27 19:29, Wei Liu wrote: There are maximum nr_onlie_cpus netback threads running. nr_onlie_cpus --> nr_online_cpus We can make use of per-cpu scratch space to reduce the size of buffer space when we move to 1:1 model. In the unlikely event when per-cpu scratch space is not available, processing routines will refuse to run on that CPU. Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx> --- drivers/net/xen-netback/netback.c | 247 ++++++++++++++++++++++++++++++------- 1 file changed, 204 insertions(+), 43 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 54853be..0f69eda 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -37,6 +37,7 @@ #include <linux/kthread.h> #include <linux/if_vlan.h> #include <linux/udp.h> +#include <linux/cpu.h>#include <net/tcp.h> @@ -95,6 +96,24 @@ struct netbk_rx_meta { #define MAX_BUFFER_OFFSET PAGE_SIZE +/* Coalescing tx requests before copying makes number of grant+ * copy ops greater or equal to number of slots required. In + * worst case a tx request consumes 2 gnttab_copy. So the size + * of tx_copy_ops array should be 2*MAX_PENDING_REQS. + */ +#define TX_COPY_OPS_SIZE (2*MAX_PENDING_REQS) +DEFINE_PER_CPU(struct gnttab_copy *, tx_copy_ops); + +/* Given MAX_BUFFER_OFFSET of 4096 the worst case is that each + * head/fragment page uses 2 copy operations because it + * straddles two buffers in the frontend. So the size of following + * arrays should be 2*XEN_NETIF_RX_RING_SIZE. + */ +#define GRANT_COPY_OP_SIZE (2*XEN_NETIF_RX_RING_SIZE) +#define META_SIZE (2*XEN_NETIF_RX_RING_SIZE) +DEFINE_PER_CPU(struct gnttab_copy *, grant_copy_op); +DEFINE_PER_CPU(struct netbk_rx_meta *, meta); + struct xen_netbk { wait_queue_head_t wq; struct task_struct *task; @@ -116,21 +135,7 @@ struct xen_netbk { atomic_t netfront_count;struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];- /* Coalescing tx requests before copying makes number of grant - * copy ops greater or equal to number of slots required. In - * worst case a tx request consumes 2 gnttab_copy. - */ - struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS]; - u16 pending_ring[MAX_PENDING_REQS]; - - /* - * Given MAX_BUFFER_OFFSET of 4096 the worst case is that each - * head/fragment page uses 2 copy operations because it - * straddles two buffers in the frontend. - */ - struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE]; - struct netbk_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE]; };static struct xen_netbk *xen_netbk;@@ -608,12 +613,31 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) int count; unsigned long offset; struct skb_cb_overlay *sco; + struct gnttab_copy *gco = get_cpu_var(grant_copy_op); + struct netbk_rx_meta *m = get_cpu_var(meta); Change m to a friendly name? + static int unusable_count;struct netrx_pending_operations npo = {- .copy = netbk->grant_copy_op, - .meta = netbk->meta, + .copy = gco, + .meta = m, };+ if (gco == NULL || m == NULL) {+ put_cpu_var(grant_copy_op); + put_cpu_var(meta); + if (unusable_count == 1000) { It is better to use a macro to replace this number here. BTW, can you explain why using 1000 here? + printk(KERN_ALERT + "xen-netback: " + "CPU %d scratch space is not available," + " not doing any TX work for netback/%d\n", + smp_processor_id(), + (int)(netbk - xen_netbk)); unusable_count is not a value based on netbk here. I assume you use unusable_count to judge whether scratch space is available for specific netbk, if so, then unusable_count needs to be counter for specific netbk, not for all netbk. + unusable_count = 0; + } else + unusable_count++; + return; + } + skb_queue_head_init(&rxq);count = 0;@@ -635,27 +659,30 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) break; }- BUG_ON(npo.meta_prod > ARRAY_SIZE(netbk->meta));+ BUG_ON(npo.meta_prod > META_SIZE);- if (!npo.copy_prod)+ if (!npo.copy_prod) { + put_cpu_var(grant_copy_op); + put_cpu_var(meta); return; + }- BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));- gnttab_batch_copy(netbk->grant_copy_op, npo.copy_prod); + BUG_ON(npo.copy_prod > GRANT_COPY_OP_SIZE); + gnttab_batch_copy(gco, npo.copy_prod);while ((skb = __skb_dequeue(&rxq)) != NULL) {sco = (struct skb_cb_overlay *)skb->cb;vif = netdev_priv(skb->dev); - if (netbk->meta[npo.meta_cons].gso_size && vif->gso_prefix) {+ if (m[npo.meta_cons].gso_size && vif->gso_prefix) { resp = RING_GET_RESPONSE(&vif->rx, vif->rx.rsp_prod_pvt++);resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data; - resp->offset = netbk->meta[npo.meta_cons].gso_size;- resp->id = netbk->meta[npo.meta_cons].id; + resp->offset = m[npo.meta_cons].gso_size; + resp->id = m[npo.meta_cons].id; resp->status = sco->meta_slots_used;npo.meta_cons++;@@ -680,12 +707,12 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) flags |= XEN_NETRXF_data_validated;offset = 0;- resp = make_rx_response(vif, netbk->meta[npo.meta_cons].id, + resp = make_rx_response(vif, m[npo.meta_cons].id, status, offset, - netbk->meta[npo.meta_cons].size, + m[npo.meta_cons].size, flags);- if (netbk->meta[npo.meta_cons].gso_size && !vif->gso_prefix) {+ if (m[npo.meta_cons].gso_size && !vif->gso_prefix) { struct xen_netif_extra_info *gso = (struct xen_netif_extra_info *) RING_GET_RESPONSE(&vif->rx, @@ -693,7 +720,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)resp->flags |= XEN_NETRXF_extra_info; - gso->u.gso.size = netbk->meta[npo.meta_cons].gso_size;+ gso->u.gso.size = m[npo.meta_cons].gso_size; gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; gso->u.gso.pad = 0; gso->u.gso.features = 0; @@ -703,7 +730,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) }netbk_add_frag_responses(vif, status,- netbk->meta + npo.meta_cons + 1, + m + npo.meta_cons + 1, sco->meta_slots_used);RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);@@ -726,6 +753,9 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) if (!skb_queue_empty(&netbk->rx_queue) && !timer_pending(&netbk->net_timer)) xen_netbk_kick_thread(netbk); + + put_cpu_var(grant_copy_op); + put_cpu_var(meta); }void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb)@@ -1351,9 +1381,10 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) return false; }-static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)+static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk, + struct gnttab_copy *tco) { - struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop; + struct gnttab_copy *gop = tco, *request_gop; struct sk_buff *skb; int ret;@@ -1531,16 +1562,17 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)vif->tx.req_cons = idx; xen_netbk_check_rx_xenvif(vif);- if ((gop-netbk->tx_copy_ops) >= ARRAY_SIZE(netbk->tx_copy_ops))+ if ((gop-tco) >= TX_COPY_OPS_SIZE) break; }- return gop - netbk->tx_copy_ops;+ return gop - tco; }-static void xen_netbk_tx_submit(struct xen_netbk *netbk)+static void xen_netbk_tx_submit(struct xen_netbk *netbk, + struct gnttab_copy *tco) { - struct gnttab_copy *gop = netbk->tx_copy_ops; + struct gnttab_copy *gop = tco; struct sk_buff *skb;while ((skb = __skb_dequeue(&netbk->tx_queue)) != NULL) {@@ -1615,15 +1647,37 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk) static void xen_netbk_tx_action(struct xen_netbk *netbk) { unsigned nr_gops; + struct gnttab_copy *tco; + static int unusable_count; + + tco = get_cpu_var(tx_copy_ops); + + if (tco == NULL) { + put_cpu_var(tx_copy_ops); + if (unusable_count == 1000) { Same as above + printk(KERN_ALERT + "xen-netback: " + "CPU %d scratch space is not available," + " not doing any RX work for netback/%d\n", + smp_processor_id(), + (int)(netbk - xen_netbk)); + } else + unusable_count++; + return; + }- nr_gops = xen_netbk_tx_build_gops(netbk);+ nr_gops = xen_netbk_tx_build_gops(netbk, tco);- if (nr_gops == 0)+ if (nr_gops == 0) { + put_cpu_var(tx_copy_ops); return; + } + + gnttab_batch_copy(tco, nr_gops);- gnttab_batch_copy(netbk->tx_copy_ops, nr_gops);+ xen_netbk_tx_submit(netbk, tco);- xen_netbk_tx_submit(netbk);+ put_cpu_var(tx_copy_ops); }static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx,@@ -1760,6 +1814,93 @@ static int xen_netbk_kthread(void *data) return 0; }+static int __create_percpu_scratch_space(unsigned int cpu)+{ + if (per_cpu(tx_copy_ops, cpu) || + per_cpu(grant_copy_op, cpu) || + per_cpu(meta, cpu)) + return 0; + + per_cpu(tx_copy_ops, cpu) = + vzalloc_node(sizeof(struct gnttab_copy) * TX_COPY_OPS_SIZE, + cpu_to_node(cpu)); + + per_cpu(grant_copy_op, cpu) = + vzalloc_node(sizeof(struct gnttab_copy) * GRANT_COPY_OP_SIZE, + cpu_to_node(cpu)); + + per_cpu(meta, cpu) = + vzalloc_node(sizeof(struct netbk_rx_meta) * META_SIZE, + cpu_to_node(cpu)); + + if (!per_cpu(tx_copy_ops, cpu) || + !per_cpu(grant_copy_op, cpu) || + !per_cpu(meta, cpu)) + return -ENOMEM; + + return 0; +} + +static void __free_percpu_scratch_space(unsigned int cpu) +{ + void *tmp; + + tmp = per_cpu(tx_copy_ops, cpu); It is better to verify whether tmp is available before freeing it, for example: if (tmp) + per_cpu(tx_copy_ops, cpu) = NULL; + vfree(tmp); + + tmp = per_cpu(grant_copy_op, cpu); same + per_cpu(grant_copy_op, cpu) = NULL; + vfree(tmp); + + tmp = per_cpu(meta, cpu); same + per_cpu(meta, cpu) = NULL; + vfree(tmp); +} + +static int __netback_percpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + int rc = NOTIFY_DONE; + + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + printk(KERN_INFO "xen-netback: CPU %d online, creating scratch space\n", + cpu); + rc = __create_percpu_scratch_space(cpu); + if (rc) { + printk(KERN_ALERT "xen-netback: failed to create scratch space for CPU %d\n", + cpu); + /* There is really nothing more we can do. Free any + * partially allocated scratch space. When processing + * routines get to run they will just print warning + * message and stop processing. + */ + __free_percpu_scratch_space(cpu); + rc = NOTIFY_BAD; + } else + rc = NOTIFY_OK; + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + printk(KERN_INFO "xen-netback: CPU %d offline, destroying scratch space\n", + cpu); + __free_percpu_scratch_space(cpu); + rc = NOTIFY_OK; + break; + default: + break; + } + + return rc; +} + +static struct notifier_block netback_notifier_block = { + .notifier_call = __netback_percpu_callback, +}; Moving this to the top of this file? + void xen_netbk_unmap_frontend_rings(struct xenvif *vif) { if (vif->tx.sring) @@ -1810,6 +1951,7 @@ static int __init netback_init(void) int i; int rc = 0; int group; + int cpu;if (!xen_domain())return -ENODEV; @@ -1821,10 +1963,21 @@ static int __init netback_init(void) fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX; }+ for_each_online_cpu(cpu) {+ rc = __create_percpu_scratch_space(cpu); + if (rc) { + rc = -ENOMEM; + goto failed_init; + } + } + register_hotcpu_notifier(&netback_notifier_block); + xen_netbk_group_nr = num_online_cpus(); xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr); - if (!xen_netbk) - return -ENOMEM; + if (!xen_netbk) { + goto failed_init; + rc = -ENOMEM; rc = -ENOMEM is never called. Thanks Annie _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |