[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 2/2] xen-netback: switch to NAPI + kthread 1:1 model



On Wed, 2013-05-29 at 12:43 +0100, Wei Liu wrote:
> This patch implements 1:1 model netback. NAPI and kthread are utilized
> to do the weight-lifting job:
> 
>  - NAPI is used for guest side TX (host side RX)
>  - kthread is used for guest side RX (host side TX)
> 
> Xenvif and xen_netbk are made into one structure to reduce code size.
> 
> This model provides better scheduling fairness among vifs. It is also
> prerequisite for implementing multiqueue for Xen netback.

Any interesting numbers from this change?

> Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx>
> ---
>  drivers/net/xen-netback/common.h    |  105 +++--
>  drivers/net/xen-netback/interface.c |  122 +++---
>  drivers/net/xen-netback/netback.c   |  742 
> ++++++++++++-----------------------
>  3 files changed, 389 insertions(+), 580 deletions(-)
> 
> diff --git a/drivers/net/xen-netback/common.h 
> b/drivers/net/xen-netback/common.h
> index 8a4d77e..2e270cf 100644
> --- a/drivers/net/xen-netback/common.h
> +++ b/drivers/net/xen-netback/common.h
> @@ -45,15 +45,43 @@
>  #include <xen/grant_table.h>
>  #include <xen/xenbus.h>
>  
> -struct xen_netbk;
> +typedef unsigned int pending_ring_idx_t;
> +#define INVALID_PENDING_RING_IDX (~0U)
> +
> +struct pending_tx_info {
> +     struct xen_netif_tx_request req; /* coalesced tx request */
> +     pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX
> +                               * if it is head of one or more tx
> +                               * reqs

So this sounds like it is only valid for the pending tx corresponding to
the first slot in a frame, right?

It does not contain a link from any arbitrary slot to the slot
containing the head of that frame, right?

So for the head it is effectively that entries index into the pending_tx
array?

> +                               */
> +};
> +
> +#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
> +#define XEN_NETIF_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
> +
> +struct xenvif_rx_meta {
> +     int id;
> +     int size;
> +     int gso_size;
> +};
> +
> +/* Discriminate from any valid pending_idx value. */
> +#define INVALID_PENDING_IDX 0xFFFF
> +
> +#define MAX_BUFFER_OFFSET PAGE_SIZE
> +
> +#define MAX_PENDING_REQS 256
>  
>  struct xenvif {
>       /* Unique identifier for this interface. */
>       domid_t          domid;
>       unsigned int     handle;
>  
> -     /* Reference to netback processing backend. */
> -     struct xen_netbk *netbk;
> +     /* Use NAPI for guest TX */
> +     struct napi_struct napi;
> +     /* Use kthread for guest RX */
> +     struct task_struct *task;
> +     wait_queue_head_t wq;

This isn't new with this patch but I wonder if we shouldn't group tx and
rx related things into to separate regions of the struct, rather than
interleaving them like this. Especially once we start to separate RX and
TX processing onto different processes the current layout is going to
have pretty horrible cache line bouncing properties.

>       u8               fe_dev_addr[6];
>  
> @@ -64,9 +92,6 @@ struct xenvif {
>       char tx_irq_name[IFNAMSIZ+4]; /* DEVNAME-tx */
>       char rx_irq_name[IFNAMSIZ+4]; /* DEVNAME-rx */
>  
> -     /* List of frontends to notify after a batch of frames sent. */
> -     struct list_head notify_list;
> -
>       /* The shared rings and indexes. */
>       struct xen_netif_tx_back_ring tx;
>       struct xen_netif_rx_back_ring rx;
> @@ -96,12 +121,33 @@ struct xenvif {
>       /* Statistics */
>       unsigned long rx_gso_checksum_fixup;
>  
> +     struct sk_buff_head rx_queue;
> +     struct sk_buff_head tx_queue;
> +
> +     struct page *mmap_pages[MAX_PENDING_REQS];
> +
> +     pending_ring_idx_t pending_prod;
> +     pending_ring_idx_t pending_cons;
> +     u16 pending_ring[MAX_PENDING_REQS];
> +
> +     struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
> +
> +     /* Coalescing tx requests before copying makes number of grant
> +      * copy ops greater or equal to number of slots required. In
> +      * worst case a tx request consumes 2 gnttab_copy.
> +      */
> +     struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS];
> +
> +     /*
> +      * Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
> +      * head/fragment page uses 2 copy operations because it
> +      * straddles two buffers in the frontend.
> +      */
> +     struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE];
> +     struct xenvif_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE];
> +
>       /* Miscellaneous private stuff. */
> -     struct list_head schedule_list;
> -     atomic_t         refcnt;
>       struct net_device *dev;
> -
> -     wait_queue_head_t waiting_to_free;
>  };
>  
>  static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif 
> *vif)
> @@ -109,9 +155,6 @@ static inline struct xenbus_device 
> *xenvif_to_xenbus_device(struct xenvif *vif)
>       return to_xenbus_device(vif->dev->dev.parent);
>  }
>  
> -#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
> -#define XEN_NETIF_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
> -
>  struct xenvif *xenvif_alloc(struct device *parent,
>                           domid_t domid,
>                           unsigned int handle);
> @@ -121,39 +164,26 @@ int xenvif_connect(struct xenvif *vif, unsigned long 
> tx_ring_ref,
>                  unsigned int rx_evtchn);
>  void xenvif_disconnect(struct xenvif *vif);
>  
> -void xenvif_get(struct xenvif *vif);
> -void xenvif_put(struct xenvif *vif);
> -
>  int xenvif_xenbus_init(void);
>  void xenvif_xenbus_fini(void);
>  
>  int xenvif_schedulable(struct xenvif *vif);
>  
> -int xen_netbk_rx_ring_full(struct xenvif *vif);
> +int xenvif_rx_ring_full(struct xenvif *vif);

From a review PoV it would have been good to do the bulk rename either
before or after the functional changes. I just skipped over anything
which looked to be just renaming at first glance. Hopefully I didn't
miss anything ciritical!

> +static int xenvif_poll(struct napi_struct *napi, int budget)
> +{
> +     struct xenvif *vif = container_of(napi, struct xenvif, napi);
> +     int work_done;
> +
> +     work_done = xenvif_tx_action(vif, budget);
> +
> +     if (work_done < budget) {
> +             int more_to_do = 0;
> +             unsigned long flags;
> +
> +             local_irq_save(flags);

What does this protect against? Maybe it's a napi requirement not an Xen
netif one?

> +
> +             RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
> +             if (!more_to_do || work_done < 0)

"work_done < 0" is an error condition from xenvif_tx_action? But
following the chain down through xenvif_tx_action to xenvif_tx_submit it
doesn't look like such a thing is possible?

Perhaps just a BUG_ON(work_done < 0) right after the call to
xenvif_tx_action()?


> @@ -272,11 +275,13 @@ struct xenvif *xenvif_alloc(struct device *parent, 
> domid_t domid,
>       struct net_device *dev;
>       struct xenvif *vif;
>       char name[IFNAMSIZ] = {};
> +     int i;
>  
>       snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
>       dev = alloc_netdev(sizeof(struct xenvif), name, ether_setup);
>       if (dev == NULL) {
> -             pr_warn("Could not allocate netdev\n");
> +             printk(KERN_WARNING "xen-netback: Could not allocate netdev for 
> vif%d.%d\n",
> +                    domid, handle);

pr_warn can take a fmt string and do the right thing, can't it? Or was
there a different reason for this change?

Rather than repeating vif%d.%d why not reuse the result of the previous
snprintf -- especially since the fmt codes differ (although probably not
in a way which affects the result).

> @@ -377,7 +389,16 @@ int xenvif_connect(struct xenvif *vif, unsigned long 
> tx_ring_ref,
>               disable_irq(vif->rx_irq);
>       }
>  
> -     xenvif_get(vif);
> +     init_waitqueue_head(&vif->wq);
> +     vif->task = kthread_create(xenvif_kthread,
> +                                (void *)vif,
> +                                "vif%d.%d", vif->domid, vif->handle);

%u.%u, or better find a way to reuse the name allocated in xenvif_alloc,
which I suppose you can get via vif->dev->name?

> +     if (IS_ERR(vif->task)) {
> +             printk(KERN_WARNING "xen-netback: Could not allocate kthread 
> for vif%d.%d\n",
> +                    vif->domid, vif->handle);

pr_warn if possible please. and %u vs %d again.

> @@ -434,9 +338,9 @@ static void netbk_gop_frag_copy(struct xenvif *vif, 
> struct sk_buff *skb,
>  
>               copy_gop = npo->copy + npo->copy_prod++;
>               copy_gop->flags = GNTCOPY_dest_gref;
> +
>               copy_gop->source.domid = DOMID_SELF;
>               copy_gop->source.u.gmfn = virt_to_mfn(page_address(page));
> -

The blank lines in this function seem pretty arbitrary both before and
after this change ;-)

>               copy_gop->source.offset = offset;
>               copy_gop->dest.domid = vif->domid;
>  
> @@ -1469,19 +1279,20 @@ static unsigned xen_netbk_tx_build_gops(struct 
> xen_netbk *netbk)
>                       struct xen_netif_extra_info *gso;
>                       gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
>  
> -                     if (netbk_set_skb_gso(vif, skb, gso)) {
> -                             /* Failure in netbk_set_skb_gso is fatal. */
> +                     if (xenvif_set_skb_gso(vif, skb, gso)) {
> +                             /* Failure in xenvif_set_skb_gso is fatal. */
>                               kfree_skb(skb);
> -                             continue;
> +                             /* XXX ???? break or continue ?*/

I'd been wondering that a bunch in the previous hunks too ;-)

break seems fine here.
>  
> +int xenvif_kthread(void *data)
> +{
> +     struct xenvif *vif = data;
> +
> +     while (!kthread_should_stop()) {
> +             wait_event_interruptible(vif->wq,
> +                                      rx_work_todo(vif) ||
> +                                      kthread_should_stop());
> +             cond_resched();
> +
> +             if (kthread_should_stop())
> +                     break;

Should this come before the cond resched? Seems odd to potentially wait
a bit before just exiting.

Ian.


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.