[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] [XenPPC] [linux-ppc-2.6] [LINUX][XEN] backport net drivers
# HG changeset patch # User Jimi Xenidis <jimix@xxxxxxxxxxxxxx> # Node ID 11ee20d418ea813709da4c86dbc4ae28efb17f36 # Parent fe1a31c06cbe548d3eb59b4e0fc16fc102a16344 [LINUX][XEN] backport net drivers This is simply a copy of the network files from the Sparse tree that have not made it to our tree yet. Signed-off-by: Jimi Xenidis <jimix@xxxxxxxxxxxxxx> --- drivers/xen/netback/common.h | 25 drivers/xen/netback/interface.c | 85 +- drivers/xen/netback/loopback.c | 8 drivers/xen/netback/netback.c | 834 ++++++++++++++++++++------ drivers/xen/netback/xenbus.c | 87 ++ drivers/xen/netfront/netfront.c | 1242 ++++++++++++++++++++++++++++------------ 6 files changed, 1696 insertions(+), 585 deletions(-) diff -r fe1a31c06cbe -r 11ee20d418ea drivers/xen/netback/common.h --- a/drivers/xen/netback/common.h Sun Oct 08 12:23:50 2006 -0400 +++ b/drivers/xen/netback/common.h Sun Oct 08 12:28:37 2006 -0400 @@ -64,9 +64,9 @@ typedef struct netif_st { /* Physical parameters of the comms window. */ grant_handle_t tx_shmem_handle; - grant_ref_t tx_shmem_ref; + grant_ref_t tx_shmem_ref; grant_handle_t rx_shmem_handle; - grant_ref_t rx_shmem_ref; + grant_ref_t rx_shmem_ref; unsigned int evtchn; unsigned int irq; @@ -75,6 +75,13 @@ typedef struct netif_st { netif_rx_back_ring_t rx; struct vm_struct *tx_comms_area; struct vm_struct *rx_comms_area; + + /* Set of features that can be turned on in dev->features. */ + int features; + + /* Internal feature information. */ + int can_queue:1; /* can queue packets for receiver? */ + int copying_receiver:1; /* copy packets to receiver? */ /* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */ RING_IDX rx_req_cons_peek; @@ -86,8 +93,6 @@ typedef struct netif_st { struct timer_list credit_timeout; /* Miscellaneous private stuff. */ - enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; - int active; struct list_head list; /* scheduling list */ atomic_t refcnt; struct net_device *dev; @@ -121,4 +126,16 @@ struct net_device_stats *netif_be_get_st struct net_device_stats *netif_be_get_stats(struct net_device *dev); irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs); +static inline int netbk_can_queue(struct net_device *dev) +{ + netif_t *netif = netdev_priv(dev); + return netif->can_queue; +} + +static inline int netbk_can_sg(struct net_device *dev) +{ + netif_t *netif = netdev_priv(dev); + return netif->features & NETIF_F_SG; +} + #endif /* __NETIF__BACKEND__COMMON_H__ */ diff -r fe1a31c06cbe -r 11ee20d418ea drivers/xen/netback/interface.c --- a/drivers/xen/netback/interface.c Sun Oct 08 12:23:50 2006 -0400 +++ b/drivers/xen/netback/interface.c Sun Oct 08 12:28:37 2006 -0400 @@ -36,46 +36,75 @@ static void __netif_up(netif_t *netif) { - struct net_device *dev = netif->dev; - netif_tx_lock_bh(dev); - netif->active = 1; - netif_tx_unlock_bh(dev); enable_irq(netif->irq); netif_schedule_work(netif); } static void __netif_down(netif_t *netif) { - struct net_device *dev = netif->dev; disable_irq(netif->irq); - netif_tx_lock_bh(dev); - netif->active = 0; - netif_tx_unlock_bh(dev); netif_deschedule_work(netif); } static int net_open(struct net_device *dev) { netif_t *netif = netdev_priv(dev); - if (netif->status == CONNECTED) + if (netif_carrier_ok(dev)) __netif_up(netif); - netif_start_queue(dev); return 0; } static int net_close(struct net_device *dev) { netif_t *netif = netdev_priv(dev); - netif_stop_queue(dev); - if (netif->status == CONNECTED) + if (netif_carrier_ok(dev)) __netif_down(netif); return 0; +} + +static int netbk_change_mtu(struct net_device *dev, int mtu) +{ + int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; + + if (mtu > max) + return -EINVAL; + dev->mtu = mtu; + return 0; +} + +static int netbk_set_sg(struct net_device *dev, u32 data) +{ + if (data) { + netif_t *netif = netdev_priv(dev); + + if (!(netif->features & NETIF_F_SG)) + return -ENOSYS; + } + + return ethtool_op_set_sg(dev, data); +} + +static int netbk_set_tso(struct net_device *dev, u32 data) +{ + if (data) { + netif_t *netif = netdev_priv(dev); + + if (!(netif->features & NETIF_F_TSO)) + return -ENOSYS; + } + + return ethtool_op_set_tso(dev, data); } static struct ethtool_ops network_ethtool_ops = { .get_tx_csum = ethtool_op_get_tx_csum, .set_tx_csum = ethtool_op_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = netbk_set_sg, + .get_tso = ethtool_op_get_tso, + .set_tso = netbk_set_tso, + .get_link = ethtool_op_get_link, }; netif_t *netif_alloc(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN]) @@ -92,11 +121,12 @@ netif_t *netif_alloc(domid_t domid, unsi return ERR_PTR(-ENOMEM); } + netif_carrier_off(dev); + netif = netdev_priv(dev); memset(netif, 0, sizeof(*netif)); netif->domid = domid; netif->handle = handle; - netif->status = DISCONNECTED; atomic_set(&netif->refcnt, 1); init_waitqueue_head(&netif->waiting_to_free); netif->dev = dev; @@ -109,12 +139,16 @@ netif_t *netif_alloc(domid_t domid, unsi dev->get_stats = netif_be_get_stats; dev->open = net_open; dev->stop = net_close; + dev->change_mtu = netbk_change_mtu; dev->features = NETIF_F_IP_CSUM; SET_ETHTOOL_OPS(dev, &network_ethtool_ops); - /* Disable queuing. */ - dev->tx_queue_len = 0; + /* + * Reduce default TX queuelen so that each guest interface only + * allows it to eat around 6.4MB of host memory. + */ + dev->tx_queue_len = 100; for (i = 0; i < ETH_ALEN; i++) if (be_mac[i] != 0) @@ -255,11 +289,9 @@ int netif_map(netif_t *netif, unsigned l netif->rx_req_cons_peek = 0; netif_get(netif); - wmb(); /* Other CPUs see new state before interface is started. */ rtnl_lock(); - netif->status = CONNECTED; - wmb(); + netif_carrier_on(netif->dev); if (netif_running(netif->dev)) __netif_up(netif); rtnl_unlock(); @@ -295,20 +327,13 @@ static void netif_free(netif_t *netif) void netif_disconnect(netif_t *netif) { - switch (netif->status) { - case CONNECTED: + if (netif_carrier_ok(netif->dev)) { rtnl_lock(); - netif->status = DISCONNECTING; - wmb(); + netif_carrier_off(netif->dev); if (netif_running(netif->dev)) __netif_down(netif); rtnl_unlock(); netif_put(netif); - /* fall through */ - case DISCONNECTED: - netif_free(netif); - break; - default: - BUG(); - } -} + } + netif_free(netif); +} diff -r fe1a31c06cbe -r 11ee20d418ea drivers/xen/netback/loopback.c --- a/drivers/xen/netback/loopback.c Sun Oct 08 12:23:50 2006 -0400 +++ b/drivers/xen/netback/loopback.c Sun Oct 08 12:28:37 2006 -0400 @@ -125,6 +125,11 @@ static struct ethtool_ops network_ethtoo { .get_tx_csum = ethtool_op_get_tx_csum, .set_tx_csum = ethtool_op_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = ethtool_op_set_sg, + .get_tso = ethtool_op_get_tso, + .set_tso = ethtool_op_set_tso, + .get_link = ethtool_op_get_link, }; /* @@ -152,6 +157,7 @@ static void loopback_construct(struct ne dev->features = (NETIF_F_HIGHDMA | NETIF_F_LLTX | + NETIF_F_TSO | NETIF_F_SG | NETIF_F_IP_CSUM); @@ -212,7 +218,7 @@ static int __init make_loopback(int i) return err; } -static void __init clean_loopback(int i) +static void __exit clean_loopback(int i) { struct net_device *dev1, *dev2; char dev_name[IFNAMSIZ]; diff -r fe1a31c06cbe -r 11ee20d418ea drivers/xen/netback/netback.c --- a/drivers/xen/netback/netback.c Sun Oct 08 12:23:50 2006 -0400 +++ b/drivers/xen/netback/netback.c Sun Oct 08 12:28:37 2006 -0400 @@ -40,17 +40,23 @@ /*#define NETBE_DEBUG_INTERRUPT*/ +struct netbk_rx_meta { + skb_frag_t frag; + int id; + int copy:1; +}; + static void netif_idx_release(u16 pending_idx); static void netif_page_release(struct page *page); static void make_tx_response(netif_t *netif, - u16 id, + netif_tx_request_t *txp, s8 st); -static int make_rx_response(netif_t *netif, - u16 id, - s8 st, - u16 offset, - u16 size, - u16 flags); +static netif_rx_response_t *make_rx_response(netif_t *netif, + u16 id, + s8 st, + u16 offset, + u16 size, + u16 flags); static void net_tx_action(unsigned long unused); static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0); @@ -63,13 +69,11 @@ static struct timer_list net_timer; #define MAX_PENDING_REQS 256 static struct sk_buff_head rx_queue; -static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1]; -static mmu_update_t rx_mmu[NET_RX_RING_SIZE]; -static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE]; -static unsigned char rx_notify[NR_IRQS]; static unsigned long mmap_vstart; #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE)) + +static void *rx_mmap_area; #define PKT_PROT_LEN 64 @@ -97,27 +101,30 @@ static spinlock_t net_schedule_list_lock static spinlock_t net_schedule_list_lock; #define MAX_MFN_ALLOC 64 -static unsigned long mfn_list[MAX_MFN_ALLOC]; +static xen_pfn_t mfn_list[MAX_MFN_ALLOC]; static unsigned int alloc_index = 0; -static DEFINE_SPINLOCK(mfn_lock); - -static unsigned long alloc_mfn(void) -{ - unsigned long mfn = 0, flags; + +static inline unsigned long alloc_mfn(void) +{ + return mfn_list[--alloc_index]; +} + +static int check_mfn(int nr) +{ struct xen_memory_reservation reservation = { - .nr_extents = MAX_MFN_ALLOC, .extent_order = 0, .domid = DOMID_SELF }; - set_xen_guest_handle(reservation.extent_start, (xen_pfn_t *)mfn_list); - spin_lock_irqsave(&mfn_lock, flags); - if ( unlikely(alloc_index == 0) ) - alloc_index = HYPERVISOR_memory_op( - XENMEM_increase_reservation, &reservation); - if ( alloc_index != 0 ) - mfn = mfn_list[--alloc_index]; - spin_unlock_irqrestore(&mfn_lock, flags); - return mfn; + + if (likely(alloc_index >= nr)) + return 0; + + set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index); + reservation.nr_extents = MAX_MFN_ALLOC - alloc_index; + alloc_index += HYPERVISOR_memory_op(XENMEM_increase_reservation, + &reservation); + + return alloc_index >= nr ? 0 : -ENOMEM; } static inline void maybe_schedule_tx_action(void) @@ -139,6 +146,123 @@ static inline int is_xen_skb(struct sk_b return (cp == skbuff_cachep); } +/* + * We can flip without copying the packet unless: + * 1. The data is not allocated from our special cache; or + * 2. The main data area is shared; or + * 3. One or more fragments are shared; or + * 4. There are chained fragments. + */ +static inline int is_flippable_skb(struct sk_buff *skb) +{ + int frag; + + if (!is_xen_skb(skb) || skb_cloned(skb)) + return 0; + + for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) { + if (page_count(skb_shinfo(skb)->frags[frag].page) > 1) + return 0; + } + + if (skb_shinfo(skb)->frag_list != NULL) + return 0; + + return 1; +} + +static struct sk_buff *netbk_copy_skb(struct sk_buff *skb) +{ + struct skb_shared_info *ninfo; + struct sk_buff *nskb; + unsigned long offset; + int ret; + int len; + int headlen; + + BUG_ON(skb_shinfo(skb)->frag_list != NULL); + + nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC); + if (unlikely(!nskb)) + goto err; + + skb_reserve(nskb, 16); + headlen = nskb->end - nskb->data; + if (headlen > skb_headlen(skb)) + headlen = skb_headlen(skb); + ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen); + BUG_ON(ret); + + ninfo = skb_shinfo(nskb); + ninfo->gso_size = skb_shinfo(skb)->gso_size; + ninfo->gso_type = skb_shinfo(skb)->gso_type; + + offset = headlen; + len = skb->len - headlen; + + nskb->len = skb->len; + nskb->data_len = len; + nskb->truesize += len; + + while (len) { + struct page *page; + int copy; + int zero; + + if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) { + dump_stack(); + goto err_free; + } + + copy = len >= PAGE_SIZE ? PAGE_SIZE : len; + zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO; + + page = alloc_page(GFP_ATOMIC | zero); + if (unlikely(!page)) + goto err_free; + + ret = skb_copy_bits(skb, offset, page_address(page), copy); + BUG_ON(ret); + + ninfo->frags[ninfo->nr_frags].page = page; + ninfo->frags[ninfo->nr_frags].page_offset = 0; + ninfo->frags[ninfo->nr_frags].size = copy; + ninfo->nr_frags++; + + offset += copy; + len -= copy; + } + + offset = nskb->data - skb->data; + + nskb->h.raw = skb->h.raw + offset; + nskb->nh.raw = skb->nh.raw + offset; + nskb->mac.raw = skb->mac.raw + offset; + + return nskb; + + err_free: + kfree_skb(nskb); + err: + return NULL; +} + +static inline int netbk_max_required_rx_slots(netif_t *netif) +{ + if (netif->features & (NETIF_F_SG|NETIF_F_TSO)) + return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */ + return 1; /* all in one */ +} + +static inline int netbk_queue_full(netif_t *netif) +{ + RING_IDX peek = netif->rx_req_cons_peek; + RING_IDX needed = netbk_max_required_rx_slots(netif); + + return ((netif->rx.sring->req_prod - peek) < needed) || + ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed); +} + int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) { netif_t *netif = netdev_priv(dev); @@ -146,30 +270,24 @@ int netif_be_start_xmit(struct sk_buff * BUG_ON(skb->dev != dev); /* Drop the packet if the target domain has no receive buffers. */ - if (!netif->active || - (netif->rx_req_cons_peek == netif->rx.sring->req_prod) || - ((netif->rx_req_cons_peek - netif->rx.rsp_prod_pvt) == - NET_RX_RING_SIZE)) + if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev))) goto drop; - /* - * We do not copy the packet unless: - * 1. The data is shared; or - * 2. The data is not allocated from our special cache. - * NB. We also couldn't cope with fragmented packets, but we won't get - * any because we not advertise the NETIF_F_SG feature. - */ - if (skb_shared(skb) || skb_cloned(skb) || !is_xen_skb(skb)) { - int hlen = skb->data - skb->head; - int ret; - struct sk_buff *nskb = dev_alloc_skb(hlen + skb->len); + if (unlikely(netbk_queue_full(netif))) { + /* Not a BUG_ON() -- misbehaving netfront can trigger this. */ + if (netbk_can_queue(dev)) + DPRINTK("Queue full but not stopped!\n"); + goto drop; + } + + /* Copy the packet here if it's destined for a flipping + interface but isn't flippable (e.g. extra references to + data) + */ + if (!netif->copying_receiver && !is_flippable_skb(skb)) { + struct sk_buff *nskb = netbk_copy_skb(skb); if ( unlikely(nskb == NULL) ) goto drop; - skb_reserve(nskb, hlen); - __skb_put(nskb, skb->len); - ret = skb_copy_bits(skb, -hlen, nskb->data - hlen, - skb->len + hlen); - BUG_ON(ret); /* Copy only the header fields we use in this driver. */ nskb->dev = skb->dev; nskb->ip_summed = skb->ip_summed; @@ -178,8 +296,17 @@ int netif_be_start_xmit(struct sk_buff * skb = nskb; } - netif->rx_req_cons_peek++; + netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 + + !!skb_shinfo(skb)->gso_size; netif_get(netif); + + if (netbk_can_queue(dev) && netbk_queue_full(netif)) { + netif->rx.sring->req_event = netif->rx_req_cons_peek + + netbk_max_required_rx_slots(netif); + mb(); /* request notification /then/ check & stop the queue */ + if (netbk_queue_full(netif)) + netif_stop_queue(dev); + } skb_queue_tail(&rx_queue, skb); tasklet_schedule(&net_rx_tasklet); @@ -203,7 +330,7 @@ static void xen_network_done_notify(void /* * Add following to poll() function in NAPI driver (Tigon3 is example): * if ( xen_network_done() ) - * tg3_enable_ints(tp); + * tg3_enable_ints(tp); */ int xen_network_done(void) { @@ -211,148 +338,371 @@ int xen_network_done(void) } #endif -static void net_rx_action(unsigned long unused) -{ - netif_t *netif = NULL; - s8 status; - u16 size, id, irq, flags; +struct netrx_pending_operations { + unsigned trans_prod, trans_cons; + unsigned mmu_prod, mmu_cons; + unsigned mcl_prod, mcl_cons; + unsigned copy_prod, copy_cons; + unsigned meta_prod, meta_cons; + mmu_update_t *mmu; + gnttab_transfer_t *trans; + gnttab_copy_t *copy; multicall_entry_t *mcl; + struct netbk_rx_meta *meta; +}; + +/* Set up the grant operations for this fragment. If it's a flipping + interface, we also set up the unmap request from here. */ +static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta, + int i, struct netrx_pending_operations *npo, + struct page *page, unsigned long size, + unsigned long offset) +{ mmu_update_t *mmu; gnttab_transfer_t *gop; - unsigned long vdata, old_mfn, new_mfn; + gnttab_copy_t *copy_gop; + multicall_entry_t *mcl; + netif_rx_request_t *req; + unsigned long old_mfn, new_mfn; + + old_mfn = virt_to_mfn(page_address(page)); + + req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i); + if (netif->copying_receiver) { + /* The fragment needs to be copied rather than + flipped. */ + meta->copy = 1; + copy_gop = npo->copy + npo->copy_prod++; + copy_gop->source.domid = DOMID_SELF; + copy_gop->source.offset = offset; + copy_gop->source.u.gmfn = old_mfn; + copy_gop->dest.domid = netif->domid; + copy_gop->dest.offset = 0; + copy_gop->dest.u.ref = req->gref; + copy_gop->len = size; + copy_gop->flags = GNTCOPY_dest_gref; + } else { + meta->copy = 0; + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + new_mfn = alloc_mfn(); + + /* + * Set the new P2M table entry before + * reassigning the old data page. Heed the + * comment in pgtable-2level.h:pte_page(). :-) + */ + set_phys_to_machine(page_to_pfn(page), new_mfn); + + mcl = npo->mcl + npo->mcl_prod++; + MULTI_update_va_mapping(mcl, + (unsigned long)page_address(page), + pfn_pte_ma(new_mfn, PAGE_KERNEL), + 0); + + mmu = npo->mmu + npo->mmu_prod++; + mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) | + MMU_MACHPHYS_UPDATE; + mmu->val = page_to_pfn(page); + } + + gop = npo->trans + npo->trans_prod++; + gop->mfn = old_mfn; + gop->domid = netif->domid; + gop->ref = req->gref; + } + return req->id; +} + +static void netbk_gop_skb(struct sk_buff *skb, + struct netrx_pending_operations *npo) +{ + netif_t *netif = netdev_priv(skb->dev); + int nr_frags = skb_shinfo(skb)->nr_frags; + int i; + int extra; + struct netbk_rx_meta *head_meta, *meta; + + head_meta = npo->meta + npo->meta_prod++; + head_meta->frag.page_offset = skb_shinfo(skb)->gso_type; + head_meta->frag.size = skb_shinfo(skb)->gso_size; + extra = !!head_meta->frag.size + 1; + + for (i = 0; i < nr_frags; i++) { + meta = npo->meta + npo->meta_prod++; + meta->frag = skb_shinfo(skb)->frags[i]; + meta->id = netbk_gop_frag(netif, meta, i + extra, npo, + meta->frag.page, + meta->frag.size, + meta->frag.page_offset); + } + + /* + * This must occur at the end to ensure that we don't trash + * skb_shinfo until we're done. + */ + head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo, + virt_to_page(skb->data), + skb_headlen(skb), + offset_in_page(skb->data)); + + netif->rx.req_cons += nr_frags + extra; +} + +static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta) +{ + int i; + + for (i = 0; i < nr_frags; i++) + put_page(meta[i].frag.page); +} + +/* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was + used to set up the operations on the top of + netrx_pending_operations, which have since been done. Check that + they didn't give any errors and advance over them. */ +static int netbk_check_gop(int nr_frags, domid_t domid, + struct netrx_pending_operations *npo) +{ + multicall_entry_t *mcl; + gnttab_transfer_t *gop; + gnttab_copy_t *copy_op; + int status = NETIF_RSP_OKAY; + int i; + + for (i = 0; i <= nr_frags; i++) { + if (npo->meta[npo->meta_cons + i].copy) { + copy_op = npo->copy + npo->copy_cons++; + if (copy_op->status != GNTST_okay) { + DPRINTK("Bad status %d from copy to DOM%d.\n", + gop->status, domid); + status = NETIF_RSP_ERROR; + } + } else { + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + mcl = npo->mcl + npo->mcl_cons++; + /* The update_va_mapping() must not fail. */ + BUG_ON(mcl->result != 0); + } + + gop = npo->trans + npo->trans_cons++; + /* Check the reassignment error code. */ + if (gop->status != 0) { + DPRINTK("Bad status %d from grant transfer to DOM%u\n", + gop->status, domid); + /* + * Page no longer belongs to us unless + * GNTST_bad_page, but that should be + * a fatal error anyway. + */ + BUG_ON(gop->status == GNTST_bad_page); + status = NETIF_RSP_ERROR; + } + } + } + + return status; +} + +static void netbk_add_frag_responses(netif_t *netif, int status, + struct netbk_rx_meta *meta, int nr_frags) +{ + int i; + unsigned long offset; + + for (i = 0; i < nr_frags; i++) { + int id = meta[i].id; + int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data; + + if (meta[i].copy) + offset = 0; + else + offset = meta[i].frag.page_offset; + make_rx_response(netif, id, status, offset, + meta[i].frag.size, flags); + } +} + +static void net_rx_action(unsigned long unused) +{ + netif_t *netif = NULL; + s8 status; + u16 id, irq, flags; + netif_rx_response_t *resp; + multicall_entry_t *mcl; struct sk_buff_head rxq; struct sk_buff *skb; - u16 notify_list[NET_RX_RING_SIZE]; int notify_nr = 0; int ret; + int nr_frags; + int count; + unsigned long offset; + + /* + * Putting hundreds of bytes on the stack is considered rude. + * Static works because a tasklet can only be on one CPU at any time. + */ + static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3]; + static mmu_update_t rx_mmu[NET_RX_RING_SIZE]; + static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE]; + static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE]; + static unsigned char rx_notify[NR_IRQS]; + static u16 notify_list[NET_RX_RING_SIZE]; + static struct netbk_rx_meta meta[NET_RX_RING_SIZE]; + + struct netrx_pending_operations npo = { + mmu: rx_mmu, + trans: grant_trans_op, + copy: grant_copy_op, + mcl: rx_mcl, + meta: meta}; skb_queue_head_init(&rxq); - mcl = rx_mcl; - mmu = rx_mmu; - gop = grant_rx_op; + count = 0; while ((skb = skb_dequeue(&rx_queue)) != NULL) { - netif = netdev_priv(skb->dev); - vdata = (unsigned long)skb->data; - old_mfn = virt_to_mfn(vdata); - - if (!xen_feature(XENFEAT_auto_translated_physmap)) { + nr_frags = skb_shinfo(skb)->nr_frags; + *(int *)skb->cb = nr_frags; + + if (!xen_feature(XENFEAT_auto_translated_physmap) && + check_mfn(nr_frags + 1)) { /* Memory squeeze? Back off for an arbitrary while. */ - if ((new_mfn = alloc_mfn()) == 0) { - if ( net_ratelimit() ) - WPRINTK("Memory squeeze in netback " - "driver.\n"); - mod_timer(&net_timer, jiffies + HZ); - skb_queue_head(&rx_queue, skb); - break; - } - /* - * Set the new P2M table entry before reassigning - * the old data page. Heed the comment in - * pgtable-2level.h:pte_page(). :-) - */ - set_phys_to_machine( - __pa(skb->data) >> PAGE_SHIFT, - new_mfn); - - MULTI_update_va_mapping(mcl, vdata, - pfn_pte_ma(new_mfn, - PAGE_KERNEL), 0); - mcl++; - - mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) | - MMU_MACHPHYS_UPDATE; - mmu->val = __pa(vdata) >> PAGE_SHIFT; - mmu++; - } - - gop->mfn = old_mfn; - gop->domid = netif->domid; - gop->ref = RING_GET_REQUEST( - &netif->rx, netif->rx.req_cons)->gref; - netif->rx.req_cons++; - gop++; + if ( net_ratelimit() ) + WPRINTK("Memory squeeze in netback " + "driver.\n"); + mod_timer(&net_timer, jiffies + HZ); + skb_queue_head(&rx_queue, skb); + break; + } + + netbk_gop_skb(skb, &npo); + + count += nr_frags + 1; __skb_queue_tail(&rxq, skb); /* Filled the batch queue? */ - if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op)) + if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE) break; } - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - if (mcl == rx_mcl) - return; - + if (npo.mcl_prod && + !xen_feature(XENFEAT_auto_translated_physmap)) { + mcl = npo.mcl + npo.mcl_prod++; + + BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping); mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; - if (mmu - rx_mmu) { - mcl->op = __HYPERVISOR_mmu_update; - mcl->args[0] = (unsigned long)rx_mmu; - mcl->args[1] = mmu - rx_mmu; - mcl->args[2] = 0; - mcl->args[3] = DOMID_SELF; - mcl++; - } - - ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl); - BUG_ON(ret != 0); - } - - ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, - gop - grant_rx_op); + mcl->op = __HYPERVISOR_mmu_update; + mcl->args[0] = (unsigned long)rx_mmu; + mcl->args[1] = npo.mmu_prod; + mcl->args[2] = 0; + mcl->args[3] = DOMID_SELF; + } + + if (npo.trans_prod) { + mcl = npo.mcl + npo.mcl_prod++; + mcl->op = __HYPERVISOR_grant_table_op; + mcl->args[0] = GNTTABOP_transfer; + mcl->args[1] = (unsigned long)grant_trans_op; + mcl->args[2] = npo.trans_prod; + } + + if (npo.copy_prod) { + mcl = npo.mcl + npo.mcl_prod++; + mcl->op = __HYPERVISOR_grant_table_op; + mcl->args[0] = GNTTABOP_copy; + mcl->args[1] = (unsigned long)grant_copy_op; + mcl->args[2] = npo.copy_prod; + } + + /* Nothing to do? */ + if (!npo.mcl_prod) + return; + + BUG_ON(npo.copy_prod > NET_RX_RING_SIZE); + BUG_ON(npo.mmu_prod > NET_RX_RING_SIZE); + BUG_ON(npo.trans_prod > NET_RX_RING_SIZE); + BUG_ON(npo.mcl_prod > NET_RX_RING_SIZE+3); + BUG_ON(npo.meta_prod > NET_RX_RING_SIZE); + + ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod); BUG_ON(ret != 0); - mcl = rx_mcl; - gop = grant_rx_op; while ((skb = __skb_dequeue(&rxq)) != NULL) { - netif = netdev_priv(skb->dev); - size = skb->tail - skb->data; - - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; - - netif->stats.tx_bytes += size; + nr_frags = *(int *)skb->cb; + + netif = netdev_priv(skb->dev); + /* We can't rely on skb_release_data to release the + pages used by fragments for us, since it tries to + touch the pages in the fraglist. If we're in + flipping mode, that doesn't work. In copying mode, + we still have access to all of the pages, and so + it's safe to let release_data deal with it. */ + /* (Freeing the fragments is safe since we copy + non-linear skbs destined for flipping interfaces) */ + if (!netif->copying_receiver) { + atomic_set(&(skb_shinfo(skb)->dataref), 1); + skb_shinfo(skb)->frag_list = NULL; + skb_shinfo(skb)->nr_frags = 0; + netbk_free_pages(nr_frags, meta + npo.meta_cons + 1); + } + + netif->stats.tx_bytes += skb->len; netif->stats.tx_packets++; - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - /* The update_va_mapping() must not fail. */ - BUG_ON(mcl->result != 0); - mcl++; - } - - /* Check the reassignment error code. */ - status = NETIF_RSP_OKAY; - if (gop->status != 0) { - DPRINTK("Bad status %d from grant transfer to DOM%u\n", - gop->status, netif->domid); - /* - * Page no longer belongs to us unless GNTST_bad_page, - * but that should be a fatal error anyway. - */ - BUG_ON(gop->status == GNTST_bad_page); - status = NETIF_RSP_ERROR; - } - irq = netif->irq; - id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id; - flags = 0; + status = netbk_check_gop(nr_frags, netif->domid, &npo); + + id = meta[npo.meta_cons].id; + flags = nr_frags ? NETRXF_more_data : 0; + if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ flags |= NETRXF_csum_blank | NETRXF_data_validated; else if (skb->proto_data_valid) /* remote but checksummed? */ flags |= NETRXF_data_validated; - if (make_rx_response(netif, id, status, - (unsigned long)skb->data & ~PAGE_MASK, - size, flags) && - (rx_notify[irq] == 0)) { + + if (meta[npo.meta_cons].copy) + offset = 0; + else + offset = offset_in_page(skb->data); + resp = make_rx_response(netif, id, status, offset, + skb_headlen(skb), flags); + + if (meta[npo.meta_cons].frag.size) { + struct netif_extra_info *gso = + (struct netif_extra_info *) + RING_GET_RESPONSE(&netif->rx, + netif->rx.rsp_prod_pvt++); + + resp->flags |= NETRXF_extra_info; + + gso->u.gso.size = meta[npo.meta_cons].frag.size; + gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; + gso->u.gso.pad = 0; + gso->u.gso.features = 0; + + gso->type = XEN_NETIF_EXTRA_TYPE_GSO; + gso->flags = 0; + } + + netbk_add_frag_responses(netif, status, + meta + npo.meta_cons + 1, + nr_frags); + + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret); + irq = netif->irq; + if (ret && !rx_notify[irq]) { rx_notify[irq] = 1; notify_list[notify_nr++] = irq; } + if (netif_queue_stopped(netif->dev) && + !netbk_queue_full(netif)) + netif_wake_queue(netif->dev); + netif_put(netif); dev_kfree_skb(skb); - gop++; + npo.meta_cons += nr_frags + 1; } while (notify_nr != 0) { @@ -403,7 +753,9 @@ static void add_to_net_schedule_list_tai return; spin_lock_irq(&net_schedule_list_lock); - if (!__on_net_schedule_list(netif) && netif->active) { + if (!__on_net_schedule_list(netif) && + likely(netif_running(netif->dev) && + netif_carrier_ok(netif->dev))) { list_add_tail(&netif->list, &net_schedule_list); netif_get(netif); } @@ -481,7 +833,7 @@ inline static void net_tx_action_dealloc netif = pending_tx_info[pending_idx].netif; - make_tx_response(netif, pending_tx_info[pending_idx].req.id, + make_tx_response(netif, &pending_tx_info[pending_idx].req, NETIF_RSP_OKAY); pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; @@ -490,14 +842,16 @@ inline static void net_tx_action_dealloc } } -static void netbk_tx_err(netif_t *netif, RING_IDX end) +static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end) { RING_IDX cons = netif->tx.req_cons; do { - netif_tx_request_t *txp = RING_GET_REQUEST(&netif->tx, cons); - make_tx_response(netif, txp->id, NETIF_RSP_ERROR); - } while (++cons < end); + make_tx_response(netif, txp, NETIF_RSP_ERROR); + if (cons >= end) + break; + txp = RING_GET_REQUEST(&netif->tx, cons++); + } while (1); netif->tx.req_cons = cons; netif_schedule_work(netif); netif_put(netif); @@ -508,7 +862,7 @@ static int netbk_count_requests(netif_t { netif_tx_request_t *first = txp; RING_IDX cons = netif->tx.req_cons; - int frags = 1; + int frags = 0; while (txp->flags & NETTXF_more_data) { if (frags >= work_to_do) { @@ -543,7 +897,7 @@ static gnttab_map_grant_ref_t *netbk_get skb_frag_t *frags = shinfo->frags; netif_tx_request_t *txp; unsigned long pending_idx = *((u16 *)skb->data); - RING_IDX cons = netif->tx.req_cons + 1; + RING_IDX cons = netif->tx.req_cons; int i, start; /* Skip first skb fragment if it is on same page as header fragment. */ @@ -581,7 +935,7 @@ static int netbk_tx_check_mop(struct sk_ err = mop->status; if (unlikely(err)) { txp = &pending_tx_info[pending_idx].req; - make_tx_response(netif, txp->id, NETIF_RSP_ERROR); + make_tx_response(netif, txp, NETIF_RSP_ERROR); pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; netif_put(netif); } else { @@ -614,7 +968,7 @@ static int netbk_tx_check_mop(struct sk_ /* Error on this fragment: respond to client with an error. */ txp = &pending_tx_info[pending_idx].req; - make_tx_response(netif, txp->id, NETIF_RSP_ERROR); + make_tx_response(netif, txp, NETIF_RSP_ERROR); pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; netif_put(netif); @@ -661,6 +1015,57 @@ static void netbk_fill_frags(struct sk_b } } +int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras, + int work_to_do) +{ + struct netif_extra_info *extra; + RING_IDX cons = netif->tx.req_cons; + + do { + if (unlikely(work_to_do-- <= 0)) { + DPRINTK("Missing extra info\n"); + return -EBADR; + } + + extra = (struct netif_extra_info *) + RING_GET_REQUEST(&netif->tx, cons); + if (unlikely(!extra->type || + extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { + netif->tx.req_cons = ++cons; + DPRINTK("Invalid extra type: %d\n", extra->type); + return -EINVAL; + } + + memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); + netif->tx.req_cons = ++cons; + } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); + + return work_to_do; +} + +static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso) +{ + if (!gso->u.gso.size) { + DPRINTK("GSO size must not be zero.\n"); + return -EINVAL; + } + + /* Currently only TCPv4 S.O. is supported. */ + if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { + DPRINTK("Bad GSO type %d.\n", gso->u.gso.type); + return -EINVAL; + } + + skb_shinfo(skb)->gso_size = gso->u.gso.size; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + + /* Header must be checked, and gso_segs computed. */ + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb)->gso_segs = 0; + + return 0; +} + /* Called after netfront has transmitted */ static void net_tx_action(unsigned long unused) { @@ -668,6 +1073,7 @@ static void net_tx_action(unsigned long struct sk_buff *skb; netif_t *netif; netif_tx_request_t txreq; + struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; u16 pending_idx; RING_IDX i; gnttab_map_grant_ref_t *mop; @@ -726,23 +1132,37 @@ static void net_tx_action(unsigned long } netif->remaining_credit -= txreq.size; + work_to_do--; + netif->tx.req_cons = ++i; + + memset(extras, 0, sizeof(extras)); + if (txreq.flags & NETTXF_extra_info) { + work_to_do = netbk_get_extras(netif, extras, + work_to_do); + i = netif->tx.req_cons; + if (unlikely(work_to_do < 0)) { + netbk_tx_err(netif, &txreq, i); + continue; + } + } + ret = netbk_count_requests(netif, &txreq, work_to_do); if (unlikely(ret < 0)) { - netbk_tx_err(netif, i - ret); + netbk_tx_err(netif, &txreq, i - ret); continue; } i += ret; - if (unlikely(ret > MAX_SKB_FRAGS + 1)) { + if (unlikely(ret > MAX_SKB_FRAGS)) { DPRINTK("Too many frags\n"); - netbk_tx_err(netif, i); + netbk_tx_err(netif, &txreq, i); continue; } if (unlikely(txreq.size < ETH_HLEN)) { DPRINTK("Bad packet size: %d\n", txreq.size); - netbk_tx_err(netif, i); - continue; + netbk_tx_err(netif, &txreq, i); + continue; } /* No crossing a page as the payload mustn't fragment. */ @@ -750,25 +1170,36 @@ static void net_tx_action(unsigned long DPRINTK("txreq.offset: %x, size: %u, end: %lu\n", txreq.offset, txreq.size, (txreq.offset &~PAGE_MASK) + txreq.size); - netbk_tx_err(netif, i); + netbk_tx_err(netif, &txreq, i); continue; } pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; data_len = (txreq.size > PKT_PROT_LEN && - ret < MAX_SKB_FRAGS + 1) ? + ret < MAX_SKB_FRAGS) ? PKT_PROT_LEN : txreq.size; skb = alloc_skb(data_len+16, GFP_ATOMIC); if (unlikely(skb == NULL)) { DPRINTK("Can't allocate a skb in start_xmit.\n"); - netbk_tx_err(netif, i); + netbk_tx_err(netif, &txreq, i); break; } /* Packets passed to netif_rx() must have some headroom. */ skb_reserve(skb, 16); + + if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { + struct netif_extra_info *gso; + gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; + + if (netbk_set_skb_gso(skb, gso)) { + kfree_skb(skb); + netbk_tx_err(netif, &txreq, i); + continue; + } + } gnttab_set_map_op(mop, MMAP_VADDR(pending_idx), GNTMAP_host_map | GNTMAP_readonly, @@ -782,11 +1213,14 @@ static void net_tx_action(unsigned long __skb_put(skb, data_len); - skb_shinfo(skb)->nr_frags = ret - 1; + skb_shinfo(skb)->nr_frags = ret; if (data_len < txreq.size) { skb_shinfo(skb)->nr_frags++; skb_shinfo(skb)->frags[0].page = (void *)(unsigned long)pending_idx; + } else { + /* Discriminate from any valid pending_idx value. */ + skb_shinfo(skb)->frags[0].page = (void *)~0UL; } __skb_queue_tail(&tx_queue, skb); @@ -884,21 +1318,32 @@ static void netif_page_release(struct pa u16 pending_idx = page - virt_to_page(mmap_vstart); /* Ready for next use. */ - init_page_count(page); + set_page_count(page, 1); netif_idx_release(pending_idx); } +static void netif_rx_page_release(struct page *page) +{ + /* Ready for next use. */ + set_page_count(page, 1); +} + irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs) { netif_t *netif = dev_id; + add_to_net_schedule_list_tail(netif); maybe_schedule_tx_action(); + + if (netif_queue_stopped(netif->dev) && !netbk_queue_full(netif)) + netif_wake_queue(netif->dev); + return IRQ_HANDLED; } static void make_tx_response(netif_t *netif, - u16 id, + netif_tx_request_t *txp, s8 st) { RING_IDX i = netif->tx.rsp_prod_pvt; @@ -906,8 +1351,11 @@ static void make_tx_response(netif_t *ne int notify; resp = RING_GET_RESPONSE(&netif->tx, i); - resp->id = id; + resp->id = txp->id; resp->status = st; + + if (txp->flags & NETTXF_extra_info) + RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL; netif->tx.rsp_prod_pvt = ++i; RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify); @@ -924,16 +1372,15 @@ static void make_tx_response(netif_t *ne #endif } -static int make_rx_response(netif_t *netif, - u16 id, - s8 st, - u16 offset, - u16 size, - u16 flags) +static netif_rx_response_t *make_rx_response(netif_t *netif, + u16 id, + s8 st, + u16 offset, + u16 size, + u16 flags) { RING_IDX i = netif->rx.rsp_prod_pvt; netif_rx_response_t *resp; - int notify; resp = RING_GET_RESPONSE(&netif->rx, i); resp->offset = offset; @@ -944,9 +1391,8 @@ static int make_rx_response(netif_t *net resp->status = (s16)st; netif->rx.rsp_prod_pvt = ++i; - RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify); - - return notify; + + return resp; } #ifdef NETBE_DEBUG_INTERRUPT @@ -1002,13 +1448,25 @@ static int __init netback_init(void) net_timer.function = net_alarm; page = balloon_alloc_empty_page_range(MAX_PENDING_REQS); - BUG_ON(page == NULL); + if (page == NULL) + return -ENOMEM; + mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); for (i = 0; i < MAX_PENDING_REQS; i++) { page = virt_to_page(MMAP_VADDR(i)); - init_page_count(page); + set_page_count(page, 1); SetPageForeign(page, netif_page_release); + } + + page = balloon_alloc_empty_page_range(NET_RX_RING_SIZE); + BUG_ON(page == NULL); + rx_mmap_area = pfn_to_kaddr(page_to_pfn(page)); + + for (i = 0; i < NET_RX_RING_SIZE; i++) { + page = virt_to_page(rx_mmap_area + (i * PAGE_SIZE)); + set_page_count(page, 1); + SetPageForeign(page, netif_rx_page_release); } pending_cons = 0; diff -r fe1a31c06cbe -r 11ee20d418ea drivers/xen/netback/xenbus.c --- a/drivers/xen/netback/xenbus.c Sun Oct 08 12:23:50 2006 -0400 +++ b/drivers/xen/netback/xenbus.c Sun Oct 08 12:28:37 2006 -0400 @@ -101,6 +101,19 @@ static int netback_probe(struct xenbus_d goto abort_transaction; } + err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", + "%d", 1); + if (err) { + message = "writing feature-gso-tcpv4"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-rx-copy", "%d", 1); + if (err) { + message = "writing feature-copying"; + goto abort_transaction; + } + err = xenbus_transaction_end(xbt, 0); } while (err == -EAGAIN); @@ -215,16 +228,31 @@ static void frontend_changed(struct xenb { struct backend_info *be = dev->dev.driver_data; - DPRINTK(""); + DPRINTK("%s", xenbus_strstate(frontend_state)); be->frontend_state = frontend_state; switch (frontend_state) { case XenbusStateInitialising: + if (dev->state == XenbusStateClosed) { + printk("%s: %s: prepare for reconnect\n", + __FUNCTION__, dev->nodename); + if (be->netif) { + netif_disconnect(be->netif); + be->netif = NULL; + } + xenbus_switch_state(dev, XenbusStateInitWait); + } + break; + case XenbusStateInitialised: break; case XenbusStateConnected: + if (!be->netif) { + /* reconnect: setup be->netif */ + backend_changed(&be->backend_watch, NULL, 0); + } maybe_connect(be); break; @@ -233,13 +261,18 @@ static void frontend_changed(struct xenb break; case XenbusStateClosed: + xenbus_switch_state(dev, XenbusStateClosed); +#ifdef JX + if (xenbus_dev_is_online(dev)) + break; +#endif + /* fall through if not online */ + case XenbusStateUnknown: if (be->netif != NULL) kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE); device_unregister(&dev->dev); break; - case XenbusStateUnknown: - case XenbusStateInitWait: default: xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", frontend_state); @@ -342,8 +375,9 @@ static int connect_rings(struct backend_ { struct xenbus_device *dev = be->dev; unsigned long tx_ring_ref, rx_ring_ref; - unsigned int evtchn; + unsigned int evtchn, rx_copy; int err; + int val; DPRINTK(""); @@ -356,6 +390,51 @@ static int connect_rings(struct backend_ "reading %s/ring-ref and event-channel", dev->otherend); return err; + } + + err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u", + &rx_copy); + if (err == -ENOENT) { + err = 0; + rx_copy = 0; + } + if (err < 0) { + xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy", + dev->otherend); + return err; + } + be->netif->copying_receiver = !!rx_copy; + + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-notify", "%d", + &val) < 0) + val = 0; + if (val) + be->netif->can_queue = 1; + else + /* Must be non-zero for pfifo_fast to work. */ + be->netif->dev->tx_queue_len = 1; + + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0) + val = 0; + if (val) { + be->netif->features |= NETIF_F_SG; + be->netif->dev->features |= NETIF_F_SG; + } + + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d", + &val) < 0) + val = 0; + if (val) { + be->netif->features |= NETIF_F_TSO; + be->netif->dev->features |= NETIF_F_TSO; + } + + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload", + "%d", &val) < 0) + val = 0; + if (val) { + be->netif->features &= ~NETIF_F_IP_CSUM; + be->netif->dev->features &= ~NETIF_F_IP_CSUM; } /* Map the shared frame, irq etc. */ diff -r fe1a31c06cbe -r 11ee20d418ea drivers/xen/netfront/netfront.c --- a/drivers/xen/netfront/netfront.c Sun Oct 08 12:23:50 2006 -0400 +++ b/drivers/xen/netfront/netfront.c Sun Oct 08 12:28:37 2006 -0400 @@ -46,11 +46,11 @@ #include <linux/ethtool.h> #include <linux/in.h> #include <linux/if_ether.h> +#include <linux/io.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <net/arp.h> #include <net/route.h> -#include <asm/io.h> #include <asm/uaccess.h> #include <xen/evtchn.h> #include <xen/xenbus.h> @@ -58,21 +58,31 @@ #include <xen/interface/memory.h> #include <xen/balloon.h> #include <asm/page.h> +#include <asm/maddr.h> #include <asm/uaccess.h> #include <xen/interface/grant_table.h> #include <xen/gnttab.h> +#define RX_COPY_THRESHOLD 256 + +/* If we don't have GSO, fake things up so that we never try to use it. */ +#ifndef NETIF_F_GSO +#define netif_needs_gso(dev, skb) 0 +#define dev_disable_gso_features(dev) ((void)0) +#else +#define HAVE_GSO 1 +static inline void dev_disable_gso_features(struct net_device *dev) +{ + /* Turn off all GSO bits except ROBUST. */ + dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1; + dev->features |= NETIF_F_GSO_ROBUST; +} +#endif + #define GRANT_INVALID_REF 0 #define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE) #define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE) - -static inline void init_skb_shinfo(struct sk_buff *skb) -{ - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; -} struct netfront_info { struct list_head list; @@ -88,6 +98,7 @@ struct netfront_info { unsigned int handle; unsigned int evtchn, irq; + unsigned int copying_receiver; /* Receive-ring batched refills. */ #define RX_MIN_TARGET 8 @@ -99,30 +110,35 @@ struct netfront_info { struct timer_list rx_refill_timer; /* - * {tx,rx}_skbs store outstanding skbuffs. The first entry in each - * array is an index into a chain of free entries. + * {tx,rx}_skbs store outstanding skbuffs. The first entry in tx_skbs + * is an index into a chain of free entries. */ struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1]; - struct sk_buff *rx_skbs[NET_RX_RING_SIZE+1]; + struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; #define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) grant_ref_t gref_tx_head; grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; grant_ref_t gref_rx_head; - grant_ref_t grant_rx_ref[NET_TX_RING_SIZE + 1]; + grant_ref_t grant_rx_ref[NET_TX_RING_SIZE]; struct xenbus_device *xbdev; int tx_ring_ref; int rx_ring_ref; u8 mac[ETH_ALEN]; - unsigned long rx_pfn_array[NET_RX_RING_SIZE]; + xen_pfn_t rx_pfn_array[NET_RX_RING_SIZE]; struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; struct mmu_update rx_mmu[NET_RX_RING_SIZE]; }; +struct netfront_rx_info { + struct netif_rx_response rx; + struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; +}; + /* - * Access macros for acquiring freeing slots in {tx,rx}_skbs[]. + * Access macros for acquiring freeing slots in tx_skbs[]. */ static inline void add_id_to_freelist(struct sk_buff **list, unsigned short id) @@ -136,6 +152,29 @@ static inline unsigned short get_id_from unsigned int id = (unsigned int)(unsigned long)list[0]; list[0] = list[id]; return id; +} + +static inline int xennet_rxidx(RING_IDX idx) +{ + return idx & (NET_RX_RING_SIZE - 1); +} + +static inline struct sk_buff *xennet_get_rx_skb(struct netfront_info *np, + RING_IDX ri) +{ + int i = xennet_rxidx(ri); + struct sk_buff *skb = np->rx_skbs[i]; + np->rx_skbs[i] = NULL; + return skb; +} + +static inline grant_ref_t xennet_get_rx_ref(struct netfront_info *np, + RING_IDX ri) +{ + int i = xennet_rxidx(ri); + grant_ref_t ref = np->grant_rx_ref[i]; + np->grant_rx_ref[i] = GRANT_INVALID_REF; + return ref; } #define DPRINTK(fmt, args...) \ @@ -148,12 +187,13 @@ static inline unsigned short get_id_from static int talk_to_backend(struct xenbus_device *, struct netfront_info *); static int setup_device(struct xenbus_device *, struct netfront_info *); -static struct net_device *create_netdev(int, struct xenbus_device *); +static struct net_device *create_netdev(int, int, struct xenbus_device *); static void netfront_closing(struct xenbus_device *); static void end_access(int, void *); static void netif_disconnect_backend(struct netfront_info *); +static int open_netdev(struct netfront_info *); static void close_netdev(struct netfront_info *); static void netif_free(struct netfront_info *); @@ -190,6 +230,7 @@ static int __devinit netfront_probe(stru struct net_device *netdev; struct netfront_info *info; unsigned int handle; + unsigned feature_rx_copy; err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%u", &handle); if (err != 1) { @@ -197,7 +238,24 @@ static int __devinit netfront_probe(stru return err; } - netdev = create_netdev(handle, dev); +#ifdef CONFIG_PPC_XEN + err = xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-copy", "%u", + &feature_rx_copy); + BUG_ON(err != 1); + if (err != 1) { + xenbus_dev_fatal(dev, err, "reading feature-rx-copy"); + return err; + } + BUG_ON(!feature_rx_copy); + if (!feature_rx_copy) { + xenbus_dev_fatal(dev, 0, "need a copy-capable backend"); + return -EINVAL; + } +#else + feature_rx_copy = 0; +#endif + + netdev = create_netdev(handle, feature_rx_copy, dev); if (IS_ERR(netdev)) { err = PTR_ERR(netdev); xenbus_dev_fatal(dev, err, "creating netdev"); @@ -208,15 +266,22 @@ static int __devinit netfront_probe(stru dev->dev.driver_data = info; err = talk_to_backend(dev, info); - if (err) { - xennet_sysfs_delif(info->netdev); - unregister_netdev(netdev); - free_netdev(netdev); - dev->dev.driver_data = NULL; - return err; - } + if (err) + goto fail_backend; + + err = open_netdev(info); + if (err) + goto fail_open; return 0; + + fail_open: + xennet_sysfs_delif(info->netdev); + unregister_netdev(netdev); + fail_backend: + free_netdev(netdev); + dev->dev.driver_data = NULL; + return err; } @@ -303,6 +368,33 @@ again: goto abort_transaction; } + err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u", + info->copying_receiver); + if (err) { + message = "writing request-rx-copy"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1); + if (err) { + message = "writing feature-rx-notify"; + goto abort_transaction; + } + + err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1); + if (err) { + message = "writing feature-sg"; + goto abort_transaction; + } + +#ifdef HAVE_GSO + err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1); + if (err) { + message = "writing feature-gso-tcpv4"; + goto abort_transaction; + } +#endif + err = xenbus_transaction_end(xbt, 0); if (err) { if (err == -EAGAIN) @@ -374,7 +466,8 @@ static int setup_device(struct xenbus_de memcpy(netdev->dev_addr, info->mac, ETH_ALEN); err = bind_evtchn_to_irqhandler(info->evtchn, netif_int, - SA_SAMPLE_RANDOM, netdev->name, netdev); + SA_SAMPLE_RANDOM, netdev->name, + netdev); if (err < 0) goto fail; info->irq = err; @@ -395,7 +488,7 @@ static void backend_changed(struct xenbu struct netfront_info *np = dev->dev.driver_data; struct net_device *netdev = np->netdev; - DPRINTK("\n"); + DPRINTK("%s\n", xenbus_strstate(backend_state)); switch (backend_state) { case XenbusStateInitialising: @@ -453,8 +546,14 @@ static int network_open(struct net_devic memset(&np->stats, 0, sizeof(np->stats)); - network_alloc_rx_buffers(dev); - np->rx.sring->rsp_event = np->rx.rsp_cons + 1; + spin_lock(&np->rx_lock); + if (netif_carrier_ok(dev)) { + network_alloc_rx_buffers(dev); + np->rx.sring->rsp_event = np->rx.rsp_cons + 1; + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) + netif_rx_schedule(dev); + } + spin_unlock(&np->rx_lock); netif_start_queue(dev); @@ -463,7 +562,7 @@ static int network_open(struct net_devic static inline int netfront_tx_slot_available(struct netfront_info *np) { - return RING_FREE_REQUESTS(&np->tx) >= MAX_SKB_FRAGS + 1; + return RING_FREE_REQUESTS(&np->tx) >= MAX_SKB_FRAGS + 2; } static inline void network_maybe_wake_tx(struct net_device *dev) @@ -483,15 +582,20 @@ static void network_tx_buf_gc(struct net struct netfront_info *np = netdev_priv(dev); struct sk_buff *skb; - if (unlikely(!netif_carrier_ok(dev))) - return; + BUG_ON(!netif_carrier_ok(dev)); do { prod = np->tx.sring->rsp_prod; rmb(); /* Ensure we see responses up to 'rp'. */ for (cons = np->tx.rsp_cons; cons != prod; cons++) { - id = RING_GET_RESPONSE(&np->tx, cons)->id; + struct netif_tx_response *txrsp; + + txrsp = RING_GET_RESPONSE(&np->tx, cons); + if (txrsp->status == NETIF_RSP_NULL) + continue; + + id = txrsp->id; skb = np->tx_skbs[id]; if (unlikely(gnttab_query_foreign_access( np->grant_tx_ref[id]) != 0)) { @@ -540,10 +644,15 @@ static void network_alloc_rx_buffers(str unsigned short id; struct netfront_info *np = netdev_priv(dev); struct sk_buff *skb; - int i, batch_target; + struct page *page; + int i, batch_target, notify; RING_IDX req_prod = np->rx.req_prod_pvt; struct xen_memory_reservation reservation; grant_ref_t ref; + unsigned long pfn; + void *vaddr; + int nr_flips; + netif_rx_request_t *req; if (unlikely(!netif_carrier_ok(dev))) return; @@ -557,28 +666,41 @@ static void network_alloc_rx_buffers(str batch_target = np->rx_target - (req_prod - np->rx.rsp_cons); for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) { /* - * Subtract dev_alloc_skb headroom (16 bytes) and shared info - * tailroom then round down to SKB_DATA_ALIGN boundary. + * Allocate an skb and a page. Do not use __dev_alloc_skb as + * that will allocate page-sized buffers which is not + * necessary here. + * 16 bytes added as necessary headroom for netif_receive_skb. */ - skb = __dev_alloc_skb( - ((PAGE_SIZE - sizeof(struct skb_shared_info)) & - (-SKB_DATA_ALIGN(1))) - 16, - GFP_ATOMIC|__GFP_NOWARN); - if (skb == NULL) { + skb = alloc_skb(RX_COPY_THRESHOLD + 16, + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!skb)) + goto no_skb; + + page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); + if (!page) { + kfree_skb(skb); +no_skb: /* Any skbuffs queued for refill? Force them out. */ if (i != 0) goto refill; /* Could not allocate any skbuffs. Try again later. */ mod_timer(&np->rx_refill_timer, jiffies + (HZ/10)); - return; + break; } + + skb_reserve(skb, 16); /* mimic dev_alloc_skb() */ + skb_shinfo(skb)->frags[0].page = page; + skb_shinfo(skb)->nr_frags = 1; __skb_queue_tail(&np->rx_batch, skb); } /* Is the batch large enough to be worthwhile? */ - if (i < (np->rx_target/2)) + if (i < (np->rx_target/2)) { + if (req_prod > np->rx.sring->req_prod) + goto push; return; + } /* Adjust our fill target if we risked running out of buffers. */ if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) && @@ -586,70 +708,93 @@ static void network_alloc_rx_buffers(str np->rx_target = np->rx_max_target; refill: - for (i = 0; ; i++) { + for (nr_flips = i = 0; ; i++) { if ((skb = __skb_dequeue(&np->rx_batch)) == NULL) break; skb->dev = dev; - id = get_id_from_freelist(np->rx_skbs); - + id = xennet_rxidx(req_prod + i); + + BUG_ON(np->rx_skbs[id]); np->rx_skbs[id] = skb; - RING_GET_REQUEST(&np->rx, req_prod + i)->id = id; ref = gnttab_claim_grant_reference(&np->gref_rx_head); BUG_ON((signed short)ref < 0); np->grant_rx_ref[id] = ref; - gnttab_grant_foreign_transfer_ref(ref, - np->xbdev->otherend_id, - __pa(skb->head)>>PAGE_SHIFT); - RING_GET_REQUEST(&np->rx, req_prod + i)->gref = ref; - np->rx_pfn_array[i] = virt_to_mfn(skb->head); + + pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page); + vaddr = page_address(skb_shinfo(skb)->frags[0].page); + + req = RING_GET_REQUEST(&np->rx, req_prod + i); + if (!np->copying_receiver) { + gnttab_grant_foreign_transfer_ref(ref, + np->xbdev->otherend_id, + pfn); + np->rx_pfn_array[nr_flips] = pfn_to_mfn(pfn); + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Remove this page before passing + * back to Xen. */ + set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + MULTI_update_va_mapping(np->rx_mcl+i, + (unsigned long)vaddr, + __pte(0), 0); + } + nr_flips++; + } else { + gnttab_grant_foreign_access_ref(ref, + np->xbdev->otherend_id, + pfn, + 0); + } + + req->id = id; + req->gref = ref; + } + + if ( nr_flips != 0 ) { + /* Tell the ballon driver what is going on. */ + balloon_update_driver_allowance(i); + + set_xen_guest_handle(reservation.extent_start, + np->rx_pfn_array); + reservation.nr_extents = nr_flips; + reservation.extent_order = 0; + reservation.address_bits = 0; + reservation.domid = DOMID_SELF; if (!xen_feature(XENFEAT_auto_translated_physmap)) { - /* Remove this page before passing back to Xen. */ - set_phys_to_machine(__pa(skb->head) >> PAGE_SHIFT, - INVALID_P2M_ENTRY); - MULTI_update_va_mapping(np->rx_mcl+i, - (unsigned long)skb->head, - __pte(0), 0); + /* After all PTEs have been zapped, flush the TLB. */ + np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = + UVMF_TLB_FLUSH|UVMF_ALL; + + /* Give away a batch of pages. */ + np->rx_mcl[i].op = __HYPERVISOR_memory_op; + np->rx_mcl[i].args[0] = XENMEM_decrease_reservation; + np->rx_mcl[i].args[1] = (unsigned long)&reservation; + + /* Zap PTEs and give away pages in one big + * multicall. */ + (void)HYPERVISOR_multicall(np->rx_mcl, i+1); + + /* Check return status of HYPERVISOR_memory_op(). */ + if (unlikely(np->rx_mcl[i].result != i)) + panic("Unable to reduce memory reservation\n"); + } else { + if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &reservation) != i) + panic("Unable to reduce memory reservation\n"); } - } - - /* Tell the ballon driver what is going on. */ - balloon_update_driver_allowance(i); - - set_xen_guest_handle(reservation.extent_start, - (xen_pfn_t *)np->rx_pfn_array); - reservation.nr_extents = i; - reservation.extent_order = 0; - reservation.address_bits = 0; - reservation.domid = DOMID_SELF; - - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - /* After all PTEs have been zapped, flush the TLB. */ - np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = - UVMF_TLB_FLUSH|UVMF_ALL; - - /* Give away a batch of pages. */ - np->rx_mcl[i].op = __HYPERVISOR_memory_op; - np->rx_mcl[i].args[0] = XENMEM_decrease_reservation; - np->rx_mcl[i].args[1] = (unsigned long)&reservation; - - /* Zap PTEs and give away pages in one big multicall. */ - (void)HYPERVISOR_multicall(np->rx_mcl, i+1); - - /* Check return status of HYPERVISOR_memory_op(). */ - if (unlikely(np->rx_mcl[i].result != i)) - panic("Unable to reduce memory reservation\n"); - } else - if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, - &reservation) != i) - panic("Unable to reduce memory reservation\n"); + } else { + wmb(); + } /* Above is a suitable barrier to ensure backend will see requests. */ np->rx.req_prod_pvt = req_prod + i; - RING_PUSH_REQUESTS(&np->rx); + push: + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify); + if (notify) + notify_remote_via_irq(np->irq); } static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, @@ -720,6 +865,7 @@ static int network_start_xmit(struct sk_ unsigned short id; struct netfront_info *np = netdev_priv(dev); struct netif_tx_request *tx; + struct netif_extra_info *extra; char *data = skb->data; RING_IDX i; grant_ref_t ref; @@ -740,7 +886,8 @@ static int network_start_xmit(struct sk_ spin_lock_irq(&np->tx_lock); if (unlikely(!netif_carrier_ok(dev) || - (frags > 1 && !xennet_can_sg(dev)))) { + (frags > 1 && !xennet_can_sg(dev)) || + netif_needs_gso(dev, skb))) { spin_unlock_irq(&np->tx_lock); goto drop; } @@ -763,10 +910,35 @@ static int network_start_xmit(struct sk_ tx->size = len; tx->flags = 0; + extra = NULL; + if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ tx->flags |= NETTXF_csum_blank | NETTXF_data_validated; +#ifdef CONFIG_XEN if (skb->proto_data_valid) /* remote but checksummed? */ tx->flags |= NETTXF_data_validated; +#endif + +#ifdef HAVE_GSO + if (skb_shinfo(skb)->gso_size) { + struct netif_extra_info *gso = (struct netif_extra_info *) + RING_GET_REQUEST(&np->tx, ++i); + + if (extra) + extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; + else + tx->flags |= NETTXF_extra_info; + + gso->u.gso.size = skb_shinfo(skb)->gso_size; + gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; + gso->u.gso.pad = 0; + gso->u.gso.features = 0; + + gso->type = XEN_NETIF_EXTRA_TYPE_GSO; + gso->flags = 0; + extra = gso; + } +#endif np->tx.req_prod_pvt = i + 1; @@ -802,191 +974,411 @@ static irqreturn_t netif_int(int irq, vo unsigned long flags; spin_lock_irqsave(&np->tx_lock, flags); - network_tx_buf_gc(dev); + + if (likely(netif_carrier_ok(dev))) { + network_tx_buf_gc(dev); + /* Under tx_lock: protects access to rx shared-ring indexes. */ + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) + netif_rx_schedule(dev); + } + spin_unlock_irqrestore(&np->tx_lock, flags); - if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx) && - likely(netif_running(dev))) - netif_rx_schedule(dev); - return IRQ_HANDLED; } - -static int netif_poll(struct net_device *dev, int *pbudget) -{ - struct netfront_info *np = netdev_priv(dev); - struct sk_buff *skb, *nskb; - struct netif_rx_response *rx; - RING_IDX i, rp; - struct mmu_update *mmu = np->rx_mmu; - struct multicall_entry *mcl = np->rx_mcl; - int work_done, budget, more_to_do = 1; - struct sk_buff_head rxq; - unsigned long flags; - unsigned long mfn; - grant_ref_t ref; - - spin_lock(&np->rx_lock); - - if (unlikely(!netif_carrier_ok(dev))) { - spin_unlock(&np->rx_lock); - return 0; - } - - skb_queue_head_init(&rxq); - - if ((budget = *pbudget) > dev->quota) - budget = dev->quota; - rp = np->rx.sring->rsp_prod; - rmb(); /* Ensure we see queued responses up to 'rp'. */ - - for (i = np->rx.rsp_cons, work_done = 0; - (i != rp) && (work_done < budget); - i++, work_done++) { - rx = RING_GET_RESPONSE(&np->rx, i); +static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb, + grant_ref_t ref) +{ + int new = xennet_rxidx(np->rx.req_prod_pvt); + + BUG_ON(np->rx_skbs[new]); + np->rx_skbs[new] = skb; + np->grant_rx_ref[new] = ref; + RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new; + RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref; + np->rx.req_prod_pvt++; +} + +int xennet_get_extras(struct netfront_info *np, + struct netif_extra_info *extras, RING_IDX rp) + +{ + struct netif_extra_info *extra; + RING_IDX cons = np->rx.rsp_cons; + int err = 0; + + do { + struct sk_buff *skb; + grant_ref_t ref; + + if (unlikely(cons + 1 == rp)) { + if (net_ratelimit()) + WPRINTK("Missing extra info\n"); + err = -EBADR; + break; + } + + extra = (struct netif_extra_info *) + RING_GET_RESPONSE(&np->rx, ++cons); + + if (unlikely(!extra->type || + extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { + if (net_ratelimit()) + WPRINTK("Invalid extra type: %d\n", + extra->type); + err = -EINVAL; + } else { + memcpy(&extras[extra->type - 1], extra, + sizeof(*extra)); + } + + skb = xennet_get_rx_skb(np, cons); + ref = xennet_get_rx_ref(np, cons); + xennet_move_rx_slot(np, skb, ref); + } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); + + np->rx.rsp_cons = cons; + return err; +} + +static int xennet_get_responses(struct netfront_info *np, + struct netfront_rx_info *rinfo, RING_IDX rp, + struct sk_buff_head *list, + int *pages_flipped_p) +{ + int pages_flipped = *pages_flipped_p; + struct mmu_update *mmu; + struct multicall_entry *mcl; + struct netif_rx_response *rx = &rinfo->rx; + struct netif_extra_info *extras = rinfo->extras; + RING_IDX cons = np->rx.rsp_cons; + struct sk_buff *skb = xennet_get_rx_skb(np, cons); + grant_ref_t ref = xennet_get_rx_ref(np, cons); + int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD); + int frags = 1; + int err = 0; + unsigned long ret; + + if (rx->flags & NETRXF_extra_info) { + err = xennet_get_extras(np, extras, rp); + cons = np->rx.rsp_cons; + } + + for (;;) { + unsigned long mfn; + + if (unlikely(rx->status < 0 || + rx->offset + rx->status > PAGE_SIZE)) { + if (net_ratelimit()) + WPRINTK("rx->offset: %x, size: %u\n", + rx->offset, rx->status); + err = -EINVAL; + goto next; + } /* * This definitely indicates a bug, either in this driver or in * the backend driver. In future this should flag the bad * situation to the system controller to reboot the backed. */ - if ((ref = np->grant_rx_ref[rx->id]) == GRANT_INVALID_REF) { + if (ref == GRANT_INVALID_REF) { WPRINTK("Bad rx response id %d.\n", rx->id); + err = -EINVAL; + goto next; + } + + if (!np->copying_receiver) { + /* Memory pressure, insufficient buffer + * headroom, ... */ + if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) { + if (net_ratelimit()) + WPRINTK("Unfulfilled rx req " + "(id=%d, st=%d).\n", + rx->id, rx->status); + xennet_move_rx_slot(np, skb, ref); + err = -ENOMEM; + goto next; + } + + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Remap the page. */ + struct page *page = + skb_shinfo(skb)->frags[0].page; + unsigned long pfn = page_to_pfn(page); + void *vaddr = page_address(page); + + mcl = np->rx_mcl + pages_flipped; + mmu = np->rx_mmu + pages_flipped; + + MULTI_update_va_mapping(mcl, + (unsigned long)vaddr, + pfn_pte_ma(mfn, + PAGE_KERNEL), + 0); + mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT) + | MMU_MACHPHYS_UPDATE; + mmu->val = pfn; + + set_phys_to_machine(pfn, mfn); + } + pages_flipped++; + } else { + ret = gnttab_end_foreign_access_ref(ref, 0); + BUG_ON(!ret); + } + + gnttab_release_grant_reference(&np->gref_rx_head, ref); + + __skb_queue_tail(list, skb); + +next: + if (!(rx->flags & NETRXF_more_data)) + break; + + if (cons + frags == rp) { + if (net_ratelimit()) + WPRINTK("Need more frags\n"); + err = -ENOENT; + break; + } + + rx = RING_GET_RESPONSE(&np->rx, cons + frags); + skb = xennet_get_rx_skb(np, cons + frags); + ref = xennet_get_rx_ref(np, cons + frags); + frags++; + } + + if (unlikely(frags > max)) { + if (net_ratelimit()) + WPRINTK("Too many frags\n"); + err = -E2BIG; + } + + *pages_flipped_p = pages_flipped; + + return err; +} + +static RING_IDX xennet_fill_frags(struct netfront_info *np, + struct sk_buff *skb, + struct sk_buff_head *list) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + int nr_frags = shinfo->nr_frags; + RING_IDX cons = np->rx.rsp_cons; + skb_frag_t *frag = shinfo->frags + nr_frags; + struct sk_buff *nskb; + + while ((nskb = __skb_dequeue(list))) { + struct netif_rx_response *rx = + RING_GET_RESPONSE(&np->rx, ++cons); + + frag->page = skb_shinfo(nskb)->frags[0].page; + frag->page_offset = rx->offset; + frag->size = rx->status; + + skb->data_len += rx->status; + + skb_shinfo(nskb)->nr_frags = 0; + kfree_skb(nskb); + + frag++; + nr_frags++; + } + + shinfo->nr_frags = nr_frags; + return cons; +} + +static int xennet_set_skb_gso(struct sk_buff *skb, + struct netif_extra_info *gso) +{ + if (!gso->u.gso.size) { + if (net_ratelimit()) + WPRINTK("GSO size must not be zero.\n"); + return -EINVAL; + } + + /* Currently only TCPv4 S.O. is supported. */ + if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { + if (net_ratelimit()) + WPRINTK("Bad GSO type %d.\n", gso->u.gso.type); + return -EINVAL; + } + +#ifdef HAVE_GSO + skb_shinfo(skb)->gso_size = gso->u.gso.size; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + + /* Header must be checked, and gso_segs computed. */ + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb)->gso_segs = 0; + + return 0; +#else + if (net_ratelimit()) + WPRINTK("GSO unsupported by this kernel.\n"); + return -EINVAL; +#endif +} + +static int netif_poll(struct net_device *dev, int *pbudget) +{ + struct netfront_info *np = netdev_priv(dev); + struct sk_buff *skb; + struct netfront_rx_info rinfo; + struct netif_rx_response *rx = &rinfo.rx; + struct netif_extra_info *extras = rinfo.extras; + RING_IDX i, rp; + struct multicall_entry *mcl; + int work_done, budget, more_to_do = 1; + struct sk_buff_head rxq; + struct sk_buff_head errq; + struct sk_buff_head tmpq; + unsigned long flags; + unsigned int len; + int pages_flipped = 0; + int err; + + spin_lock(&np->rx_lock); + + if (unlikely(!netif_carrier_ok(dev))) { + spin_unlock(&np->rx_lock); + return 0; + } + + skb_queue_head_init(&rxq); + skb_queue_head_init(&errq); + skb_queue_head_init(&tmpq); + + if ((budget = *pbudget) > dev->quota) + budget = dev->quota; + rp = np->rx.sring->rsp_prod; + rmb(); /* Ensure we see queued responses up to 'rp'. */ + + for (i = np->rx.rsp_cons, work_done = 0; + (i != rp) && (work_done < budget); + np->rx.rsp_cons = ++i, work_done++) { + memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx)); + memset(extras, 0, sizeof(extras)); + + err = xennet_get_responses(np, &rinfo, rp, &tmpq, + &pages_flipped); + + if (unlikely(err)) { +err: + i = np->rx.rsp_cons + skb_queue_len(&tmpq) - 1; work_done--; + while ((skb = __skb_dequeue(&tmpq))) + __skb_queue_tail(&errq, skb); + np->stats.rx_errors++; continue; } - /* Memory pressure, insufficient buffer headroom, ... */ - if ((mfn = gnttab_end_foreign_transfer_ref(ref)) == 0) { - if (net_ratelimit()) - WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n", - rx->id, rx->status); - RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = - rx->id; - RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = - ref; - np->rx.req_prod_pvt++; - RING_PUSH_REQUESTS(&np->rx); - work_done--; - continue; + skb = __skb_dequeue(&tmpq); + + if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { + struct netif_extra_info *gso; + gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; + + if (unlikely(xennet_set_skb_gso(skb, gso))) { + __skb_queue_head(&tmpq, skb); + goto err; + } } - gnttab_release_grant_reference(&np->gref_rx_head, ref); - np->grant_rx_ref[rx->id] = GRANT_INVALID_REF; - - skb = np->rx_skbs[rx->id]; - add_id_to_freelist(np->rx_skbs, rx->id); - - /* NB. We handle skb overflow later. */ - skb->data = skb->head + rx->offset; - skb->len = rx->status; - skb->tail = skb->data + skb->len; + skb->nh.raw = (void *)skb_shinfo(skb)->frags[0].page; + skb->h.raw = skb->nh.raw + rx->offset; + + len = rx->status; + if (len > RX_COPY_THRESHOLD) + len = RX_COPY_THRESHOLD; + skb_put(skb, len); + + if (rx->status > len) { + skb_shinfo(skb)->frags[0].page_offset = + rx->offset + len; + skb_shinfo(skb)->frags[0].size = rx->status - len; + skb->data_len = rx->status - len; + } else { + skb_shinfo(skb)->frags[0].page = NULL; + skb_shinfo(skb)->nr_frags = 0; + } + + i = xennet_fill_frags(np, skb, &tmpq); + + /* + * Truesize must approximates the size of true data plus + * any supervisor overheads. Adding hypervisor overheads + * has been shown to significantly reduce achievable + * bandwidth with the default receive buffer size. It is + * therefore not wise to account for it here. + * + * After alloc_skb(RX_COPY_THRESHOLD), truesize is set to + * RX_COPY_THRESHOLD + the supervisor overheads. Here, we + * add the size of the data pulled in xennet_fill_frags(). + * + * We also adjust for any unused space in the main data + * area by subtracting (RX_COPY_THRESHOLD - len). This is + * especially important with drivers which split incoming + * packets into header and data, using only 66 bytes of + * the main data area (see the e1000 driver for example.) + * On such systems, without this last adjustement, our + * achievable receive throughout using the standard receive + * buffer size was cut by 25%(!!!). + */ + skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len); + skb->len += skb->data_len; /* * Old backends do not assert data_validated but we * can infer it from csum_blank so test both flags. */ - if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank)) { + if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank)) skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->proto_data_valid = 1; - } else { + else skb->ip_summed = CHECKSUM_NONE; - skb->proto_data_valid = 0; +#ifdef CONFIG_XEN + skb->proto_data_valid = (skb->ip_summed != CHECKSUM_NONE); + skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank); +#endif + np->stats.rx_packets++; + np->stats.rx_bytes += skb->len; + + __skb_queue_tail(&rxq, skb); + } + + if (pages_flipped) { + /* Some pages are no longer absent... */ + balloon_update_driver_allowance(-pages_flipped); + + /* Do all the remapping work and M2P updates. */ + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + mcl = np->rx_mcl + pages_flipped; + mcl->op = __HYPERVISOR_mmu_update; + mcl->args[0] = (unsigned long)np->rx_mmu; + mcl->args[1] = pages_flipped; + mcl->args[2] = 0; + mcl->args[3] = DOMID_SELF; + (void)HYPERVISOR_multicall(np->rx_mcl, + pages_flipped + 1); } - skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank); - - np->stats.rx_packets++; - np->stats.rx_bytes += rx->status; - - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - /* Remap the page. */ - MULTI_update_va_mapping(mcl, (unsigned long)skb->head, - pfn_pte_ma(mfn, PAGE_KERNEL), - 0); - mcl++; - mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT) - | MMU_MACHPHYS_UPDATE; - mmu->val = __pa(skb->head) >> PAGE_SHIFT; - mmu++; - - set_phys_to_machine(__pa(skb->head) >> PAGE_SHIFT, - mfn); - } - - __skb_queue_tail(&rxq, skb); - } - - /* Some pages are no longer absent... */ - balloon_update_driver_allowance(-work_done); - - /* Do all the remapping work, and M2P updates, in one big hypercall. */ - if (likely((mcl - np->rx_mcl) != 0)) { - mcl->op = __HYPERVISOR_mmu_update; - mcl->args[0] = (unsigned long)np->rx_mmu; - mcl->args[1] = mmu - np->rx_mmu; - mcl->args[2] = 0; - mcl->args[3] = DOMID_SELF; - mcl++; - (void)HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl); - } + } + + while ((skb = __skb_dequeue(&errq))) + kfree_skb(skb); while ((skb = __skb_dequeue(&rxq)) != NULL) { - if (skb->len > (dev->mtu + ETH_HLEN + 4)) { - if (net_ratelimit()) - printk(KERN_INFO "Received packet too big for " - "MTU (%d > %d)\n", - skb->len - ETH_HLEN - 4, dev->mtu); - skb->len = 0; - skb->tail = skb->data; - init_skb_shinfo(skb); - dev_kfree_skb(skb); - continue; - } - - /* - * Enough room in skbuff for the data we were passed? Also, - * Linux expects at least 16 bytes headroom in each rx buffer. - */ - if (unlikely(skb->tail > skb->end) || - unlikely((skb->data - skb->head) < 16)) { - if (net_ratelimit()) { - if (skb->tail > skb->end) - printk(KERN_INFO "Received packet " - "is %zd bytes beyond tail.\n", - skb->tail - skb->end); - else - printk(KERN_INFO "Received packet " - "is %zd bytes before head.\n", - 16 - (skb->data - skb->head)); - } - - nskb = __dev_alloc_skb(skb->len + 2, - GFP_ATOMIC|__GFP_NOWARN); - if (nskb != NULL) { - skb_reserve(nskb, 2); - skb_put(nskb, skb->len); - memcpy(nskb->data, skb->data, skb->len); - /* Copy any other fields we already set up. */ - nskb->dev = skb->dev; - nskb->ip_summed = skb->ip_summed; - nskb->proto_data_valid = skb->proto_data_valid; - nskb->proto_csum_blank = skb->proto_csum_blank; - } - - /* Reinitialise and then destroy the old skbuff. */ - skb->len = 0; - skb->tail = skb->data; - init_skb_shinfo(skb); - dev_kfree_skb(skb); - - /* Switch old for new, if we copied the buffer. */ - if ((skb = nskb) == NULL) - continue; - } - - /* Set the shinfo area, which is hidden behind the data. */ - init_skb_shinfo(skb); + struct page *page = (struct page *)skb->nh.raw; + void *vaddr = page_address(page); + + memcpy(skb->data, vaddr + (skb->h.raw - skb->nh.raw), + skb_headlen(skb)); + + if (page != skb_shinfo(skb)->frags[0].page) + __free_page(page); + /* Ethernet work: Delayed to here as it peeks the header. */ skb->protocol = eth_type_trans(skb, dev); @@ -994,8 +1386,6 @@ static int netif_poll(struct net_device netif_receive_skb(skb); dev->last_rx = jiffies; } - - np->rx.rsp_cons = i; /* If we get a callback with very few responses, reduce fill target. */ /* NB. Note exponential increase, linear decrease. */ @@ -1024,74 +1414,12 @@ static int netif_poll(struct net_device return more_to_do; } - -static int network_close(struct net_device *dev) -{ - struct netfront_info *np = netdev_priv(dev); - netif_stop_queue(np->netdev); - return 0; -} - - -static struct net_device_stats *network_get_stats(struct net_device *dev) -{ - struct netfront_info *np = netdev_priv(dev); - return &np->stats; -} - -static int xennet_change_mtu(struct net_device *dev, int mtu) -{ - int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; - - if (mtu > max) - return -EINVAL; - dev->mtu = mtu; - return 0; -} - -static int xennet_set_sg(struct net_device *dev, u32 data) -{ - if (data) { - struct netfront_info *np = netdev_priv(dev); - int val; - - if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg", - "%d", &val) < 0) - val = 0; - if (!val) - return -ENOSYS; - } else if (dev->mtu > ETH_DATA_LEN) - dev->mtu = ETH_DATA_LEN; - - return ethtool_op_set_sg(dev, data); -} - -static void xennet_set_features(struct net_device *dev) -{ - xennet_set_sg(dev, 1); -} - -static void network_connect(struct net_device *dev) -{ - struct netfront_info *np = netdev_priv(dev); - int i, requeue_idx; +static void netif_release_tx_bufs(struct netfront_info *np) +{ struct sk_buff *skb; - - xennet_set_features(dev); - - spin_lock_irq(&np->tx_lock); - spin_lock(&np->rx_lock); - - /* - * Recovery procedure: - * NB. Freelist index entries are always going to be less than - * PAGE_OFFSET, whereas pointers to skbs will always be equal or - * greater than PAGE_OFFSET: we use this property to distinguish - * them. - */ - - /* Step 1: Discard all pending TX packet fragments. */ - for (requeue_idx = 0, i = 1; i <= NET_TX_RING_SIZE; i++) { + int i; + + for (i = 1; i <= NET_TX_RING_SIZE; i++) { if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET) continue; @@ -1104,22 +1432,218 @@ static void network_connect(struct net_d add_id_to_freelist(np->tx_skbs, i); dev_kfree_skb_irq(skb); } +} + +static void netif_release_rx_bufs(struct netfront_info *np) +{ + struct mmu_update *mmu = np->rx_mmu; + struct multicall_entry *mcl = np->rx_mcl; + struct sk_buff_head free_list; + struct sk_buff *skb; + unsigned long mfn; + int xfer = 0, noxfer = 0, unused = 0; + int id, ref; + + if (np->copying_receiver) { + printk("%s: fix me for copying receiver.\n", __FUNCTION__); + return; + } + + skb_queue_head_init(&free_list); + + spin_lock(&np->rx_lock); + + for (id = 0; id < NET_RX_RING_SIZE; id++) { + if ((ref = np->grant_rx_ref[id]) == GRANT_INVALID_REF) { + unused++; + continue; + } + + skb = np->rx_skbs[id]; + mfn = gnttab_end_foreign_transfer_ref(ref); + gnttab_release_grant_reference(&np->gref_rx_head, ref); + np->grant_rx_ref[id] = GRANT_INVALID_REF; + add_id_to_freelist(np->rx_skbs, id); + + if (0 == mfn) { + struct page *page = skb_shinfo(skb)->frags[0].page; + balloon_release_driver_page(page); + skb_shinfo(skb)->nr_frags = 0; + dev_kfree_skb(skb); + noxfer++; + continue; + } + + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Remap the page. */ + struct page *page = skb_shinfo(skb)->frags[0].page; + unsigned long pfn = page_to_pfn(page); + void *vaddr = page_address(page); + + MULTI_update_va_mapping(mcl, (unsigned long)vaddr, + pfn_pte_ma(mfn, PAGE_KERNEL), + 0); + mcl++; + mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT) + | MMU_MACHPHYS_UPDATE; + mmu->val = pfn; + mmu++; + + set_phys_to_machine(pfn, mfn); + } + __skb_queue_tail(&free_list, skb); + xfer++; + } + + printk("%s: %d xfer, %d noxfer, %d unused\n", + __FUNCTION__, xfer, noxfer, unused); + + if (xfer) { + /* Some pages are no longer absent... */ + balloon_update_driver_allowance(-xfer); + + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Do all the remapping work and M2P updates. */ + mcl->op = __HYPERVISOR_mmu_update; + mcl->args[0] = (unsigned long)np->rx_mmu; + mcl->args[1] = mmu - np->rx_mmu; + mcl->args[2] = 0; + mcl->args[3] = DOMID_SELF; + mcl++; + HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl); + } + } + + while ((skb = __skb_dequeue(&free_list)) != NULL) + dev_kfree_skb(skb); + + spin_unlock(&np->rx_lock); +} + +static int network_close(struct net_device *dev) +{ + struct netfront_info *np = netdev_priv(dev); + netif_stop_queue(np->netdev); + return 0; +} + + +static struct net_device_stats *network_get_stats(struct net_device *dev) +{ + struct netfront_info *np = netdev_priv(dev); + return &np->stats; +} + +static int xennet_change_mtu(struct net_device *dev, int mtu) +{ + int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; + + if (mtu > max) + return -EINVAL; + dev->mtu = mtu; + return 0; +} + +static int xennet_set_sg(struct net_device *dev, u32 data) +{ + if (data) { + struct netfront_info *np = netdev_priv(dev); + int val; + + if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg", + "%d", &val) < 0) + val = 0; + if (!val) + return -ENOSYS; + } else if (dev->mtu > ETH_DATA_LEN) + dev->mtu = ETH_DATA_LEN; + + return ethtool_op_set_sg(dev, data); +} + +static int xennet_set_tso(struct net_device *dev, u32 data) +{ +#ifdef HAVE_GSO + if (data) { + struct netfront_info *np = netdev_priv(dev); + int val; + + if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, + "feature-gso-tcpv4", "%d", &val) < 0) + val = 0; + if (!val) + return -ENOSYS; + } + + return ethtool_op_set_tso(dev, data); +#else + return -ENOSYS; +#endif +} + +static void xennet_set_features(struct net_device *dev) +{ + dev_disable_gso_features(dev); + xennet_set_sg(dev, 0); + + /* We need checksum offload to enable scatter/gather and TSO. */ + if (!(dev->features & NETIF_F_IP_CSUM)) + return; + + if (!xennet_set_sg(dev, 1)) + xennet_set_tso(dev, 1); +} + +static void network_connect(struct net_device *dev) +{ + struct netfront_info *np = netdev_priv(dev); + int i, requeue_idx; + struct sk_buff *skb; + grant_ref_t ref; + netif_rx_request_t *req; + + xennet_set_features(dev); + + spin_lock_irq(&np->tx_lock); + spin_lock(&np->rx_lock); + + /* + * Recovery procedure: + * NB. Freelist index entries are always going to be less than + * PAGE_OFFSET, whereas pointers to skbs will always be equal or + * greater than PAGE_OFFSET: we use this property to distinguish + * them. + */ + + /* Step 1: Discard all pending TX packet fragments. */ + netif_release_tx_bufs(np); /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */ - for (requeue_idx = 0, i = 1; i <= NET_RX_RING_SIZE; i++) { - if ((unsigned long)np->rx_skbs[i] < PAGE_OFFSET) + for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { + if (!np->rx_skbs[i]) continue; - gnttab_grant_foreign_transfer_ref( - np->grant_rx_ref[i], np->xbdev->otherend_id, - __pa(np->rx_skbs[i]->data) >> PAGE_SHIFT); - RING_GET_REQUEST(&np->rx, requeue_idx)->gref = - np->grant_rx_ref[i]; - RING_GET_REQUEST(&np->rx, requeue_idx)->id = i; + + skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i); + ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i); + req = RING_GET_REQUEST(&np->rx, requeue_idx); + + if (!np->copying_receiver) { + gnttab_grant_foreign_transfer_ref( + ref, np->xbdev->otherend_id, + page_to_pfn(skb_shinfo(skb)->frags->page)); + } else { + gnttab_grant_foreign_access_ref( + ref, np->xbdev->otherend_id, + page_to_pfn(skb_shinfo(skb)->frags->page), + 0); + } + req->gref = ref; + req->id = requeue_idx; + requeue_idx++; } np->rx.req_prod_pvt = requeue_idx; - RING_PUSH_REQUESTS(&np->rx); /* * Step 3: All public and private state should now be sane. Get @@ -1139,6 +1663,8 @@ static void netif_uninit(struct net_devi static void netif_uninit(struct net_device *dev) { struct netfront_info *np = netdev_priv(dev); + netif_release_tx_bufs(np); + netif_release_rx_bufs(np); gnttab_free_grant_references(np->gref_tx_head); gnttab_free_grant_references(np->gref_rx_head); } @@ -1149,6 +1675,9 @@ static struct ethtool_ops network_ethtoo .set_tx_csum = ethtool_op_set_tx_csum, .get_sg = ethtool_op_get_sg, .set_sg = xennet_set_sg, + .get_tso = ethtool_op_get_tso, + .set_tso = xennet_set_tso, + .get_link = ethtool_op_get_link, }; #ifdef CONFIG_SYSFS @@ -1294,13 +1823,8 @@ static void network_set_multicast_list(s { } -/** Create a network device. - * @param handle device handle - * @param val return parameter for created device - * @return 0 on success, error code otherwise - */ -static struct net_device * __devinit create_netdev(int handle, - struct xenbus_device *dev) +static struct net_device * __devinit +create_netdev(int handle, int copying_receiver, struct xenbus_device *dev) { int i, err = 0; struct net_device *netdev = NULL; @@ -1313,9 +1837,10 @@ static struct net_device * __devinit cre return ERR_PTR(-ENOMEM); } - np = netdev_priv(netdev); - np->handle = handle; - np->xbdev = dev; + np = netdev_priv(netdev); + np->handle = handle; + np->xbdev = dev; + np->copying_receiver = copying_receiver; netif_carrier_off(netdev); @@ -1337,8 +1862,8 @@ static struct net_device * __devinit cre np->grant_tx_ref[i] = GRANT_INVALID_REF; } - for (i = 0; i <= NET_RX_RING_SIZE; i++) { - np->rx_skbs[i] = (void *)((unsigned long) i+1); + for (i = 0; i < NET_RX_RING_SIZE; i++) { + np->rx_skbs[i] = NULL; np->grant_rx_ref[i] = GRANT_INVALID_REF; } @@ -1372,27 +1897,9 @@ static struct net_device * __devinit cre SET_MODULE_OWNER(netdev); SET_NETDEV_DEV(netdev, &dev->dev); - err = register_netdev(netdev); - if (err) { - printk(KERN_WARNING "%s> register_netdev err=%d\n", - __FUNCTION__, err); - goto exit_free_rx; - } - - err = xennet_sysfs_addif(netdev); - if (err) { - /* This can be non-fatal: it only means no tuning parameters */ - printk(KERN_WARNING "%s> add sysfs failed err=%d\n", - __FUNCTION__, err); - } - np->netdev = netdev; - return netdev; - - exit_free_rx: - gnttab_free_grant_references(np->gref_rx_head); exit_free_tx: gnttab_free_grant_references(np->gref_tx_head); exit: @@ -1431,10 +1938,9 @@ static void netfront_closing(struct xenb { struct netfront_info *info = dev->dev.driver_data; - DPRINTK("netfront_closing: %s removed\n", dev->nodename); + DPRINTK("%s\n", dev->nodename); close_netdev(info); - xenbus_switch_state(dev, XenbusStateClosed); } @@ -1451,6 +1957,26 @@ static int __devexit netfront_remove(str return 0; } + +static int open_netdev(struct netfront_info *info) +{ + int err; + + err = register_netdev(info->netdev); + if (err) { + printk(KERN_WARNING "%s: register_netdev err=%d\n", + __FUNCTION__, err); + return err; + } + + err = xennet_sysfs_addif(info->netdev); + if (err) { + /* This can be non-fatal: it only means no tuning parameters */ + printk(KERN_WARNING "%s: add sysfs failed err=%d\n", + __FUNCTION__, err); + } + return 0; +} static void close_netdev(struct netfront_info *info) { @@ -1529,7 +2055,7 @@ static int __init netif_init(void) if (!is_running_on_xen()) return -ENODEV; - if (xen_start_info->flags & SIF_INITDOMAIN) + if (is_initial_xendomain()) return 0; IPRINTK("Initialising virtual ethernet driver.\n"); _______________________________________________ Xen-ppc-devel mailing list Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-ppc-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |