[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH net-next] xen-netfront: Add support for IPv6 offloads



This patch adds support for IPv6 checksum offload and GSO when those
fetaures are available in the backend.

Signed-off-by: Paul Durrant <paul.durrant@xxxxxxxxxx>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Cc: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx>
Cc: David Vrabel <david.vrabel@xxxxxxxxxx>
---
 drivers/net/xen-netfront.c |  263 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 231 insertions(+), 32 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index dd1011e..27212d8 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -616,7 +616,9 @@ static int xennet_start_xmit(struct sk_buff *skb, struct 
net_device *dev)
                tx->flags |= XEN_NETTXF_extra_info;
 
                gso->u.gso.size = skb_shinfo(skb)->gso_size;
-               gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+               gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ?
+                                 XEN_NETIF_GSO_TYPE_TCPV6 :
+                                 XEN_NETIF_GSO_TYPE_TCPV4;
                gso->u.gso.pad = 0;
                gso->u.gso.features = 0;
 
@@ -808,15 +810,18 @@ static int xennet_set_skb_gso(struct sk_buff *skb,
                return -EINVAL;
        }
 
-       /* Currently only TCPv4 S.O. is supported. */
-       if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
+       if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4 &&
+           gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV6) {
                if (net_ratelimit())
                        pr_warn("Bad GSO type %d\n", gso->u.gso.type);
                return -EINVAL;
        }
 
        skb_shinfo(skb)->gso_size = gso->u.gso.size;
-       skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+       skb_shinfo(skb)->gso_type =
+               (gso->u.gso.type == XEN_NETIF_GSO_TYPE_TCPV4) ?
+               SKB_GSO_TCPV4 :
+               SKB_GSO_TCPV6;
 
        /* Header must be checked, and gso_segs computed. */
        skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
@@ -856,62 +861,72 @@ static RING_IDX xennet_fill_frags(struct netfront_info 
*np,
        return cons;
 }
 
-static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
+static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len)
 {
-       struct iphdr *iph;
-       int err = -EPROTO;
-       int recalculate_partial_csum = 0;
-
-       /*
-        * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
-        * peers can fail to set NETRXF_csum_blank when sending a GSO
-        * frame. In this case force the SKB to CHECKSUM_PARTIAL and
-        * recalculate the partial checksum.
-        */
-       if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
-               struct netfront_info *np = netdev_priv(dev);
-               np->rx_gso_checksum_fixup++;
-               skb->ip_summed = CHECKSUM_PARTIAL;
-               recalculate_partial_csum = 1;
+       if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) {
+               /* If we need to pullup then pullup to the max, so we
+                * won't need to do it again.
+                */
+               int target = min_t(int, skb->len, MAX_TCP_HEADER);
+               __pskb_pull_tail(skb, target - skb_headlen(skb));
        }
+}
 
-       /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
-       if (skb->ip_summed != CHECKSUM_PARTIAL)
-               return 0;
+static int checksum_setup_ip(struct sk_buff *skb, int recalculate_partial_csum)
+{
+       struct iphdr *iph = (void *)skb->data;
+       unsigned int header_size;
+       unsigned int off;
+       int err = -EPROTO;
 
-       if (skb->protocol != htons(ETH_P_IP))
-               goto out;
+       off = sizeof(struct iphdr);
 
-       iph = (void *)skb->data;
+       header_size = skb->network_header + off + MAX_IPOPTLEN;
+       maybe_pull_tail(skb, header_size);
+
+       off = iph->ihl * 4;
 
        switch (iph->protocol) {
        case IPPROTO_TCP:
-               if (!skb_partial_csum_set(skb, 4 * iph->ihl,
+               if (!skb_partial_csum_set(skb, off,
                                          offsetof(struct tcphdr, check)))
                        goto out;
 
                if (recalculate_partial_csum) {
                        struct tcphdr *tcph = tcp_hdr(skb);
+
+                       header_size = skb->network_header +
+                               off +
+                               sizeof(struct tcphdr);
+                       maybe_pull_tail(skb, header_size);
+
                        tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-                                                        skb->len - iph->ihl*4,
+                                                        skb->len - off,
                                                         IPPROTO_TCP, 0);
                }
                break;
        case IPPROTO_UDP:
-               if (!skb_partial_csum_set(skb, 4 * iph->ihl,
+               if (!skb_partial_csum_set(skb, off,
                                          offsetof(struct udphdr, check)))
                        goto out;
 
                if (recalculate_partial_csum) {
                        struct udphdr *udph = udp_hdr(skb);
+
+                       header_size = skb->network_header +
+                               off +
+                               sizeof(struct udphdr);
+                       maybe_pull_tail(skb, header_size);
+
                        udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-                                                        skb->len - iph->ihl*4,
+                                                        skb->len - off,
                                                         IPPROTO_UDP, 0);
                }
                break;
        default:
                if (net_ratelimit())
-                       pr_err("Attempting to checksum a non-TCP/UDP packet, 
dropping a protocol %d packet\n",
+                       pr_err("Attempting to checksum a non-TCP/UDP packet, "
+                              "dropping a protocol %d packet\n",
                               iph->protocol);
                goto out;
        }
@@ -922,6 +937,158 @@ out:
        return err;
 }
 
+static int checksum_setup_ipv6(struct sk_buff *skb, int 
recalculate_partial_csum)
+{
+       int err = -EPROTO;
+       struct ipv6hdr *ipv6h = (void *)skb->data;
+       u8 nexthdr;
+       unsigned int header_size;
+       unsigned int off;
+       bool fragment;
+       bool done;
+
+       done = false;
+
+       off = sizeof(struct ipv6hdr);
+
+       header_size = skb->network_header + off;
+       maybe_pull_tail(skb, header_size);
+
+       nexthdr = ipv6h->nexthdr;
+
+       while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) &&
+              !done) {
+               switch (nexthdr) {
+               case IPPROTO_DSTOPTS:
+               case IPPROTO_HOPOPTS:
+               case IPPROTO_ROUTING: {
+                       struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
+
+                       header_size = skb->network_header +
+                               off +
+                               sizeof(struct ipv6_opt_hdr);
+                       maybe_pull_tail(skb, header_size);
+
+                       nexthdr = hp->nexthdr;
+                       off += ipv6_optlen(hp);
+                       break;
+               }
+               case IPPROTO_AH: {
+                       struct ip_auth_hdr *hp = (void *)(skb->data + off);
+
+                       header_size = skb->network_header +
+                               off +
+                               sizeof(struct ip_auth_hdr);
+                       maybe_pull_tail(skb, header_size);
+
+                       nexthdr = hp->nexthdr;
+                       off += (hp->hdrlen+2)<<2;
+                       break;
+               }
+               case IPPROTO_FRAGMENT:
+                       fragment = true;
+                       /* fall through */
+               default:
+                       done = true;
+                       break;
+               }
+       }
+
+       if (!done) {
+               if (net_ratelimit())
+                       pr_err("Failed to parse packet header\n");
+               goto out;
+       }
+
+       if (fragment) {
+               if (net_ratelimit())
+                       pr_err("Packet is a fragment!\n");
+               goto out;
+       }
+
+       switch (nexthdr) {
+       case IPPROTO_TCP:
+               if (!skb_partial_csum_set(skb, off,
+                                         offsetof(struct tcphdr, check)))
+                       goto out;
+
+               if (recalculate_partial_csum) {
+                       struct tcphdr *tcph = tcp_hdr(skb);
+
+                       header_size = skb->network_header +
+                               off +
+                               sizeof(struct tcphdr);
+                       maybe_pull_tail(skb, header_size);
+
+                       tcph->check = ~csum_ipv6_magic(&ipv6h->saddr,
+                                                      &ipv6h->daddr,
+                                                      skb->len - off,
+                                                      IPPROTO_TCP, 0);
+               }
+               break;
+       case IPPROTO_UDP:
+               if (!skb_partial_csum_set(skb, off,
+                                         offsetof(struct udphdr, check)))
+                       goto out;
+
+               if (recalculate_partial_csum) {
+                       struct udphdr *udph = udp_hdr(skb);
+
+                       header_size = skb->network_header +
+                               off +
+                               sizeof(struct udphdr);
+                       maybe_pull_tail(skb, header_size);
+
+                       udph->check = ~csum_ipv6_magic(&ipv6h->saddr,
+                                                      &ipv6h->daddr,
+                                                      skb->len - off,
+                                                      IPPROTO_UDP, 0);
+               }
+               break;
+       default:
+               if (net_ratelimit())
+                       pr_err("Attempting to checksum a non-TCP/UDP packet, "
+                              "dropping a protocol %d packet\n",
+                              nexthdr);
+               goto out;
+       }
+
+       err = 0;
+
+out:
+       return err;
+}
+
+static int checksum_setup(struct net_device *dev, struct sk_buff *skb)
+{
+       int err = -EPROTO;
+       int recalculate_partial_csum = 0;
+
+       /*
+        * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
+        * peers can fail to set NETRXF_csum_blank when sending a GSO
+        * frame. In this case force the SKB to CHECKSUM_PARTIAL and
+        * recalculate the partial checksum.
+        */
+       if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
+               struct netfront_info *np = netdev_priv(dev);
+               np->rx_gso_checksum_fixup++;
+               skb->ip_summed = CHECKSUM_PARTIAL;
+               recalculate_partial_csum = 1;
+       }
+
+       /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
+       if (skb->ip_summed != CHECKSUM_PARTIAL)
+               return 0;
+
+       if (skb->protocol == htons(ETH_P_IP))
+               err = checksum_setup_ip(skb, recalculate_partial_csum);
+       else if (skb->protocol == htons(ETH_P_IPV6))
+               err = checksum_setup_ipv6(skb, recalculate_partial_csum);
+
+       return err;
+}
+
 static int handle_incoming_queue(struct net_device *dev,
                                 struct sk_buff_head *rxq)
 {
@@ -1232,6 +1399,15 @@ static netdev_features_t xennet_fix_features(struct 
net_device *dev,
                        features &= ~NETIF_F_SG;
        }
 
+       if (features & NETIF_F_IPV6_CSUM) {
+               if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+                                "feature-ipv6-csum-offload", "%d", &val) < 0)
+                       val = 0;
+
+               if (!val)
+                       features &= ~NETIF_F_IPV6_CSUM;
+       }
+
        if (features & NETIF_F_TSO) {
                if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
                                 "feature-gso-tcpv4", "%d", &val) < 0)
@@ -1241,6 +1417,15 @@ static netdev_features_t xennet_fix_features(struct 
net_device *dev,
                        features &= ~NETIF_F_TSO;
        }
 
+       if (features & NETIF_F_TSO6) {
+               if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+                                "feature-gso-tcpv6", "%d", &val) < 0)
+                       val = 0;
+
+               if (!val)
+                       features &= ~NETIF_F_TSO6;
+       }
+
        return features;
 }
 
@@ -1373,7 +1558,9 @@ static struct net_device *xennet_create_dev(struct 
xenbus_device *dev)
        netif_napi_add(netdev, &np->napi, xennet_poll, 64);
        netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
                                  NETIF_F_GSO_ROBUST;
-       netdev->hw_features     = NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO;
+       netdev->hw_features     = NETIF_F_SG | 
+                                 NETIF_F_IPV6_CSUM |
+                                 NETIF_F_TSO | NETIF_F_TSO6;
 
        /*
          * Assume that all hw features are available for now. This set
@@ -1751,6 +1938,18 @@ again:
                goto abort_transaction;
        }
 
+       err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6", "%d", 1);
+       if (err) {
+               message = "writing feature-gso-tcpv6";
+               goto abort_transaction;
+       }
+
+       err = xenbus_printf(xbt, dev->nodename, "feature-ipv6-csum-offload", 
"%d", 1);
+       if (err) {
+               message = "writing feature-gso-tcpv6";
+               goto abort_transaction;
+       }
+
        err = xenbus_transaction_end(xbt, 0);
        if (err) {
                if (err == -EAGAIN)
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.