[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [Patch] vnet-module ( for 3.1 && unstable )
Hi, Robert, Here are some questions: 1) If we just pull Eth Header and IP header, does it reach the position out of the head room? I dont' observe it before. 2) You don't move_addr_to_kernel/move_addr_to_user, you just need change_fs to KERNEL_DS and then change it back. 3) Why not use "struct sock" directly to manipulate the socket instead of using int file descriptor? On 7/16/07, Robert Valentan <R.Valentan@xxxxxxxxxxxxx> wrote: Hallo! The attached patch makes the vnet-module working for i386 and x86_64. (I had no other systems for testing) (My patch from 6.6.2007 is included) - x86_64: the x86-64 kernel has __ARCH_WANT_SYS_SOCKETCALL defined, but has no socketcall. Replacing the _syscall's to a working one. - moves the "skb_pull" function from kernel to skb_util.c because pulling in the data will rise a "BUG_ON" in the kernel. The kernel-function does not allow to pull into the payload! - the skb_buff is not always possible to modify a received buffer to send it again to the network. (vnet_forward.c and etherip.c) The code now work's.. I think with no or less time-penalty. Summary: Solved: With this patch the vnet-module is working on i386 and x86_64, including peering-option. (except the following "open") Open: - Vnet's only with security-option "none" are working. - The module-option "vnet_encaps" is ignored, always "udp" is used. Signed-off-by: Robert Valentan <R.Valentan@xxxxxxxxxxxxx> -- regards Robert Valentan diff -r aee991c7723a tools/vnet/vnet-module/esp.c --- a/tools/vnet/vnet-module/esp.c Wed May 9 15:34:47 2007 +++ b/tools/vnet/vnet-module/esp.c Sun Jul 15 17:03:47 2007 @@ -341,12 +341,13 @@ dprintf("> ETH header pull...\n"); memmove(skb->data, skb->mac.raw, ETH_HLEN); skb->mac.raw = skb->data; - __skb_pull(skb, ETH_HLEN); + skb_pull_vn(skb, ETH_HLEN); } dprintf("> IP header pull...\n"); memmove(skb->data, skb->nh.raw, ip_n); skb->nh.raw = skb->data; - __skb_pull(skb, ip_n); + skb_pull_vn(skb, ip_n); + skb->h.raw = skb->data; esph = (void*)skb->data; // Add spi and sequence number. esph->spi = sa->ident.spi; @@ -457,7 +458,7 @@ // Move skb->data back to ethernet header. // Do in 2 moves to ensure offsets are +ve, // since args to skb_pull/skb_push are unsigned. - __skb_pull(skb, head_n); + skb_pull_vn(skb, head_n); __skb_push(skb, skb->data - skb->mac.raw); // After this esph is invalid. esph = NULL; @@ -763,7 +764,7 @@ dprintf(">\n"); #ifdef DEBUG dprintf("> recv skb=\n"); - skb_print_bits(skb, 0, skb->len); + skb_print_bits("", skb, 0, skb->len); #endif ip_n = (skb->nh.iph->ihl << 2); if(skb->data == skb->mac.raw){ @@ -773,7 +774,7 @@ err = -EINVAL; goto exit; } - skb_pull(skb, eth_n + ip_n); + skb_pull_vn(skb, eth_n + ip_n); } addr = skb->nh.iph->daddr; err = esp_skb_header(skb, &esph); diff -r aee991c7723a tools/vnet/vnet-module/etherip.c --- a/tools/vnet/vnet-module/etherip.c Wed May 9 15:34:47 2007 +++ b/tools/vnet/vnet-module/etherip.c Sun Jul 15 17:03:47 2007 @@ -270,6 +270,7 @@ u32 saddr, daddr; char vnetbuf[VNET_ID_BUF]; struct ethhdr *eth; + struct sk_buff *newskb; dprintf(">\n"); saddr = skb->nh.iph->saddr; @@ -293,7 +294,7 @@ err = -EINVAL; goto exit; } - skb_pull(skb, pull_n); + skb_pull_vn(skb, pull_n); } // Assume skb->data points at etherip header. etheriph = (void*)skb->data; @@ -318,7 +319,18 @@ goto exit; } // Point at the headers in the contained ethernet frame. - skb->mac.raw = skb_pull(skb, etherip_n); + skb->mac.raw = skb_pull_vn(skb, etherip_n); + + newskb = alloc_skb(skb->len, GFP_ATOMIC); + if (!newskb) { + wprintf("> alloc new sk_buff failed \n"); + goto exit; + } + newskb->mac.raw = skb_put(newskb, skb->len); + skb_copy_bits(skb, 0, newskb->data, skb->len); + kfree_skb(skb); + skb = newskb; + eth = eth_hdr(skb); // Simulate the logic from eth_type_trans() @@ -340,27 +352,12 @@ // Assuming a standard Ethernet frame. // Should check for protocol? Support ETH_P_8021Q too. - skb->nh.raw = skb_pull(skb, ETH_HLEN); - -#ifdef __KERNEL__ - // Fix IP options, checksum, skb dst, netfilter state. - memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); - if (skb->ip_summed == CHECKSUM_HW){ - skb->ip_summed = CHECKSUM_NONE; - } - dst_release(skb->dst); - skb->dst = NULL; - nf_reset(skb); -#ifdef CONFIG_BRIDGE_NETFILTER - if(skb->nf_bridge){ - // Stop the eth header being clobbered by nf_bridge_maybe_copy_header(). - _nf_bridge_save_header(skb); - } -#endif -#endif // __KERNEL__ - - dprintf("> Unpacked srcaddr=" IPFMT " vnet=%s srcmac=" MACFMT " dstmac=" MACFMT "\n", + skb->nh.raw = skb_pull_vn(skb, ETH_HLEN); + skb->h.raw = newskb->nh.raw + sizeof(struct iphdr); + + dprintf("> Unpacked srcaddr=" IPFMT " dstaddr=" IPFMT " vnet=%s srcmac=" MACFMT " dstmac=" MACFMT "\n", NIPQUAD(skb->nh.iph->saddr), + NIPQUAD(skb->nh.iph->daddr), VnetId_ntoa(&vnet, vnetbuf), MAC6TUPLE(eth->h_source), MAC6TUPLE(eth->h_dest)); diff -r aee991c7723a tools/vnet/vnet-module/skb_util.h --- a/tools/vnet/vnet-module/skb_util.h Wed May 9 15:34:47 2007 +++ b/tools/vnet/vnet-module/skb_util.h Sun Jul 15 17:03:47 2007 @@ -66,6 +66,21 @@ } #endif + +/* + * It's a copy from {kernel}/include/linux/skbuff.h func '__skb_pull' and 'skb_pull' + * to aviodthe BUG_ON when pulling into the data (getting forwarded ip-frames) + */ +static inline unsigned char *__skb_pull_vn(struct sk_buff *skb, unsigned int len) +{ + skb->len -= len; + //BUG_ON(skb->len < skb->data_len); + return skb->data += len; +} +static inline unsigned char *skb_pull_vn(struct sk_buff *skb, unsigned int len) +{ + return unlikely(len > skb->len) ? NULL : __skb_pull_vn(skb, len); +} #ifdef __KERNEL__ diff -r aee991c7723a tools/vnet/vnet-module/varp.c --- a/tools/vnet/vnet-module/varp.c Wed May 9 15:34:47 2007 +++ b/tools/vnet/vnet-module/varp.c Sun Jul 15 17:03:47 2007 @@ -1365,7 +1365,7 @@ goto exit; } } - varph = (void*)skb_pull(skb, sizeof(struct udphdr)); + varph = (void*)skb_pull_vn(skb, sizeof(struct udphdr)); if(skb->len < sizeof(struct VnetMsgHdr)){ wprintf("> Varp msg too short: %d < %d\n", skb->len, sizeof(struct VnetMsgHdr)); goto exit; @@ -1378,11 +1378,11 @@ } break; case VUDP_ID: // Etherip-in-udp packet. - skb_pull(skb, sizeof(struct VnetMsgHdr)); + skb_pull_vn(skb, sizeof(struct VnetMsgHdr)); err = etherip_protocol_recv(skb); goto exit; case VFWD_ID: // Forwarded. - skb_pull(skb, sizeof(struct VnetMsgHdr)); + skb_pull_vn(skb, sizeof(struct VnetMsgHdr)); err = vnet_forward_recv(skb); goto exit; default: diff -r aee991c7723a tools/vnet/vnet-module/varp_socket.c --- a/tools/vnet/vnet-module/varp_socket.c Wed May 9 15:34:47 2007 +++ b/tools/vnet/vnet-module/varp_socket.c Sun Jul 15 17:03:47 2007 @@ -79,8 +79,8 @@ * Architectures using socketcall() define __ARCH_WANT_SYS_SOCKETCALL. */ -#ifdef __ARCH_WANT_SYS_SOCKETCALL - +#if defined(__ARCH_WANT_SYS_SOCKETCALLxx) && defined(__NR_socketcall) + /* Define the socketcall() syscall. * Multiplexes all the socket-related calls. * @@ -190,59 +190,260 @@ /* No socketcall - define the individual syscalls. */ -static inline _syscall3(int, socket, - int, family, - int, type, - int, protocol); - -static inline _syscall3(int, bind, - int, fd, - struct sockaddr *, umyaddr, - int, addrlen); - -static inline _syscall3(int, connect, - int, fd, - struct sockaddr *, uservaddr, - int, addrlen); - -static inline _syscall6(int, sendto, - int, fd, - void *, buff, - size_t, len, - unsigned, flags, - struct sockaddr *, addr, - int, addr_len); - -static inline _syscall6(int, recvfrom, - int, fd, - void *, ubuf, - size_t, size, - unsigned, flags, - struct sockaddr *, addr, - int *, addr_len); - -static inline _syscall5(int, setsockopt, - int, fd, - int, level, - int, optname, - void *, optval, - int, optlen); - -static inline _syscall5(int, getsockopt, - int, fd, - int, level, - int, optname, - void *, optval, - int *, optlen); - -static inline _syscall2(int, shutdown, - int, fd, - int, how); - -static inline _syscall3(int, getsockname, - int, fd, - struct sockaddr *, usockaddr, - int *, usockaddr_len); +/* the following code is copied from linux-kernel/net/socket.c + * As replacement of the __NR_socketcall, which exists not in x86_64 and + * same other systems. + * An alternate will be an export of the copied-functions in net/socket.c + */ +#define MAX_SOCK_ADDR 128 + +int socket(int family, int type, int protocol){ + + int retval; + struct socket *sock; + retval = sock_create(family, type, protocol, &sock); + if (retval < 0) + goto out; + + retval = sock_map_fd(sock); + if (retval < 0) + goto out_release; + +out: + /* It may be already another descriptor 8) Not kernel problem. */ + return retval; + +out_release: + sock_release(sock); + return retval; + +} + + +int bind(int fd, struct sockaddr *umyaddr, int addrlen){ + + struct socket *sock; + char address[MAX_SOCK_ADDR]; + int err; + + if((sock = sockfd_lookup(fd,&err))!=NULL) + { + if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0) { + err = security_socket_bind(sock, (struct sockaddr *)address, addrlen); + if (err) { + sockfd_put(sock); + return err; + } + err = sock->ops->bind(sock, (struct sockaddr *)address, addrlen); + } + sockfd_put(sock); + } + return err; +} + + +int connect(int fd, struct sockaddr *uservaddr, int addrlen){ + + struct socket *sock; + char address[MAX_SOCK_ADDR]; + int err; + + sock = sockfd_lookup(fd, &err); + if (!sock) + goto out; + err = move_addr_to_kernel(uservaddr, addrlen, address); + if (err < 0) + goto out_put; + + err = security_socket_connect(sock, (struct sockaddr *)address, addrlen); + if (err) + goto out_put; + + err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen, + sock->file->f_flags); +out_put: + sockfd_put(sock); +out: + return err; +} + +int sendto(int fd, void * buff, size_t len, + unsigned flags, struct sockaddr *addr, + int addr_len){ + + struct socket *sock; + char address[MAX_SOCK_ADDR]; + int err; + struct msghdr msg; + struct iovec iov; + + sock = sockfd_lookup(fd, &err); + if (!sock) + goto out; + iov.iov_base=buff; + iov.iov_len=len; + msg.msg_name=NULL; + msg.msg_iov=&iov; + msg.msg_iovlen=1; + msg.msg_control=NULL; + msg.msg_controllen=0; + msg.msg_namelen=0; + if(addr) + { + err = move_addr_to_kernel(addr, addr_len, address); + if (err < 0) + goto out_put; + msg.msg_name=address; + msg.msg_namelen=addr_len; + } + if (sock->file->f_flags & O_NONBLOCK) + flags |= MSG_DONTWAIT; + msg.msg_flags = flags; + err = sock_sendmsg(sock, &msg, len); + +out_put: + sockfd_put(sock); +out: + return err; +} + + +int recvfrom(int fd, void * ubuf, size_t size, + unsigned flags, struct sockaddr *addr, + int *addr_len){ + + struct socket *sock; + struct iovec iov; + struct msghdr msg; + char address[MAX_SOCK_ADDR]; + int err,err2; + + sock = sockfd_lookup(fd, &err); + if (!sock) + goto out; + + msg.msg_control=NULL; + msg.msg_controllen=0; + msg.msg_iovlen=1; + msg.msg_iov=&iov; + iov.iov_len=size; + iov.iov_base=ubuf; + msg.msg_name=address; + msg.msg_namelen=MAX_SOCK_ADDR; + if (sock->file->f_flags & O_NONBLOCK) + flags |= MSG_DONTWAIT; + err=sock_recvmsg(sock, &msg, size, flags); + + if(err >= 0 && addr != NULL) + { + err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len); + if(err2<0) + err=err2; + } + sockfd_put(sock); +out: + return err; +} + +int setsockopt(int fd, int level, int optname, void *optval, int optlen){ + + int err; + struct socket *sock; + + if (optlen < 0) + return -EINVAL; + + if ((sock = sockfd_lookup(fd, &err))!=NULL) + { + err = security_socket_setsockopt(sock,level,optname); + if (err) { + sockfd_put(sock); + return err; + } + + if (level == SOL_SOCKET) + err=sock_setsockopt(sock,level,optname,optval,optlen); + else + err=sock->ops->setsockopt(sock, level, optname, optval, optlen); + sockfd_put(sock); + } + return err; +} + + +/* not possible, because sock_getsockopt is not exported ... +int getsockopt(int fd, int level, int optname, void *optval, int *optlen){ + + int err; + struct socket *sock; + + if ((sock = sockfd_lookup(fd, &err))!=NULL) + { + err = security_socket_getsockopt(sock, level, optname); + if (err) { + sockfd_put(sock); + return err; + } + + if (level == SOL_SOCKET) + err=sock_getsockopt(sock,level,optname,optval,optlen); + else + err=sock->ops->getsockopt(sock, level, optname, optval, optlen); + sockfd_put(sock); + } + return err; +} +*/ + + +int shutdown(int fd, int how){ + + int err; + struct socket *sock; + + if ((sock = sockfd_lookup(fd, &err))!=NULL) + { + err = security_socket_shutdown(sock, how); + if (err) { + sockfd_put(sock); + return err; + } + + err=sock->ops->shutdown(sock, how); + sockfd_put(sock); + } + return err; +} + +int getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len){ + + struct socket *sock; + char address[MAX_SOCK_ADDR]; + int len, err; + + sock = sockfd_lookup(fd, &err); + if (!sock) + goto out; + + err = security_socket_getsockname(sock); + if (err) + goto out_put; + + err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0); + if (err) + goto out_put; + err = move_addr_to_user(address, len, usockaddr, usockaddr_len); + +out_put: + sockfd_put(sock); +out: + return err; +} + +/** + * End of copy from net/socket.c + */ + #endif /* __ARCH_WANT_SYS_SOCKETCALL */ diff -r aee991c7723a tools/vnet/vnet-module/vnet_forward.c --- a/tools/vnet/vnet-module/vnet_forward.c Wed May 9 15:34:47 2007 +++ b/tools/vnet/vnet-module/vnet_forward.c Sun Jul 15 17:03:47 2007 @@ -186,7 +186,7 @@ printk("\nWrapped packet:\n"); print_iphdr(__FUNCTION__, skb); print_udphdr(__FUNCTION__, skb); - skb_print_bits(__FUNCTION__, skb, 0, 0 * skb->len); + skb_print_bits(__FUNCTION__, skb, 0, skb->len); #endif err = _skb_xmit(skb, saddr); @@ -304,7 +304,7 @@ peer->rx_packets++; skb->mac.raw = NULL; skb->nh.raw = skb->data; - skb->h.raw = (void*)(skb->nh.iph + 1); + skb->h.raw = skb->data + sizeof(struct iphdr); if(!skb->nh.iph->saddr){ skb->nh.iph->saddr = addr.u.ip4.s_addr; } @@ -328,12 +328,17 @@ // Handle (a copy of) it ourselves, because // if it is looped-back by xmit it will be ignored. - //recvskb = skb_clone(skb, GFP_ATOMIC); - recvskb = pskb_copy(skb, GFP_ATOMIC); + recvskb = alloc_skb(skb->len, GFP_ATOMIC); if(recvskb){ + recvskb->protocol = htons(ETH_P_IP); + + recvskb->nh.raw = skb_put(recvskb, skb->len); + recvskb->h.raw = recvskb->data + sizeof(struct iphdr); + skb_copy_bits(skb, 0, recvskb->data, skb->len); + // Data points at the unwrapped iphdr, but varp_handle_message() // expects it to point at the udphdr, so pull. - skb_pull(recvskb, sizeof(struct iphdr)); + skb_pull_vn(recvskb, sizeof(struct iphdr)); if(varp_handle_message(recvskb) <= 0){ kfree_skb(recvskb); } _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel -- best regards, hanzhu _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |