From 140c55d4b59581680dc8963612bdc79d19f7bef6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Aug 2014 11:49:29 +0200 Subject: [PATCH 1/9] net-timestamp: sock_tx_timestamp() fix sock_tx_timestamp() should not ignore initial *tx_flags value, as TCP stack can store SKBTX_SHARED_FRAG in it. Also first argument (struct sock *) can be const. Signed-off-by: Eric Dumazet Fixes: 4ed2d765dfac ("net-timestamp: TCP timestamping") Cc: Willem de Bruijn Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/sock.h | 6 ++++-- net/socket.c | 20 +++++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 52fe0bc5598a..38805fa02e48 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2199,9 +2199,11 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, /** * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped * @sk: socket sending this packet - * @tx_flags: filled with instructions for time stamping + * @tx_flags: completed with instructions for time stamping + * + * Note : callers should take care of initial *tx_flags value (usually 0) */ -void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); +void sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags); /** * sk_eat_skb - Release a skb if it is no longer needed diff --git a/net/socket.c b/net/socket.c index ae89569a2db5..95ee7d8682e7 100644 --- a/net/socket.c +++ b/net/socket.c @@ -610,20 +610,26 @@ void sock_release(struct socket *sock) } EXPORT_SYMBOL(sock_release); -void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) +void sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags) { - *tx_flags = 0; + u8 flags = *tx_flags; + if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE) - *tx_flags |= SKBTX_HW_TSTAMP; + flags |= SKBTX_HW_TSTAMP; + if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE) - *tx_flags |= SKBTX_SW_TSTAMP; + flags |= SKBTX_SW_TSTAMP; + if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED) - *tx_flags |= SKBTX_SCHED_TSTAMP; + flags |= SKBTX_SCHED_TSTAMP; + if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK) - *tx_flags |= SKBTX_ACK_TSTAMP; + flags |= SKBTX_ACK_TSTAMP; if (sock_flag(sk, SOCK_WIFI_STATUS)) - *tx_flags |= SKBTX_WIFI_STATUS; + flags |= SKBTX_WIFI_STATUS; + + *tx_flags = flags; } EXPORT_SYMBOL(sock_tx_timestamp); From 8e1e605902d07c0ce1c3331a8d0a12a6c9e5d7fb Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Wed, 6 Aug 2014 17:10:59 +0530 Subject: [PATCH 2/9] cxgb4: Fix for SR-IOV VF initialization Commit 35b1de5 ("rdma/cxgb4: Fixes cxgb4 probe failure in VM when PF is exposed through PCI Passthrough") introduced a regression, where VF failed to initialize for Physical function 0 to Physical Function 3. In the above commit, we removed the code which used to enable sriov for PF0 to PF3. Now adding it back to get sriov working. V2: Removed SRIOV loop for PF[0..3] to instantiate the VF's as per David Miller's comment Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 4247356c16ff..1a162d21d8ac 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -6527,11 +6527,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* We control everything through one PF */ func = SOURCEPF_GET(readl(adapter->regs + PL_WHOAMI)); - if ((pdev->device == 0xa000 && func != 0) || - func != ent->driver_data) { + if (func != ent->driver_data) { pci_save_state(pdev); /* to restore SR-IOV later */ - err = 0; - goto out_unmap_bar0; + goto sriov; } adapter->pdev = pdev; @@ -6697,6 +6695,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (is_offload(adapter)) attach_ulds(adapter); +sriov: #ifdef CONFIG_PCI_IOV if (func < ARRAY_SIZE(num_vf) && num_vf[func] > 0) if (pci_enable_sriov(pdev, num_vf[func]) == 0) From be136ed30a4345e42ad846c1b9d925932dab303b Mon Sep 17 00:00:00 2001 From: KY Srinivasan Date: Wed, 6 Aug 2014 11:11:00 -0700 Subject: [PATCH 3/9] hyperv: Adjust the size of sendbuf region to support ws2008r2 WS2008R2 is a supported platform and it turns out that the maximum sendbuf size that ws2008R2 can support is only 15MB. Make the necessary adjustment. Signed-off-by: K. Y. Srinivasan Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 02a3ee282eee..d5e07def6a59 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -585,7 +585,7 @@ struct nvsp_message { #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ -#define NETVSC_SEND_BUFFER_SIZE (1024 * 1024 * 16) /* 16MB */ +#define NETVSC_SEND_BUFFER_SIZE (1024 * 1024 * 15) /* 15MB */ #define NETVSC_INVALID_INDEX -1 From f066e2b091a50f0b76ade87250065d65996b93dd Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 6 Aug 2014 15:09:44 -0400 Subject: [PATCH 4/9] net-timestamp: cumulative tcp timestamping fixes A set of small fixes pointed out just after the merge: - make tcp_tx_timestamp static - make tcp_gso_tstamp static - use before() to compare TCP seqno, instead of cast to u64 - add tstamp to tx_flags in GSO, instead of overwrite tx_flags - record skb_shinfo(skb)->tskey for all timestamps, also HW. - optimization in tcp_tx_timestamp: call sock_tx_timestamp only if a tstamp option is set. Signed-off-by: Willem de Bruijn Fixes: 4ed2d765dfac ("net-timestamp: TCP timestamping") Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 12 +++++++----- net/ipv4/tcp_offload.c | 8 ++++---- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 744af67a5989..181b70ebd964 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -426,13 +426,15 @@ void tcp_init_sock(struct sock *sk) } EXPORT_SYMBOL(tcp_init_sock); -void tcp_tx_timestamp(struct sock *sk, struct sk_buff *skb) +static void tcp_tx_timestamp(struct sock *sk, struct sk_buff *skb) { - struct skb_shared_info *shinfo = skb_shinfo(skb); + if (sk->sk_tsflags) { + struct skb_shared_info *shinfo = skb_shinfo(skb); - sock_tx_timestamp(sk, &shinfo->tx_flags); - if (shinfo->tx_flags & SKBTX_ANY_SW_TSTAMP) - shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1; + sock_tx_timestamp(sk, &shinfo->tx_flags); + if (shinfo->tx_flags & SKBTX_ANY_TSTAMP) + shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1; + } } /* diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index f597119fc4e7..bc1b83cb8309 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -14,12 +14,12 @@ #include #include -void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, unsigned int seq, - unsigned int mss) +static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, + unsigned int seq, unsigned int mss) { while (skb) { - if (ts_seq < (__u64) seq + mss) { - skb_shinfo(skb)->tx_flags = SKBTX_SW_TSTAMP; + if (before(ts_seq, seq + mss)) { + skb_shinfo(skb)->tx_flags |= SKBTX_SW_TSTAMP; skb_shinfo(skb)->tskey = ts_seq; return; } From 8b429468a6b4746e88abbf5649c9e592d7b3e355 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 5 Aug 2014 21:42:41 -0700 Subject: [PATCH 5/9] vmxnet3: fix decimal printf format specifiers prefixed with 0x The prefix suggests the number should be printed in hex, so use the %x specifier to do that. Found by using regex suggested by Joe Perches. Signed-off-by: Hans Wennborg Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index b76f7dcde0db..d0db371c30a7 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -766,7 +766,7 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx, gdesc->dword[3] = 0; netdev_dbg(adapter->netdev, - "txd[%u]: 0x%llu %u %u\n", + "txd[%u]: 0x%llx %u %u\n", tq->tx_ring.next2fill, le64_to_cpu(gdesc->txd.addr), le32_to_cpu(gdesc->dword[2]), gdesc->dword[3]); vmxnet3_cmd_ring_adv_next2fill(&tq->tx_ring); From 753a2ad54ef45e3417a9d49537c2b42b04a2e1be Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Thu, 7 Aug 2014 00:17:09 +0200 Subject: [PATCH 6/9] net: reallocate new socket option number for IPV6_AUTOFLOWLABEL cb1ce2e ("ipv6: Implement automatic flow label generation on transmit") accidentally uses socket option 64, which is already used by ip6tables: IP6T_SO_SET_REPLACE / IP6T_SO_GET_INFO 64 IP6T_SO_SET_ADD_COUNTERS / IP6T_SO_GET_ENTRIES 65 There is comment include/uapi/linux/in6.h warning about that. Allocate 70 for this, which seems to be unused instead. Cc: Tom Herbert Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- include/uapi/linux/in6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 22b7a69619d8..74a2a1773494 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -233,7 +233,6 @@ struct in6_flowlabel_req { #if 0 /* not yet */ #define IPV6_USE_MIN_MTU 63 #endif -#define IPV6_AUTOFLOWLABEL 64 /* * Netfilter (1) @@ -262,6 +261,7 @@ struct in6_flowlabel_req { * IP6T_SO_ORIGINAL_DST 80 */ +#define IPV6_AUTOFLOWLABEL 70 /* RFC5014: Source address selection */ #define IPV6_ADDR_PREFERENCES 72 From 269f8cb2608c7bdebadeb9a2061ba42dcd6d4ff7 Mon Sep 17 00:00:00 2001 From: Francois Romieu Date: Thu, 7 Aug 2014 00:36:40 +0200 Subject: [PATCH 7/9] net: fix USB network driver config option. It must be tristate to avoid broken dependencies with kernel built-in usb network drivers when usb support is module only. When net config option is set, least surprize default should match usb. Wireless RNDIS USB driver used to select USB_USBNET. USB_USBNET now depends on USB_NET_DRIVERS so the latter should be selected as well. Signed-off-by: Francois Romieu Signed-off-by: David S. Miller --- drivers/net/usb/Kconfig | 4 ++-- drivers/net/wireless/Kconfig | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 9f194a0bef7c..37eed4d84e9c 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -5,8 +5,8 @@ comment "Host-side USB support is needed for USB Network Adapter support" depends on !USB && NET menuconfig USB_NET_DRIVERS - bool "USB Network Adapters" - default y + tristate "USB Network Adapters" + default USB if USB depends on USB && NET if USB_NET_DRIVERS diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig index b2137e8f7ca6..16604bdf5197 100644 --- a/drivers/net/wireless/Kconfig +++ b/drivers/net/wireless/Kconfig @@ -189,6 +189,7 @@ config USB_NET_RNDIS_WLAN tristate "Wireless RNDIS USB support" depends on USB depends on CFG80211 + select USB_NET_DRIVERS select USB_USBNET select USB_NET_CDCETHER select USB_NET_RNDIS_HOST From 9ea88a153001ffeb3d8810917e8eea62ca9b6f25 Mon Sep 17 00:00:00 2001 From: Dmitry Popov Date: Thu, 7 Aug 2014 02:38:22 +0400 Subject: [PATCH 8/9] tcp: md5: check md5 signature without socket lock Since a8afca032 (tcp: md5: protects md5sig_info with RCU) tcp_md5_do_lookup doesn't require socket lock, rcu_read_lock is enough. Therefore socket lock is no longer required for tcp_v{4,6}_inbound_md5_hash too, so we can move these calls (wrapped with rcu_read_{,un}lock) before bh_lock_sock: from tcp_v{4,6}_do_rcv to tcp_v{4,6}_rcv. Signed-off-by: Dmitry Popov Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 36 +++++++++++++++++++++++++----------- net/ipv6/tcp_ipv6.c | 25 +++++++++++++++++++------ 2 files changed, 44 insertions(+), 17 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 992a1f926009..dceff5fe8e66 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1167,7 +1167,8 @@ clear_hash_noput: } EXPORT_SYMBOL(tcp_v4_md5_hash_skb); -static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) +static bool __tcp_v4_inbound_md5_hash(struct sock *sk, + const struct sk_buff *skb) { /* * This gets called for each TCP segment that arrives @@ -1220,6 +1221,17 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) return false; } +static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) +{ + bool ret; + + rcu_read_lock(); + ret = __tcp_v4_inbound_md5_hash(sk, skb); + rcu_read_unlock(); + + return ret; +} + #endif static void tcp_v4_init_req(struct request_sock *req, struct sock *sk, @@ -1432,16 +1444,6 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) { struct sock *rsk; -#ifdef CONFIG_TCP_MD5SIG - /* - * We really want to reject the packet as early as possible - * if: - * o We're expecting an MD5'd packet and this is no MD5 tcp option - * o There is an MD5 option and we're not expecting one - */ - if (tcp_v4_inbound_md5_hash(sk, skb)) - goto discard; -#endif if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ struct dst_entry *dst = sk->sk_rx_dst; @@ -1644,6 +1646,18 @@ process: if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; + +#ifdef CONFIG_TCP_MD5SIG + /* + * We really want to reject the packet as early as possible + * if: + * o We're expecting an MD5'd packet and this is no MD5 tcp option + * o There is an MD5 option and we're not expecting one + */ + if (tcp_v4_inbound_md5_hash(sk, skb)) + goto discard_and_relse; +#endif + nf_reset(skb); if (sk_filter(sk, skb)) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 22055b098428..f2ce95502392 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -667,7 +667,8 @@ clear_hash_noput: return 1; } -static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) +static int __tcp_v6_inbound_md5_hash(struct sock *sk, + const struct sk_buff *skb) { const __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; @@ -707,6 +708,18 @@ static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) } return 0; } + +static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) +{ + int ret; + + rcu_read_lock(); + ret = __tcp_v6_inbound_md5_hash(sk, skb); + rcu_read_unlock(); + + return ret; +} + #endif static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, @@ -1247,11 +1260,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); -#ifdef CONFIG_TCP_MD5SIG - if (tcp_v6_inbound_md5_hash(sk, skb)) - goto discard; -#endif - if (sk_filter(sk, skb)) goto discard; @@ -1424,6 +1432,11 @@ process: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; +#ifdef CONFIG_TCP_MD5SIG + if (tcp_v6_inbound_md5_hash(sk, skb)) + goto discard_and_relse; +#endif + if (sk_filter(sk, skb)) goto discard_and_relse; From 6c8f7e70837468da4e658080d4448930fb597e1b Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 7 Aug 2014 00:18:47 +0100 Subject: [PATCH 9/9] netlink: hold nl_sock_hash_lock during diag dump Although RCU protection would be possible during diag dump, doing so allows for concurrent table mutations which can render the in-table offset between individual Netlink messages invalid and thus cause legitimate sockets to be skipped in the dump. Since the diag dump is relatively low volume and consistency is more important than performance, the table mutex is held during dump. Reported-by: Andrey Wagin Signed-off-by: Thomas Graf Fixes: e341694e3eb57fc ("netlink: Convert netlink_lookup() to use RCU protected hash table") Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 1 + net/netlink/af_netlink.h | 1 + net/netlink/diag.c | 3 +++ 3 files changed, 5 insertions(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 479a344563d8..a324b4b34c90 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -104,6 +104,7 @@ static atomic_t nl_table_users = ATOMIC_INIT(0); /* Protects netlink socket hash table mutations */ DEFINE_MUTEX(nl_sk_hash_lock); +EXPORT_SYMBOL_GPL(nl_sk_hash_lock); static int lockdep_nl_sk_hash_is_held(void) { diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 60f631fb7087..b20a1731759b 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -73,5 +73,6 @@ struct netlink_table { extern struct netlink_table *nl_table; extern rwlock_t nl_table_lock; +extern struct mutex nl_sk_hash_lock; #endif diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 7301850eb56f..de8c74a3c061 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -170,6 +170,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) req = nlmsg_data(cb->nlh); + mutex_lock(&nl_sk_hash_lock); read_lock(&nl_table_lock); if (req->sdiag_protocol == NDIAG_PROTO_ALL) { @@ -183,6 +184,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) } else { if (req->sdiag_protocol >= MAX_LINKS) { read_unlock(&nl_table_lock); + mutex_unlock(&nl_sk_hash_lock); return -ENOENT; } @@ -190,6 +192,7 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) } read_unlock(&nl_table_lock); + mutex_unlock(&nl_sk_hash_lock); return skb->len; }