From 0c36820e2ab7d943ab1188230fdf2149826d33c0 Mon Sep 17 00:00:00 2001 From: Jonathan Davies Date: Tue, 31 Mar 2015 11:05:15 +0100 Subject: [PATCH 01/12] xen-netfront: transmit fully GSO-sized packets xen-netfront limits transmitted skbs to be at most 44 segments in size. However, GSO permits up to 65536 bytes, which means a maximum of 45 segments of 1448 bytes each. This slight reduction in the size of packets means a slight loss in efficiency. Since c/s 9ecd1a75d, xen-netfront sets gso_max_size to XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER, where XEN_NETIF_MAX_TX_SIZE is 65535 bytes. The calculation used by tcp_tso_autosize (and also tcp_xmit_size_goal since c/s 6c09fa09d) in determining when to split an skb into two is sk->sk_gso_max_size - 1 - MAX_TCP_HEADER. So the maximum permitted size of an skb is calculated to be (XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER) - 1 - MAX_TCP_HEADER. Intuitively, this looks like the wrong formula -- we don't need two TCP headers. Instead, there is no need to deviate from the default gso_max_size of 65536 as this already accommodates the size of the header. Currently, the largest skb transmitted by netfront is 63712 bytes (44 segments of 1448 bytes each), as observed via tcpdump. This patch makes netfront send skbs of up to 65160 bytes (45 segments of 1448 bytes each). Similarly, the maximum allowable mtu does not need to subtract MAX_TCP_HEADER as it relates to the size of the whole packet, including the header. Fixes: 9ecd1a75d977 ("xen-netfront: reduce gso_max_size to account for max TCP header") Signed-off-by: Jonathan Davies Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index e9b960f0ff32..720aaf6313d2 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1008,8 +1008,7 @@ err: static int xennet_change_mtu(struct net_device *dev, int mtu) { - int max = xennet_can_sg(dev) ? - XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER : ETH_DATA_LEN; + int max = xennet_can_sg(dev) ? XEN_NETIF_MAX_TX_SIZE : ETH_DATA_LEN; if (mtu > max) return -EINVAL; @@ -1279,8 +1278,6 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) netdev->ethtool_ops = &xennet_ethtool_ops; SET_NETDEV_DEV(netdev, &dev->dev); - netif_set_gso_max_size(netdev, XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER); - np->netdev = netdev; netif_carrier_off(netdev); From 666b805150efd62f05810ff0db08f44a2370c937 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 1 Apr 2015 20:26:46 -0400 Subject: [PATCH 02/12] tcp: fix FRTO undo on cumulative ACK of SACKed range On processing cumulative ACKs, the FRTO code was not checking the SACKed bit, meaning that there could be a spurious FRTO undo on a cumulative ACK of a previously SACKed skb. The FRTO code should only consider a cumulative ACK to indicate that an original/unretransmitted skb is newly ACKed if the skb was not yet SACKed. The effect of the spurious FRTO undo would typically be to make the connection think that all previously-sent packets were in flight when they really weren't, leading to a stall and an RTO. Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Fixes: e33099f96d99c ("tcp: implement RFC5682 F-RTO") Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fb4cf8b8e121..f501ac048366 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3105,10 +3105,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (!first_ackt.v64) first_ackt = last_ackt; - if (!(sacked & TCPCB_SACKED_ACKED)) + if (!(sacked & TCPCB_SACKED_ACKED)) { reord = min(pkts_acked, reord); - if (!after(scb->end_seq, tp->high_seq)) - flag |= FLAG_ORIG_SACK_ACKED; + if (!after(scb->end_seq, tp->high_seq)) + flag |= FLAG_ORIG_SACK_ACKED; + } } if (sacked & TCPCB_SACKED_ACKED) From ed785309c94445dd90e242370e1f7bb034e008fd Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 31 Mar 2015 11:01:45 -0700 Subject: [PATCH 03/12] ipv4: take rtnl_lock and mark mrt table as freed on namespace cleanup This is the IPv4 part for commit 905a6f96a1b1 (ipv6: take rtnl_lock and mark mrt6 table as freed on namespace cleanup). Cc: Hannes Frederic Sowa Acked-by: Hannes Frederic Sowa Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 92825443fad6..bc40115bc394 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -278,10 +278,12 @@ static void __net_exit ipmr_rules_exit(struct net *net) { struct mr_table *mrt, *next; + rtnl_lock(); list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { list_del(&mrt->list); ipmr_free_table(mrt); } + rtnl_unlock(); fib_rules_unregister(net->ipv4.mr_rules_ops); } #else @@ -308,7 +310,10 @@ static int __net_init ipmr_rules_init(struct net *net) static void __net_exit ipmr_rules_exit(struct net *net) { + rtnl_lock(); ipmr_free_table(net->ipv4.mrt); + net->ipv4.mrt = NULL; + rtnl_unlock(); } #endif From 419df12fb5fa558451319276838c1842f2b11f8f Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 31 Mar 2015 11:01:46 -0700 Subject: [PATCH 04/12] net: move fib_rules_unregister() under rtnl lock We have to hold rtnl lock for fib_rules_unregister() otherwise the following race could happen: fib_rules_unregister(): fib_nl_delrule(): ... ... ... ops = lookup_rules_ops(); list_del_rcu(&ops->list); list_for_each_entry(ops->rules) { fib_rules_cleanup_ops(ops); ... list_del_rcu(); list_del_rcu(); } Note, net->rules_mod_lock is actually not needed at all, either upper layer netns code or rtnl lock guarantees we are safe. Cc: Alexander Duyck Cc: Thomas Graf Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/core/fib_rules.c | 2 +- net/decnet/dn_rules.c | 2 ++ net/ipv4/fib_frontend.c | 3 +-- net/ipv4/ipmr.c | 2 +- net/ipv6/fib6_rules.c | 2 ++ net/ipv6/ip6mr.c | 2 +- 6 files changed, 8 insertions(+), 5 deletions(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 44706e81b2e0..e4fdc9dfb2c7 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -175,9 +175,9 @@ void fib_rules_unregister(struct fib_rules_ops *ops) spin_lock(&net->rules_mod_lock); list_del_rcu(&ops->list); - fib_rules_cleanup_ops(ops); spin_unlock(&net->rules_mod_lock); + fib_rules_cleanup_ops(ops); call_rcu(&ops->rcu, fib_rules_put_rcu); } EXPORT_SYMBOL_GPL(fib_rules_unregister); diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index faf7cc3483fe..9d66a0f72f90 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -248,7 +248,9 @@ void __init dn_fib_rules_init(void) void __exit dn_fib_rules_cleanup(void) { + rtnl_lock(); fib_rules_unregister(dn_fib_rules_ops); + rtnl_unlock(); rcu_barrier(); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 57be71dd6a9e..23b9b3e86f4c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1111,11 +1111,10 @@ static void ip_fib_net_exit(struct net *net) { unsigned int i; + rtnl_lock(); #ifdef CONFIG_IP_MULTIPLE_TABLES fib4_rules_exit(net); #endif - - rtnl_lock(); for (i = 0; i < FIB_TABLE_HASHSZ; i++) { struct fib_table *tb; struct hlist_head *head; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index bc40115bc394..fe54eba6d00d 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -283,8 +283,8 @@ static void __net_exit ipmr_rules_exit(struct net *net) list_del(&mrt->list); ipmr_free_table(mrt); } - rtnl_unlock(); fib_rules_unregister(net->ipv4.mr_rules_ops); + rtnl_unlock(); } #else #define ipmr_for_each_table(mrt, net) \ diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 27ca79682efb..70bc6abc0639 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -322,7 +322,9 @@ out_fib6_rules_ops: static void __net_exit fib6_rules_net_exit(struct net *net) { + rtnl_lock(); fib_rules_unregister(net->ipv6.fib6_rules_ops); + rtnl_unlock(); } static struct pernet_operations fib6_rules_net_ops = { diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 52028f449a89..2f1fd9ffcb34 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -267,8 +267,8 @@ static void __net_exit ip6mr_rules_exit(struct net *net) list_del(&mrt->list); ip6mr_free_table(mrt); } - rtnl_unlock(); fib_rules_unregister(net->ipv6.mr6_rules_ops); + rtnl_unlock(); } #else #define ip6mr_for_each_table(mrt, net) \ From 7ba0c47c34a1ea5bc7a24ca67309996cce0569b5 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 31 Mar 2015 11:01:47 -0700 Subject: [PATCH 05/12] ip6mr: call del_timer_sync() in ip6mr_free_table() We need to wait for the flying timers, since we are going to free the mrtable right after it. Cc: Hannes Frederic Sowa Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 2f1fd9ffcb34..312e0ff47339 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -336,7 +336,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id) static void ip6mr_free_table(struct mr6_table *mrt) { - del_timer(&mrt->ipmr_expire_timer); + del_timer_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt); kfree(mrt); } From 6d458f5b4ece8542a5c2281e40008823fec91814 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 3 Apr 2015 12:02:36 +0200 Subject: [PATCH 06/12] Revert "netns: don't clear nsid too early on removal" This reverts commit 4217291e592d ("netns: don't clear nsid too early on removal"). This is not the right fix, it introduces races. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/net_namespace.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 5221f975a4cc..cb5290b8c428 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -349,7 +349,7 @@ static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ static void cleanup_net(struct work_struct *work) { const struct pernet_operations *ops; - struct net *net, *tmp, *peer; + struct net *net, *tmp; struct list_head net_kill_list; LIST_HEAD(net_exit_list); @@ -365,6 +365,14 @@ static void cleanup_net(struct work_struct *work) list_for_each_entry(net, &net_kill_list, cleanup_list) { list_del_rcu(&net->list); list_add_tail(&net->exit_list, &net_exit_list); + for_each_net(tmp) { + int id = __peernet2id(tmp, net, false); + + if (id >= 0) + idr_remove(&tmp->netns_ids, id); + } + idr_destroy(&net->netns_ids); + } rtnl_unlock(); @@ -390,26 +398,12 @@ static void cleanup_net(struct work_struct *work) */ rcu_barrier(); - rtnl_lock(); /* Finally it is safe to free my network namespace structure */ list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { - /* Unreference net from all peers (no need to loop over - * net_exit_list because idr_destroy() will be called for each - * element of this list. - */ - for_each_net(peer) { - int id = __peernet2id(peer, net, false); - - if (id >= 0) - idr_remove(&peer->netns_ids, id); - } - idr_destroy(&net->netns_ids); - list_del_init(&net->exit_list); put_user_ns(net->user_ns); net_drop_ns(net); } - rtnl_unlock(); } static DECLARE_WORK(net_cleanup_work, cleanup_net); From 576b7cd2f6ff1e90b3fc0a000d2fe74f8a50a4bb Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 3 Apr 2015 12:02:37 +0200 Subject: [PATCH 07/12] netns: don't allocate an id for dead netns First, let's explain the problem. Suppose you have an ipip interface that stands in the netns foo and its link part in the netns bar (so the netns bar has an nsid into the netns foo). Now, you remove the netns bar: - the bar nsid into the netns foo is removed - the netns exit method of ipip is called, thus our ipip iface is removed: => a netlink message is built in the netns foo to advertise this deletion => this netlink message requests an nsid for bar, thus a new nsid is allocated for bar and never removed. This patch adds a check in peernet2id() so that an id cannot be allocated for a netns which is currently destroyed. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/net_namespace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index cb5290b8c428..70d3450588b2 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -198,8 +198,10 @@ static int __peernet2id(struct net *net, struct net *peer, bool alloc) */ int peernet2id(struct net *net, struct net *peer) { - int id = __peernet2id(net, peer, true); + bool alloc = atomic_read(&peer->count) == 0 ? false : true; + int id; + id = __peernet2id(net, peer, alloc); return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED; } EXPORT_SYMBOL(peernet2id); From f60e5990d9c1424af9dbca60a23ba2a1c7c1ce90 Mon Sep 17 00:00:00 2001 From: "hannes@stressinduktion.org" Date: Wed, 1 Apr 2015 17:07:44 +0200 Subject: [PATCH 08/12] ipv6: protect skb->sk accesses from recursive dereference inside the stack We should not consult skb->sk for output decisions in xmit recursion levels > 0 in the stack. Otherwise local socket settings could influence the result of e.g. tunnel encapsulation process. ipv6 does not conform with this in three places: 1) ip6_fragment: we do consult ipv6_npinfo for frag_size 2) sk_mc_loop in ipv6 uses skb->sk and checks if we should loop the packet back to the local socket 3) ip6_skb_dst_mtu could query the settings from the user socket and force a wrong MTU Furthermore: In sk_mc_loop we could potentially land in WARN_ON(1) if we use a PF_PACKET socket ontop of an IPv6-backed vxlan device. Reuse xmit_recursion as we are currently only interested in protecting tunnel devices. Cc: Jiri Pirko Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ include/net/ip.h | 16 ---------------- include/net/ip6_route.h | 3 ++- include/net/sock.h | 2 ++ net/core/dev.c | 4 +++- net/core/sock.c | 19 +++++++++++++++++++ net/ipv6/ip6_output.c | 3 ++- 7 files changed, 34 insertions(+), 19 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index dcf6ec27739b..278738873703 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2185,6 +2185,12 @@ void netdev_freemem(struct net_device *dev); void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); +DECLARE_PER_CPU(int, xmit_recursion); +static inline int dev_recursion_level(void) +{ + return this_cpu_read(xmit_recursion); +} + struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); diff --git a/include/net/ip.h b/include/net/ip.h index 025c61c0dffb..6cc1eafb153a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -453,22 +453,6 @@ static __inline__ void inet_reset_saddr(struct sock *sk) #endif -static inline int sk_mc_loop(struct sock *sk) -{ - if (!sk) - return 1; - switch (sk->sk_family) { - case AF_INET: - return inet_sk(sk)->mc_loop; -#if IS_ENABLED(CONFIG_IPV6) - case AF_INET6: - return inet6_sk(sk)->mc_loop; -#endif - } - WARN_ON(1); - return 1; -} - bool ip_call_ra_chain(struct sk_buff *skb); /* diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 1d09b46c1e48..eda131d179d9 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -174,7 +174,8 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); static inline int ip6_skb_dst_mtu(struct sk_buff *skb) { - struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; + struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? + inet6_sk(skb->sk) : NULL; return (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) ? skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); diff --git a/include/net/sock.h b/include/net/sock.h index ab186b1d31ff..e4079c28e6b8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1762,6 +1762,8 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); +bool sk_mc_loop(struct sock *sk); + static inline bool sk_can_gso(const struct sock *sk) { return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type); diff --git a/net/core/dev.c b/net/core/dev.c index 962ee9d71964..45109b70664e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2848,7 +2848,9 @@ static void skb_update_prio(struct sk_buff *skb) #define skb_update_prio(skb) #endif -static DEFINE_PER_CPU(int, xmit_recursion); +DEFINE_PER_CPU(int, xmit_recursion); +EXPORT_SYMBOL(xmit_recursion); + #define RECURSION_LIMIT 10 /** diff --git a/net/core/sock.c b/net/core/sock.c index 78e89eb7eb70..71e3e5f1eaa0 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -653,6 +653,25 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) sock_reset_flag(sk, bit); } +bool sk_mc_loop(struct sock *sk) +{ + if (dev_recursion_level()) + return false; + if (!sk) + return true; + switch (sk->sk_family) { + case AF_INET: + return inet_sk(sk)->mc_loop; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + return inet6_sk(sk)->mc_loop; +#endif + } + WARN_ON(1); + return true; +} +EXPORT_SYMBOL(sk_mc_loop); + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7e80b61b51ff..36cf0ab685a0 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -542,7 +542,8 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); - struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; + struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? + inet6_sk(skb->sk) : NULL; struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; unsigned int mtu, hlen, left, len; From ecf7b361a6bc1fd5441e4d6a3d7433abbe577064 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Wed, 1 Apr 2015 19:23:29 +0300 Subject: [PATCH 09/12] mvneta: dont call mvneta_adjust_link() manually mvneta_adjust_link() is a callback for of_phy_connect() and should not be called directly. The result of calling it directly is as below: Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 96208f17bb53..2db653225a0e 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2658,16 +2658,11 @@ static int mvneta_stop(struct net_device *dev) static int mvneta_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct mvneta_port *pp = netdev_priv(dev); - int ret; if (!pp->phy_dev) return -ENOTSUPP; - ret = phy_mii_ioctl(pp->phy_dev, ifr, cmd); - if (!ret) - mvneta_adjust_link(dev); - - return ret; + return phy_mii_ioctl(pp->phy_dev, ifr, cmd); } /* Ethtool methods */ From 67e04c29ec0daad9ba29341b4dab4b89526994cf Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 3 Apr 2015 13:46:09 -0700 Subject: [PATCH 10/12] l2tp: unregister l2tp_net_ops on failure path Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 895348e44c7d..a29a504492af 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1871,6 +1871,7 @@ static int __init l2tp_init(void) l2tp_wq = alloc_workqueue("l2tp", WQ_UNBOUND, 0); if (!l2tp_wq) { pr_err("alloc_workqueue failed\n"); + unregister_pernet_device(&l2tp_net_ops); rc = -ENOMEM; goto out; } From 303038135afbd0520d1e241c02592be6e4ea7204 Mon Sep 17 00:00:00 2001 From: Pavel Nakonechny Date: Sun, 5 Apr 2015 00:46:21 +0300 Subject: [PATCH 11/12] net: dsa: fix filling routing table from OF description According to description in 'include/net/dsa.h', in cascade switches configurations where there are more than one interconnected devices, 'rtable' array in 'dsa_chip_data' structure is used to indicate which port on this switch should be used to send packets to that are destined for corresponding switch. However, dsa_of_setup_routing_table() fills 'rtable' with port numbers of the _target_ switch, but not current one. This commit removes redundant devicetree parsing and adds needed port number as a function argument. So dsa_of_setup_routing_table() now just looks for target switch number by parsing parent of 'link' device node. To remove possible misunderstandings with the way of determining target switch number, a corresponding comment was added to the source code and to the DSA device tree bindings documentation file. This was tested on a custom board with two Marvell 88E6095 switches with following corresponding routing tables: { -1, 10 } and { 8, -1 }. Signed-off-by: Pavel Nakonechny Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- .../devicetree/bindings/net/dsa/dsa.txt | 4 +++- net/dsa/dsa.c | 23 ++++++------------- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.txt b/Documentation/devicetree/bindings/net/dsa/dsa.txt index e124847443f8..f0b4cd72411d 100644 --- a/Documentation/devicetree/bindings/net/dsa/dsa.txt +++ b/Documentation/devicetree/bindings/net/dsa/dsa.txt @@ -19,7 +19,9 @@ the parent DSA node. The maximum number of allowed child nodes is 4 (DSA_MAX_SWITCHES). Each of these switch child nodes should have the following required properties: -- reg : Describes the switch address on the MII bus +- reg : Contains two fields. The first one describes the + address on the MII bus. The second is the switch + number that must be unique in cascaded configurations - #address-cells : Must be 1 - #size-cells : Must be 0 diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 2173402d87e0..4dea2e0681d1 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -501,12 +501,10 @@ static struct net_device *dev_to_net_device(struct device *dev) #ifdef CONFIG_OF static int dsa_of_setup_routing_table(struct dsa_platform_data *pd, struct dsa_chip_data *cd, - int chip_index, + int chip_index, int port_index, struct device_node *link) { - int ret; const __be32 *reg; - int link_port_addr; int link_sw_addr; struct device_node *parent_sw; int len; @@ -519,6 +517,10 @@ static int dsa_of_setup_routing_table(struct dsa_platform_data *pd, if (!reg || (len != sizeof(*reg) * 2)) return -EINVAL; + /* + * Get the destination switch number from the second field of its 'reg' + * property, i.e. for "reg = <0x19 1>" sw_addr is '1'. + */ link_sw_addr = be32_to_cpup(reg + 1); if (link_sw_addr >= pd->nr_chips) @@ -535,20 +537,9 @@ static int dsa_of_setup_routing_table(struct dsa_platform_data *pd, memset(cd->rtable, -1, pd->nr_chips * sizeof(s8)); } - reg = of_get_property(link, "reg", NULL); - if (!reg) { - ret = -EINVAL; - goto out; - } - - link_port_addr = be32_to_cpup(reg); - - cd->rtable[link_sw_addr] = link_port_addr; + cd->rtable[link_sw_addr] = port_index; return 0; -out: - kfree(cd->rtable); - return ret; } static void dsa_of_free_platform_data(struct dsa_platform_data *pd) @@ -658,7 +649,7 @@ static int dsa_of_probe(struct platform_device *pdev) if (!strcmp(port_name, "dsa") && link && pd->nr_chips > 1) { ret = dsa_of_setup_routing_table(pd, cd, - chip_index, link); + chip_index, port_index, link); if (ret) goto out_free_chip; } From fde913e25496761a4e2a4c81230c913aba6289a2 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 5 Apr 2015 17:50:48 +0300 Subject: [PATCH 12/12] net/mlx4_core: Fix error message deprecation for ConnectX-2 cards Commit 1daa4303b4ca ("net/mlx4_core: Deprecate error message at ConnectX-2 cards startup to debug") did the deprecation only for port 1 of the card. Need to deprecate for port 2 as well. Fixes: 1daa4303b4ca ("net/mlx4_core: Deprecate error message at ConnectX-2 cards startup to debug") Signed-off-by: Jack Morgenstein Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 3350721bf515..546ca4226916 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -724,7 +724,8 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, * on the host, we deprecate the error message for this * specific command/input_mod/opcode_mod/fw-status to be debug. */ - if (op == MLX4_CMD_SET_PORT && in_modifier == 1 && + if (op == MLX4_CMD_SET_PORT && + (in_modifier == 1 || in_modifier == 2) && op_modifier == 0 && context->fw_status == CMD_STAT_BAD_SIZE) mlx4_dbg(dev, "command 0x%x failed: fw status = 0x%x\n", op, context->fw_status);