From d865616e1889d0b6528b5d9b620e13b1607003a5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 7 Mar 2015 16:19:41 -0600 Subject: [PATCH 1/6] mpls: Fix the kzalloc argument order in mpls_rt_alloc *Blink* I got the argument order wrong to kzalloc and the code was working properly when tested. *Blink* Fix that. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 4f265c677eca..59cc32564d50 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -239,7 +239,7 @@ static struct mpls_route *mpls_rt_alloc(size_t alen) { struct mpls_route *rt; - rt = kzalloc(GFP_KERNEL, sizeof(*rt) + alen); + rt = kzalloc(sizeof(*rt) + alen, GFP_KERNEL); if (rt) rt->rt_via_alen = alen; return rt; From 19d0c341d9d5cd186661fef58e7264a9701ef71d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 7 Mar 2015 16:21:56 -0600 Subject: [PATCH 2/6] mpls: Cleanup the rcu usage in the code. Sparse was generating a lot of warnings mostly from missing annotations in the code. Add missing annotations and in a few cases tweak the code for performance by moving work before loops. This also fixes a problematic ommision of rcu_assign_pointer and rcu_dereference. Hopefully with complete rcu annotations any new rcu errors will stick out like a sore thumb. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 73 +++++++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 59cc32564d50..0f2833e1b233 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -24,7 +24,7 @@ #define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))) struct mpls_route { /* next hop label forwarding entry */ - struct net_device *rt_dev; + struct net_device __rcu *rt_dev; struct rcu_head rt_rcu; u32 rt_label[MAX_NEW_LABELS]; u8 rt_protocol; /* routing protocol that set this entry */ @@ -152,7 +152,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, goto drop; /* Find the output device */ - out_dev = rt->rt_dev; + out_dev = rcu_dereference(rt->rt_dev); if (!mpls_output_possible(out_dev)) goto drop; @@ -269,13 +269,15 @@ static void mpls_route_update(struct net *net, unsigned index, struct net_device *dev, struct mpls_route *new, const struct nl_info *info) { + struct mpls_route __rcu **platform_label; struct mpls_route *rt, *old = NULL; ASSERT_RTNL(); - rt = net->mpls.platform_label[index]; - if (!dev || (rt && (rt->rt_dev == dev))) { - rcu_assign_pointer(net->mpls.platform_label[index], new); + platform_label = rtnl_dereference(net->mpls.platform_label); + rt = rtnl_dereference(platform_label[index]); + if (!dev || (rt && (rtnl_dereference(rt->rt_dev) == dev))) { + rcu_assign_pointer(platform_label[index], new); old = rt; } @@ -287,9 +289,14 @@ static void mpls_route_update(struct net *net, unsigned index, static unsigned find_free_label(struct net *net) { + struct mpls_route __rcu **platform_label; + size_t platform_labels; unsigned index; - for (index = 16; index < net->mpls.platform_labels; index++) { - if (!net->mpls.platform_label[index]) + + platform_label = rtnl_dereference(net->mpls.platform_label); + platform_labels = net->mpls.platform_labels; + for (index = 16; index < platform_labels; index++) { + if (!rtnl_dereference(platform_label[index])) return index; } return LABEL_NOT_SPECIFIED; @@ -297,6 +304,7 @@ static unsigned find_free_label(struct net *net) static int mpls_route_add(struct mpls_route_config *cfg) { + struct mpls_route __rcu **platform_label; struct net *net = cfg->rc_nlinfo.nl_net; struct net_device *dev = NULL; struct mpls_route *rt, *old; @@ -345,7 +353,8 @@ static int mpls_route_add(struct mpls_route_config *cfg) goto errout; err = -EEXIST; - old = net->mpls.platform_label[index]; + platform_label = rtnl_dereference(net->mpls.platform_label); + old = rtnl_dereference(platform_label[index]); if ((cfg->rc_nlflags & NLM_F_EXCL) && old) goto errout; @@ -366,7 +375,7 @@ static int mpls_route_add(struct mpls_route_config *cfg) for (i = 0; i < rt->rt_labels; i++) rt->rt_label[i] = cfg->rc_output_label[i]; rt->rt_protocol = cfg->rc_protocol; - rt->rt_dev = dev; + RCU_INIT_POINTER(rt->rt_dev, dev); rt->rt_via_family = cfg->rc_via_family; memcpy(rt->rt_via, cfg->rc_via, cfg->rc_via_alen); @@ -406,14 +415,16 @@ errout: static void mpls_ifdown(struct net_device *dev) { + struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); unsigned index; + platform_label = rtnl_dereference(net->mpls.platform_label); for (index = 0; index < net->mpls.platform_labels; index++) { - struct mpls_route *rt = net->mpls.platform_label[index]; + struct mpls_route *rt = rtnl_dereference(platform_label[index]); if (!rt) continue; - if (rt->rt_dev != dev) + if (rtnl_dereference(rt->rt_dev) != dev) continue; rt->rt_dev = NULL; } @@ -653,6 +664,7 @@ static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, u32 label, struct mpls_route *rt, int flags) { + struct net_device *dev; struct nlmsghdr *nlh; struct rtmsg *rtm; @@ -676,7 +688,8 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, goto nla_put_failure; if (nla_put_via(skb, rt->rt_via_family, rt->rt_via, rt->rt_via_alen)) goto nla_put_failure; - if (rt->rt_dev && nla_put_u32(skb, RTA_OIF, rt->rt_dev->ifindex)) + dev = rtnl_dereference(rt->rt_dev); + if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) goto nla_put_failure; if (nla_put_labels(skb, RTA_DST, 1, &label)) goto nla_put_failure; @@ -692,6 +705,8 @@ nla_put_failure: static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); + struct mpls_route __rcu **platform_label; + size_t platform_labels; unsigned int index; ASSERT_RTNL(); @@ -700,9 +715,11 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) if (index < 16) index = 16; - for (; index < net->mpls.platform_labels; index++) { + platform_label = rtnl_dereference(net->mpls.platform_label); + platform_labels = net->mpls.platform_labels; + for (; index < platform_labels; index++) { struct mpls_route *rt; - rt = net->mpls.platform_label[index]; + rt = rtnl_dereference(platform_label[index]); if (!rt) continue; @@ -780,7 +797,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) rt0 = mpls_rt_alloc(lo->addr_len); if (!rt0) goto nort0; - rt0->rt_dev = lo; + RCU_INIT_POINTER(rt0->rt_dev, lo); rt0->rt_protocol = RTPROT_KERNEL; rt0->rt_via_family = AF_PACKET; memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len); @@ -790,7 +807,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) rt2 = mpls_rt_alloc(lo->addr_len); if (!rt2) goto nort2; - rt2->rt_dev = lo; + RCU_INIT_POINTER(rt2->rt_dev, lo); rt2->rt_protocol = RTPROT_KERNEL; rt2->rt_via_family = AF_PACKET; memcpy(rt2->rt_via, lo->dev_addr, lo->addr_len); @@ -798,7 +815,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) rtnl_lock(); /* Remember the original table */ - old = net->mpls.platform_label; + old = rtnl_dereference(net->mpls.platform_label); old_limit = net->mpls.platform_labels; /* Free any labels beyond the new table */ @@ -815,19 +832,19 @@ static int resize_platform_label_table(struct net *net, size_t limit) /* If needed set the predefined labels */ if ((old_limit <= LABEL_IPV6_EXPLICIT_NULL) && (limit > LABEL_IPV6_EXPLICIT_NULL)) { - labels[LABEL_IPV6_EXPLICIT_NULL] = rt2; + RCU_INIT_POINTER(labels[LABEL_IPV6_EXPLICIT_NULL], rt2); rt2 = NULL; } if ((old_limit <= LABEL_IPV4_EXPLICIT_NULL) && (limit > LABEL_IPV4_EXPLICIT_NULL)) { - labels[LABEL_IPV4_EXPLICIT_NULL] = rt0; + RCU_INIT_POINTER(labels[LABEL_IPV4_EXPLICIT_NULL], rt0); rt0 = NULL; } /* Update the global pointers */ net->mpls.platform_labels = limit; - net->mpls.platform_label = labels; + rcu_assign_pointer(net->mpls.platform_label, labels); rtnl_unlock(); @@ -903,6 +920,8 @@ static int mpls_net_init(struct net *net) static void mpls_net_exit(struct net *net) { + struct mpls_route __rcu **platform_label; + size_t platform_labels; struct ctl_table *table; unsigned int index; @@ -910,8 +929,8 @@ static void mpls_net_exit(struct net *net) unregister_net_sysctl_table(net->mpls.ctl); kfree(table); - /* An rcu grace period haselapsed since there was a device in - * the network namespace (and thus the last in fqlight packet) + /* An rcu grace period has passed since there was a device in + * the network namespace (and thus the last in flight packet) * left this network namespace. This is because * unregister_netdevice_many and netdev_run_todo has completed * for each network device that was in this network namespace. @@ -920,14 +939,16 @@ static void mpls_net_exit(struct net *net) * freeing the platform_label table. */ rtnl_lock(); - for (index = 0; index < net->mpls.platform_labels; index++) { - struct mpls_route *rt = net->mpls.platform_label[index]; - rcu_assign_pointer(net->mpls.platform_label[index], NULL); + platform_label = rtnl_dereference(net->mpls.platform_label); + platform_labels = net->mpls.platform_labels; + for (index = 0; index < platform_labels; index++) { + struct mpls_route *rt = rtnl_dereference(platform_label[index]); + RCU_INIT_POINTER(platform_label[index], NULL); mpls_rt_free(rt); } rtnl_unlock(); - kvfree(net->mpls.platform_label); + kvfree(platform_label); } static struct pernet_operations mpls_net_ops = { From 0f7bbd5805e3d32e3ee58d1a802a8404a724f2fc Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 7 Mar 2015 16:22:40 -0600 Subject: [PATCH 3/6] mpls: Better error code for unsupported option. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 0f2833e1b233..5c99e3fc1b72 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -348,7 +348,7 @@ static int mpls_route_add(struct mpls_route_config *cfg) goto errout; /* Append makes no sense with mpls */ - err = -EINVAL; + err = -EOPNOTSUPP; if (cfg->rc_nlflags & NLM_F_APPEND) goto errout; From aa7da9375677d31dd53ed6253f55cb19e3075811 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 7 Mar 2015 16:23:23 -0600 Subject: [PATCH 4/6] mpls: Correct the ttl decrement. According to RFC3032 section 2.4.2 packets with an outgoing ttl of 0 MUST NOT be forwarded. According to section 2.4.1 an outgoing TTL of 0 comes from an incomming TTL <= 1. Therefore any packets that is received with a ttl <= 1 should not have it's ttl decremented and forwarded. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 5c99e3fc1b72..e120074157de 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -162,7 +162,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, skb_forward_csum(skb); /* Verify ttl is valid */ - if (dec.ttl <= 2) + if (dec.ttl <= 1) goto drop; dec.ttl -= 1; From 7d5f41f276b376d567e919530f8b5fd70be25426 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 7 Mar 2015 16:24:23 -0600 Subject: [PATCH 5/6] mpls: Fix the openvswitch select of NET_MPLS_GSO Fix the OPENVSWITCH Kconfig option and old Kconfigs by having OPENVSWITCH select both NET_MPLS_GSO and MPLSO. A Kbuild test robot reported that when NET_MPLS_GSO is selected by OPENVSWITCH the generated .config is broken because MPLS is not selected. Cc: Simon Horman Fixes: cec9166ca4e mpls: Refactor how the mpls module is built Reported-by: kbuild test robot Signed-off-by: "Eric W. Biederman" Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/openvswitch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index b7d818c59423..ed6b0f8dd1bb 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -6,6 +6,7 @@ config OPENVSWITCH tristate "Open vSwitch" depends on INET select LIBCRC32C + select MPLS select NET_MPLS_GSO ---help--- Open vSwitch is a multilayer Ethernet switch targeted at virtualized From b79bda3d38ae67940f1740f7e015f284eb551680 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 7 Mar 2015 16:25:56 -0600 Subject: [PATCH 6/6] neigh: Use neigh table index for neigh_packet_xmit Remove a little bit of unnecessary work when transmitting a packet with neigh_packet_xmit. Use the neighbour table index not the address family as a parameter. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/net/neighbour.h | 1 + net/core/neighbour.c | 22 +++++++++++----------- net/mpls/af_mpls.c | 35 ++++++++++++++++++++++------------- 3 files changed, 34 insertions(+), 24 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index afb8237b0a8c..d48b8ec8b5f4 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -226,6 +226,7 @@ enum { NEIGH_ND_TABLE = 1, NEIGH_DN_TABLE = 2, NEIGH_NR_TABLES, + NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */ }; static inline int neigh_parms_family(struct neigh_parms *p) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index cffaf00561e7..ad07990e943d 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2391,22 +2391,15 @@ void __neigh_for_each_release(struct neigh_table *tbl, } EXPORT_SYMBOL(__neigh_for_each_release); -int neigh_xmit(int family, struct net_device *dev, +int neigh_xmit(int index, struct net_device *dev, const void *addr, struct sk_buff *skb) { - int err; - if (family == AF_PACKET) { - err = dev_hard_header(skb, dev, ntohs(skb->protocol), - addr, NULL, skb->len); - if (err < 0) - goto out_kfree_skb; - err = dev_queue_xmit(skb); - } else { + int err = -EAFNOSUPPORT; + if (likely(index < NEIGH_NR_TABLES)) { struct neigh_table *tbl; struct neighbour *neigh; - err = -ENETDOWN; - tbl = neigh_find_table(family); + tbl = neigh_tables[index]; if (!tbl) goto out; neigh = __neigh_lookup_noref(tbl, addr, dev); @@ -2417,6 +2410,13 @@ int neigh_xmit(int family, struct net_device *dev, goto out_kfree_skb; err = neigh->output(neigh, skb); } + else if (index == NEIGH_LINK_TABLE) { + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + addr, NULL, skb->len); + if (err < 0) + goto out_kfree_skb; + err = dev_queue_xmit(skb); + } out: return err; out_kfree_skb: diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index e120074157de..0ad8f7141be2 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -28,9 +28,9 @@ struct mpls_route { /* next hop label forwarding entry */ struct rcu_head rt_rcu; u32 rt_label[MAX_NEW_LABELS]; u8 rt_protocol; /* routing protocol that set this entry */ - u8 rt_labels:2, - rt_via_alen:6; - unsigned short rt_via_family; + u8 rt_labels; + u8 rt_via_alen; + u8 rt_via_table; u8 rt_via[0]; }; @@ -201,7 +201,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, } } - err = neigh_xmit(rt->rt_via_family, out_dev, rt->rt_via, skb); + err = neigh_xmit(rt->rt_via_table, out_dev, rt->rt_via, skb); if (err) net_dbg_ratelimited("%s: packet transmission failed: %d\n", __func__, err); @@ -225,7 +225,7 @@ static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = { struct mpls_route_config { u32 rc_protocol; u32 rc_ifindex; - u16 rc_via_family; + u16 rc_via_table; u16 rc_via_alen; u8 rc_via[MAX_VIA_ALEN]; u32 rc_label; @@ -343,7 +343,7 @@ static int mpls_route_add(struct mpls_route_config *cfg) goto errout; err = -EINVAL; - if ((cfg->rc_via_family == AF_PACKET) && + if ((cfg->rc_via_table == NEIGH_LINK_TABLE) && (dev->addr_len != cfg->rc_via_alen)) goto errout; @@ -376,7 +376,7 @@ static int mpls_route_add(struct mpls_route_config *cfg) rt->rt_label[i] = cfg->rc_output_label[i]; rt->rt_protocol = cfg->rc_protocol; RCU_INIT_POINTER(rt->rt_dev, dev); - rt->rt_via_family = cfg->rc_via_family; + rt->rt_via_table = cfg->rc_via_table; memcpy(rt->rt_via, cfg->rc_via, cfg->rc_via_alen); mpls_route_update(net, index, NULL, rt, &cfg->rc_nlinfo); @@ -448,15 +448,22 @@ static struct notifier_block mpls_dev_notifier = { }; static int nla_put_via(struct sk_buff *skb, - u16 family, const void *addr, int alen) + u8 table, const void *addr, int alen) { + static const int table_to_family[NEIGH_NR_TABLES + 1] = { + AF_INET, AF_INET6, AF_DECnet, AF_PACKET, + }; struct nlattr *nla; struct rtvia *via; + int family = AF_UNSPEC; nla = nla_reserve(skb, RTA_VIA, alen + 2); if (!nla) return -EMSGSIZE; + if (table <= NEIGH_NR_TABLES) + family = table_to_family[table]; + via = nla_data(nla); via->rtvia_family = family; memcpy(via->rtvia_addr, addr, alen); @@ -599,21 +606,23 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh, struct rtvia *via = nla_data(nla); if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) goto errout; - cfg->rc_via_family = via->rtvia_family; cfg->rc_via_alen = nla_len(nla) - offsetof(struct rtvia, rtvia_addr); if (cfg->rc_via_alen > MAX_VIA_ALEN) goto errout; /* Validate the address family */ - switch(cfg->rc_via_family) { + switch(via->rtvia_family) { case AF_PACKET: + cfg->rc_via_table = NEIGH_LINK_TABLE; break; case AF_INET: + cfg->rc_via_table = NEIGH_ARP_TABLE; if (cfg->rc_via_alen != 4) goto errout; break; case AF_INET6: + cfg->rc_via_table = NEIGH_ND_TABLE; if (cfg->rc_via_alen != 16) goto errout; break; @@ -686,7 +695,7 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event, if (rt->rt_labels && nla_put_labels(skb, RTA_NEWDST, rt->rt_labels, rt->rt_label)) goto nla_put_failure; - if (nla_put_via(skb, rt->rt_via_family, rt->rt_via, rt->rt_via_alen)) + if (nla_put_via(skb, rt->rt_via_table, rt->rt_via, rt->rt_via_alen)) goto nla_put_failure; dev = rtnl_dereference(rt->rt_dev); if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex)) @@ -799,7 +808,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) goto nort0; RCU_INIT_POINTER(rt0->rt_dev, lo); rt0->rt_protocol = RTPROT_KERNEL; - rt0->rt_via_family = AF_PACKET; + rt0->rt_via_table = NEIGH_LINK_TABLE; memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len); } if (limit > LABEL_IPV6_EXPLICIT_NULL) { @@ -809,7 +818,7 @@ static int resize_platform_label_table(struct net *net, size_t limit) goto nort2; RCU_INIT_POINTER(rt2->rt_dev, lo); rt2->rt_protocol = RTPROT_KERNEL; - rt2->rt_via_family = AF_PACKET; + rt2->rt_via_table = NEIGH_LINK_TABLE; memcpy(rt2->rt_via, lo->dev_addr, lo->addr_len); }