From 0f457a36626fa94026e483836fbf29e451434567 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 30 Apr 2019 07:45:48 -0700 Subject: [PATCH 1/3] ipv4: Move cached routes to fib_nh_common While the cached routes, nh_pcpu_rth_output and nh_rth_input, are IPv4 specific, a later patch wants to make them accessible for IPv6 nexthops with IPv4 routes using a fib6_nh. Move the cached routes from fib_nh to fib_nh_common and update references. Initialization of the cached entries is moved to fib_nh_common_init, and free is moved to fib_nh_common_release. Change in location only, from fib_nh up to fib_nh_common; no functional change intended. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/ip_fib.h | 6 ++++-- net/ipv4/fib_semantics.c | 36 +++++++++++++++++++----------------- net/ipv4/route.c | 18 +++++++++--------- 3 files changed, 32 insertions(+), 28 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 772a9e61bd84..659c5081c40b 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -96,6 +96,10 @@ struct fib_nh_common { int nhc_weight; atomic_t nhc_upper_bound; + + /* v4 specific, but allows fib6_nh with v4 routes */ + struct rtable __rcu * __percpu *nhc_pcpu_rth_output; + struct rtable __rcu *nhc_rth_input; }; struct fib_nh { @@ -107,8 +111,6 @@ struct fib_nh { #endif __be32 nh_saddr; int nh_saddr_genid; - struct rtable __rcu * __percpu *nh_pcpu_rth_output; - struct rtable __rcu *nh_rth_input; struct fnhe_hash_bucket __rcu *nh_exceptions; #define fib_nh_family nh_common.nhc_family #define fib_nh_dev nh_common.nhc_dev diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 71c2165a2ce3..4402ec6dc426 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -212,6 +212,8 @@ void fib_nh_common_release(struct fib_nh_common *nhc) dev_put(nhc->nhc_dev); lwtstate_put(nhc->nhc_lwtstate); + rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); + rt_fibinfo_free(&nhc->nhc_rth_input); } EXPORT_SYMBOL_GPL(fib_nh_common_release); @@ -223,8 +225,6 @@ void fib_nh_release(struct net *net, struct fib_nh *fib_nh) #endif fib_nh_common_release(&fib_nh->nh_common); free_nh_exceptions(fib_nh); - rt_fibinfo_free_cpus(fib_nh->nh_pcpu_rth_output); - rt_fibinfo_free(&fib_nh->nh_rth_input); } /* Release a nexthop info record */ @@ -491,23 +491,35 @@ int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap, u16 encap_type, void *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) { + int err; + + nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *, + gfp_flags); + if (!nhc->nhc_pcpu_rth_output) + return -ENOMEM; + if (encap) { struct lwtunnel_state *lwtstate; - int err; if (encap_type == LWTUNNEL_ENCAP_NONE) { NL_SET_ERR_MSG(extack, "LWT encap type not specified"); - return -EINVAL; + err = -EINVAL; + goto lwt_failure; } err = lwtunnel_build_state(encap_type, encap, nhc->nhc_family, cfg, &lwtstate, extack); if (err) - return err; + goto lwt_failure; nhc->nhc_lwtstate = lwtstate_get(lwtstate); } return 0; + +lwt_failure: + rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); + nhc->nhc_pcpu_rth_output = NULL; + return err; } EXPORT_SYMBOL_GPL(fib_nh_common_init); @@ -515,18 +527,14 @@ int fib_nh_init(struct net *net, struct fib_nh *nh, struct fib_config *cfg, int nh_weight, struct netlink_ext_ack *extack) { - int err = -ENOMEM; + int err; nh->fib_nh_family = AF_INET; - nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *); - if (!nh->nh_pcpu_rth_output) - goto err_out; - err = fib_nh_common_init(&nh->nh_common, cfg->fc_encap, cfg->fc_encap_type, cfg, GFP_KERNEL, extack); if (err) - goto init_failure; + return err; nh->fib_nh_oif = cfg->fc_oif; nh->fib_nh_gw_family = cfg->fc_gw_family; @@ -546,12 +554,6 @@ int fib_nh_init(struct net *net, struct fib_nh *nh, nh->fib_nh_weight = nh_weight; #endif return 0; - -init_failure: - rt_fibinfo_free_cpus(nh->nh_pcpu_rth_output); - nh->nh_pcpu_rth_output = NULL; -err_out: - return err; } #ifdef CONFIG_IP_ROUTE_MULTIPATH diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 795aed6e4720..662ac9bd956e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -646,6 +646,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, u32 pmtu, bool lock, unsigned long expires) { + struct fib_nh_common *nhc = &nh->nh_common; struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; struct rtable *rt; @@ -715,13 +716,13 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, * stale, so anyone caching it rechecks if this exception * applies to them. */ - rt = rcu_dereference(nh->nh_rth_input); + rt = rcu_dereference(nhc->nhc_rth_input); if (rt) rt->dst.obsolete = DST_OBSOLETE_KILL; for_each_possible_cpu(i) { struct rtable __rcu **prt; - prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i); + prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i); rt = rcu_dereference(*prt); if (rt) rt->dst.obsolete = DST_OBSOLETE_KILL; @@ -1471,13 +1472,14 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) { + struct fib_nh_common *nhc = &nh->nh_common; struct rtable *orig, *prev, **p; bool ret = true; if (rt_is_input_route(rt)) { - p = (struct rtable **)&nh->nh_rth_input; + p = (struct rtable **)&nhc->nhc_rth_input; } else { - p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output); + p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output); } orig = *p; @@ -1810,7 +1812,7 @@ static int __mkroute_input(struct sk_buff *skb, if (fnhe) rth = rcu_dereference(fnhe->fnhe_rth_input); else - rth = rcu_dereference(nh->nh_rth_input); + rth = rcu_dereference(nhc->nhc_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; @@ -2105,10 +2107,8 @@ local_input: if (res->fi) { if (!itag) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); - struct fib_nh *nh; - nh = container_of(nhc, struct fib_nh, nh_common); - rth = rcu_dereference(nh->nh_rth_input); + rth = rcu_dereference(nhc->nhc_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); err = 0; @@ -2337,7 +2337,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, do_cache = false; goto add; } - prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); + prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output); } rth = rcu_dereference(*prth); if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst)) From 87063a1fa66740302f08add95ad3d4d316376bef Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 30 Apr 2019 07:45:49 -0700 Subject: [PATCH 2/3] ipv4: Pass fib_nh_common to rt_cache_route Now that the cached routes are in fib_nh_common, pass it to rt_cache_route and simplify its callers. For rt_set_nexthop, the tclassid becomes the last user of fib_nh so move the container_of under the #ifdef CONFIG_IP_ROUTE_CLASSID. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- net/ipv4/route.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 662ac9bd956e..9b50d0440940 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1470,9 +1470,8 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, return ret; } -static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) +static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt) { - struct fib_nh_common *nhc = &nh->nh_common; struct rtable *orig, *prev, **p; bool ret = true; @@ -1576,7 +1575,6 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, if (fi) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); - struct fib_nh *nh; if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) { rt->rt_gw_family = nhc->nhc_gw_family; @@ -1589,15 +1587,19 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, ip_dst_init_metrics(&rt->dst, fi->fib_metrics); - nh = container_of(nhc, struct fib_nh, nh_common); #ifdef CONFIG_IP_ROUTE_CLASSID - rt->dst.tclassid = nh->nh_tclassid; + { + struct fib_nh *nh; + + nh = container_of(nhc, struct fib_nh, nh_common); + rt->dst.tclassid = nh->nh_tclassid; + } #endif - rt->dst.lwtstate = lwtstate_get(nh->fib_nh_lws); + rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate); if (unlikely(fnhe)) cached = rt_bind_exception(rt, fnhe, daddr, do_cache); else if (do_cache) - cached = rt_cache_route(nh, rt); + cached = rt_cache_route(nhc, rt); if (unlikely(!cached)) { /* Routes we intend to cache in nexthop exception or * FIB nexthop have the DST_NOCACHE bit clear. @@ -2139,7 +2141,6 @@ local_input: if (do_cache) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); - struct fib_nh *nh; rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate); if (lwtunnel_input_redirect(rth->dst.lwtstate)) { @@ -2148,8 +2149,7 @@ local_input: rth->dst.input = lwtunnel_input; } - nh = container_of(nhc, struct fib_nh, nh_common); - if (unlikely(!rt_cache_route(nh, rth))) + if (unlikely(!rt_cache_route(nhc, rth))) rt_add_uncached_list(rth); } skb_dst_set(skb, &rth->dst); From a5995e7107eb3d9c44744d3cf47d49fabfef01f5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 30 Apr 2019 07:45:50 -0700 Subject: [PATCH 3/3] ipv4: Move exception bucket to nh_common Similar to the cached routes, make IPv4 exceptions accessible when using an IPv6 nexthop struct with IPv4 routes. Simplify the exception functions by passing in fib_nh_common since that is all it needs, and then cleanup the call sites that have extraneous fib_nh conversions. As with the cached routes this is a change in location only, from fib_nh up to fib_nh_common; no functional change intended. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- net/ipv4/fib_semantics.c | 12 ++++++------ net/ipv4/route.c | 41 +++++++++++++++++----------------------- 3 files changed, 24 insertions(+), 31 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 659c5081c40b..d0e28f4ab099 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -100,6 +100,7 @@ struct fib_nh_common { /* v4 specific, but allows fib6_nh with v4 routes */ struct rtable __rcu * __percpu *nhc_pcpu_rth_output; struct rtable __rcu *nhc_rth_input; + struct fnhe_hash_bucket __rcu *nhc_exceptions; }; struct fib_nh { @@ -111,7 +112,6 @@ struct fib_nh { #endif __be32 nh_saddr; int nh_saddr_genid; - struct fnhe_hash_bucket __rcu *nh_exceptions; #define fib_nh_family nh_common.nhc_family #define fib_nh_dev nh_common.nhc_dev #define fib_nh_oif nh_common.nhc_oif diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 4402ec6dc426..d3da6a10f86f 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -159,12 +159,12 @@ static void rt_fibinfo_free(struct rtable __rcu **rtp) dst_release_immediate(&rt->dst); } -static void free_nh_exceptions(struct fib_nh *nh) +static void free_nh_exceptions(struct fib_nh_common *nhc) { struct fnhe_hash_bucket *hash; int i; - hash = rcu_dereference_protected(nh->nh_exceptions, 1); + hash = rcu_dereference_protected(nhc->nhc_exceptions, 1); if (!hash) return; for (i = 0; i < FNHE_HASH_SIZE; i++) { @@ -214,6 +214,7 @@ void fib_nh_common_release(struct fib_nh_common *nhc) lwtstate_put(nhc->nhc_lwtstate); rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); rt_fibinfo_free(&nhc->nhc_rth_input); + free_nh_exceptions(nhc); } EXPORT_SYMBOL_GPL(fib_nh_common_release); @@ -224,7 +225,6 @@ void fib_nh_release(struct net *net, struct fib_nh *fib_nh) net->ipv4.fib_num_tclassid_users--; #endif fib_nh_common_release(&fib_nh->nh_common); - free_nh_exceptions(fib_nh); } /* Release a nexthop info record */ @@ -1713,12 +1713,12 @@ static int call_fib_nh_notifiers(struct fib_nh *nh, * - if the new MTU is greater than the PMTU, don't make any change * - otherwise, unlock and set PMTU */ -static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig) +static void nh_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig) { struct fnhe_hash_bucket *bucket; int i; - bucket = rcu_dereference_protected(nh->nh_exceptions, 1); + bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1); if (!bucket) return; @@ -1749,7 +1749,7 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) hlist_for_each_entry(nh, head, nh_hash) { if (nh->fib_nh_dev == dev) - nh_update_mtu(nh, dev->mtu, orig_mtu); + nh_update_mtu(&nh->nh_common, dev->mtu, orig_mtu); } } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9b50d0440940..11ddc276776e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -643,10 +643,10 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh } } -static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, - u32 pmtu, bool lock, unsigned long expires) +static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr, + __be32 gw, u32 pmtu, bool lock, + unsigned long expires) { - struct fib_nh_common *nhc = &nh->nh_common; struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; struct rtable *rt; @@ -654,17 +654,17 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, unsigned int i; int depth; - genid = fnhe_genid(dev_net(nh->fib_nh_dev)); + genid = fnhe_genid(dev_net(nhc->nhc_dev)); hval = fnhe_hashfun(daddr); spin_lock_bh(&fnhe_lock); - hash = rcu_dereference(nh->nh_exceptions); + hash = rcu_dereference(nhc->nhc_exceptions); if (!hash) { hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC); if (!hash) goto out_unlock; - rcu_assign_pointer(nh->nh_exceptions, hash); + rcu_assign_pointer(nhc->nhc_exceptions, hash); } hash += hval; @@ -789,10 +789,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow } else { if (fib_lookup(net, fl4, &res, 0) == 0) { struct fib_nh_common *nhc = FIB_RES_NHC(res); - struct fib_nh *nh; - nh = container_of(nhc, struct fib_nh, nh_common); - update_or_create_fnhe(nh, fl4->daddr, new_gw, + update_or_create_fnhe(nhc, fl4->daddr, new_gw, 0, false, jiffies + ip_rt_gc_timeout); } @@ -1040,10 +1038,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) rcu_read_lock(); if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { struct fib_nh_common *nhc = FIB_RES_NHC(res); - struct fib_nh *nh; - nh = container_of(nhc, struct fib_nh, nh_common); - update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock, + update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, jiffies + ip_rt_mtu_expires); } rcu_read_unlock(); @@ -1329,7 +1325,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } -static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) +static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr) { struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe, __rcu **fnhe_p; @@ -1337,7 +1333,7 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) spin_lock_bh(&fnhe_lock); - hash = rcu_dereference_protected(nh->nh_exceptions, + hash = rcu_dereference_protected(nhc->nhc_exceptions, lockdep_is_held(&fnhe_lock)); hash += hval; @@ -1363,9 +1359,10 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) spin_unlock_bh(&fnhe_lock); } -static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) +static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc, + __be32 daddr) { - struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions); + struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions); struct fib_nh_exception *fnhe; u32 hval; @@ -1379,7 +1376,7 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) if (fnhe->fnhe_daddr == daddr) { if (fnhe->fnhe_expires && time_after(jiffies, fnhe->fnhe_expires)) { - ip_del_fnhe(nh, daddr); + ip_del_fnhe(nhc, daddr); break; } return fnhe; @@ -1406,10 +1403,9 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) mtu = fi->fib_mtu; if (likely(!mtu)) { - struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common); struct fib_nh_exception *fnhe; - fnhe = find_exception(nh, daddr); + fnhe = find_exception(nhc, daddr); if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires)) mtu = fnhe->fnhe_pmtu; } @@ -1760,7 +1756,6 @@ static int __mkroute_input(struct sk_buff *skb, struct net_device *dev = nhc->nhc_dev; struct fib_nh_exception *fnhe; struct rtable *rth; - struct fib_nh *nh; int err; struct in_device *out_dev; bool do_cache; @@ -1808,8 +1803,7 @@ static int __mkroute_input(struct sk_buff *skb, } } - nh = container_of(nhc, struct fib_nh, nh_common); - fnhe = find_exception(nh, daddr); + fnhe = find_exception(nhc, daddr); if (do_cache) { if (fnhe) rth = rcu_dereference(fnhe->fnhe_rth_input); @@ -2321,10 +2315,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, do_cache &= fi != NULL; if (fi) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); - struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common); struct rtable __rcu **prth; - fnhe = find_exception(nh, fl4->daddr); + fnhe = find_exception(nhc, fl4->daddr); if (!do_cache) goto add; if (fnhe) {