Merge branch 'ipv4-Move-location-of-pcpu-route-cache-and-exceptions'

David Ahern says:

====================
ipv4: Move location of pcpu route cache and exceptions

This series moves IPv4 pcpu cached routes from fib_nh to fib_nh_common
to make the caches available for IPv6 nexthops (fib6_nh) with IPv4
routes. This allows a fib6_nh struct to be used with both IPv4 and
and IPv6 routes.

v4
- fixed memleak if encap_type is not set as noticed by Ido

v3
- dropped ipv6 patches for now. Will resubmit those once the existing
  refcnt problem is fixed

v2
- reverted patch 2 to use ifdef CONFIG_IP_ROUTE_CLASSID instead
  of IS_ENABLED(CONFIG_IP_ROUTE_CLASSID) to fix compile issues
  reported by kbuild test robot
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2019-05-05 00:47:16 -07:00
commit f4b47a5945
3 changed files with 64 additions and 67 deletions

View File

@ -96,6 +96,11 @@ struct fib_nh_common {
int nhc_weight;
atomic_t nhc_upper_bound;
/* v4 specific, but allows fib6_nh with v4 routes */
struct rtable __rcu * __percpu *nhc_pcpu_rth_output;
struct rtable __rcu *nhc_rth_input;
struct fnhe_hash_bucket __rcu *nhc_exceptions;
};
struct fib_nh {
@ -107,9 +112,6 @@ struct fib_nh {
#endif
__be32 nh_saddr;
int nh_saddr_genid;
struct rtable __rcu * __percpu *nh_pcpu_rth_output;
struct rtable __rcu *nh_rth_input;
struct fnhe_hash_bucket __rcu *nh_exceptions;
#define fib_nh_family nh_common.nhc_family
#define fib_nh_dev nh_common.nhc_dev
#define fib_nh_oif nh_common.nhc_oif

View File

@ -159,12 +159,12 @@ static void rt_fibinfo_free(struct rtable __rcu **rtp)
dst_release_immediate(&rt->dst);
}
static void free_nh_exceptions(struct fib_nh *nh)
static void free_nh_exceptions(struct fib_nh_common *nhc)
{
struct fnhe_hash_bucket *hash;
int i;
hash = rcu_dereference_protected(nh->nh_exceptions, 1);
hash = rcu_dereference_protected(nhc->nhc_exceptions, 1);
if (!hash)
return;
for (i = 0; i < FNHE_HASH_SIZE; i++) {
@ -212,6 +212,9 @@ void fib_nh_common_release(struct fib_nh_common *nhc)
dev_put(nhc->nhc_dev);
lwtstate_put(nhc->nhc_lwtstate);
rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
rt_fibinfo_free(&nhc->nhc_rth_input);
free_nh_exceptions(nhc);
}
EXPORT_SYMBOL_GPL(fib_nh_common_release);
@ -222,9 +225,6 @@ void fib_nh_release(struct net *net, struct fib_nh *fib_nh)
net->ipv4.fib_num_tclassid_users--;
#endif
fib_nh_common_release(&fib_nh->nh_common);
free_nh_exceptions(fib_nh);
rt_fibinfo_free_cpus(fib_nh->nh_pcpu_rth_output);
rt_fibinfo_free(&fib_nh->nh_rth_input);
}
/* Release a nexthop info record */
@ -491,23 +491,35 @@ int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap,
u16 encap_type, void *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack)
{
int err;
nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *,
gfp_flags);
if (!nhc->nhc_pcpu_rth_output)
return -ENOMEM;
if (encap) {
struct lwtunnel_state *lwtstate;
int err;
if (encap_type == LWTUNNEL_ENCAP_NONE) {
NL_SET_ERR_MSG(extack, "LWT encap type not specified");
return -EINVAL;
err = -EINVAL;
goto lwt_failure;
}
err = lwtunnel_build_state(encap_type, encap, nhc->nhc_family,
cfg, &lwtstate, extack);
if (err)
return err;
goto lwt_failure;
nhc->nhc_lwtstate = lwtstate_get(lwtstate);
}
return 0;
lwt_failure:
rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
nhc->nhc_pcpu_rth_output = NULL;
return err;
}
EXPORT_SYMBOL_GPL(fib_nh_common_init);
@ -515,18 +527,14 @@ int fib_nh_init(struct net *net, struct fib_nh *nh,
struct fib_config *cfg, int nh_weight,
struct netlink_ext_ack *extack)
{
int err = -ENOMEM;
int err;
nh->fib_nh_family = AF_INET;
nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
if (!nh->nh_pcpu_rth_output)
goto err_out;
err = fib_nh_common_init(&nh->nh_common, cfg->fc_encap,
cfg->fc_encap_type, cfg, GFP_KERNEL, extack);
if (err)
goto init_failure;
return err;
nh->fib_nh_oif = cfg->fc_oif;
nh->fib_nh_gw_family = cfg->fc_gw_family;
@ -546,12 +554,6 @@ int fib_nh_init(struct net *net, struct fib_nh *nh,
nh->fib_nh_weight = nh_weight;
#endif
return 0;
init_failure:
rt_fibinfo_free_cpus(nh->nh_pcpu_rth_output);
nh->nh_pcpu_rth_output = NULL;
err_out:
return err;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@ -1711,12 +1713,12 @@ static int call_fib_nh_notifiers(struct fib_nh *nh,
* - if the new MTU is greater than the PMTU, don't make any change
* - otherwise, unlock and set PMTU
*/
static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig)
static void nh_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
{
struct fnhe_hash_bucket *bucket;
int i;
bucket = rcu_dereference_protected(nh->nh_exceptions, 1);
bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1);
if (!bucket)
return;
@ -1747,7 +1749,7 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
hlist_for_each_entry(nh, head, nh_hash) {
if (nh->fib_nh_dev == dev)
nh_update_mtu(nh, dev->mtu, orig_mtu);
nh_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
}
}

View File

@ -643,8 +643,9 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
}
}
static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
u32 pmtu, bool lock, unsigned long expires)
static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
__be32 gw, u32 pmtu, bool lock,
unsigned long expires)
{
struct fnhe_hash_bucket *hash;
struct fib_nh_exception *fnhe;
@ -653,17 +654,17 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
unsigned int i;
int depth;
genid = fnhe_genid(dev_net(nh->fib_nh_dev));
genid = fnhe_genid(dev_net(nhc->nhc_dev));
hval = fnhe_hashfun(daddr);
spin_lock_bh(&fnhe_lock);
hash = rcu_dereference(nh->nh_exceptions);
hash = rcu_dereference(nhc->nhc_exceptions);
if (!hash) {
hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC);
if (!hash)
goto out_unlock;
rcu_assign_pointer(nh->nh_exceptions, hash);
rcu_assign_pointer(nhc->nhc_exceptions, hash);
}
hash += hval;
@ -715,13 +716,13 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
* stale, so anyone caching it rechecks if this exception
* applies to them.
*/
rt = rcu_dereference(nh->nh_rth_input);
rt = rcu_dereference(nhc->nhc_rth_input);
if (rt)
rt->dst.obsolete = DST_OBSOLETE_KILL;
for_each_possible_cpu(i) {
struct rtable __rcu **prt;
prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i);
rt = rcu_dereference(*prt);
if (rt)
rt->dst.obsolete = DST_OBSOLETE_KILL;
@ -788,10 +789,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
} else {
if (fib_lookup(net, fl4, &res, 0) == 0) {
struct fib_nh_common *nhc = FIB_RES_NHC(res);
struct fib_nh *nh;
nh = container_of(nhc, struct fib_nh, nh_common);
update_or_create_fnhe(nh, fl4->daddr, new_gw,
update_or_create_fnhe(nhc, fl4->daddr, new_gw,
0, false,
jiffies + ip_rt_gc_timeout);
}
@ -1039,10 +1038,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
rcu_read_lock();
if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
struct fib_nh_common *nhc = FIB_RES_NHC(res);
struct fib_nh *nh;
nh = container_of(nhc, struct fib_nh, nh_common);
update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
jiffies + ip_rt_mtu_expires);
}
rcu_read_unlock();
@ -1328,7 +1325,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr)
{
struct fnhe_hash_bucket *hash;
struct fib_nh_exception *fnhe, __rcu **fnhe_p;
@ -1336,7 +1333,7 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
spin_lock_bh(&fnhe_lock);
hash = rcu_dereference_protected(nh->nh_exceptions,
hash = rcu_dereference_protected(nhc->nhc_exceptions,
lockdep_is_held(&fnhe_lock));
hash += hval;
@ -1362,9 +1359,10 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
spin_unlock_bh(&fnhe_lock);
}
static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc,
__be32 daddr)
{
struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions);
struct fib_nh_exception *fnhe;
u32 hval;
@ -1378,7 +1376,7 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
if (fnhe->fnhe_daddr == daddr) {
if (fnhe->fnhe_expires &&
time_after(jiffies, fnhe->fnhe_expires)) {
ip_del_fnhe(nh, daddr);
ip_del_fnhe(nhc, daddr);
break;
}
return fnhe;
@ -1405,10 +1403,9 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
mtu = fi->fib_mtu;
if (likely(!mtu)) {
struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common);
struct fib_nh_exception *fnhe;
fnhe = find_exception(nh, daddr);
fnhe = find_exception(nhc, daddr);
if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
mtu = fnhe->fnhe_pmtu;
}
@ -1469,15 +1466,15 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
return ret;
}
static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
{
struct rtable *orig, *prev, **p;
bool ret = true;
if (rt_is_input_route(rt)) {
p = (struct rtable **)&nh->nh_rth_input;
p = (struct rtable **)&nhc->nhc_rth_input;
} else {
p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output);
p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
}
orig = *p;
@ -1574,7 +1571,6 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
if (fi) {
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
struct fib_nh *nh;
if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
rt->rt_gw_family = nhc->nhc_gw_family;
@ -1587,15 +1583,19 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
ip_dst_init_metrics(&rt->dst, fi->fib_metrics);
nh = container_of(nhc, struct fib_nh, nh_common);
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
{
struct fib_nh *nh;
nh = container_of(nhc, struct fib_nh, nh_common);
rt->dst.tclassid = nh->nh_tclassid;
}
#endif
rt->dst.lwtstate = lwtstate_get(nh->fib_nh_lws);
rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
if (unlikely(fnhe))
cached = rt_bind_exception(rt, fnhe, daddr, do_cache);
else if (do_cache)
cached = rt_cache_route(nh, rt);
cached = rt_cache_route(nhc, rt);
if (unlikely(!cached)) {
/* Routes we intend to cache in nexthop exception or
* FIB nexthop have the DST_NOCACHE bit clear.
@ -1756,7 +1756,6 @@ static int __mkroute_input(struct sk_buff *skb,
struct net_device *dev = nhc->nhc_dev;
struct fib_nh_exception *fnhe;
struct rtable *rth;
struct fib_nh *nh;
int err;
struct in_device *out_dev;
bool do_cache;
@ -1804,13 +1803,12 @@ static int __mkroute_input(struct sk_buff *skb,
}
}
nh = container_of(nhc, struct fib_nh, nh_common);
fnhe = find_exception(nh, daddr);
fnhe = find_exception(nhc, daddr);
if (do_cache) {
if (fnhe)
rth = rcu_dereference(fnhe->fnhe_rth_input);
else
rth = rcu_dereference(nh->nh_rth_input);
rth = rcu_dereference(nhc->nhc_rth_input);
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
goto out;
@ -2105,10 +2103,8 @@ local_input:
if (res->fi) {
if (!itag) {
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
struct fib_nh *nh;
nh = container_of(nhc, struct fib_nh, nh_common);
rth = rcu_dereference(nh->nh_rth_input);
rth = rcu_dereference(nhc->nhc_rth_input);
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
err = 0;
@ -2139,7 +2135,6 @@ local_input:
if (do_cache) {
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
struct fib_nh *nh;
rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate);
if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
@ -2148,8 +2143,7 @@ local_input:
rth->dst.input = lwtunnel_input;
}
nh = container_of(nhc, struct fib_nh, nh_common);
if (unlikely(!rt_cache_route(nh, rth)))
if (unlikely(!rt_cache_route(nhc, rth)))
rt_add_uncached_list(rth);
}
skb_dst_set(skb, &rth->dst);
@ -2321,10 +2315,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
do_cache &= fi != NULL;
if (fi) {
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common);
struct rtable __rcu **prth;
fnhe = find_exception(nh, fl4->daddr);
fnhe = find_exception(nhc, fl4->daddr);
if (!do_cache)
goto add;
if (fnhe) {
@ -2337,7 +2330,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
do_cache = false;
goto add;
}
prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output);
}
rth = rcu_dereference(*prth);
if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst))