net/ipv6: Cleanup exception and cache route handling

IPv6 FIB will only contain FIB entries with exception routes added to
the FIB entry. Once this transformation is complete, FIB lookups will
return a fib6_info with the lookup functions still returning a dst
based rt6_info. The current code uses rt6_info for both paths and
overloads the rt6_info variable usually called 'rt'.

This patch introduces a new 'f6i' variable name for the result of the FIB
lookup and keeps 'rt' as the dst based return variable. 'f6i' becomes a
fib6_info in a later patch which is why it is introduced as f6i now;
avoids the additional churn in the later patch.

In addition, remove RTF_CACHE and dst checks from fib6 add and delete
since they can not happen now and will never happen after the data
type flip.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David Ahern 2018-04-17 17:33:23 -07:00 committed by David S. Miller
parent acb54e3cba
commit 23fb93a4d3
3 changed files with 81 additions and 78 deletions

View File

@ -106,7 +106,6 @@ int ip6_ins_rt(struct net *net, struct rt6_info *rt);
int ip6_del_rt(struct net *net, struct rt6_info *rt); int ip6_del_rt(struct net *net, struct rt6_info *rt);
void rt6_flush_exceptions(struct rt6_info *rt); void rt6_flush_exceptions(struct rt6_info *rt);
int rt6_remove_exception_rt(struct rt6_info *rt);
void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args, void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args,
unsigned long now); unsigned long now);

View File

@ -1074,7 +1074,7 @@ add:
static void fib6_start_gc(struct net *net, struct rt6_info *rt) static void fib6_start_gc(struct net *net, struct rt6_info *rt)
{ {
if (!timer_pending(&net->ipv6.ip6_fib_timer) && if (!timer_pending(&net->ipv6.ip6_fib_timer) &&
(rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE))) (rt->rt6i_flags & RTF_EXPIRES))
mod_timer(&net->ipv6.ip6_fib_timer, mod_timer(&net->ipv6.ip6_fib_timer,
jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
} }
@ -1125,8 +1125,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt))) if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
return -EINVAL; return -EINVAL;
if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE))
return -EINVAL;
if (info->nlh) { if (info->nlh) {
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
@ -1650,8 +1648,6 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
RT6_TRACE("fib6_del_route\n"); RT6_TRACE("fib6_del_route\n");
WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
/* Unlink it */ /* Unlink it */
*rtp = rt->rt6_next; *rtp = rt->rt6_next;
rt->rt6i_node = NULL; rt->rt6i_node = NULL;
@ -1720,21 +1716,11 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
struct rt6_info __rcu **rtp; struct rt6_info __rcu **rtp;
struct rt6_info __rcu **rtp_next; struct rt6_info __rcu **rtp_next;
#if RT6_DEBUG >= 2
if (rt->dst.obsolete > 0) {
WARN_ON(fn);
return -ENOENT;
}
#endif
if (!fn || rt == net->ipv6.fib6_null_entry) if (!fn || rt == net->ipv6.fib6_null_entry)
return -ENOENT; return -ENOENT;
WARN_ON(!(fn->fn_flags & RTN_RTINFO)); WARN_ON(!(fn->fn_flags & RTN_RTINFO));
/* remove cached dst from exception table */
if (rt->rt6i_flags & RTF_CACHE)
return rt6_remove_exception_rt(rt);
/* /*
* Walk the leaf entries looking for ourself * Walk the leaf entries looking for ourself
*/ */

View File

@ -1013,8 +1013,8 @@ static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
BUG_ON(from->from); BUG_ON(from->from);
rt->rt6i_flags &= ~RTF_EXPIRES; rt->rt6i_flags &= ~RTF_EXPIRES;
dst_hold(&from->dst); if (dst_hold_safe(&from->dst))
rt->from = from; rt->from = from;
dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true); dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
if (from->fib6_metrics != &dst_default_metrics) { if (from->fib6_metrics != &dst_default_metrics) {
rt->dst._metrics |= DST_METRICS_REFCOUNTED; rt->dst._metrics |= DST_METRICS_REFCOUNTED;
@ -1097,8 +1097,9 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
const struct sk_buff *skb, const struct sk_buff *skb,
int flags) int flags)
{ {
struct rt6_info *rt, *rt_cache; struct rt6_info *f6i;
struct fib6_node *fn; struct fib6_node *fn;
struct rt6_info *rt;
if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
flags &= ~RT6_LOOKUP_F_IFACE; flags &= ~RT6_LOOKUP_F_IFACE;
@ -1106,36 +1107,36 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
rcu_read_lock(); rcu_read_lock();
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart: restart:
rt = rcu_dereference(fn->leaf); f6i = rcu_dereference(fn->leaf);
if (!rt) { if (!f6i) {
rt = net->ipv6.fib6_null_entry; f6i = net->ipv6.fib6_null_entry;
} else { } else {
rt = rt6_device_match(net, rt, &fl6->saddr, f6i = rt6_device_match(net, f6i, &fl6->saddr,
fl6->flowi6_oif, flags); fl6->flowi6_oif, flags);
if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) if (f6i->rt6i_nsiblings && fl6->flowi6_oif == 0)
rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif, f6i = rt6_multipath_select(net, f6i, fl6,
skb, flags); fl6->flowi6_oif, skb, flags);
} }
if (rt == net->ipv6.fib6_null_entry) { if (f6i == net->ipv6.fib6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr); fn = fib6_backtrack(fn, &fl6->saddr);
if (fn) if (fn)
goto restart; goto restart;
} }
/* Search through exception table */ /* Search through exception table */
rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
if (rt_cache) { if (rt) {
rt = rt_cache;
if (ip6_hold_safe(net, &rt, true)) if (ip6_hold_safe(net, &rt, true))
dst_use_noref(&rt->dst, jiffies); dst_use_noref(&rt->dst, jiffies);
} else if (dst_hold_safe(&rt->dst)) { } else if (f6i == net->ipv6.fib6_null_entry) {
struct rt6_info *nrt;
nrt = ip6_create_rt_rcu(rt);
dst_release(&rt->dst);
rt = nrt;
} else {
rt = net->ipv6.ip6_null_entry; rt = net->ipv6.ip6_null_entry;
dst_hold(&rt->dst); dst_hold(&rt->dst);
} else {
rt = ip6_create_rt_rcu(f6i);
if (!rt) {
rt = net->ipv6.ip6_null_entry;
dst_hold(&rt->dst);
}
} }
rcu_read_unlock(); rcu_read_unlock();
@ -1218,9 +1219,6 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
* Clone the route. * Clone the route.
*/ */
if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
ort = ort->from;
rcu_read_lock(); rcu_read_lock();
dev = ip6_rt_get_dev_rcu(ort); dev = ip6_rt_get_dev_rcu(ort);
rt = __ip6_dst_alloc(dev_net(dev), dev, 0); rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
@ -1446,11 +1444,6 @@ static int rt6_insert_exception(struct rt6_info *nrt,
struct rt6_exception *rt6_ex; struct rt6_exception *rt6_ex;
int err = 0; int err = 0;
/* ort can't be a cache or pcpu route */
if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
ort = ort->from;
WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
spin_lock_bh(&rt6_exception_lock); spin_lock_bh(&rt6_exception_lock);
if (ort->exception_bucket_flushed) { if (ort->exception_bucket_flushed) {
@ -1589,7 +1582,7 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
} }
/* Remove the passed in cached rt from the hash table that contains it */ /* Remove the passed in cached rt from the hash table that contains it */
int rt6_remove_exception_rt(struct rt6_info *rt) static int rt6_remove_exception_rt(struct rt6_info *rt)
{ {
struct rt6_exception_bucket *bucket; struct rt6_exception_bucket *bucket;
struct rt6_info *from = rt->from; struct rt6_info *from = rt->from;
@ -1854,7 +1847,8 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
const struct sk_buff *skb, int flags) const struct sk_buff *skb, int flags)
{ {
struct fib6_node *fn, *saved_fn; struct fib6_node *fn, *saved_fn;
struct rt6_info *rt, *rt_cache; struct rt6_info *f6i;
struct rt6_info *rt;
int strict = 0; int strict = 0;
strict |= flags & RT6_LOOKUP_F_IFACE; strict |= flags & RT6_LOOKUP_F_IFACE;
@ -1871,10 +1865,10 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
oif = 0; oif = 0;
redo_rt6_select: redo_rt6_select:
rt = rt6_select(net, fn, oif, strict); f6i = rt6_select(net, fn, oif, strict);
if (rt->rt6i_nsiblings) if (f6i->rt6i_nsiblings)
rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict); f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict);
if (rt == net->ipv6.fib6_null_entry) { if (f6i == net->ipv6.fib6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr); fn = fib6_backtrack(fn, &fl6->saddr);
if (fn) if (fn)
goto redo_rt6_select; goto redo_rt6_select;
@ -1886,18 +1880,17 @@ redo_rt6_select:
} }
} }
/*Search through exception table */ if (f6i == net->ipv6.fib6_null_entry) {
rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
if (rt_cache)
rt = rt_cache;
if (rt == net->ipv6.fib6_null_entry) {
rt = net->ipv6.ip6_null_entry; rt = net->ipv6.ip6_null_entry;
rcu_read_unlock(); rcu_read_unlock();
dst_hold(&rt->dst); dst_hold(&rt->dst);
trace_fib6_table_lookup(net, rt, table, fl6); trace_fib6_table_lookup(net, rt, table, fl6);
return rt; return rt;
} else if (rt->rt6i_flags & RTF_CACHE) { }
/*Search through exception table */
rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
if (rt) {
if (ip6_hold_safe(net, &rt, true)) if (ip6_hold_safe(net, &rt, true))
dst_use_noref(&rt->dst, jiffies); dst_use_noref(&rt->dst, jiffies);
@ -1905,7 +1898,7 @@ redo_rt6_select:
trace_fib6_table_lookup(net, rt, table, fl6); trace_fib6_table_lookup(net, rt, table, fl6);
return rt; return rt;
} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
!(rt->rt6i_flags & RTF_GATEWAY))) { !(f6i->rt6i_flags & RTF_GATEWAY))) {
/* Create a RTF_CACHE clone which will not be /* Create a RTF_CACHE clone which will not be
* owned by the fib6 tree. It is for the special case where * owned by the fib6 tree. It is for the special case where
* the daddr in the skb during the neighbor look-up is different * the daddr in the skb during the neighbor look-up is different
@ -1914,16 +1907,16 @@ redo_rt6_select:
struct rt6_info *uncached_rt; struct rt6_info *uncached_rt;
if (ip6_hold_safe(net, &rt, true)) { if (ip6_hold_safe(net, &f6i, true)) {
dst_use_noref(&rt->dst, jiffies); dst_use_noref(&f6i->dst, jiffies);
} else { } else {
rcu_read_unlock(); rcu_read_unlock();
uncached_rt = rt; uncached_rt = f6i;
goto uncached_rt_out; goto uncached_rt_out;
} }
rcu_read_unlock(); rcu_read_unlock();
uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
dst_release(&rt->dst); dst_release(&rt->dst);
if (uncached_rt) { if (uncached_rt) {
@ -1946,18 +1939,18 @@ uncached_rt_out:
struct rt6_info *pcpu_rt; struct rt6_info *pcpu_rt;
dst_use_noref(&rt->dst, jiffies); dst_use_noref(&f6i->dst, jiffies);
local_bh_disable(); local_bh_disable();
pcpu_rt = rt6_get_pcpu_route(rt); pcpu_rt = rt6_get_pcpu_route(f6i);
if (!pcpu_rt) { if (!pcpu_rt) {
/* atomic_inc_not_zero() is needed when using rcu */ /* atomic_inc_not_zero() is needed when using rcu */
if (atomic_inc_not_zero(&rt->rt6i_ref)) { if (atomic_inc_not_zero(&f6i->rt6i_ref)) {
/* No dst_hold() on rt is needed because grabbing /* No dst_hold() on rt is needed because grabbing
* rt->rt6i_ref makes sure rt can't be released. * rt->rt6i_ref makes sure rt can't be released.
*/ */
pcpu_rt = rt6_make_pcpu_route(net, rt); pcpu_rt = rt6_make_pcpu_route(net, f6i);
rt6_release(rt); rt6_release(f6i);
} else { } else {
/* rt is already removed from tree */ /* rt is already removed from tree */
pcpu_rt = net->ipv6.ip6_null_entry; pcpu_rt = net->ipv6.ip6_null_entry;
@ -2419,7 +2412,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
int flags) int flags)
{ {
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
struct rt6_info *rt, *rt_cache; struct rt6_info *ret = NULL, *rt_cache;
struct rt6_info *rt;
struct fib6_node *fn; struct fib6_node *fn;
/* Get the "current" route for this destination and /* Get the "current" route for this destination and
@ -2458,7 +2452,7 @@ restart:
if (rt_cache && if (rt_cache &&
ipv6_addr_equal(&rdfl->gateway, ipv6_addr_equal(&rdfl->gateway,
&rt_cache->rt6i_gateway)) { &rt_cache->rt6i_gateway)) {
rt = rt_cache; ret = rt_cache;
break; break;
} }
continue; continue;
@ -2469,7 +2463,7 @@ restart:
if (!rt) if (!rt)
rt = net->ipv6.fib6_null_entry; rt = net->ipv6.fib6_null_entry;
else if (rt->rt6i_flags & RTF_REJECT) { else if (rt->rt6i_flags & RTF_REJECT) {
rt = net->ipv6.ip6_null_entry; ret = net->ipv6.ip6_null_entry;
goto out; goto out;
} }
@ -2480,12 +2474,15 @@ restart:
} }
out: out:
ip6_hold_safe(net, &rt, true); if (ret)
dst_hold(&ret->dst);
else
ret = ip6_create_rt_rcu(rt);
rcu_read_unlock(); rcu_read_unlock();
trace_fib6_table_lookup(net, rt, table, fl6); trace_fib6_table_lookup(net, ret, table, fl6);
return rt; return ret;
}; };
static struct dst_entry *ip6_route_redirect(struct net *net, static struct dst_entry *ip6_route_redirect(struct net *net,
@ -3182,6 +3179,22 @@ out_put:
return err; return err;
} }
static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
{
int rc = -ESRCH;
if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
goto out;
if (cfg->fc_flags & RTF_GATEWAY &&
!ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
goto out;
if (dst_hold_safe(&rt->dst))
rc = rt6_remove_exception_rt(rt);
out:
return rc;
}
static int ip6_route_del(struct fib6_config *cfg, static int ip6_route_del(struct fib6_config *cfg,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
@ -3206,11 +3219,16 @@ static int ip6_route_del(struct fib6_config *cfg,
if (fn) { if (fn) {
for_each_fib6_node_rt_rcu(fn) { for_each_fib6_node_rt_rcu(fn) {
if (cfg->fc_flags & RTF_CACHE) { if (cfg->fc_flags & RTF_CACHE) {
int rc;
rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst, rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
&cfg->fc_src); &cfg->fc_src);
if (!rt_cache) if (rt_cache) {
continue; rc = ip6_del_cached_rt(rt_cache, cfg);
rt = rt_cache; if (rc != -ESRCH)
return rc;
}
continue;
} }
if (cfg->fc_ifindex && if (cfg->fc_ifindex &&
(!rt->fib6_nh.nh_dev || (!rt->fib6_nh.nh_dev ||
@ -3327,7 +3345,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
NEIGH_UPDATE_F_ISROUTER)), NEIGH_UPDATE_F_ISROUTER)),
NDISC_REDIRECT, &ndopts); NDISC_REDIRECT, &ndopts);
nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL); nrt = ip6_rt_cache_alloc(rt->from, &msg->dest, NULL);
if (!nrt) if (!nrt)
goto out; goto out;