net/ipv6: Move nexthop data to fib6_nh

Introduce fib6_nh structure and move nexthop related data from
rt6_info and rt6_info.dst to fib6_nh. References to dev, gateway or
lwtstate from a FIB lookup perspective are converted to use fib6_nh;
datapath references to dst version are left as is.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David Ahern 2018-04-17 17:33:14 -07:00 committed by David S. Miller
parent e8478e80e5
commit 5e670d844b
6 changed files with 125 additions and 99 deletions

View File

@ -2770,9 +2770,9 @@ mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
struct in6_addr *gw; struct in6_addr *gw;
int ifindex, weight; int ifindex, weight;
ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex; ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex;
weight = mlxsw_sp_rt6->rt->rt6i_nh_weight; weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight;
gw = &mlxsw_sp_rt6->rt->rt6i_gateway; gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw;
if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex, if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
weight)) weight))
return false; return false;
@ -2838,7 +2838,7 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
struct net_device *dev; struct net_device *dev;
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
dev = mlxsw_sp_rt6->rt->dst.dev; dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev;
val ^= dev->ifindex; val ^= dev->ifindex;
} }
@ -3836,9 +3836,9 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
struct rt6_info *rt = mlxsw_sp_rt6->rt; struct rt6_info *rt = mlxsw_sp_rt6->rt;
if (nh->rif && nh->rif->dev == rt->dst.dev && if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev &&
ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr, ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
&rt->rt6i_gateway)) &rt->fib6_nh.nh_gw))
return nh; return nh;
continue; continue;
} }
@ -3895,7 +3895,7 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) { if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) {
list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD; list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
return; return;
} }
@ -3905,9 +3905,9 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6); nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
if (nh && nh->offloaded) if (nh && nh->offloaded)
mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD; mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD;
else else
mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD; mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
} }
} }
@ -3922,7 +3922,7 @@ mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
struct rt6_info *rt = mlxsw_sp_rt6->rt; struct rt6_info *rt = mlxsw_sp_rt6->rt;
rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD; rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD;
} }
} }
@ -4818,8 +4818,8 @@ static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
const struct rt6_info *rt, const struct rt6_info *rt,
enum mlxsw_sp_ipip_type *ret) enum mlxsw_sp_ipip_type *ret)
{ {
return rt->dst.dev && return rt->fib6_nh.nh_dev &&
mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret); mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret);
} }
static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
@ -4829,7 +4829,7 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
{ {
const struct mlxsw_sp_ipip_ops *ipip_ops; const struct mlxsw_sp_ipip_ops *ipip_ops;
struct mlxsw_sp_ipip_entry *ipip_entry; struct mlxsw_sp_ipip_entry *ipip_entry;
struct net_device *dev = rt->dst.dev; struct net_device *dev = rt->fib6_nh.nh_dev;
struct mlxsw_sp_rif *rif; struct mlxsw_sp_rif *rif;
int err; int err;
@ -4872,11 +4872,11 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh, struct mlxsw_sp_nexthop *nh,
const struct rt6_info *rt) const struct rt6_info *rt)
{ {
struct net_device *dev = rt->dst.dev; struct net_device *dev = rt->fib6_nh.nh_dev;
nh->nh_grp = nh_grp; nh->nh_grp = nh_grp;
nh->nh_weight = rt->rt6i_nh_weight; nh->nh_weight = rt->fib6_nh.nh_weight;
memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr)); memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr));
mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);

View File

@ -127,6 +127,16 @@ struct rt6_exception {
#define FIB6_EXCEPTION_BUCKET_SIZE (1 << FIB6_EXCEPTION_BUCKET_SIZE_SHIFT) #define FIB6_EXCEPTION_BUCKET_SIZE (1 << FIB6_EXCEPTION_BUCKET_SIZE_SHIFT)
#define FIB6_MAX_DEPTH 5 #define FIB6_MAX_DEPTH 5
struct fib6_nh {
struct in6_addr nh_gw;
struct net_device *nh_dev;
struct lwtunnel_state *nh_lwtstate;
unsigned int nh_flags;
atomic_t nh_upper_bound;
int nh_weight;
};
struct rt6_info { struct rt6_info {
struct dst_entry dst; struct dst_entry dst;
struct rt6_info __rcu *rt6_next; struct rt6_info __rcu *rt6_next;
@ -149,12 +159,9 @@ struct rt6_info {
*/ */
struct list_head rt6i_siblings; struct list_head rt6i_siblings;
unsigned int rt6i_nsiblings; unsigned int rt6i_nsiblings;
atomic_t rt6i_nh_upper_bound;
atomic_t rt6i_ref; atomic_t rt6i_ref;
unsigned int rt6i_nh_flags;
/* These are in a separate cache line. */ /* These are in a separate cache line. */
struct rt6key rt6i_dst ____cacheline_aligned_in_smp; struct rt6key rt6i_dst ____cacheline_aligned_in_smp;
u32 rt6i_flags; u32 rt6i_flags;
@ -171,13 +178,14 @@ struct rt6_info {
u32 rt6i_metric; u32 rt6i_metric;
u32 rt6i_pmtu; u32 rt6i_pmtu;
/* more non-fragment space at head required */ /* more non-fragment space at head required */
int rt6i_nh_weight;
unsigned short rt6i_nfheader_len; unsigned short rt6i_nfheader_len;
u8 rt6i_protocol; u8 rt6i_protocol;
u8 fib6_type; u8 fib6_type;
u8 exception_bucket_flushed:1, u8 exception_bucket_flushed:1,
should_flush:1, should_flush:1,
unused:6; unused:6;
struct fib6_nh fib6_nh;
}; };
#define for_each_fib6_node_rt_rcu(fn) \ #define for_each_fib6_node_rt_rcu(fn) \

View File

@ -273,10 +273,10 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b) static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b)
{ {
return a->dst.dev == b->dst.dev && return a->fib6_nh.nh_dev == b->fib6_nh.nh_dev &&
a->rt6i_idev == b->rt6i_idev && a->rt6i_idev == b->rt6i_idev &&
ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) && ipv6_addr_equal(&a->fib6_nh.nh_gw, &b->fib6_nh.nh_gw) &&
!lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate); !lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate);
} }
#endif #endif

View File

@ -2369,7 +2369,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
goto out; goto out;
for_each_fib6_node_rt_rcu(fn) { for_each_fib6_node_rt_rcu(fn) {
if (rt->dst.dev->ifindex != dev->ifindex) if (rt->fib6_nh.nh_dev->ifindex != dev->ifindex)
continue; continue;
if ((rt->rt6i_flags & flags) != flags) if ((rt->rt6i_flags & flags) != flags)
continue; continue;

View File

@ -2221,6 +2221,7 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
{ {
struct rt6_info *rt = v; struct rt6_info *rt = v;
struct ipv6_route_iter *iter = seq->private; struct ipv6_route_iter *iter = seq->private;
const struct net_device *dev;
seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
@ -2230,14 +2231,15 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "00000000000000000000000000000000 00 "); seq_puts(seq, "00000000000000000000000000000000 00 ");
#endif #endif
if (rt->rt6i_flags & RTF_GATEWAY) if (rt->rt6i_flags & RTF_GATEWAY)
seq_printf(seq, "%pi6", &rt->rt6i_gateway); seq_printf(seq, "%pi6", &rt->fib6_nh.nh_gw);
else else
seq_puts(seq, "00000000000000000000000000000000"); seq_puts(seq, "00000000000000000000000000000000");
dev = rt->fib6_nh.nh_dev;
seq_printf(seq, " %08x %08x %08x %08x %8s\n", seq_printf(seq, " %08x %08x %08x %08x %8s\n",
rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
rt->dst.__use, rt->rt6i_flags, rt->dst.__use, rt->rt6i_flags,
rt->dst.dev ? rt->dst.dev->name : ""); dev ? dev->name : "");
iter->w.leaf = NULL; iter->w.leaf = NULL;
return 0; return 0;
} }

View File

@ -466,12 +466,15 @@ static struct rt6_info *rt6_multipath_select(const struct net *net,
if (!fl6->mp_hash) if (!fl6->mp_hash)
fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL); fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound)) if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
return match; return match;
list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings, list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
rt6i_siblings) { rt6i_siblings) {
if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound)) int nh_upper_bound;
nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
if (fl6->mp_hash > nh_upper_bound)
continue; continue;
if (rt6_score_route(sibling, oif, strict) < 0) if (rt6_score_route(sibling, oif, strict) < 0)
break; break;
@ -495,13 +498,14 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
struct rt6_info *local = NULL; struct rt6_info *local = NULL;
struct rt6_info *sprt; struct rt6_info *sprt;
if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD)) if (!oif && ipv6_addr_any(saddr) &&
!(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
return rt; return rt;
for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) { for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
struct net_device *dev = sprt->dst.dev; const struct net_device *dev = sprt->fib6_nh.nh_dev;
if (sprt->rt6i_nh_flags & RTNH_F_DEAD) if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
continue; continue;
if (oif) { if (oif) {
@ -533,7 +537,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
return net->ipv6.ip6_null_entry; return net->ipv6.ip6_null_entry;
} }
return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt; return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt;
} }
#ifdef CONFIG_IPV6_ROUTER_PREF #ifdef CONFIG_IPV6_ROUTER_PREF
@ -558,7 +562,10 @@ static void rt6_probe_deferred(struct work_struct *w)
static void rt6_probe(struct rt6_info *rt) static void rt6_probe(struct rt6_info *rt)
{ {
struct __rt6_probe_work *work; struct __rt6_probe_work *work;
const struct in6_addr *nh_gw;
struct neighbour *neigh; struct neighbour *neigh;
struct net_device *dev;
/* /*
* Okay, this does not seem to be appropriate * Okay, this does not seem to be appropriate
* for now, however, we need to check if it * for now, however, we need to check if it
@ -569,8 +576,11 @@ static void rt6_probe(struct rt6_info *rt)
*/ */
if (!rt || !(rt->rt6i_flags & RTF_GATEWAY)) if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
return; return;
nh_gw = &rt->fib6_nh.nh_gw;
dev = rt->fib6_nh.nh_dev;
rcu_read_lock_bh(); rcu_read_lock_bh();
neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
if (neigh) { if (neigh) {
if (neigh->nud_state & NUD_VALID) if (neigh->nud_state & NUD_VALID)
goto out; goto out;
@ -592,9 +602,9 @@ static void rt6_probe(struct rt6_info *rt)
if (work) { if (work) {
INIT_WORK(&work->work, rt6_probe_deferred); INIT_WORK(&work->work, rt6_probe_deferred);
work->target = rt->rt6i_gateway; work->target = *nh_gw;
dev_hold(rt->dst.dev); dev_hold(dev);
work->dev = rt->dst.dev; work->dev = dev;
schedule_work(&work->work); schedule_work(&work->work);
} }
@ -612,7 +622,8 @@ static inline void rt6_probe(struct rt6_info *rt)
*/ */
static inline int rt6_check_dev(struct rt6_info *rt, int oif) static inline int rt6_check_dev(struct rt6_info *rt, int oif)
{ {
struct net_device *dev = rt->dst.dev; const struct net_device *dev = rt->fib6_nh.nh_dev;
if (!oif || dev->ifindex == oif) if (!oif || dev->ifindex == oif)
return 2; return 2;
if ((dev->flags & IFF_LOOPBACK) && if ((dev->flags & IFF_LOOPBACK) &&
@ -623,15 +634,16 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif)
static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
{ {
struct neighbour *neigh;
enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
struct neighbour *neigh;
if (rt->rt6i_flags & RTF_NONEXTHOP || if (rt->rt6i_flags & RTF_NONEXTHOP ||
!(rt->rt6i_flags & RTF_GATEWAY)) !(rt->rt6i_flags & RTF_GATEWAY))
return RT6_NUD_SUCCEED; return RT6_NUD_SUCCEED;
rcu_read_lock_bh(); rcu_read_lock_bh();
neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
&rt->fib6_nh.nh_gw);
if (neigh) { if (neigh) {
read_lock(&neigh->lock); read_lock(&neigh->lock);
if (neigh->nud_state & NUD_VALID) if (neigh->nud_state & NUD_VALID)
@ -679,11 +691,11 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
bool match_do_rr = false; bool match_do_rr = false;
struct inet6_dev *idev = rt->rt6i_idev; struct inet6_dev *idev = rt->rt6i_idev;
if (rt->rt6i_nh_flags & RTNH_F_DEAD) if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
goto out; goto out;
if (idev->cnf.ignore_routes_with_linkdown && if (idev->cnf.ignore_routes_with_linkdown &&
rt->rt6i_nh_flags & RTNH_F_LINKDOWN && rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
!(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
goto out; goto out;
@ -888,7 +900,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
/* called with rcu_lock held */ /* called with rcu_lock held */
static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt) static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
{ {
struct net_device *dev = rt->dst.dev; struct net_device *dev = rt->fib6_nh.nh_dev;
if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) { if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
/* for copies of local routes, dst->dev needs to be the /* for copies of local routes, dst->dev needs to be the
@ -928,7 +940,7 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
if (rt->rt6i_idev) if (rt->rt6i_idev)
in6_dev_hold(rt->rt6i_idev); in6_dev_hold(rt->rt6i_idev);
rt->dst.lastuse = jiffies; rt->dst.lastuse = jiffies;
rt->rt6i_gateway = ort->rt6i_gateway; rt->rt6i_gateway = ort->fib6_nh.nh_gw;
rt->rt6i_flags = ort->rt6i_flags; rt->rt6i_flags = ort->rt6i_flags;
rt6_set_from(rt, ort); rt6_set_from(rt, ort);
rt->rt6i_metric = ort->rt6i_metric; rt->rt6i_metric = ort->rt6i_metric;
@ -937,7 +949,7 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
#endif #endif
rt->rt6i_prefsrc = ort->rt6i_prefsrc; rt->rt6i_prefsrc = ort->rt6i_prefsrc;
rt->rt6i_table = ort->rt6i_table; rt->rt6i_table = ort->rt6i_table;
rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate); rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
} }
static struct fib6_node* fib6_backtrack(struct fib6_node *fn, static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
@ -1308,7 +1320,7 @@ __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
static int rt6_insert_exception(struct rt6_info *nrt, static int rt6_insert_exception(struct rt6_info *nrt,
struct rt6_info *ort) struct rt6_info *ort)
{ {
struct net *net = dev_net(ort->dst.dev); struct net *net = dev_net(nrt->dst.dev);
struct rt6_exception_bucket *bucket; struct rt6_exception_bucket *bucket;
struct in6_addr *src_key = NULL; struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex; struct rt6_exception *rt6_ex;
@ -2313,7 +2325,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart: restart:
for_each_fib6_node_rt_rcu(fn) { for_each_fib6_node_rt_rcu(fn) {
if (rt->rt6i_nh_flags & RTNH_F_DEAD) if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
continue; continue;
if (rt6_check_expired(rt)) if (rt6_check_expired(rt))
continue; continue;
@ -2321,14 +2333,14 @@ restart:
break; break;
if (!(rt->rt6i_flags & RTF_GATEWAY)) if (!(rt->rt6i_flags & RTF_GATEWAY))
continue; continue;
if (fl6->flowi6_oif != rt->dst.dev->ifindex) if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
continue; continue;
/* rt_cache's gateway might be different from its 'parent' /* rt_cache's gateway might be different from its 'parent'
* in the case of an ip redirect. * in the case of an ip redirect.
* So we keep searching in the exception table if the gateway * So we keep searching in the exception table if the gateway
* is different. * is different.
*/ */
if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) { if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
rt_cache = rt6_find_cached_rt(rt, rt_cache = rt6_find_cached_rt(rt,
&fl6->daddr, &fl6->daddr,
&fl6->saddr); &fl6->saddr);
@ -2905,7 +2917,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
&lwtstate, extack); &lwtstate, extack);
if (err) if (err)
goto out; goto out;
rt->dst.lwtstate = lwtstate_get(lwtstate); rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
lwtunnel_set_redirect(&rt->dst); lwtunnel_set_redirect(&rt->dst);
} }
@ -2920,7 +2932,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
#endif #endif
rt->rt6i_metric = cfg->fc_metric; rt->rt6i_metric = cfg->fc_metric;
rt->rt6i_nh_weight = 1; rt->fib6_nh.nh_weight = 1;
rt->fib6_type = cfg->fc_type; rt->fib6_type = cfg->fc_type;
@ -2975,7 +2987,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
if (err) if (err)
goto out; goto out;
rt->rt6i_gateway = cfg->fc_gateway; rt->fib6_nh.nh_gw = rt->rt6i_gateway = cfg->fc_gateway;
} }
err = -ENODEV; err = -ENODEV;
@ -3010,9 +3022,9 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
install_route: install_route:
if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) && if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
!netif_carrier_ok(dev)) !netif_carrier_ok(dev))
rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK); rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
rt->dst.dev = dev; rt->fib6_nh.nh_dev = rt->dst.dev = dev;
rt->rt6i_idev = idev; rt->rt6i_idev = idev;
rt->rt6i_table = table; rt->rt6i_table = table;
@ -3171,11 +3183,11 @@ static int ip6_route_del(struct fib6_config *cfg,
rt = rt_cache; rt = rt_cache;
} }
if (cfg->fc_ifindex && if (cfg->fc_ifindex &&
(!rt->dst.dev || (!rt->fib6_nh.nh_dev ||
rt->dst.dev->ifindex != cfg->fc_ifindex)) rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
continue; continue;
if (cfg->fc_flags & RTF_GATEWAY && if (cfg->fc_flags & RTF_GATEWAY &&
!ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
continue; continue;
if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
continue; continue;
@ -3337,11 +3349,11 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
goto out; goto out;
for_each_fib6_node_rt_rcu(fn) { for_each_fib6_node_rt_rcu(fn) {
if (rt->dst.dev->ifindex != ifindex) if (rt->fib6_nh.nh_dev->ifindex != ifindex)
continue; continue;
if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
continue; continue;
if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
continue; continue;
ip6_hold_safe(NULL, &rt, false); ip6_hold_safe(NULL, &rt, false);
break; break;
@ -3398,9 +3410,9 @@ struct rt6_info *rt6_get_dflt_router(struct net *net,
rcu_read_lock(); rcu_read_lock();
for_each_fib6_node_rt_rcu(&table->tb6_root) { for_each_fib6_node_rt_rcu(&table->tb6_root) {
if (dev == rt->dst.dev && if (dev == rt->fib6_nh.nh_dev &&
((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ipv6_addr_equal(&rt->rt6i_gateway, addr)) ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
break; break;
} }
if (rt) if (rt)
@ -3627,6 +3639,8 @@ struct rt6_info *addrconf_dst_alloc(struct net *net,
rt->rt6i_flags |= RTF_LOCAL; rt->rt6i_flags |= RTF_LOCAL;
} }
rt->fib6_nh.nh_gw = *addr;
rt->fib6_nh.nh_dev = dev;
rt->rt6i_gateway = *addr; rt->rt6i_gateway = *addr;
rt->rt6i_dst.addr = *addr; rt->rt6i_dst.addr = *addr;
rt->rt6i_dst.plen = 128; rt->rt6i_dst.plen = 128;
@ -3649,7 +3663,7 @@ static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
struct net *net = ((struct arg_dev_net_ip *)arg)->net; struct net *net = ((struct arg_dev_net_ip *)arg)->net;
struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
if (((void *)rt->dst.dev == dev || !dev) && if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
rt != net->ipv6.ip6_null_entry && rt != net->ipv6.ip6_null_entry &&
ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
spin_lock_bh(&rt6_exception_lock); spin_lock_bh(&rt6_exception_lock);
@ -3681,7 +3695,7 @@ static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
struct in6_addr *gateway = (struct in6_addr *)arg; struct in6_addr *gateway = (struct in6_addr *)arg;
if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
return -1; return -1;
} }
@ -3729,8 +3743,8 @@ static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
static bool rt6_is_dead(const struct rt6_info *rt) static bool rt6_is_dead(const struct rt6_info *rt)
{ {
if (rt->rt6i_nh_flags & RTNH_F_DEAD || if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
(rt->rt6i_nh_flags & RTNH_F_LINKDOWN && (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
return true; return true;
@ -3743,11 +3757,11 @@ static int rt6_multipath_total_weight(const struct rt6_info *rt)
int total = 0; int total = 0;
if (!rt6_is_dead(rt)) if (!rt6_is_dead(rt))
total += rt->rt6i_nh_weight; total += rt->fib6_nh.nh_weight;
list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) { list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
if (!rt6_is_dead(iter)) if (!rt6_is_dead(iter))
total += iter->rt6i_nh_weight; total += iter->fib6_nh.nh_weight;
} }
return total; return total;
@ -3758,11 +3772,11 @@ static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
int upper_bound = -1; int upper_bound = -1;
if (!rt6_is_dead(rt)) { if (!rt6_is_dead(rt)) {
*weight += rt->rt6i_nh_weight; *weight += rt->fib6_nh.nh_weight;
upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31, upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
total) - 1; total) - 1;
} }
atomic_set(&rt->rt6i_nh_upper_bound, upper_bound); atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
} }
static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total) static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
@ -3805,8 +3819,8 @@ static int fib6_ifup(struct rt6_info *rt, void *p_arg)
const struct arg_netdev_event *arg = p_arg; const struct arg_netdev_event *arg = p_arg;
struct net *net = dev_net(arg->dev); struct net *net = dev_net(arg->dev);
if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) { if (rt != net->ipv6.ip6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
rt->rt6i_nh_flags &= ~arg->nh_flags; rt->fib6_nh.nh_flags &= ~arg->nh_flags;
fib6_update_sernum_upto_root(net, rt); fib6_update_sernum_upto_root(net, rt);
rt6_multipath_rebalance(rt); rt6_multipath_rebalance(rt);
} }
@ -3834,10 +3848,10 @@ static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
{ {
struct rt6_info *iter; struct rt6_info *iter;
if (rt->dst.dev == dev) if (rt->fib6_nh.nh_dev == dev)
return true; return true;
list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
if (iter->dst.dev == dev) if (iter->fib6_nh.nh_dev == dev)
return true; return true;
return false; return false;
@ -3858,11 +3872,12 @@ static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
struct rt6_info *iter; struct rt6_info *iter;
unsigned int dead = 0; unsigned int dead = 0;
if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD) if (rt->fib6_nh.nh_dev == down_dev ||
rt->fib6_nh.nh_flags & RTNH_F_DEAD)
dead++; dead++;
list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
if (iter->dst.dev == down_dev || if (iter->fib6_nh.nh_dev == down_dev ||
iter->rt6i_nh_flags & RTNH_F_DEAD) iter->fib6_nh.nh_flags & RTNH_F_DEAD)
dead++; dead++;
return dead; return dead;
@ -3874,11 +3889,11 @@ static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
{ {
struct rt6_info *iter; struct rt6_info *iter;
if (rt->dst.dev == dev) if (rt->fib6_nh.nh_dev == dev)
rt->rt6i_nh_flags |= nh_flags; rt->fib6_nh.nh_flags |= nh_flags;
list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
if (iter->dst.dev == dev) if (iter->fib6_nh.nh_dev == dev)
iter->rt6i_nh_flags |= nh_flags; iter->fib6_nh.nh_flags |= nh_flags;
} }
/* called with write lock held for table with rt */ /* called with write lock held for table with rt */
@ -3893,12 +3908,12 @@ static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
switch (arg->event) { switch (arg->event) {
case NETDEV_UNREGISTER: case NETDEV_UNREGISTER:
return rt->dst.dev == dev ? -1 : 0; return rt->fib6_nh.nh_dev == dev ? -1 : 0;
case NETDEV_DOWN: case NETDEV_DOWN:
if (rt->should_flush) if (rt->should_flush)
return -1; return -1;
if (!rt->rt6i_nsiblings) if (!rt->rt6i_nsiblings)
return rt->dst.dev == dev ? -1 : 0; return rt->fib6_nh.nh_dev == dev ? -1 : 0;
if (rt6_multipath_uses_dev(rt, dev)) { if (rt6_multipath_uses_dev(rt, dev)) {
unsigned int count; unsigned int count;
@ -3914,10 +3929,10 @@ static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
} }
return -2; return -2;
case NETDEV_CHANGE: case NETDEV_CHANGE:
if (rt->dst.dev != dev || if (rt->fib6_nh.nh_dev != dev ||
rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
break; break;
rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
rt6_multipath_rebalance(rt); rt6_multipath_rebalance(rt);
break; break;
} }
@ -3969,7 +3984,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
Since RFC 1981 doesn't include administrative MTU increase Since RFC 1981 doesn't include administrative MTU increase
update PMTU increase is a MUST. (i.e. jumbo frame) update PMTU increase is a MUST. (i.e. jumbo frame)
*/ */
if (rt->dst.dev == arg->dev && if (rt->fib6_nh.nh_dev == arg->dev &&
!dst_metric_locked(&rt->dst, RTAX_MTU)) { !dst_metric_locked(&rt->dst, RTAX_MTU)) {
spin_lock_bh(&rt6_exception_lock); spin_lock_bh(&rt6_exception_lock);
if (dst_metric_raw(&rt->dst, RTAX_MTU) && if (dst_metric_raw(&rt->dst, RTAX_MTU) &&
@ -4255,7 +4270,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
goto cleanup; goto cleanup;
} }
rt->rt6i_nh_weight = rtnh->rtnh_hops + 1; rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
if (err) { if (err) {
@ -4412,7 +4427,7 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
+ NLA_ALIGN(sizeof(struct rtnexthop)) + NLA_ALIGN(sizeof(struct rtnexthop))
+ nla_total_size(16) /* RTA_GATEWAY */ + nla_total_size(16) /* RTA_GATEWAY */
+ lwtunnel_get_encap_size(rt->dst.lwtstate); + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
nexthop_len *= rt->rt6i_nsiblings; nexthop_len *= rt->rt6i_nsiblings;
} }
@ -4430,38 +4445,38 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
+ nla_total_size(sizeof(struct rta_cacheinfo)) + nla_total_size(sizeof(struct rta_cacheinfo))
+ nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
+ nla_total_size(1) /* RTA_PREF */ + nla_total_size(1) /* RTA_PREF */
+ lwtunnel_get_encap_size(rt->dst.lwtstate) + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
+ nexthop_len; + nexthop_len;
} }
static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
unsigned int *flags, bool skip_oif) unsigned int *flags, bool skip_oif)
{ {
if (rt->rt6i_nh_flags & RTNH_F_DEAD) if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
*flags |= RTNH_F_DEAD; *flags |= RTNH_F_DEAD;
if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) { if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
*flags |= RTNH_F_LINKDOWN; *flags |= RTNH_F_LINKDOWN;
if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown) if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
*flags |= RTNH_F_DEAD; *flags |= RTNH_F_DEAD;
} }
if (rt->rt6i_flags & RTF_GATEWAY) { if (rt->rt6i_flags & RTF_GATEWAY) {
if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0) if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
goto nla_put_failure; goto nla_put_failure;
} }
*flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK); *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD) if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
*flags |= RTNH_F_OFFLOAD; *flags |= RTNH_F_OFFLOAD;
/* not needed for multipath encoding b/c it has a rtnexthop struct */ /* not needed for multipath encoding b/c it has a rtnexthop struct */
if (!skip_oif && rt->dst.dev && if (!skip_oif && rt->fib6_nh.nh_dev &&
nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
goto nla_put_failure; goto nla_put_failure;
if (rt->dst.lwtstate && if (rt->fib6_nh.nh_lwtstate &&
lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0) lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
goto nla_put_failure; goto nla_put_failure;
return 0; return 0;
@ -4473,6 +4488,7 @@ nla_put_failure:
/* add multipath next hop */ /* add multipath next hop */
static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt) static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
{ {
const struct net_device *dev = rt->fib6_nh.nh_dev;
struct rtnexthop *rtnh; struct rtnexthop *rtnh;
unsigned int flags = 0; unsigned int flags = 0;
@ -4480,8 +4496,8 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
if (!rtnh) if (!rtnh)
goto nla_put_failure; goto nla_put_failure;
rtnh->rtnh_hops = rt->rt6i_nh_weight - 1; rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0; rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
if (rt6_nexthop_info(skb, rt, &flags, true) < 0) if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
goto nla_put_failure; goto nla_put_failure;