diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 0a21fbefdfbe..a4328cea376a 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -120,6 +120,9 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) || (!IN_DEV_FORWARD(in_dev) && \ IN_DEV_ORCONF((in_dev), ACCEPT_REDIRECTS))) +#define IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) \ + IN_DEV_CONF_GET((in_dev), IGNORE_ROUTES_WITH_LINKDOWN) + #define IN_DEV_ARPFILTER(in_dev) IN_DEV_ORCONF((in_dev), ARPFILTER) #define IN_DEV_ARP_ACCEPT(in_dev) IN_DEV_ORCONF((in_dev), ARP_ACCEPT) #define IN_DEV_ARP_ANNOUNCE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 6d67383a5114..903a55efbffe 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -36,7 +36,8 @@ struct fib_lookup_arg { void *result; struct fib_rule *rule; int flags; -#define FIB_LOOKUP_NOREF 1 +#define FIB_LOOKUP_NOREF 1 +#define FIB_LOOKUP_IGNORE_LINKSTATE 2 }; struct fib_rules_ops { diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index f73d27c5575a..49c142bdf01e 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -226,7 +226,7 @@ static inline struct fib_table *fib_new_table(struct net *net, u32 id) } static inline int fib_lookup(struct net *net, const struct flowi4 *flp, - struct fib_result *res) + struct fib_result *res, unsigned int flags) { struct fib_table *tb; int err = -ENETUNREACH; @@ -234,7 +234,7 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp, rcu_read_lock(); tb = fib_get_table(net, RT_TABLE_MAIN); - if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF)) + if (tb && !fib_table_lookup(tb, flp, res, flags | FIB_LOOKUP_NOREF)) err = 0; rcu_read_unlock(); @@ -249,16 +249,18 @@ void __net_exit fib4_rules_exit(struct net *net); struct fib_table *fib_new_table(struct net *net, u32 id); struct fib_table *fib_get_table(struct net *net, u32 id); -int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res); +int __fib_lookup(struct net *net, struct flowi4 *flp, + struct fib_result *res, unsigned int flags); static inline int fib_lookup(struct net *net, struct flowi4 *flp, - struct fib_result *res) + struct fib_result *res, unsigned int flags) { struct fib_table *tb; int err; + flags |= FIB_LOOKUP_NOREF; if (net->ipv4.fib_has_custom_rules) - return __fib_lookup(net, flp, res); + return __fib_lookup(net, flp, res, flags); rcu_read_lock(); @@ -266,11 +268,11 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp, for (err = 0; !err; err = -ENETUNREACH) { tb = rcu_dereference_rtnl(net->ipv4.fib_main); - if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF)) + if (tb && !fib_table_lookup(tb, flp, res, flags)) break; tb = rcu_dereference_rtnl(net->ipv4.fib_default); - if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF)) + if (tb && !fib_table_lookup(tb, flp, res, flags)) break; } diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index 411959405ab6..08f894d2ddbd 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -164,6 +164,7 @@ enum IPV4_DEVCONF_ROUTE_LOCALNET, IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL, IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL, + IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, __IPV4_DEVCONF_MAX }; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 419d23c53ec7..7498716e8f54 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2169,6 +2169,8 @@ static struct devinet_sysctl_table { "igmpv2_unsolicited_report_interval"), DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL, "igmpv3_unsolicited_report_interval"), + DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN, + "ignore_routes_with_linkdown"), DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 534eb1485045..6bbc54940eb4 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -280,7 +280,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_scope = scope; fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0; - if (!fib_lookup(net, &fl4, &res)) + if (!fib_lookup(net, &fl4, &res, 0)) return FIB_RES_PREFSRC(net, res); } else { scope = RT_SCOPE_LINK; @@ -319,7 +319,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0; net = dev_net(dev); - if (fib_lookup(net, &fl4, &res)) + if (fib_lookup(net, &fl4, &res, 0)) goto last_resort; if (res.type != RTN_UNICAST && (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev))) @@ -354,7 +354,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_oif = dev->ifindex; ret = 0; - if (fib_lookup(net, &fl4, &res) == 0) { + if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) { if (res.type == RTN_UNICAST) ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; } diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 56151982f74e..18123d50f576 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -47,11 +47,12 @@ struct fib4_rule { #endif }; -int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) +int __fib_lookup(struct net *net, struct flowi4 *flp, + struct fib_result *res, unsigned int flags) { struct fib_lookup_arg arg = { .result = res, - .flags = FIB_LOOKUP_NOREF, + .flags = flags, }; int err; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b1b305b1e340..3bfccd83551c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -623,7 +623,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, /* It is not necessary, but requires a bit of thinking */ if (fl4.flowi4_scope < RT_SCOPE_LINK) fl4.flowi4_scope = RT_SCOPE_LINK; - err = fib_lookup(net, &fl4, &res); + err = fib_lookup(net, &fl4, &res, + FIB_LOOKUP_IGNORE_LINKSTATE); if (err) { rcu_read_unlock(); return err; @@ -1035,12 +1036,20 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) goto nla_put_failure; if (fi->fib_nhs == 1) { + struct in_device *in_dev; + if (fi->fib_nh->nh_gw && nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) goto nla_put_failure; if (fi->fib_nh->nh_oif && nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif)) goto nla_put_failure; + if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) { + in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev); + if (in_dev && + IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) + rtm->rtm_flags |= RTNH_F_DEAD; + } #ifdef CONFIG_IP_ROUTE_CLASSID if (fi->fib_nh[0].nh_tclassid && nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) @@ -1057,11 +1066,19 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, goto nla_put_failure; for_nexthops(fi) { + struct in_device *in_dev; + rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); if (!rtnh) goto nla_put_failure; rtnh->rtnh_flags = nh->nh_flags & 0xFF; + if (nh->nh_flags & RTNH_F_LINKDOWN) { + in_dev = __in_dev_get_rcu(nh->nh_dev); + if (in_dev && + IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) + rtnh->rtnh_flags |= RTNH_F_DEAD; + } rtnh->rtnh_hops = nh->nh_weight - 1; rtnh->rtnh_ifindex = nh->nh_oif; @@ -1310,16 +1327,22 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags) void fib_select_multipath(struct fib_result *res) { struct fib_info *fi = res->fi; + struct in_device *in_dev; int w; spin_lock_bh(&fib_multipath_lock); if (fi->fib_power <= 0) { int power = 0; change_nexthops(fi) { - if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) { - power += nexthop_nh->nh_weight; - nexthop_nh->nh_power = nexthop_nh->nh_weight; - } + in_dev = __in_dev_get_rcu(nexthop_nh->nh_dev); + if (nexthop_nh->nh_flags & RTNH_F_DEAD) + continue; + if (in_dev && + IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && + nexthop_nh->nh_flags & RTNH_F_LINKDOWN) + continue; + power += nexthop_nh->nh_weight; + nexthop_nh->nh_power = nexthop_nh->nh_weight; } endfor_nexthops(fi); fi->fib_power = power; if (power <= 0) { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 6c666a9f1bd5..15d32612e3c6 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1412,9 +1412,15 @@ found: continue; for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { const struct fib_nh *nh = &fi->fib_nh[nhsel]; + struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev); if (nh->nh_flags & RTNH_F_DEAD) continue; + if (in_dev && + IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && + nh->nh_flags & RTNH_F_LINKDOWN && + !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) + continue; if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif) continue; diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 4bfaedf9b34e..8618fd150c96 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -40,7 +40,7 @@ static bool rpfilter_lookup_reverse(struct flowi4 *fl4, struct net *net = dev_net(dev); int ret __maybe_unused; - if (fib_lookup(net, fl4, &res)) + if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE)) return false; if (res.type != RTN_UNICAST) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f6055984c307..d0362a2de3d3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -747,7 +747,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow if (!(n->nud_state & NUD_VALID)) { neigh_event_send(n, NULL); } else { - if (fib_lookup(net, fl4, &res) == 0) { + if (fib_lookup(net, fl4, &res, 0) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, new_gw, @@ -975,7 +975,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) return; rcu_read_lock(); - if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) { + if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, 0, mtu, @@ -1186,7 +1186,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) fl4.flowi4_mark = skb->mark; rcu_read_lock(); - if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) + if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0) src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); else src = inet_select_addr(rt->dst.dev, @@ -1716,7 +1716,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.daddr = daddr; fl4.saddr = saddr; - err = fib_lookup(net, &fl4, &res); + err = fib_lookup(net, &fl4, &res, 0); if (err != 0) { if (!IN_DEV_FORWARD(in_dev)) err = -EHOSTUNREACH; @@ -2123,7 +2123,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) goto make_route; } - if (fib_lookup(net, fl4, &res)) { + if (fib_lookup(net, fl4, &res, 0)) { res.fi = NULL; res.table = NULL; if (fl4->flowi4_oif) {