From 22e6c58b8c2843337ec4e8464b1ce6e869ca5bf4 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Oct 2018 18:56:41 -0700 Subject: [PATCH 01/11] netlink: Add answer_flags to netlink_callback With dump filtering we need a way to ensure the NLM_F_DUMP_FILTERED flag is set on a message back to the user if the data returned is influenced by some input attributes. Normally this can be done as messages are added to the skb, but if the filter results in no data being returned, the user could be confused as to why. This patch adds answer_flags to the netlink_callback allowing dump handlers to set the NLM_F_DUMP_FILTERED at a minimum in the NLMSG_DONE message ensuring the flag gets back to the user. The netlink_callback space is initialized to 0 via a memset in __netlink_dump_start, so init of the new answer_flags is covered. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 + net/netlink/af_netlink.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 72580f1a72a2..4da90a6ab536 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -180,6 +180,7 @@ struct netlink_callback { u16 family; u16 min_dump_alloc; bool strict_check; + u16 answer_flags; unsigned int prev_seq, seq; long args[6]; }; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index e613a9f89600..6bb9f3cde0b0 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2257,7 +2257,8 @@ static int netlink_dump(struct sock *sk) } nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, - sizeof(nlk->dump_done_errno), NLM_F_MULTI); + sizeof(nlk->dump_done_errno), + NLM_F_MULTI | cb->answer_flags); if (WARN_ON(!nlh)) goto errout_skb; From 4724676d551c0961659b1da3fb4b5928169fb184 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Oct 2018 18:56:42 -0700 Subject: [PATCH 02/11] net: Add struct for fib dump filter Add struct fib_dump_filter for options on limiting which routes are returned in a dump request. The current list is table id, protocol, route type, rtm_flags and nexthop device index. struct net is needed to lookup the net_device from the index. Declare the filter for each route dump handler and plumb the new arguments from dump handlers to ip_valid_fib_dump_req. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_route.h | 1 + include/net/ip_fib.h | 13 ++++++++++++- net/ipv4/fib_frontend.c | 6 ++++-- net/ipv4/ipmr.c | 8 ++++++-- net/ipv6/ip6_fib.c | 5 +++-- net/ipv6/ip6mr.c | 5 ++++- net/mpls/af_mpls.c | 12 ++++++++---- 7 files changed, 38 insertions(+), 12 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index cef186dbd2ce..7ab119936e69 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -174,6 +174,7 @@ struct rt6_rtnl_dump_arg { struct sk_buff *skb; struct netlink_callback *cb; struct net *net; + struct fib_dump_filter filter; }; int rt6_dump_route(struct fib6_info *f6i, void *p_arg); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 852e4ebf2209..667013bf4266 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -222,6 +222,16 @@ struct fib_table { unsigned long __data[0]; }; +struct fib_dump_filter { + u32 table_id; + /* filter_set is an optimization that an entry is set */ + bool filter_set; + unsigned char protocol; + unsigned char rt_type; + unsigned int flags; + struct net_device *dev; +}; + int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags); int fib_table_insert(struct net *, struct fib_table *, struct fib_config *, @@ -453,6 +463,7 @@ static inline void fib_proc_exit(struct net *net) u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr); -int ip_valid_fib_dump_req(const struct nlmsghdr *nlh, +int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, + struct fib_dump_filter *filter, struct netlink_ext_ack *extack); #endif /* _NET_FIB_H */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 0f1beceb47d5..850850dd80e1 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -802,7 +802,8 @@ errout: return err; } -int ip_valid_fib_dump_req(const struct nlmsghdr *nlh, +int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, + struct fib_dump_filter *filter, struct netlink_ext_ack *extack) { struct rtmsg *rtm; @@ -837,6 +838,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); + struct fib_dump_filter filter = {}; unsigned int h, s_h; unsigned int e = 0, s_e; struct fib_table *tb; @@ -844,7 +846,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) int dumped = 0, err; if (cb->strict_check) { - err = ip_valid_fib_dump_req(nlh, cb->extack); + err = ip_valid_fib_dump_req(net, nlh, &filter, cb->extack); if (err < 0) return err; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 91b0d5671649..44d777058960 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2527,9 +2527,13 @@ errout_free: static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { - if (cb->strict_check) { - int err = ip_valid_fib_dump_req(cb->nlh, cb->extack); + struct fib_dump_filter filter = {}; + if (cb->strict_check) { + int err; + + err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh, + &filter, cb->extack); if (err < 0) return err; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 0783af11b0b7..94e61fe47ff8 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -569,17 +569,18 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); + struct rt6_rtnl_dump_arg arg = {}; unsigned int h, s_h; unsigned int e = 0, s_e; - struct rt6_rtnl_dump_arg arg; struct fib6_walker *w; struct fib6_table *tb; struct hlist_head *head; int res = 0; if (cb->strict_check) { - int err = ip_valid_fib_dump_req(nlh, cb->extack); + int err; + err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb->extack); if (err < 0) return err; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index d7563ef76518..dbd5166c5599 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2458,10 +2458,13 @@ errout: static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; + struct fib_dump_filter filter = {}; if (cb->strict_check) { - int err = ip_valid_fib_dump_req(nlh, cb->extack); + int err; + err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh, + &filter, cb->extack); if (err < 0) return err; } diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 5fe274c47c41..bfcb4759c9ee 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2032,13 +2032,15 @@ nla_put_failure: } #if IS_ENABLED(CONFIG_INET) -static int mpls_valid_fib_dump_req(const struct nlmsghdr *nlh, +static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, + struct fib_dump_filter *filter, struct netlink_ext_ack *extack) { - return ip_valid_fib_dump_req(nlh, extack); + return ip_valid_fib_dump_req(net, nlh, filter, extack); } #else -static int mpls_valid_fib_dump_req(const struct nlmsghdr *nlh, +static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, + struct fib_dump_filter *filter, struct netlink_ext_ack *extack) { struct rtmsg *rtm; @@ -2070,14 +2072,16 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); struct mpls_route __rcu **platform_label; + struct fib_dump_filter filter = {}; size_t platform_labels; unsigned int index; ASSERT_RTNL(); if (cb->strict_check) { - int err = mpls_valid_fib_dump_req(nlh, cb->extack); + int err; + err = mpls_valid_fib_dump_req(net, nlh, &filter, cb->extack); if (err < 0) return err; } From 18a8021a7be3207686851208f91a2f105b2d4703 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Oct 2018 18:56:43 -0700 Subject: [PATCH 03/11] net/ipv4: Plumb support for filtering route dumps Implement kernel side filtering of routes by table id, egress device index, protocol and route type. If the table id is given in the filter, lookup the table and call fib_table_dump directly for it. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- net/ipv4/fib_frontend.c | 13 ++++++++++++- net/ipv4/fib_trie.c | 37 ++++++++++++++++++++++++++----------- 3 files changed, 39 insertions(+), 13 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 667013bf4266..1eabc9edd2b9 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -239,7 +239,7 @@ int fib_table_insert(struct net *, struct fib_table *, struct fib_config *, int fib_table_delete(struct net *, struct fib_table *, struct fib_config *, struct netlink_ext_ack *extack); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, - struct netlink_callback *cb); + struct netlink_callback *cb, struct fib_dump_filter *filter); int fib_table_flush(struct net *net, struct fib_table *table); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); void fib_table_flush_external(struct fib_table *table); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 850850dd80e1..37dc8ac366fd 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -855,6 +855,17 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) ((struct rtmsg *)nlmsg_data(nlh))->rtm_flags & RTM_F_CLONED) return skb->len; + if (filter.table_id) { + tb = fib_get_table(net, filter.table_id); + if (!tb) { + NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist"); + return -ENOENT; + } + + err = fib_table_dump(tb, skb, cb, &filter); + return skb->len ? : err; + } + s_h = cb->args[0]; s_e = cb->args[1]; @@ -869,7 +880,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (dumped) memset(&cb->args[2], 0, sizeof(cb->args) - 2 * sizeof(cb->args[0])); - err = fib_table_dump(tb, skb, cb); + err = fib_table_dump(tb, skb, cb, &filter); if (err < 0) { if (likely(skb->len)) goto out; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 5bc0c89e81e4..237c9f72b265 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2003,12 +2003,17 @@ void fib_free_table(struct fib_table *tb) } static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, - struct sk_buff *skb, struct netlink_callback *cb) + struct sk_buff *skb, struct netlink_callback *cb, + struct fib_dump_filter *filter) { + unsigned int flags = NLM_F_MULTI; __be32 xkey = htonl(l->key); struct fib_alias *fa; int i, s_i; + if (filter->filter_set) + flags |= NLM_F_DUMP_FILTERED; + s_i = cb->args[4]; i = 0; @@ -2016,25 +2021,35 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { int err; - if (i < s_i) { - i++; - continue; - } + if (i < s_i) + goto next; - if (tb->tb_id != fa->tb_id) { - i++; - continue; + if (tb->tb_id != fa->tb_id) + goto next; + + if (filter->filter_set) { + if (filter->rt_type && fa->fa_type != filter->rt_type) + goto next; + + if ((filter->protocol && + fa->fa_info->fib_protocol != filter->protocol)) + goto next; + + if (filter->dev && + !fib_info_nh_uses_dev(fa->fa_info, filter->dev)) + goto next; } err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, tb->tb_id, fa->fa_type, xkey, KEYLENGTH - fa->fa_slen, - fa->fa_tos, fa->fa_info, NLM_F_MULTI); + fa->fa_tos, fa->fa_info, flags); if (err < 0) { cb->args[4] = i; return err; } +next: i++; } @@ -2044,7 +2059,7 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, /* rcu_read_lock needs to be hold by caller from readside */ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, - struct netlink_callback *cb) + struct netlink_callback *cb, struct fib_dump_filter *filter) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *l, *tp = t->kv; @@ -2057,7 +2072,7 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, while ((l = leaf_walk_rcu(&tp, key)) != NULL) { int err; - err = fn_trie_dump_leaf(l, tb, skb, cb); + err = fn_trie_dump_leaf(l, tb, skb, cb, filter); if (err < 0) { cb->args[3] = key; cb->args[2] = count; From 13e38901d46ca60156c53c0d5e0b8bb58db66da5 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Oct 2018 18:56:44 -0700 Subject: [PATCH 04/11] net/ipv6: Plumb support for filtering route dumps Implement kernel side filtering of routes by table id, egress device index, protocol, and route type. If the table id is given in the filter, lookup the table and call fib6_dump_table directly for it. Move the existing route flags check for prefix only routes to the new filter. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 28 ++++++++++++++++++++++------ net/ipv6/route.c | 40 ++++++++++++++++++++++++++++++++-------- 2 files changed, 54 insertions(+), 14 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 94e61fe47ff8..a51fc357a05c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -583,10 +583,12 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb->extack); if (err < 0) return err; - } + } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) { + struct rtmsg *rtm = nlmsg_data(nlh); - s_h = cb->args[0]; - s_e = cb->args[1]; + if (rtm->rtm_flags & RTM_F_PREFIX) + arg.filter.flags = RTM_F_PREFIX; + } w = (void *)cb->args[2]; if (!w) { @@ -612,6 +614,20 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) arg.net = net; w->args = &arg; + if (arg.filter.table_id) { + tb = fib6_get_table(net, arg.filter.table_id); + if (!tb) { + NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist"); + return -ENOENT; + } + + res = fib6_dump_table(tb, skb, cb); + goto out; + } + + s_h = cb->args[0]; + s_e = cb->args[1]; + rcu_read_lock(); for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { e = 0; @@ -621,16 +637,16 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) goto next; res = fib6_dump_table(tb, skb, cb); if (res != 0) - goto out; + goto out_unlock; next: e++; } } -out: +out_unlock: rcu_read_unlock(); cb->args[1] = e; cb->args[0] = h; - +out: res = res < 0 ? res : skb->len; if (res <= 0) fib6_dump_end(cb); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f4e08b0689a8..9fd600e42f9d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4767,28 +4767,52 @@ nla_put_failure: return -EMSGSIZE; } +static bool fib6_info_uses_dev(const struct fib6_info *f6i, + const struct net_device *dev) +{ + if (f6i->fib6_nh.nh_dev == dev) + return true; + + if (f6i->fib6_nsiblings) { + struct fib6_info *sibling, *next_sibling; + + list_for_each_entry_safe(sibling, next_sibling, + &f6i->fib6_siblings, fib6_siblings) { + if (sibling->fib6_nh.nh_dev == dev) + return true; + } + } + + return false; +} + int rt6_dump_route(struct fib6_info *rt, void *p_arg) { struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; + struct fib_dump_filter *filter = &arg->filter; + unsigned int flags = NLM_F_MULTI; struct net *net = arg->net; if (rt == net->ipv6.fib6_null_entry) return 0; - if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { - struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); - - /* user wants prefix routes only */ - if (rtm->rtm_flags & RTM_F_PREFIX && - !(rt->fib6_flags & RTF_PREFIX_RT)) { - /* success since this is not a prefix route */ + if ((filter->flags & RTM_F_PREFIX) && + !(rt->fib6_flags & RTF_PREFIX_RT)) { + /* success since this is not a prefix route */ + return 1; + } + if (filter->filter_set) { + if ((filter->rt_type && rt->fib6_type != filter->rt_type) || + (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) || + (filter->protocol && rt->fib6_protocol != filter->protocol)) { return 1; } + flags |= NLM_F_DUMP_FILTERED; } return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0, RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid, - arg->cb->nlh->nlmsg_seq, NLM_F_MULTI); + arg->cb->nlh->nlmsg_seq, flags); } static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, From bae9a78b17b87a7cef56cf492916007a58f65172 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Oct 2018 18:56:45 -0700 Subject: [PATCH 05/11] net/mpls: Plumb support for filtering route dumps Implement kernel side filtering of routes by egress device index and protocol. MPLS uses only a single table and route type. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index bfcb4759c9ee..48f4cbd9fb38 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2067,12 +2067,35 @@ static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, } #endif +static bool mpls_rt_uses_dev(struct mpls_route *rt, + const struct net_device *dev) +{ + struct net_device *nh_dev; + + if (rt->rt_nhn == 1) { + struct mpls_nh *nh = rt->rt_nh; + + nh_dev = rtnl_dereference(nh->nh_dev); + if (dev == nh_dev) + return true; + } else { + for_nexthops(rt) { + nh_dev = rtnl_dereference(nh->nh_dev); + if (nh_dev == dev) + return true; + } endfor_nexthops(rt); + } + + return false; +} + static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); struct mpls_route __rcu **platform_label; struct fib_dump_filter filter = {}; + unsigned int flags = NLM_F_MULTI; size_t platform_labels; unsigned int index; @@ -2084,6 +2107,14 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) err = mpls_valid_fib_dump_req(net, nlh, &filter, cb->extack); if (err < 0) return err; + + /* for MPLS, there is only 1 table with fixed type and flags. + * If either are set in the filter then return nothing. + */ + if ((filter.table_id && filter.table_id != RT_TABLE_MAIN) || + (filter.rt_type && filter.rt_type != RTN_UNICAST) || + filter.flags) + return skb->len; } index = cb->args[0]; @@ -2092,15 +2123,24 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) platform_label = rtnl_dereference(net->mpls.platform_label); platform_labels = net->mpls.platform_labels; + + if (filter.filter_set) + flags |= NLM_F_DUMP_FILTERED; + for (; index < platform_labels; index++) { struct mpls_route *rt; + rt = rtnl_dereference(platform_label[index]); if (!rt) continue; + if ((filter.dev && !mpls_rt_uses_dev(rt, filter.dev)) || + (filter.protocol && rt->rt_protocol != filter.protocol)) + continue; + if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, - index, rt, NLM_F_MULTI) < 0) + index, rt, flags) < 0) break; } cb->args[0] = index; From e1cedae1ba6b09ae8376c1486712bf91ea0dfc41 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Oct 2018 18:56:46 -0700 Subject: [PATCH 06/11] ipmr: Refactor mr_rtm_dumproute Move per-table loops from mr_rtm_dumproute to mr_table_dump and export mr_table_dump for dumps by specific table id. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 6 +++ net/ipv4/ipmr_base.c | 86 +++++++++++++++++++++++-------------- 2 files changed, 60 insertions(+), 32 deletions(-) diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 6675b9f81979..db85373c8d15 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -283,6 +283,12 @@ void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg); int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mr_mfc *c, struct rtmsg *rtm); +int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb, + struct netlink_callback *cb, + int (*fill)(struct mr_table *mrt, struct sk_buff *skb, + u32 portid, u32 seq, struct mr_mfc *c, + int cmd, int flags), + spinlock_t *lock); int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, struct mr_table *(*iter)(struct net *net, struct mr_table *mrt), diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 1ad9aa62a97b..132dd2613ca5 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -268,6 +268,55 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, } EXPORT_SYMBOL(mr_fill_mroute); +int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb, + struct netlink_callback *cb, + int (*fill)(struct mr_table *mrt, struct sk_buff *skb, + u32 portid, u32 seq, struct mr_mfc *c, + int cmd, int flags), + spinlock_t *lock) +{ + unsigned int e = 0, s_e = cb->args[1]; + unsigned int flags = NLM_F_MULTI; + struct mr_mfc *mfc; + int err; + + list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) { + if (e < s_e) + goto next_entry; + + err = fill(mrt, skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags); + if (err < 0) + goto out; +next_entry: + e++; + } + e = 0; + s_e = 0; + + spin_lock_bh(lock); + list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { + if (e < s_e) + goto next_entry2; + + err = fill(mrt, skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags); + if (err < 0) { + spin_unlock_bh(lock); + goto out; + } +next_entry2: + e++; + } + spin_unlock_bh(lock); + err = 0; + e = 0; + +out: + cb->args[1] = e; + return err; +} + int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, struct mr_table *(*iter)(struct net *net, struct mr_table *mrt), @@ -277,51 +326,24 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, int cmd, int flags), spinlock_t *lock) { - unsigned int t = 0, e = 0, s_t = cb->args[0], s_e = cb->args[1]; + unsigned int t = 0, s_t = cb->args[0]; struct net *net = sock_net(skb->sk); struct mr_table *mrt; - struct mr_mfc *mfc; + int err; rcu_read_lock(); for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) { if (t < s_t) goto next_table; - list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) { - if (e < s_e) - goto next_entry; - if (fill(mrt, skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, mfc, - RTM_NEWROUTE, NLM_F_MULTI) < 0) - goto done; -next_entry: - e++; - } - e = 0; - s_e = 0; - spin_lock_bh(lock); - list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { - if (e < s_e) - goto next_entry2; - if (fill(mrt, skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, mfc, - RTM_NEWROUTE, NLM_F_MULTI) < 0) { - spin_unlock_bh(lock); - goto done; - } -next_entry2: - e++; - } - spin_unlock_bh(lock); - e = 0; - s_e = 0; + err = mr_table_dump(mrt, skb, cb, fill, lock); + if (err < 0) + break; next_table: t++; } -done: rcu_read_unlock(); - cb->args[1] = e; cb->args[0] = t; return skb->len; From cb167893f41e21e6bd283d78e53489289dc0592d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Oct 2018 18:56:47 -0700 Subject: [PATCH 07/11] net: Plumb support for filtering ipv4 and ipv6 multicast route dumps Implement kernel side filtering of routes by egress device index and table id. If the table id is given in the filter, lookup table and call mr_table_dump directly for it. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 7 +++--- net/ipv4/ipmr.c | 18 +++++++++++++--- net/ipv4/ipmr_base.c | 43 ++++++++++++++++++++++++++++++++++--- net/ipv6/ip6mr.c | 18 +++++++++++++--- 4 files changed, 74 insertions(+), 12 deletions(-) diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index db85373c8d15..34de06b426ef 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -7,6 +7,7 @@ #include #include #include +#include /** * struct vif_device - interface representor for multicast routing @@ -288,7 +289,7 @@ int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb, int (*fill)(struct mr_table *mrt, struct sk_buff *skb, u32 portid, u32 seq, struct mr_mfc *c, int cmd, int flags), - spinlock_t *lock); + spinlock_t *lock, struct fib_dump_filter *filter); int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, struct mr_table *(*iter)(struct net *net, struct mr_table *mrt), @@ -296,7 +297,7 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, struct sk_buff *skb, u32 portid, u32 seq, struct mr_mfc *c, int cmd, int flags), - spinlock_t *lock); + spinlock_t *lock, struct fib_dump_filter *filter); int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, int (*rules_dump)(struct net *net, @@ -346,7 +347,7 @@ mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, struct sk_buff *skb, u32 portid, u32 seq, struct mr_mfc *c, int cmd, int flags), - spinlock_t *lock) + spinlock_t *lock, struct fib_dump_filter *filter) { return -EINVAL; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 44d777058960..3fa988e6a3df 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2528,18 +2528,30 @@ errout_free: static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { struct fib_dump_filter filter = {}; + int err; if (cb->strict_check) { - int err; - err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh, &filter, cb->extack); if (err < 0) return err; } + if (filter.table_id) { + struct mr_table *mrt; + + mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id); + if (!mrt) { + NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist"); + return -ENOENT; + } + err = mr_table_dump(mrt, skb, cb, _ipmr_fill_mroute, + &mfc_unres_lock, &filter); + return skb->len ? : err; + } + return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter, - _ipmr_fill_mroute, &mfc_unres_lock); + _ipmr_fill_mroute, &mfc_unres_lock, &filter); } static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = { diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 132dd2613ca5..844806120f44 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -268,21 +268,45 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, } EXPORT_SYMBOL(mr_fill_mroute); +static bool mr_mfc_uses_dev(const struct mr_table *mrt, + const struct mr_mfc *c, + const struct net_device *dev) +{ + int ct; + + for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { + if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { + const struct vif_device *vif; + + vif = &mrt->vif_table[ct]; + if (vif->dev == dev) + return true; + } + } + return false; +} + int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb, struct netlink_callback *cb, int (*fill)(struct mr_table *mrt, struct sk_buff *skb, u32 portid, u32 seq, struct mr_mfc *c, int cmd, int flags), - spinlock_t *lock) + spinlock_t *lock, struct fib_dump_filter *filter) { unsigned int e = 0, s_e = cb->args[1]; unsigned int flags = NLM_F_MULTI; struct mr_mfc *mfc; int err; + if (filter->filter_set) + flags |= NLM_F_DUMP_FILTERED; + list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) { if (e < s_e) goto next_entry; + if (filter->dev && + !mr_mfc_uses_dev(mrt, mfc, filter->dev)) + goto next_entry; err = fill(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags); @@ -298,6 +322,9 @@ next_entry: list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { if (e < s_e) goto next_entry2; + if (filter->dev && + !mr_mfc_uses_dev(mrt, mfc, filter->dev)) + goto next_entry2; err = fill(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags); @@ -316,6 +343,7 @@ out: cb->args[1] = e; return err; } +EXPORT_SYMBOL(mr_table_dump); int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, struct mr_table *(*iter)(struct net *net, @@ -324,19 +352,28 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, struct sk_buff *skb, u32 portid, u32 seq, struct mr_mfc *c, int cmd, int flags), - spinlock_t *lock) + spinlock_t *lock, struct fib_dump_filter *filter) { unsigned int t = 0, s_t = cb->args[0]; struct net *net = sock_net(skb->sk); struct mr_table *mrt; int err; + /* multicast does not track protocol or have route type other + * than RTN_MULTICAST + */ + if (filter->filter_set) { + if (filter->protocol || filter->flags || + (filter->rt_type && filter->rt_type != RTN_MULTICAST)) + return skb->len; + } + rcu_read_lock(); for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) { if (t < s_t) goto next_table; - err = mr_table_dump(mrt, skb, cb, fill, lock); + err = mr_table_dump(mrt, skb, cb, fill, lock, filter); if (err < 0) break; next_table: diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index dbd5166c5599..9759b0aecdd6 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2459,16 +2459,28 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; struct fib_dump_filter filter = {}; + int err; if (cb->strict_check) { - int err; - err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh, &filter, cb->extack); if (err < 0) return err; } + if (filter.table_id) { + struct mr_table *mrt; + + mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id); + if (!mrt) { + NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist"); + return -ENOENT; + } + err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute, + &mfc_unres_lock, &filter); + return skb->len ? : err; + } + return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter, - _ip6mr_fill_mroute, &mfc_unres_lock); + _ip6mr_fill_mroute, &mfc_unres_lock, &filter); } From effe6792662495ad9c175bf0d9c53459a51fdbbd Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Oct 2018 18:56:48 -0700 Subject: [PATCH 08/11] net: Enable kernel side filtering of route dumps Update parsing of route dump request to enable kernel side filtering. Allow filtering results by protocol (e.g., which routing daemon installed the route), route type (e.g., unicast), table id and nexthop device. These amount to the low hanging fruit, yet a huge improvement, for dumping routes. ip_valid_fib_dump_req is called with RTNL held, so __dev_get_by_index can be used to look up the device index without taking a reference. From there filter->dev is only used during dump loops with the lock still held. Set NLM_F_DUMP_FILTERED in the answer_flags so the user knows the results have been filtered should no entries be returned. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- net/ipv4/fib_frontend.c | 51 +++++++++++++++++++++++++++++++++++------ net/ipv4/ipmr.c | 2 +- net/ipv6/ip6_fib.c | 2 +- net/ipv6/ip6mr.c | 2 +- net/mpls/af_mpls.c | 9 ++++---- 6 files changed, 53 insertions(+), 15 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 1eabc9edd2b9..e8d9456bf36e 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -465,5 +465,5 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr); int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, struct fib_dump_filter *filter, - struct netlink_ext_ack *extack); + struct netlink_callback *cb); #endif /* _NET_FIB_H */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 37dc8ac366fd..e86ca2255181 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -804,9 +804,14 @@ errout: int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, struct fib_dump_filter *filter, - struct netlink_ext_ack *extack) + struct netlink_callback *cb) { + struct netlink_ext_ack *extack = cb->extack; + struct nlattr *tb[RTA_MAX + 1]; struct rtmsg *rtm; + int err, i; + + ASSERT_RTNL(); if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request"); @@ -815,8 +820,7 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, rtm = nlmsg_data(nlh); if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos || - rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope || - rtm->rtm_type) { + rtm->rtm_scope) { NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request"); return -EINVAL; } @@ -825,9 +829,42 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, return -EINVAL; } - if (nlmsg_attrlen(nlh, sizeof(*rtm))) { - NL_SET_ERR_MSG(extack, "Invalid data after header in FIB dump request"); - return -EINVAL; + filter->flags = rtm->rtm_flags; + filter->protocol = rtm->rtm_protocol; + filter->rt_type = rtm->rtm_type; + filter->table_id = rtm->rtm_table; + + err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv4_policy, extack); + if (err < 0) + return err; + + for (i = 0; i <= RTA_MAX; ++i) { + int ifindex; + + if (!tb[i]) + continue; + + switch (i) { + case RTA_TABLE: + filter->table_id = nla_get_u32(tb[i]); + break; + case RTA_OIF: + ifindex = nla_get_u32(tb[i]); + filter->dev = __dev_get_by_index(net, ifindex); + if (!filter->dev) + return -ENODEV; + break; + default: + NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request"); + return -EINVAL; + } + } + + if (filter->flags || filter->protocol || filter->rt_type || + filter->table_id || filter->dev) { + filter->filter_set = 1; + cb->answer_flags = NLM_F_DUMP_FILTERED; } return 0; @@ -846,7 +883,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) int dumped = 0, err; if (cb->strict_check) { - err = ip_valid_fib_dump_req(net, nlh, &filter, cb->extack); + err = ip_valid_fib_dump_req(net, nlh, &filter, cb); if (err < 0) return err; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 3fa988e6a3df..7a3e2acda94c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2532,7 +2532,7 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (cb->strict_check) { err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh, - &filter, cb->extack); + &filter, cb); if (err < 0) return err; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index a51fc357a05c..5562c77022c6 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -580,7 +580,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (cb->strict_check) { int err; - err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb->extack); + err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb); if (err < 0) return err; } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) { diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 9759b0aecdd6..c3317ffb09eb 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2463,7 +2463,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (cb->strict_check) { err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh, - &filter, cb->extack); + &filter, cb); if (err < 0) return err; } diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 48f4cbd9fb38..24381696932a 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2034,15 +2034,16 @@ nla_put_failure: #if IS_ENABLED(CONFIG_INET) static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, struct fib_dump_filter *filter, - struct netlink_ext_ack *extack) + struct netlink_callback *cb) { - return ip_valid_fib_dump_req(net, nlh, filter, extack); + return ip_valid_fib_dump_req(net, nlh, filter, cb); } #else static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, struct fib_dump_filter *filter, - struct netlink_ext_ack *extack) + struct netlink_callback *cb) { + struct netlink_ext_ack *extack = cb->extack; struct rtmsg *rtm; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { @@ -2104,7 +2105,7 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) if (cb->strict_check) { int err; - err = mpls_valid_fib_dump_req(net, nlh, &filter, cb->extack); + err = mpls_valid_fib_dump_req(net, nlh, &filter, cb); if (err < 0) return err; From 196cfebf897266c3450519e916bab9daff74e52c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Oct 2018 18:56:49 -0700 Subject: [PATCH 09/11] net/mpls: Handle kernel side filtering of route dumps Update the dump request parsing in MPLS for the non-INET case to enable kernel side filtering. If INET is disabled the only filters that make sense for MPLS are protocol and nexthop device. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 24381696932a..7d55d4c04088 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2044,7 +2044,9 @@ static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, struct netlink_callback *cb) { struct netlink_ext_ack *extack = cb->extack; + struct nlattr *tb[RTA_MAX + 1]; struct rtmsg *rtm; + int err, i; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { NL_SET_ERR_MSG_MOD(extack, "Invalid header for FIB dump request"); @@ -2053,15 +2055,36 @@ static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, rtm = nlmsg_data(nlh); if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos || - rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope || - rtm->rtm_type || rtm->rtm_flags) { + rtm->rtm_table || rtm->rtm_scope || rtm->rtm_type || + rtm->rtm_flags) { NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for FIB dump request"); return -EINVAL; } - if (nlmsg_attrlen(nlh, sizeof(*rtm))) { - NL_SET_ERR_MSG_MOD(extack, "Invalid data after header in FIB dump request"); - return -EINVAL; + if (rtm->rtm_protocol) { + filter->protocol = rtm->rtm_protocol; + filter->filter_set = 1; + cb->answer_flags = NLM_F_DUMP_FILTERED; + } + + err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_mpls_policy, extack); + if (err < 0) + return err; + + for (i = 0; i <= RTA_MAX; ++i) { + int ifindex; + + if (i == RTA_OIF) { + ifindex = nla_get_u32(tb[i]); + filter->dev = __dev_get_by_index(net, ifindex); + if (!filter->dev) + return -ENODEV; + filter->filter_set = 1; + } else if (tb[i]) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in dump request"); + return -EINVAL; + } } return 0; From 08e814c9e8eb5a982cbd1e8f6bd255d97c51026f Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Oct 2018 18:56:50 -0700 Subject: [PATCH 10/11] net/ipv6: Bail early if user only wants cloned entries Similar to IPv4, IPv6 fib no longer contains cloned routes. If a user requests a route dump for only cloned entries, no sense walking the FIB and returning everything. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5562c77022c6..2a058b408a6a 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -586,10 +586,13 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) { struct rtmsg *rtm = nlmsg_data(nlh); - if (rtm->rtm_flags & RTM_F_PREFIX) - arg.filter.flags = RTM_F_PREFIX; + arg.filter.flags = rtm->rtm_flags & (RTM_F_PREFIX|RTM_F_CLONED); } + /* fib entries are never clones */ + if (arg.filter.flags & RTM_F_CLONED) + return skb->len; + w = (void *)cb->args[2]; if (!w) { /* New dump: From e4e92fb160d7bef689c6ad00108b4e52599ca05e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 15 Oct 2018 18:56:51 -0700 Subject: [PATCH 11/11] net/ipv4: Bail early if user only wants prefix entries Unlike IPv6, IPv4 does not have routes marked with RTF_PREFIX_RT. If the flag is set in the dump request, just return. In the process of this change, move the CLONE check to use the new filter flags. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index e86ca2255181..5bf653f36911 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -886,10 +886,14 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) err = ip_valid_fib_dump_req(net, nlh, &filter, cb); if (err < 0) return err; + } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) { + struct rtmsg *rtm = nlmsg_data(nlh); + + filter.flags = rtm->rtm_flags & (RTM_F_PREFIX | RTM_F_CLONED); } - if (nlmsg_len(nlh) >= sizeof(struct rtmsg) && - ((struct rtmsg *)nlmsg_data(nlh))->rtm_flags & RTM_F_CLONED) + /* fib entries are never clones and ipv4 does not use prefix flag */ + if (filter.flags & (RTM_F_PREFIX | RTM_F_CLONED)) return skb->len; if (filter.table_id) {