nexthop: Add support for nexthop groups

Allow the creation of nexthop groups which reference other nexthop
objects to create multipath routes:

                      +--------------+
   +------------+   +--------------+ |
   | nh  nh_grp --->| nh_grp_entry |-+
   +------------+   +---------|----+
     ^                |       |    +------------+
     +----------------+       +--->| nh, weight |
        nh_parent                  +------------+

A group entry points to a nexthop with a weight for that hop within the
group. The nexthop has a list_head, grp_list, for tracking which groups
it is a member of and the group entry has a reference back to the parent.
The grp_list is used when a nexthop is deleted - to efficiently remove
it from groups using it.

If a nexthop group spec is given, no other attributes can be set. Each
nexthop id in a group spec must already exist.

Similar to single nexthops, the specification of a nexthop group can be
updated so that data is managed with rcu locking.

Add path selection function to account for multiple paths and add
ipv{4,6}_good_nh helpers to know that if a neighbor entry exists it is
in a good state.

Update NETDEV event handling to rebalance multipath nexthop groups if
a nexthop is deleted due to a link event (down or unregister).

When a nexthop is removed any groups using it are updated. Groups using a
nexthop a tracked via a grp_list.

Nexthop dumps can be limited to groups only by adding NHA_GROUPS to the
request.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David Ahern 2019-05-24 14:43:08 -07:00 committed by David S. Miller
parent b513bd035f
commit 430a049190
2 changed files with 578 additions and 24 deletions

View File

@ -35,6 +35,9 @@ struct nh_config {
struct in6_addr ipv6; struct in6_addr ipv6;
} gw; } gw;
struct nlattr *nh_grp;
u16 nh_grp_type;
struct nlattr *nh_encap; struct nlattr *nh_encap;
u16 nh_encap_type; u16 nh_encap_type;
@ -56,20 +59,39 @@ struct nh_info {
}; };
}; };
struct nh_grp_entry {
struct nexthop *nh;
u8 weight;
atomic_t upper_bound;
struct list_head nh_list;
struct nexthop *nh_parent; /* nexthop of group with this entry */
};
struct nh_group {
u16 num_nh;
bool mpath;
bool has_v4;
struct nh_grp_entry nh_entries[0];
};
struct nexthop { struct nexthop {
struct rb_node rb_node; /* entry on netns rbtree */ struct rb_node rb_node; /* entry on netns rbtree */
struct list_head grp_list; /* nh group entries using this nh */
struct net *net; struct net *net;
u32 id; u32 id;
u8 protocol; /* app managing this nh */ u8 protocol; /* app managing this nh */
u8 nh_flags; u8 nh_flags;
bool is_group;
refcount_t refcnt; refcount_t refcnt;
struct rcu_head rcu; struct rcu_head rcu;
union { union {
struct nh_info __rcu *nh_info; struct nh_info __rcu *nh_info;
struct nh_group __rcu *nh_grp;
}; };
}; };
@ -88,12 +110,86 @@ static inline void nexthop_put(struct nexthop *nh)
call_rcu(&nh->rcu, nexthop_free_rcu); call_rcu(&nh->rcu, nexthop_free_rcu);
} }
static inline bool nexthop_is_multipath(const struct nexthop *nh)
{
if (nh->is_group) {
struct nh_group *nh_grp;
nh_grp = rcu_dereference_rtnl(nh->nh_grp);
return nh_grp->mpath;
}
return false;
}
struct nexthop *nexthop_select_path(struct nexthop *nh, int hash);
static inline unsigned int nexthop_num_path(const struct nexthop *nh)
{
unsigned int rc = 1;
if (nexthop_is_multipath(nh)) {
struct nh_group *nh_grp;
nh_grp = rcu_dereference_rtnl(nh->nh_grp);
rc = nh_grp->num_nh;
} else {
const struct nh_info *nhi;
nhi = rcu_dereference_rtnl(nh->nh_info);
if (nhi->reject_nh)
rc = 0;
}
return rc;
}
static inline
struct nexthop *nexthop_mpath_select(const struct nexthop *nh, int nhsel)
{
const struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
/* for_nexthops macros in fib_semantics.c grabs a pointer to
* the nexthop before checking nhsel
*/
if (nhsel > nhg->num_nh)
return NULL;
return nhg->nh_entries[nhsel].nh;
}
static inline
int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh)
{
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
int i;
for (i = 0; i < nhg->num_nh; i++) {
struct nexthop *nhe = nhg->nh_entries[i].nh;
struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info);
struct fib_nh_common *nhc = &nhi->fib_nhc;
int weight = nhg->nh_entries[i].weight;
if (fib_add_nexthop(skb, nhc, weight) < 0)
return -EMSGSIZE;
}
return 0;
}
/* called with rcu lock */ /* called with rcu lock */
static inline bool nexthop_is_blackhole(const struct nexthop *nh) static inline bool nexthop_is_blackhole(const struct nexthop *nh)
{ {
const struct nh_info *nhi; const struct nh_info *nhi;
nhi = rcu_dereference(nh->nh_info); if (nexthop_is_multipath(nh)) {
if (nexthop_num_path(nh) > 1)
return false;
nh = nexthop_mpath_select(nh, 0);
if (!nh)
return false;
}
nhi = rcu_dereference_rtnl(nh->nh_info);
return nhi->reject_nh; return nhi->reject_nh;
} }
#endif #endif

View File

@ -8,12 +8,17 @@
#include <linux/nexthop.h> #include <linux/nexthop.h>
#include <linux/rtnetlink.h> #include <linux/rtnetlink.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <net/arp.h>
#include <net/ipv6_stubs.h> #include <net/ipv6_stubs.h>
#include <net/lwtunnel.h> #include <net/lwtunnel.h>
#include <net/ndisc.h>
#include <net/nexthop.h> #include <net/nexthop.h>
#include <net/route.h> #include <net/route.h>
#include <net/sock.h> #include <net/sock.h>
static void remove_nexthop(struct net *net, struct nexthop *nh,
struct nl_info *nlinfo);
#define NH_DEV_HASHBITS 8 #define NH_DEV_HASHBITS 8
#define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS) #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
@ -53,9 +58,20 @@ static void nexthop_devhash_add(struct net *net, struct nh_info *nhi)
hlist_add_head(&nhi->dev_hash, head); hlist_add_head(&nhi->dev_hash, head);
} }
void nexthop_free_rcu(struct rcu_head *head) static void nexthop_free_mpath(struct nexthop *nh)
{
struct nh_group *nhg;
int i;
nhg = rcu_dereference_raw(nh->nh_grp);
for (i = 0; i < nhg->num_nh; ++i)
WARN_ON(nhg->nh_entries[i].nh);
kfree(nhg);
}
static void nexthop_free_single(struct nexthop *nh)
{ {
struct nexthop *nh = container_of(head, struct nexthop, rcu);
struct nh_info *nhi; struct nh_info *nhi;
nhi = rcu_dereference_raw(nh->nh_info); nhi = rcu_dereference_raw(nh->nh_info);
@ -68,6 +84,16 @@ void nexthop_free_rcu(struct rcu_head *head)
break; break;
} }
kfree(nhi); kfree(nhi);
}
void nexthop_free_rcu(struct rcu_head *head)
{
struct nexthop *nh = container_of(head, struct nexthop, rcu);
if (nh->is_group)
nexthop_free_mpath(nh);
else
nexthop_free_single(nh);
kfree(nh); kfree(nh);
} }
@ -78,9 +104,26 @@ static struct nexthop *nexthop_alloc(void)
struct nexthop *nh; struct nexthop *nh;
nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL); nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL);
if (nh) {
INIT_LIST_HEAD(&nh->grp_list);
}
return nh; return nh;
} }
static struct nh_group *nexthop_grp_alloc(u16 num_nh)
{
size_t sz = offsetof(struct nexthop, nh_grp)
+ sizeof(struct nh_group)
+ sizeof(struct nh_grp_entry) * num_nh;
struct nh_group *nhg;
nhg = kzalloc(sz, GFP_KERNEL);
if (nhg)
nhg->num_nh = num_nh;
return nhg;
}
static void nh_base_seq_inc(struct net *net) static void nh_base_seq_inc(struct net *net)
{ {
while (++net->nexthop.seq == 0) while (++net->nexthop.seq == 0)
@ -129,6 +172,37 @@ static u32 nh_find_unused_id(struct net *net)
return 0; return 0;
} }
static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
{
struct nexthop_grp *p;
size_t len = nhg->num_nh * sizeof(*p);
struct nlattr *nla;
u16 group_type = 0;
int i;
if (nhg->mpath)
group_type = NEXTHOP_GRP_TYPE_MPATH;
if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type))
goto nla_put_failure;
nla = nla_reserve(skb, NHA_GROUP, len);
if (!nla)
goto nla_put_failure;
p = nla_data(nla);
for (i = 0; i < nhg->num_nh; ++i) {
p->id = nhg->nh_entries[i].nh->id;
p->weight = nhg->nh_entries[i].weight - 1;
p += 1;
}
return 0;
nla_put_failure:
return -EMSGSIZE;
}
static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
int event, u32 portid, u32 seq, unsigned int nlflags) int event, u32 portid, u32 seq, unsigned int nlflags)
{ {
@ -152,6 +226,14 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
if (nla_put_u32(skb, NHA_ID, nh->id)) if (nla_put_u32(skb, NHA_ID, nh->id))
goto nla_put_failure; goto nla_put_failure;
if (nh->is_group) {
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
if (nla_put_nh_group(skb, nhg))
goto nla_put_failure;
goto out;
}
nhi = rtnl_dereference(nh->nh_info); nhi = rtnl_dereference(nh->nh_info);
nhm->nh_family = nhi->family; nhm->nh_family = nhi->family;
if (nhi->reject_nh) { if (nhi->reject_nh) {
@ -196,15 +278,24 @@ nla_put_failure:
return -EMSGSIZE; return -EMSGSIZE;
} }
static size_t nh_nlmsg_size(struct nexthop *nh) static size_t nh_nlmsg_size_grp(struct nexthop *nh)
{
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh;
return nla_total_size(sz) +
nla_total_size(2); /* NHA_GROUP_TYPE */
}
static size_t nh_nlmsg_size_single(struct nexthop *nh)
{ {
struct nh_info *nhi = rtnl_dereference(nh->nh_info); struct nh_info *nhi = rtnl_dereference(nh->nh_info);
size_t sz = nla_total_size(4); /* NHA_ID */ size_t sz;
/* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE /* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE
* are mutually exclusive * are mutually exclusive
*/ */
sz += nla_total_size(4); /* NHA_OIF */ sz = nla_total_size(4); /* NHA_OIF */
switch (nhi->family) { switch (nhi->family) {
case AF_INET: case AF_INET:
@ -227,6 +318,18 @@ static size_t nh_nlmsg_size(struct nexthop *nh)
return sz; return sz;
} }
static size_t nh_nlmsg_size(struct nexthop *nh)
{
size_t sz = nla_total_size(4); /* NHA_ID */
if (nh->is_group)
sz += nh_nlmsg_size_grp(nh);
else
sz += nh_nlmsg_size_single(nh);
return sz;
}
static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info) static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
{ {
unsigned int nlflags = info->nlh ? info->nlh->nlmsg_flags : 0; unsigned int nlflags = info->nlh ? info->nlh->nlmsg_flags : 0;
@ -254,17 +357,274 @@ errout:
rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err); rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
} }
static void __remove_nexthop(struct net *net, struct nexthop *nh) static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
struct netlink_ext_ack *extack)
{ {
struct nh_info *nhi; if (nh->is_group) {
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
nhi = rtnl_dereference(nh->nh_info); /* nested multipath (group within a group) is not
if (nhi->fib_nhc.nhc_dev) * supported
hlist_del(&nhi->dev_hash); */
if (nhg->mpath) {
NL_SET_ERR_MSG(extack,
"Multipath group can not be a nexthop within a group");
return false;
}
} else {
struct nh_info *nhi = rtnl_dereference(nh->nh_info);
if (nhi->reject_nh && npaths > 1) {
NL_SET_ERR_MSG(extack,
"Blackhole nexthop can not be used in a group with more than 1 path");
return false;
}
}
return true;
}
static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
unsigned int len = nla_len(tb[NHA_GROUP]);
struct nexthop_grp *nhg;
unsigned int i, j;
if (len & (sizeof(struct nexthop_grp) - 1)) {
NL_SET_ERR_MSG(extack,
"Invalid length for nexthop group attribute");
return -EINVAL;
}
/* convert len to number of nexthop ids */
len /= sizeof(*nhg);
nhg = nla_data(tb[NHA_GROUP]);
for (i = 0; i < len; ++i) {
if (nhg[i].resvd1 || nhg[i].resvd2) {
NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0");
return -EINVAL;
}
if (nhg[i].weight > 254) {
NL_SET_ERR_MSG(extack, "Invalid value for weight");
return -EINVAL;
}
for (j = i + 1; j < len; ++j) {
if (nhg[i].id == nhg[j].id) {
NL_SET_ERR_MSG(extack, "Nexthop id can not be used twice in a group");
return -EINVAL;
}
}
}
nhg = nla_data(tb[NHA_GROUP]);
for (i = 0; i < len; ++i) {
struct nexthop *nh;
nh = nexthop_find_by_id(net, nhg[i].id);
if (!nh) {
NL_SET_ERR_MSG(extack, "Invalid nexthop id");
return -EINVAL;
}
if (!valid_group_nh(nh, len, extack))
return -EINVAL;
}
for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) {
if (!tb[i])
continue;
NL_SET_ERR_MSG(extack,
"No other attributes can be set in nexthop groups");
return -EINVAL;
}
return 0;
}
static bool ipv6_good_nh(const struct fib6_nh *nh)
{
int state = NUD_REACHABLE;
struct neighbour *n;
rcu_read_lock_bh();
n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6);
if (n)
state = n->nud_state;
rcu_read_unlock_bh();
return !!(state & NUD_VALID);
}
static bool ipv4_good_nh(const struct fib_nh *nh)
{
int state = NUD_REACHABLE;
struct neighbour *n;
rcu_read_lock_bh();
n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
(__force u32)nh->fib_nh_gw4);
if (n)
state = n->nud_state;
rcu_read_unlock_bh();
return !!(state & NUD_VALID);
}
struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
{
struct nexthop *rc = NULL;
struct nh_group *nhg;
int i;
if (!nh->is_group)
return nh;
nhg = rcu_dereference(nh->nh_grp);
for (i = 0; i < nhg->num_nh; ++i) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
struct nh_info *nhi;
if (hash > atomic_read(&nhge->upper_bound))
continue;
/* nexthops always check if it is good and does
* not rely on a sysctl for this behavior
*/
nhi = rcu_dereference(nhge->nh->nh_info);
switch (nhi->family) {
case AF_INET:
if (ipv4_good_nh(&nhi->fib_nh))
return nhge->nh;
break;
case AF_INET6:
if (ipv6_good_nh(&nhi->fib6_nh))
return nhge->nh;
break;
}
if (!rc)
rc = nhge->nh;
}
return rc;
}
EXPORT_SYMBOL_GPL(nexthop_select_path);
static void nh_group_rebalance(struct nh_group *nhg)
{
int total = 0;
int w = 0;
int i;
for (i = 0; i < nhg->num_nh; ++i)
total += nhg->nh_entries[i].weight;
for (i = 0; i < nhg->num_nh; ++i) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
int upper_bound;
w += nhge->weight;
upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1;
atomic_set(&nhge->upper_bound, upper_bound);
}
}
static void remove_nh_grp_entry(struct nh_grp_entry *nhge,
struct nh_group *nhg,
struct nl_info *nlinfo)
{
struct nexthop *nh = nhge->nh;
struct nh_grp_entry *nhges;
bool found = false;
int i;
WARN_ON(!nh);
nhges = nhg->nh_entries;
for (i = 0; i < nhg->num_nh; ++i) {
if (found) {
nhges[i-1].nh = nhges[i].nh;
nhges[i-1].weight = nhges[i].weight;
list_del(&nhges[i].nh_list);
list_add(&nhges[i-1].nh_list, &nhges[i-1].nh->grp_list);
} else if (nhg->nh_entries[i].nh == nh) {
found = true;
}
}
if (WARN_ON(!found))
return;
nhg->num_nh--;
nhg->nh_entries[nhg->num_nh].nh = NULL;
nh_group_rebalance(nhg);
nexthop_put(nh);
if (nlinfo)
nexthop_notify(RTM_NEWNEXTHOP, nhge->nh_parent, nlinfo);
}
static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
struct nl_info *nlinfo)
{
struct nh_grp_entry *nhge, *tmp;
list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list) {
struct nh_group *nhg;
list_del(&nhge->nh_list);
nhg = rtnl_dereference(nhge->nh_parent->nh_grp);
remove_nh_grp_entry(nhge, nhg, nlinfo);
/* if this group has no more entries then remove it */
if (!nhg->num_nh)
remove_nexthop(net, nhge->nh_parent, nlinfo);
}
}
static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
{
struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
int i, num_nh = nhg->num_nh;
for (i = 0; i < num_nh; ++i) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
if (WARN_ON(!nhge->nh))
continue;
list_del(&nhge->nh_list);
nexthop_put(nhge->nh);
nhge->nh = NULL;
nhg->num_nh--;
}
}
static void __remove_nexthop(struct net *net, struct nexthop *nh,
struct nl_info *nlinfo)
{
if (nh->is_group) {
remove_nexthop_group(nh, nlinfo);
} else {
struct nh_info *nhi;
nhi = rtnl_dereference(nh->nh_info);
if (nhi->fib_nhc.nhc_dev)
hlist_del(&nhi->dev_hash);
remove_nexthop_from_groups(net, nh, nlinfo);
}
} }
static void remove_nexthop(struct net *net, struct nexthop *nh, static void remove_nexthop(struct net *net, struct nexthop *nh,
bool skip_fib, struct nl_info *nlinfo) struct nl_info *nlinfo)
{ {
/* remove from the tree */ /* remove from the tree */
rb_erase(&nh->rb_node, &net->nexthop.rb_root); rb_erase(&nh->rb_node, &net->nexthop.rb_root);
@ -272,7 +632,7 @@ static void remove_nexthop(struct net *net, struct nexthop *nh,
if (nlinfo) if (nlinfo)
nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo); nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo);
__remove_nexthop(net, nh); __remove_nexthop(net, nh, nlinfo);
nh_base_seq_inc(net); nh_base_seq_inc(net);
nexthop_put(nh); nexthop_put(nh);
@ -353,7 +713,7 @@ static void nexthop_flush_dev(struct net_device *dev)
if (nhi->fib_nhc.nhc_dev != dev) if (nhi->fib_nhc.nhc_dev != dev)
continue; continue;
remove_nexthop(net, nhi->nh_parent, false, NULL); remove_nexthop(net, nhi->nh_parent, NULL);
} }
} }
@ -366,11 +726,69 @@ static void flush_all_nexthops(struct net *net)
while ((node = rb_first(root))) { while ((node = rb_first(root))) {
nh = rb_entry(node, struct nexthop, rb_node); nh = rb_entry(node, struct nexthop, rb_node);
remove_nexthop(net, nh, false, NULL); remove_nexthop(net, nh, NULL);
cond_resched(); cond_resched();
} }
} }
static struct nexthop *nexthop_create_group(struct net *net,
struct nh_config *cfg)
{
struct nlattr *grps_attr = cfg->nh_grp;
struct nexthop_grp *entry = nla_data(grps_attr);
struct nh_group *nhg;
struct nexthop *nh;
int i;
nh = nexthop_alloc();
if (!nh)
return ERR_PTR(-ENOMEM);
nh->is_group = 1;
nhg = nexthop_grp_alloc(nla_len(grps_attr) / sizeof(*entry));
if (!nhg) {
kfree(nh);
return ERR_PTR(-ENOMEM);
}
for (i = 0; i < nhg->num_nh; ++i) {
struct nexthop *nhe;
struct nh_info *nhi;
nhe = nexthop_find_by_id(net, entry[i].id);
if (!nexthop_get(nhe))
goto out_no_nh;
nhi = rtnl_dereference(nhe->nh_info);
if (nhi->family == AF_INET)
nhg->has_v4 = true;
nhg->nh_entries[i].nh = nhe;
nhg->nh_entries[i].weight = entry[i].weight + 1;
list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list);
nhg->nh_entries[i].nh_parent = nh;
}
if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) {
nhg->mpath = 1;
nh_group_rebalance(nhg);
}
rcu_assign_pointer(nh->nh_grp, nhg);
return nh;
out_no_nh:
for (; i >= 0; --i)
nexthop_put(nhg->nh_entries[i].nh);
kfree(nhg);
kfree(nh);
return ERR_PTR(-ENOENT);
}
static int nh_create_ipv4(struct net *net, struct nexthop *nh, static int nh_create_ipv4(struct net *net, struct nexthop *nh,
struct nh_info *nhi, struct nh_config *cfg, struct nh_info *nhi, struct nh_config *cfg,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
@ -506,7 +924,11 @@ static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
} }
} }
nh = nexthop_create(net, cfg, extack); if (cfg->nh_grp)
nh = nexthop_create_group(net, cfg);
else
nh = nexthop_create(net, cfg, extack);
if (IS_ERR(nh)) if (IS_ERR(nh))
return nh; return nh;
@ -517,7 +939,7 @@ static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
err = insert_nexthop(net, nh, cfg, extack); err = insert_nexthop(net, nh, cfg, extack);
if (err) { if (err) {
__remove_nexthop(net, nh); __remove_nexthop(net, nh, NULL);
nexthop_put(nh); nexthop_put(nh);
nh = ERR_PTR(err); nh = ERR_PTR(err);
} }
@ -552,6 +974,10 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
case AF_INET: case AF_INET:
case AF_INET6: case AF_INET6:
break; break;
case AF_UNSPEC:
if (tb[NHA_GROUP])
break;
/* fallthrough */
default: default:
NL_SET_ERR_MSG(extack, "Invalid address family"); NL_SET_ERR_MSG(extack, "Invalid address family");
goto out; goto out;
@ -575,6 +1001,27 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
if (tb[NHA_ID]) if (tb[NHA_ID])
cfg->nh_id = nla_get_u32(tb[NHA_ID]); cfg->nh_id = nla_get_u32(tb[NHA_ID]);
if (tb[NHA_GROUP]) {
if (nhm->nh_family != AF_UNSPEC) {
NL_SET_ERR_MSG(extack, "Invalid family for group");
goto out;
}
cfg->nh_grp = tb[NHA_GROUP];
cfg->nh_grp_type = NEXTHOP_GRP_TYPE_MPATH;
if (tb[NHA_GROUP_TYPE])
cfg->nh_grp_type = nla_get_u16(tb[NHA_GROUP_TYPE]);
if (cfg->nh_grp_type > NEXTHOP_GRP_TYPE_MAX) {
NL_SET_ERR_MSG(extack, "Invalid group type");
goto out;
}
err = nh_check_attr_group(net, tb, extack);
/* no other attributes should be set */
goto out;
}
if (tb[NHA_BLACKHOLE]) { if (tb[NHA_BLACKHOLE]) {
if (tb[NHA_GATEWAY] || tb[NHA_OIF] || if (tb[NHA_GATEWAY] || tb[NHA_OIF] ||
tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) { tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) {
@ -752,7 +1199,7 @@ static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!nh) if (!nh)
return -ENOENT; return -ENOENT;
remove_nexthop(net, nh, false, &nlinfo); remove_nexthop(net, nh, &nlinfo);
return 0; return 0;
} }
@ -796,15 +1243,21 @@ errout_free:
goto out; goto out;
} }
static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx,
int master_idx, u8 family) bool group_filter, u8 family)
{ {
const struct net_device *dev; const struct net_device *dev;
const struct nh_info *nhi; const struct nh_info *nhi;
if (group_filter && !nh->is_group)
return true;
if (!dev_idx && !master_idx && !family) if (!dev_idx && !master_idx && !family)
return false; return false;
if (nh->is_group)
return true;
nhi = rtnl_dereference(nh->nh_info); nhi = rtnl_dereference(nh->nh_info);
if (family && nhi->family != family) if (family && nhi->family != family)
return true; return true;
@ -827,8 +1280,8 @@ static bool nh_dump_filtered(struct nexthop *nh, int dev_idx,
return false; return false;
} }
static int nh_valid_dump_req(const struct nlmsghdr *nlh, static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx,
int *dev_idx, int *master_idx, int *master_idx, bool *group_filter,
struct netlink_callback *cb) struct netlink_callback *cb)
{ {
struct netlink_ext_ack *extack = cb->extack; struct netlink_ext_ack *extack = cb->extack;
@ -863,6 +1316,9 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh,
} }
*master_idx = idx; *master_idx = idx;
break; break;
case NHA_GROUPS:
*group_filter = true;
break;
default: default:
NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request"); NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request");
return -EINVAL; return -EINVAL;
@ -885,11 +1341,13 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
int dev_filter_idx = 0, master_idx = 0; int dev_filter_idx = 0, master_idx = 0;
struct net *net = sock_net(skb->sk); struct net *net = sock_net(skb->sk);
struct rb_root *root = &net->nexthop.rb_root; struct rb_root *root = &net->nexthop.rb_root;
bool group_filter = false;
struct rb_node *node; struct rb_node *node;
int idx = 0, s_idx; int idx = 0, s_idx;
int err; int err;
err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx, cb); err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx,
&group_filter, cb);
if (err < 0) if (err < 0)
return err; return err;
@ -902,7 +1360,7 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
nh = rb_entry(node, struct nexthop, rb_node); nh = rb_entry(node, struct nexthop, rb_node);
if (nh_dump_filtered(nh, dev_filter_idx, master_idx, if (nh_dump_filtered(nh, dev_filter_idx, master_idx,
nhm->nh_family)) group_filter, nhm->nh_family))
goto cont; goto cont;
err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP,