Merge branch 'mlxsw-Improve-IPv6-route-insertion-rate'
Ido Schimmel says: ==================== mlxsw: Improve IPv6 route insertion rate Unlike IPv4, an IPv6 multipath route in the kernel is composed from multiple sibling routes, each representing a single nexthop. Therefore, an addition of a multipath route with N nexthops translates to N in-kernel notifications. This is inefficient for device drivers that need to program the route to the underlying device. Each time a new nexthop is appended, a new nexthop group needs to be constructed and the old one deleted. This patchset improves the situation by sending a single notification for a multipath route addition / deletion instead of one per-nexthop. When adding thousands of multipath routes with 16 nexthops, I measured an improvement of about x10 in the insertion rate. Patches #1-#3 add a flag that indicates that in-kernel notifications need to be suppressed and extend the IPv6 FIB notification info with information about the number of sibling routes that are being notified. Patches #4-#5 adjust the two current listeners to these notifications to ignore notifications about IPv6 multipath routes. Patches #6-#7 adds add / delete notifications for IPv6 multipath routes. Patches #8-#14 do the same for mlxsw. Patch #15 finally removes the limitations added in patches #4-#5 and stops the kernel from sending a notification for each added / deleted nexthop. Patch #16 adds test cases. v2 (David Ahern): * Remove patch adjusting netdevsim to consume resources for each fib6_info. Instead, consume one resource for the entire multipath route * Remove 'multipath_rt' usage in patch #10 * Remove 'multipath_rt' from 'struct fib6_entry_notifier_info' in patch #15. The member is only removed in this patch to prevent drivers from processing multipath routes twice during the series ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
2ae6b594fb
|
@ -5278,17 +5278,21 @@ err_nexthop6_group_get:
|
|||
static int
|
||||
mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
|
||||
struct mlxsw_sp_fib6_entry *fib6_entry,
|
||||
struct fib6_info *rt)
|
||||
struct fib6_info **rt_arr, unsigned int nrt6)
|
||||
{
|
||||
struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
|
||||
int err;
|
||||
int err, i;
|
||||
|
||||
mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
|
||||
if (IS_ERR(mlxsw_sp_rt6))
|
||||
return PTR_ERR(mlxsw_sp_rt6);
|
||||
for (i = 0; i < nrt6; i++) {
|
||||
mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
|
||||
if (IS_ERR(mlxsw_sp_rt6)) {
|
||||
err = PTR_ERR(mlxsw_sp_rt6);
|
||||
goto err_rt6_create;
|
||||
}
|
||||
|
||||
list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
|
||||
fib6_entry->nrt6++;
|
||||
list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
|
||||
fib6_entry->nrt6++;
|
||||
}
|
||||
|
||||
err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
|
||||
if (err)
|
||||
|
@ -5297,27 +5301,38 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
|
|||
return 0;
|
||||
|
||||
err_nexthop6_group_update:
|
||||
fib6_entry->nrt6--;
|
||||
list_del(&mlxsw_sp_rt6->list);
|
||||
mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
|
||||
i = nrt6;
|
||||
err_rt6_create:
|
||||
for (i--; i >= 0; i--) {
|
||||
fib6_entry->nrt6--;
|
||||
mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
|
||||
struct mlxsw_sp_rt6, list);
|
||||
list_del(&mlxsw_sp_rt6->list);
|
||||
mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static void
|
||||
mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
|
||||
struct mlxsw_sp_fib6_entry *fib6_entry,
|
||||
struct fib6_info *rt)
|
||||
struct fib6_info **rt_arr, unsigned int nrt6)
|
||||
{
|
||||
struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
|
||||
int i;
|
||||
|
||||
mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
|
||||
if (WARN_ON(!mlxsw_sp_rt6))
|
||||
return;
|
||||
for (i = 0; i < nrt6; i++) {
|
||||
mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry,
|
||||
rt_arr[i]);
|
||||
if (WARN_ON_ONCE(!mlxsw_sp_rt6))
|
||||
continue;
|
||||
|
||||
fib6_entry->nrt6--;
|
||||
list_del(&mlxsw_sp_rt6->list);
|
||||
mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
|
||||
}
|
||||
|
||||
fib6_entry->nrt6--;
|
||||
list_del(&mlxsw_sp_rt6->list);
|
||||
mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
|
||||
mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
|
||||
}
|
||||
|
||||
static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
|
||||
|
@ -5358,29 +5373,32 @@ mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
|
|||
static struct mlxsw_sp_fib6_entry *
|
||||
mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
|
||||
struct mlxsw_sp_fib_node *fib_node,
|
||||
struct fib6_info *rt)
|
||||
struct fib6_info **rt_arr, unsigned int nrt6)
|
||||
{
|
||||
struct mlxsw_sp_fib6_entry *fib6_entry;
|
||||
struct mlxsw_sp_fib_entry *fib_entry;
|
||||
struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
|
||||
int err;
|
||||
int err, i;
|
||||
|
||||
fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
|
||||
if (!fib6_entry)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
fib_entry = &fib6_entry->common;
|
||||
|
||||
mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
|
||||
if (IS_ERR(mlxsw_sp_rt6)) {
|
||||
err = PTR_ERR(mlxsw_sp_rt6);
|
||||
goto err_rt6_create;
|
||||
INIT_LIST_HEAD(&fib6_entry->rt6_list);
|
||||
|
||||
for (i = 0; i < nrt6; i++) {
|
||||
mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
|
||||
if (IS_ERR(mlxsw_sp_rt6)) {
|
||||
err = PTR_ERR(mlxsw_sp_rt6);
|
||||
goto err_rt6_create;
|
||||
}
|
||||
list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
|
||||
fib6_entry->nrt6++;
|
||||
}
|
||||
|
||||
mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
|
||||
mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]);
|
||||
|
||||
INIT_LIST_HEAD(&fib6_entry->rt6_list);
|
||||
list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
|
||||
fib6_entry->nrt6 = 1;
|
||||
err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
|
||||
if (err)
|
||||
goto err_nexthop6_group_get;
|
||||
|
@ -5390,9 +5408,15 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
|
|||
return fib6_entry;
|
||||
|
||||
err_nexthop6_group_get:
|
||||
list_del(&mlxsw_sp_rt6->list);
|
||||
mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
|
||||
i = nrt6;
|
||||
err_rt6_create:
|
||||
for (i--; i >= 0; i--) {
|
||||
fib6_entry->nrt6--;
|
||||
mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
|
||||
struct mlxsw_sp_rt6, list);
|
||||
list_del(&mlxsw_sp_rt6->list);
|
||||
mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
|
||||
}
|
||||
kfree(fib6_entry);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
@ -5435,16 +5459,16 @@ mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
|
|||
|
||||
static int
|
||||
mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
|
||||
bool replace)
|
||||
bool *p_replace)
|
||||
{
|
||||
struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
|
||||
struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
|
||||
struct mlxsw_sp_fib6_entry *fib6_entry;
|
||||
|
||||
fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
|
||||
fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, *p_replace);
|
||||
|
||||
if (replace && WARN_ON(!fib6_entry))
|
||||
return -EINVAL;
|
||||
if (*p_replace && !fib6_entry)
|
||||
*p_replace = false;
|
||||
|
||||
if (fib6_entry) {
|
||||
list_add_tail(&new6_entry->common.list,
|
||||
|
@ -5479,11 +5503,11 @@ mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
|
|||
|
||||
static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
|
||||
struct mlxsw_sp_fib6_entry *fib6_entry,
|
||||
bool replace)
|
||||
bool *p_replace)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
|
||||
err = mlxsw_sp_fib6_node_list_insert(fib6_entry, p_replace);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
|
@ -5556,10 +5580,12 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
|
|||
}
|
||||
|
||||
static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
|
||||
struct fib6_info *rt, bool replace)
|
||||
struct fib6_info **rt_arr,
|
||||
unsigned int nrt6, bool replace)
|
||||
{
|
||||
struct mlxsw_sp_fib6_entry *fib6_entry;
|
||||
struct mlxsw_sp_fib_node *fib_node;
|
||||
struct fib6_info *rt = rt_arr[0];
|
||||
int err;
|
||||
|
||||
if (mlxsw_sp->router->aborted)
|
||||
|
@ -5584,19 +5610,21 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
|
|||
*/
|
||||
fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
|
||||
if (fib6_entry) {
|
||||
err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
|
||||
err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry,
|
||||
rt_arr, nrt6);
|
||||
if (err)
|
||||
goto err_fib6_entry_nexthop_add;
|
||||
return 0;
|
||||
}
|
||||
|
||||
fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
|
||||
fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
|
||||
nrt6);
|
||||
if (IS_ERR(fib6_entry)) {
|
||||
err = PTR_ERR(fib6_entry);
|
||||
goto err_fib6_entry_create;
|
||||
}
|
||||
|
||||
err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
|
||||
err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, &replace);
|
||||
if (err)
|
||||
goto err_fib6_node_entry_link;
|
||||
|
||||
|
@ -5613,10 +5641,12 @@ err_fib6_entry_nexthop_add:
|
|||
}
|
||||
|
||||
static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
|
||||
struct fib6_info *rt)
|
||||
struct fib6_info **rt_arr,
|
||||
unsigned int nrt6)
|
||||
{
|
||||
struct mlxsw_sp_fib6_entry *fib6_entry;
|
||||
struct mlxsw_sp_fib_node *fib_node;
|
||||
struct fib6_info *rt = rt_arr[0];
|
||||
|
||||
if (mlxsw_sp->router->aborted)
|
||||
return;
|
||||
|
@ -5628,11 +5658,12 @@ static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
|
|||
if (WARN_ON(!fib6_entry))
|
||||
return;
|
||||
|
||||
/* If route is part of a multipath entry, but not the last one
|
||||
* removed, then only reduce its nexthop group.
|
||||
/* If not all the nexthops are deleted, then only reduce the nexthop
|
||||
* group.
|
||||
*/
|
||||
if (!list_is_singular(&fib6_entry->rt6_list)) {
|
||||
mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
|
||||
if (nrt6 != fib6_entry->nrt6) {
|
||||
mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr,
|
||||
nrt6);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -5893,10 +5924,15 @@ static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
|
|||
dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
|
||||
}
|
||||
|
||||
struct mlxsw_sp_fib6_event_work {
|
||||
struct fib6_info **rt_arr;
|
||||
unsigned int nrt6;
|
||||
};
|
||||
|
||||
struct mlxsw_sp_fib_event_work {
|
||||
struct work_struct work;
|
||||
union {
|
||||
struct fib6_entry_notifier_info fen6_info;
|
||||
struct mlxsw_sp_fib6_event_work fib6_work;
|
||||
struct fib_entry_notifier_info fen_info;
|
||||
struct fib_rule_notifier_info fr_info;
|
||||
struct fib_nh_notifier_info fnh_info;
|
||||
|
@ -5907,6 +5943,54 @@ struct mlxsw_sp_fib_event_work {
|
|||
unsigned long event;
|
||||
};
|
||||
|
||||
static int
|
||||
mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work,
|
||||
struct fib6_entry_notifier_info *fen6_info)
|
||||
{
|
||||
struct fib6_info *rt = fen6_info->rt;
|
||||
struct fib6_info **rt_arr;
|
||||
struct fib6_info *iter;
|
||||
unsigned int nrt6;
|
||||
int i = 0;
|
||||
|
||||
nrt6 = fen6_info->nsiblings + 1;
|
||||
|
||||
rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
|
||||
if (!rt_arr)
|
||||
return -ENOMEM;
|
||||
|
||||
fib6_work->rt_arr = rt_arr;
|
||||
fib6_work->nrt6 = nrt6;
|
||||
|
||||
rt_arr[0] = rt;
|
||||
fib6_info_hold(rt);
|
||||
|
||||
if (!fen6_info->nsiblings)
|
||||
return 0;
|
||||
|
||||
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
|
||||
if (i == fen6_info->nsiblings)
|
||||
break;
|
||||
|
||||
rt_arr[i + 1] = iter;
|
||||
fib6_info_hold(iter);
|
||||
i++;
|
||||
}
|
||||
WARN_ON_ONCE(i != fen6_info->nsiblings);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < fib6_work->nrt6; i++)
|
||||
mlxsw_sp_rt6_release(fib6_work->rt_arr[i]);
|
||||
kfree(fib6_work->rt_arr);
|
||||
}
|
||||
|
||||
static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
|
||||
{
|
||||
struct mlxsw_sp_fib_event_work *fib_work =
|
||||
|
@ -5965,18 +6049,21 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
|
|||
|
||||
switch (fib_work->event) {
|
||||
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
|
||||
case FIB_EVENT_ENTRY_APPEND: /* fall through */
|
||||
case FIB_EVENT_ENTRY_ADD:
|
||||
replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
|
||||
err = mlxsw_sp_router_fib6_add(mlxsw_sp,
|
||||
fib_work->fen6_info.rt, replace);
|
||||
fib_work->fib6_work.rt_arr,
|
||||
fib_work->fib6_work.nrt6,
|
||||
replace);
|
||||
if (err)
|
||||
mlxsw_sp_router_fib_abort(mlxsw_sp);
|
||||
mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
|
||||
mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
|
||||
break;
|
||||
case FIB_EVENT_ENTRY_DEL:
|
||||
mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
|
||||
mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
|
||||
mlxsw_sp_router_fib6_del(mlxsw_sp,
|
||||
fib_work->fib6_work.rt_arr,
|
||||
fib_work->fib6_work.nrt6);
|
||||
mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
|
||||
break;
|
||||
case FIB_EVENT_RULE_ADD:
|
||||
/* if we get here, a rule was added that we do not support.
|
||||
|
@ -6065,22 +6152,26 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
|
|||
}
|
||||
}
|
||||
|
||||
static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
|
||||
struct fib_notifier_info *info)
|
||||
static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
|
||||
struct fib_notifier_info *info)
|
||||
{
|
||||
struct fib6_entry_notifier_info *fen6_info;
|
||||
int err;
|
||||
|
||||
switch (fib_work->event) {
|
||||
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
|
||||
case FIB_EVENT_ENTRY_APPEND: /* fall through */
|
||||
case FIB_EVENT_ENTRY_ADD: /* fall through */
|
||||
case FIB_EVENT_ENTRY_DEL:
|
||||
fen6_info = container_of(info, struct fib6_entry_notifier_info,
|
||||
info);
|
||||
fib_work->fen6_info = *fen6_info;
|
||||
fib6_info_hold(fib_work->fen6_info.rt);
|
||||
err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work,
|
||||
fen6_info);
|
||||
if (err)
|
||||
return err;
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -6221,7 +6312,9 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
|
|||
break;
|
||||
case AF_INET6:
|
||||
INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
|
||||
mlxsw_sp_router_fib6_event(fib_work, info);
|
||||
err = mlxsw_sp_router_fib6_event(fib_work, info);
|
||||
if (err)
|
||||
goto err_fib_event;
|
||||
break;
|
||||
case RTNL_FAMILY_IP6MR:
|
||||
case RTNL_FAMILY_IPMR:
|
||||
|
@ -6233,6 +6326,10 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
|
|||
mlxsw_core_schedule_work(&fib_work->work);
|
||||
|
||||
return NOTIFY_DONE;
|
||||
|
||||
err_fib_event:
|
||||
kfree(fib_work);
|
||||
return NOTIFY_BAD;
|
||||
}
|
||||
|
||||
struct mlxsw_sp_rif *
|
||||
|
|
|
@ -377,6 +377,7 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *,
|
|||
struct fib6_entry_notifier_info {
|
||||
struct fib_notifier_info info; /* must be first */
|
||||
struct fib6_info *rt;
|
||||
unsigned int nsiblings;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -450,6 +451,11 @@ int call_fib6_entry_notifiers(struct net *net,
|
|||
enum fib_event_type event_type,
|
||||
struct fib6_info *rt,
|
||||
struct netlink_ext_ack *extack);
|
||||
int call_fib6_multipath_entry_notifiers(struct net *net,
|
||||
enum fib_event_type event_type,
|
||||
struct fib6_info *rt,
|
||||
unsigned int nsiblings,
|
||||
struct netlink_ext_ack *extack);
|
||||
void fib6_rt_update(struct net *net, struct fib6_info *rt,
|
||||
struct nl_info *info);
|
||||
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
|
||||
|
|
|
@ -378,13 +378,17 @@ struct nla_policy {
|
|||
/**
|
||||
* struct nl_info - netlink source information
|
||||
* @nlh: Netlink message header of original request
|
||||
* @nl_net: Network namespace
|
||||
* @portid: Netlink PORTID of requesting application
|
||||
* @skip_notify: Skip netlink notifications to user space
|
||||
* @skip_notify_kernel: Skip selected in-kernel notifications
|
||||
*/
|
||||
struct nl_info {
|
||||
struct nlmsghdr *nlh;
|
||||
struct net *nl_net;
|
||||
u32 portid;
|
||||
bool skip_notify;
|
||||
u8 skip_notify:1,
|
||||
skip_notify_kernel:1;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -381,6 +381,22 @@ int call_fib6_entry_notifiers(struct net *net,
|
|||
return call_fib6_notifiers(net, event_type, &info.info);
|
||||
}
|
||||
|
||||
int call_fib6_multipath_entry_notifiers(struct net *net,
|
||||
enum fib_event_type event_type,
|
||||
struct fib6_info *rt,
|
||||
unsigned int nsiblings,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
struct fib6_entry_notifier_info info = {
|
||||
.info.extack = extack,
|
||||
.rt = rt,
|
||||
.nsiblings = nsiblings,
|
||||
};
|
||||
|
||||
rt->fib6_table->fib_seq++;
|
||||
return call_fib6_notifiers(net, event_type, &info.info);
|
||||
}
|
||||
|
||||
struct fib6_dump_arg {
|
||||
struct net *net;
|
||||
struct notifier_block *nb;
|
||||
|
@ -1123,11 +1139,13 @@ next_iter:
|
|||
add:
|
||||
nlflags |= NLM_F_CREATE;
|
||||
|
||||
err = call_fib6_entry_notifiers(info->nl_net,
|
||||
FIB_EVENT_ENTRY_ADD,
|
||||
rt, extack);
|
||||
if (err)
|
||||
return err;
|
||||
if (!info->skip_notify_kernel) {
|
||||
err = call_fib6_entry_notifiers(info->nl_net,
|
||||
FIB_EVENT_ENTRY_ADD,
|
||||
rt, extack);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
rcu_assign_pointer(rt->fib6_next, iter);
|
||||
fib6_info_hold(rt);
|
||||
|
@ -1152,11 +1170,13 @@ add:
|
|||
return -ENOENT;
|
||||
}
|
||||
|
||||
err = call_fib6_entry_notifiers(info->nl_net,
|
||||
FIB_EVENT_ENTRY_REPLACE,
|
||||
rt, extack);
|
||||
if (err)
|
||||
return err;
|
||||
if (!info->skip_notify_kernel) {
|
||||
err = call_fib6_entry_notifiers(info->nl_net,
|
||||
FIB_EVENT_ENTRY_REPLACE,
|
||||
rt, extack);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
fib6_info_hold(rt);
|
||||
rcu_assign_pointer(rt->fib6_node, fn);
|
||||
|
@ -1839,9 +1859,11 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
|
|||
|
||||
fib6_purge_rt(rt, fn, net);
|
||||
|
||||
call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
|
||||
if (!info->skip_notify_kernel)
|
||||
call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
|
||||
if (!info->skip_notify)
|
||||
inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
|
||||
|
||||
fib6_info_release(rt);
|
||||
}
|
||||
|
||||
|
|
|
@ -3718,6 +3718,12 @@ static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
|
|||
info->skip_notify = 1;
|
||||
}
|
||||
|
||||
info->skip_notify_kernel = 1;
|
||||
call_fib6_multipath_entry_notifiers(net,
|
||||
FIB_EVENT_ENTRY_DEL,
|
||||
rt,
|
||||
rt->fib6_nsiblings,
|
||||
NULL);
|
||||
list_for_each_entry_safe(sibling, next_sibling,
|
||||
&rt->fib6_siblings,
|
||||
fib6_siblings) {
|
||||
|
@ -4965,6 +4971,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
|
|||
{
|
||||
struct fib6_info *rt_notif = NULL, *rt_last = NULL;
|
||||
struct nl_info *info = &cfg->fc_nlinfo;
|
||||
enum fib_event_type event_type;
|
||||
struct fib6_config r_cfg;
|
||||
struct rtnexthop *rtnh;
|
||||
struct fib6_info *rt;
|
||||
|
@ -5042,6 +5049,11 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
|
|||
*/
|
||||
info->skip_notify = 1;
|
||||
|
||||
/* For add and replace, send one notification with all nexthops. For
|
||||
* append, send one notification with all appended nexthops.
|
||||
*/
|
||||
info->skip_notify_kernel = 1;
|
||||
|
||||
err_nh = NULL;
|
||||
list_for_each_entry(nh, &rt6_nh_list, next) {
|
||||
err = __ip6_ins_rt(nh->fib6_info, info, extack);
|
||||
|
@ -5078,6 +5090,15 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
|
|||
nhn++;
|
||||
}
|
||||
|
||||
event_type = replace ? FIB_EVENT_ENTRY_REPLACE : FIB_EVENT_ENTRY_ADD;
|
||||
err = call_fib6_multipath_entry_notifiers(info->nl_net, event_type,
|
||||
rt_notif, nhn - 1, extack);
|
||||
if (err) {
|
||||
/* Delete all the siblings that were just added */
|
||||
err_nh = NULL;
|
||||
goto add_errout;
|
||||
}
|
||||
|
||||
/* success ... tell user about new route */
|
||||
ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
|
||||
goto cleanup;
|
||||
|
|
|
@ -0,0 +1,349 @@
|
|||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Test unicast FIB offload indication.
|
||||
|
||||
lib_dir=$(dirname $0)/../../../net/forwarding
|
||||
|
||||
ALL_TESTS="
|
||||
ipv6_route_add
|
||||
ipv6_route_replace
|
||||
ipv6_route_nexthop_group_share
|
||||
ipv6_route_rate
|
||||
"
|
||||
NUM_NETIFS=4
|
||||
source $lib_dir/lib.sh
|
||||
source $lib_dir/devlink_lib.sh
|
||||
|
||||
tor1_create()
|
||||
{
|
||||
simple_if_init $tor1_p1 2001:db8:1::2/128 2001:db8:1::3/128
|
||||
}
|
||||
|
||||
tor1_destroy()
|
||||
{
|
||||
simple_if_fini $tor1_p1 2001:db8:1::2/128 2001:db8:1::3/128
|
||||
}
|
||||
|
||||
tor2_create()
|
||||
{
|
||||
simple_if_init $tor2_p1 2001:db8:2::2/128 2001:db8:2::3/128
|
||||
}
|
||||
|
||||
tor2_destroy()
|
||||
{
|
||||
simple_if_fini $tor2_p1 2001:db8:2::2/128 2001:db8:2::3/128
|
||||
}
|
||||
|
||||
spine_create()
|
||||
{
|
||||
ip link set dev $spine_p1 up
|
||||
ip link set dev $spine_p2 up
|
||||
|
||||
__addr_add_del $spine_p1 add 2001:db8:1::1/64
|
||||
__addr_add_del $spine_p2 add 2001:db8:2::1/64
|
||||
}
|
||||
|
||||
spine_destroy()
|
||||
{
|
||||
__addr_add_del $spine_p2 del 2001:db8:2::1/64
|
||||
__addr_add_del $spine_p1 del 2001:db8:1::1/64
|
||||
|
||||
ip link set dev $spine_p2 down
|
||||
ip link set dev $spine_p1 down
|
||||
}
|
||||
|
||||
ipv6_offload_check()
|
||||
{
|
||||
local pfx="$1"; shift
|
||||
local expected_num=$1; shift
|
||||
local num
|
||||
|
||||
# Try to avoid races with route offload
|
||||
sleep .1
|
||||
|
||||
num=$(ip -6 route show match ${pfx} | grep "offload" | wc -l)
|
||||
|
||||
if [ $num -eq $expected_num ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
ipv6_route_add_prefix()
|
||||
{
|
||||
RET=0
|
||||
|
||||
# Add a prefix route and check that it is offloaded.
|
||||
ip -6 route add 2001:db8:3::/64 dev $spine_p1 metric 100
|
||||
ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 1
|
||||
check_err $? "prefix route not offloaded"
|
||||
|
||||
# Append an identical prefix route with an higher metric and check that
|
||||
# offload indication did not change.
|
||||
ip -6 route append 2001:db8:3::/64 dev $spine_p1 metric 200
|
||||
ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 1
|
||||
check_err $? "lowest metric not offloaded after append"
|
||||
ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 200" 0
|
||||
check_err $? "highest metric offloaded when should not"
|
||||
|
||||
# Prepend an identical prefix route with lower metric and check that
|
||||
# it is offloaded and the others are not.
|
||||
ip -6 route append 2001:db8:3::/64 dev $spine_p1 metric 10
|
||||
ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 10" 1
|
||||
check_err $? "lowest metric not offloaded after prepend"
|
||||
ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 0
|
||||
check_err $? "mid metric offloaded when should not"
|
||||
ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 200" 0
|
||||
check_err $? "highest metric offloaded when should not"
|
||||
|
||||
# Delete the routes and add the same route with a different nexthop
|
||||
# device. Check that it is offloaded.
|
||||
ip -6 route flush 2001:db8:3::/64 dev $spine_p1
|
||||
ip -6 route add 2001:db8:3::/64 dev $spine_p2
|
||||
ipv6_offload_check "2001:db8:3::/64 dev $spine_p2" 1
|
||||
|
||||
log_test "IPv6 prefix route add"
|
||||
|
||||
ip -6 route flush 2001:db8:3::/64
|
||||
}
|
||||
|
||||
ipv6_route_add_mpath()
|
||||
{
|
||||
RET=0
|
||||
|
||||
# Add a multipath route and check that it is offloaded.
|
||||
ip -6 route add 2001:db8:3::/64 metric 100 \
|
||||
nexthop via 2001:db8:1::2 dev $spine_p1 \
|
||||
nexthop via 2001:db8:2::2 dev $spine_p2
|
||||
ipv6_offload_check "2001:db8:3::/64 metric 100" 2
|
||||
check_err $? "multipath route not offloaded when should"
|
||||
|
||||
# Append another nexthop and check that it is offloaded as well.
|
||||
ip -6 route append 2001:db8:3::/64 metric 100 \
|
||||
nexthop via 2001:db8:1::3 dev $spine_p1
|
||||
ipv6_offload_check "2001:db8:3::/64 metric 100" 3
|
||||
check_err $? "appended nexthop not offloaded when should"
|
||||
|
||||
# Mimic route replace by removing the route and adding it back with
|
||||
# only two nexthops.
|
||||
ip -6 route del 2001:db8:3::/64
|
||||
ip -6 route add 2001:db8:3::/64 metric 100 \
|
||||
nexthop via 2001:db8:1::2 dev $spine_p1 \
|
||||
nexthop via 2001:db8:2::2 dev $spine_p2
|
||||
ipv6_offload_check "2001:db8:3::/64 metric 100" 2
|
||||
check_err $? "multipath route not offloaded after delete & add"
|
||||
|
||||
# Append a nexthop with an higher metric and check that the offload
|
||||
# indication did not change.
|
||||
ip -6 route append 2001:db8:3::/64 metric 200 \
|
||||
nexthop via 2001:db8:1::3 dev $spine_p1
|
||||
ipv6_offload_check "2001:db8:3::/64 metric 100" 2
|
||||
check_err $? "lowest metric not offloaded after append"
|
||||
ipv6_offload_check "2001:db8:3::/64 metric 200" 0
|
||||
check_err $? "highest metric offloaded when should not"
|
||||
|
||||
# Prepend a nexthop with a lower metric and check that it is offloaded
|
||||
# and the others are not.
|
||||
ip -6 route append 2001:db8:3::/64 metric 10 \
|
||||
nexthop via 2001:db8:1::3 dev $spine_p1
|
||||
ipv6_offload_check "2001:db8:3::/64 metric 10" 1
|
||||
check_err $? "lowest metric not offloaded after prepend"
|
||||
ipv6_offload_check "2001:db8:3::/64 metric 100" 0
|
||||
check_err $? "mid metric offloaded when should not"
|
||||
ipv6_offload_check "2001:db8:3::/64 metric 200" 0
|
||||
check_err $? "highest metric offloaded when should not"
|
||||
|
||||
log_test "IPv6 multipath route add"
|
||||
|
||||
ip -6 route flush 2001:db8:3::/64
|
||||
}
|
||||
|
||||
ipv6_route_add()
|
||||
{
|
||||
ipv6_route_add_prefix
|
||||
ipv6_route_add_mpath
|
||||
}
|
||||
|
||||
ipv6_route_replace()
|
||||
{
|
||||
RET=0
|
||||
|
||||
# Replace prefix route with prefix route.
|
||||
ip -6 route add 2001:db8:3::/64 metric 100 dev $spine_p1
|
||||
ipv6_offload_check "2001:db8:3::/64 metric 100" 1
|
||||
check_err $? "prefix route not offloaded when should"
|
||||
ip -6 route replace 2001:db8:3::/64 metric 100 dev $spine_p2
|
||||
ipv6_offload_check "2001:db8:3::/64 metric 100" 1
|
||||
check_err $? "prefix route not offloaded after replace"
|
||||
|
||||
# Replace prefix route with multipath route.
|
||||
ip -6 route replace 2001:db8:3::/64 metric 100 \
|
||||
nexthop via 2001:db8:1::2 dev $spine_p1 \
|
||||
nexthop via 2001:db8:2::2 dev $spine_p2
|
||||
ipv6_offload_check "2001:db8:3::/64 metric 100" 2
|
||||
check_err $? "multipath route not offloaded after replace"
|
||||
|
||||
# Replace multipath route with prefix route. A prefix route cannot
|
||||
# replace a multipath route, so it is appended.
|
||||
ip -6 route replace 2001:db8:3::/64 metric 100 dev $spine_p1
|
||||
ipv6_offload_check "2001:db8:3::/64 metric 100 dev $spine_p1" 0
|
||||
check_err $? "prefix route offloaded after 'replacing' multipath route"
|
||||
ipv6_offload_check "2001:db8:3::/64 metric 100" 2
|
||||
check_err $? "multipath route not offloaded after being 'replaced' by prefix route"
|
||||
|
||||
# Replace multipath route with multipath route.
|
||||
ip -6 route replace 2001:db8:3::/64 metric 100 \
|
||||
nexthop via 2001:db8:1::3 dev $spine_p1 \
|
||||
nexthop via 2001:db8:2::3 dev $spine_p2
|
||||
ipv6_offload_check "2001:db8:3::/64 metric 100" 2
|
||||
check_err $? "multipath route not offloaded after replacing multipath route"
|
||||
|
||||
# Replace a non-existing multipath route with a multipath route and
|
||||
# check that it is appended and not offloaded.
|
||||
ip -6 route replace 2001:db8:3::/64 metric 200 \
|
||||
nexthop via 2001:db8:1::3 dev $spine_p1 \
|
||||
nexthop via 2001:db8:2::3 dev $spine_p2
|
||||
ipv6_offload_check "2001:db8:3::/64 metric 100" 2
|
||||
check_err $? "multipath route not offloaded after non-existing route was 'replaced'"
|
||||
ipv6_offload_check "2001:db8:3::/64 metric 200" 0
|
||||
check_err $? "multipath route offloaded after 'replacing' non-existing route"
|
||||
|
||||
log_test "IPv6 route replace"
|
||||
|
||||
ip -6 route flush 2001:db8:3::/64
|
||||
}
|
||||
|
||||
ipv6_route_nexthop_group_share()
|
||||
{
|
||||
RET=0
|
||||
|
||||
# The driver consolidates identical nexthop groups in order to reduce
|
||||
# the resource usage in its adjacency table. Check that the deletion
|
||||
# of one multipath route using the group does not affect the other.
|
||||
ip -6 route add 2001:db8:3::/64 \
|
||||
nexthop via 2001:db8:1::2 dev $spine_p1 \
|
||||
nexthop via 2001:db8:2::2 dev $spine_p2
|
||||
ip -6 route add 2001:db8:4::/64 \
|
||||
nexthop via 2001:db8:1::2 dev $spine_p1 \
|
||||
nexthop via 2001:db8:2::2 dev $spine_p2
|
||||
ipv6_offload_check "2001:db8:3::/64" 2
|
||||
check_err $? "multipath route not offloaded when should"
|
||||
ipv6_offload_check "2001:db8:4::/64" 2
|
||||
check_err $? "multipath route not offloaded when should"
|
||||
ip -6 route del 2001:db8:3::/64
|
||||
ipv6_offload_check "2001:db8:4::/64" 2
|
||||
check_err $? "multipath route not offloaded after deletion of route sharing the nexthop group"
|
||||
|
||||
# Check that after unsharing a nexthop group the routes are still
|
||||
# marked as offloaded.
|
||||
ip -6 route add 2001:db8:3::/64 \
|
||||
nexthop via 2001:db8:1::2 dev $spine_p1 \
|
||||
nexthop via 2001:db8:2::2 dev $spine_p2
|
||||
ip -6 route del 2001:db8:4::/64 \
|
||||
nexthop via 2001:db8:1::2 dev $spine_p1
|
||||
ipv6_offload_check "2001:db8:4::/64" 1
|
||||
check_err $? "singlepath route not offloaded after unsharing the nexthop group"
|
||||
ipv6_offload_check "2001:db8:3::/64" 2
|
||||
check_err $? "multipath route not offloaded after unsharing the nexthop group"
|
||||
|
||||
log_test "IPv6 nexthop group sharing"
|
||||
|
||||
ip -6 route flush 2001:db8:3::/64
|
||||
ip -6 route flush 2001:db8:4::/64
|
||||
}
|
||||
|
||||
ipv6_route_rate()
|
||||
{
|
||||
local batch_dir=$(mktemp -d)
|
||||
local num_rts=$((40 * 1024))
|
||||
local num_nhs=16
|
||||
local total
|
||||
local start
|
||||
local diff
|
||||
local end
|
||||
local nhs
|
||||
local i
|
||||
|
||||
RET=0
|
||||
|
||||
# Prepare 40K /64 multipath routes with 16 nexthops each and check how
|
||||
# long it takes to add them. A limit of 60 seconds is set. It is much
|
||||
# higher than insertion should take and meant to flag a serious
|
||||
# regression.
|
||||
total=$((nums_nhs * num_rts))
|
||||
|
||||
for i in $(seq 1 $num_nhs); do
|
||||
ip -6 address add 2001:db8:1::10:$i/128 dev $tor1_p1
|
||||
nexthops+=" nexthop via 2001:db8:1::10:$i dev $spine_p1"
|
||||
done
|
||||
|
||||
for i in $(seq 1 $num_rts); do
|
||||
echo "route add 2001:db8:8:$(printf "%x" $i)::/64$nexthops" \
|
||||
>> $batch_dir/add.batch
|
||||
echo "route del 2001:db8:8:$(printf "%x" $i)::/64$nexthops" \
|
||||
>> $batch_dir/del.batch
|
||||
done
|
||||
|
||||
start=$(date +%s.%N)
|
||||
|
||||
ip -batch $batch_dir/add.batch
|
||||
count=$(ip -6 route show | grep offload | wc -l)
|
||||
while [ $count -lt $total ]; do
|
||||
sleep .01
|
||||
count=$(ip -6 route show | grep offload | wc -l)
|
||||
done
|
||||
|
||||
end=$(date +%s.%N)
|
||||
|
||||
diff=$(echo "$end - $start" | bc -l)
|
||||
test "$(echo "$diff > 60" | bc -l)" -eq 0
|
||||
check_err $? "route insertion took too long"
|
||||
log_info "inserted $num_rts routes in $diff seconds"
|
||||
|
||||
log_test "IPv6 routes insertion rate"
|
||||
|
||||
ip -batch $batch_dir/del.batch
|
||||
for i in $(seq 1 $num_nhs); do
|
||||
ip -6 address del 2001:db8:1::10:$i/128 dev $tor1_p1
|
||||
done
|
||||
rm -rf $batch_dir
|
||||
}
|
||||
|
||||
setup_prepare()
|
||||
{
|
||||
spine_p1=${NETIFS[p1]}
|
||||
tor1_p1=${NETIFS[p2]}
|
||||
|
||||
spine_p2=${NETIFS[p3]}
|
||||
tor2_p1=${NETIFS[p4]}
|
||||
|
||||
vrf_prepare
|
||||
forwarding_enable
|
||||
|
||||
tor1_create
|
||||
tor2_create
|
||||
spine_create
|
||||
}
|
||||
|
||||
cleanup()
|
||||
{
|
||||
pre_cleanup
|
||||
|
||||
spine_destroy
|
||||
tor2_destroy
|
||||
tor1_destroy
|
||||
|
||||
forwarding_restore
|
||||
vrf_cleanup
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
setup_prepare
|
||||
setup_wait
|
||||
|
||||
tests_run
|
||||
|
||||
exit $EXIT_STATUS
|
Loading…
Reference in New Issue