From 3de205cde4ee8e36416f8b1a1510658abb14f408 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 18 Jun 2019 18:12:43 +0300 Subject: [PATCH 01/16] netlink: Document all fields of 'struct nl_info' Some fields were not documented. Add documentation. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/netlink.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/net/netlink.h b/include/net/netlink.h index 28ece67f5312..ce66e43b9b6a 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -378,7 +378,9 @@ struct nla_policy { /** * struct nl_info - netlink source information * @nlh: Netlink message header of original request + * @nl_net: Network namespace * @portid: Netlink PORTID of requesting application + * @skip_notify: Skip netlink notifications to user space */ struct nl_info { struct nlmsghdr *nlh; From c82481f7ea21be8ec960a28aef07bf258f6820b7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 18 Jun 2019 18:12:44 +0300 Subject: [PATCH 02/16] netlink: Add field to skip in-kernel notifications The struct includes a 'skip_notify' flag that indicates if netlink notifications to user space should be suppressed. As explained in commit 3b1137fe7482 ("net: ipv6: Change notifications for multipath add to RTA_MULTIPATH"), this is useful to suppress per-nexthop RTM_NEWROUTE notifications when an IPv6 multipath route is added / deleted. Instead, one notification is sent for the entire multipath route. This concept is also useful for in-kernel notifications. Sending one in-kernel notification for the addition / deletion of an IPv6 multipath route - instead of one per-nexthop - provides a significant increase in the insertion / deletion rate to underlying devices. Add a 'skip_notify_kernel' flag to suppress in-kernel notifications. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/netlink.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/net/netlink.h b/include/net/netlink.h index ce66e43b9b6a..e4650e5b64a1 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -381,12 +381,14 @@ struct nla_policy { * @nl_net: Network namespace * @portid: Netlink PORTID of requesting application * @skip_notify: Skip netlink notifications to user space + * @skip_notify_kernel: Skip selected in-kernel notifications */ struct nl_info { struct nlmsghdr *nlh; struct net *nl_net; u32 portid; - bool skip_notify; + u8 skip_notify:1, + skip_notify_kernel:1; }; /** From d4b96c7b51e8fe9bcf94c8ab8cd5717d2f005b04 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 18 Jun 2019 18:12:45 +0300 Subject: [PATCH 03/16] ipv6: Extend notifier info for multipath routes Extend the IPv6 FIB notifier info with number of sibling routes being notified. This will later allow listeners to process one notification for a multipath routes instead of N, where N is the number of nexthops. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 7 +++++++ net/ipv6/ip6_fib.c | 17 +++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 1e92f1500b87..7c3d5ab05879 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -377,6 +377,8 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *, struct fib6_entry_notifier_info { struct fib_notifier_info info; /* must be first */ struct fib6_info *rt; + unsigned int nsiblings; + bool multipath_rt; }; /* @@ -450,6 +452,11 @@ int call_fib6_entry_notifiers(struct net *net, enum fib_event_type event_type, struct fib6_info *rt, struct netlink_ext_ack *extack); +int call_fib6_multipath_entry_notifiers(struct net *net, + enum fib_event_type event_type, + struct fib6_info *rt, + unsigned int nsiblings, + struct netlink_ext_ack *extack); void fib6_rt_update(struct net *net, struct fib6_info *rt, struct nl_info *info); void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1cce2082279c..df08ba8fe6fc 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -381,6 +381,23 @@ int call_fib6_entry_notifiers(struct net *net, return call_fib6_notifiers(net, event_type, &info.info); } +int call_fib6_multipath_entry_notifiers(struct net *net, + enum fib_event_type event_type, + struct fib6_info *rt, + unsigned int nsiblings, + struct netlink_ext_ack *extack) +{ + struct fib6_entry_notifier_info info = { + .info.extack = extack, + .rt = rt, + .nsiblings = nsiblings, + .multipath_rt = true, + }; + + rt->fib6_table->fib_seq++; + return call_fib6_notifiers(net, event_type, &info.info); +} + struct fib6_dump_arg { struct net *net; struct notifier_block *nb; From f6c3bb75165cb4d0a0beb2ea6df5b392b7131645 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 18 Jun 2019 18:12:46 +0300 Subject: [PATCH 04/16] mlxsw: spectrum_router: Ignore IPv6 multipath notifications IPv6 multipath notifications are about to be sent, but mlxsw is not ready to process them, so ignore them. The limitation will be lifted by a subsequent patch which will also stop the kernel from sending a notification for each nexthop. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index a6055107c5f2..ca47aa74e5d5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -6203,6 +6203,8 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, NL_SET_ERR_MSG_MOD(info->extack, "IPv6 route with nexthop objects is not supported"); return notifier_from_errno(-EINVAL); } + if (fen6_info->multipath_rt) + return NOTIFY_DONE; } break; } From d133e4f1fa12bff58e1203c7d6c75f993fb5dead Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 18 Jun 2019 18:12:47 +0300 Subject: [PATCH 05/16] netdevsim: Ignore IPv6 multipath notifications In a similar fashion to previous patch, have netdevsim ignore IPv6 multipath notifications for now. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/netdevsim/fib.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index 8c57ba747772..83ba5113210d 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -190,6 +190,13 @@ static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event, case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: + if (info->family == AF_INET6) { + struct fib6_entry_notifier_info *fen6_info = ptr; + + if (fen6_info->multipath_rt) + return NOTIFY_DONE; + } + err = nsim_fib_event(data, info, event == FIB_EVENT_ENTRY_ADD); break; From ebee3cad835f7fe7250213225cf6d62c7cf3b2ca Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 18 Jun 2019 18:12:48 +0300 Subject: [PATCH 06/16] ipv6: Add IPv6 multipath notifications for add / replace Emit a notification when a multipath routes is added or replace. Note that unlike the replace notifications sent from fib6_add_rt2node(), it is possible we are sending a 'FIB_EVENT_ENTRY_REPLACE' when a route was merely added and not replaced. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f7257a56072a..da504d36ce54 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4965,6 +4965,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, { struct fib6_info *rt_notif = NULL, *rt_last = NULL; struct nl_info *info = &cfg->fc_nlinfo; + enum fib_event_type event_type; struct fib6_config r_cfg; struct rtnexthop *rtnh; struct fib6_info *rt; @@ -5042,6 +5043,11 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, */ info->skip_notify = 1; + /* For add and replace, send one notification with all nexthops. For + * append, send one notification with all appended nexthops. + */ + info->skip_notify_kernel = 1; + err_nh = NULL; list_for_each_entry(nh, &rt6_nh_list, next) { err = __ip6_ins_rt(nh->fib6_info, info, extack); @@ -5078,6 +5084,15 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, nhn++; } + event_type = replace ? FIB_EVENT_ENTRY_REPLACE : FIB_EVENT_ENTRY_ADD; + err = call_fib6_multipath_entry_notifiers(info->nl_net, event_type, + rt_notif, nhn - 1, extack); + if (err) { + /* Delete all the siblings that were just added */ + err_nh = NULL; + goto add_errout; + } + /* success ... tell user about new route */ ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); goto cleanup; From 2881fd61b68ef260b65ff25e19e3133d99f6a0a8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 18 Jun 2019 18:12:49 +0300 Subject: [PATCH 07/16] ipv6: Add IPv6 multipath notification for route delete If all the nexthops of a multipath route are being deleted, send one notification for the entire route, instead of one per-nexthop. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index da504d36ce54..c4d285fe0adc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3718,6 +3718,12 @@ static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg) info->skip_notify = 1; } + info->skip_notify_kernel = 1; + call_fib6_multipath_entry_notifiers(net, + FIB_EVENT_ENTRY_DEL, + rt, + rt->fib6_nsiblings, + NULL); list_for_each_entry_safe(sibling, next_sibling, &rt->fib6_siblings, fib6_siblings) { From 20247fcab397dc6339992ee6deb9f4a6e53d05ef Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 18 Jun 2019 18:12:50 +0300 Subject: [PATCH 08/16] mlxsw: spectrum_router: Remove processing of IPv6 append notifications No such notifications are sent by the IPv6 code, so remove them. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index ca47aa74e5d5..8bfd53a1497b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5965,7 +5965,6 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ case FIB_EVENT_ENTRY_ADD: replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; err = mlxsw_sp_router_fib6_add(mlxsw_sp, @@ -6072,7 +6071,6 @@ static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: fen6_info = container_of(info, struct fib6_entry_notifier_info, From ccd56a5f5018ba2b32847a521b50997537d9838c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 18 Jun 2019 18:12:51 +0300 Subject: [PATCH 09/16] mlxsw: spectrum_router: Prepare function to return errors The function mlxsw_sp_router_fib6_event() takes care of preparing the needed information for the work item that actually inserts the route into the device. When processing an IPv6 multipath route, the function will need to allocate an array to store pointers to all the sibling routes. Change the function's signature to return an error code and adjust the single call site. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 8bfd53a1497b..3e2e8be753a4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -6064,8 +6064,8 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, } } -static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, - struct fib_notifier_info *info) +static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, + struct fib_notifier_info *info) { struct fib6_entry_notifier_info *fen6_info; @@ -6079,6 +6079,8 @@ static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, fib6_info_hold(fib_work->fen6_info.rt); break; } + + return 0; } static void @@ -6221,7 +6223,9 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, break; case AF_INET6: INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work); - mlxsw_sp_router_fib6_event(fib_work, info); + err = mlxsw_sp_router_fib6_event(fib_work, info); + if (err) + goto err_fib_event; break; case RTNL_FAMILY_IP6MR: case RTNL_FAMILY_IPMR: @@ -6233,6 +6237,10 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, mlxsw_core_schedule_work(&fib_work->work); return NOTIFY_DONE; + +err_fib_event: + kfree(fib_work); + return NOTIFY_BAD; } struct mlxsw_sp_rif * From 928c0b534f2977a3c20c6237282fb8f685775373 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 18 Jun 2019 18:12:52 +0300 Subject: [PATCH 10/16] mlxsw: spectrum_router: Pass multiple routes to work item Prepare the driver to process IPv6 multipath notifications by passing an array of 'struct fib6_info' instead of just one route. A reference is taken on each sibling route in order to prevent them from being freed until they are processed by the workqueue. v2: * Remove 'multipath_rt' usage Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 72 +++++++++++++++++-- 1 file changed, 65 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 3e2e8be753a4..42e0e9677b91 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5893,10 +5893,15 @@ static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp) dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); } +struct mlxsw_sp_fib6_event_work { + struct fib6_info **rt_arr; + unsigned int nrt6; +}; + struct mlxsw_sp_fib_event_work { struct work_struct work; union { - struct fib6_entry_notifier_info fen6_info; + struct mlxsw_sp_fib6_event_work fib6_work; struct fib_entry_notifier_info fen_info; struct fib_rule_notifier_info fr_info; struct fib_nh_notifier_info fnh_info; @@ -5907,6 +5912,54 @@ struct mlxsw_sp_fib_event_work { unsigned long event; }; +static int +mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work, + struct fib6_entry_notifier_info *fen6_info) +{ + struct fib6_info *rt = fen6_info->rt; + struct fib6_info **rt_arr; + struct fib6_info *iter; + unsigned int nrt6; + int i = 0; + + nrt6 = fen6_info->nsiblings + 1; + + rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC); + if (!rt_arr) + return -ENOMEM; + + fib6_work->rt_arr = rt_arr; + fib6_work->nrt6 = nrt6; + + rt_arr[0] = rt; + fib6_info_hold(rt); + + if (!fen6_info->nsiblings) + return 0; + + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { + if (i == fen6_info->nsiblings) + break; + + rt_arr[i + 1] = iter; + fib6_info_hold(iter); + i++; + } + WARN_ON_ONCE(i != fen6_info->nsiblings); + + return 0; +} + +static void +mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work) +{ + int i; + + for (i = 0; i < fib6_work->nrt6; i++) + mlxsw_sp_rt6_release(fib6_work->rt_arr[i]); + kfree(fib6_work->rt_arr); +} + static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) { struct mlxsw_sp_fib_event_work *fib_work = @@ -5968,14 +6021,16 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) case FIB_EVENT_ENTRY_ADD: replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; err = mlxsw_sp_router_fib6_add(mlxsw_sp, - fib_work->fen6_info.rt, replace); + fib_work->fib6_work.rt_arr[0], + replace); if (err) mlxsw_sp_router_fib_abort(mlxsw_sp); - mlxsw_sp_rt6_release(fib_work->fen6_info.rt); + mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); break; case FIB_EVENT_ENTRY_DEL: - mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt); - mlxsw_sp_rt6_release(fib_work->fen6_info.rt); + mlxsw_sp_router_fib6_del(mlxsw_sp, + fib_work->fib6_work.rt_arr[0]); + mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); break; case FIB_EVENT_RULE_ADD: /* if we get here, a rule was added that we do not support. @@ -6068,6 +6123,7 @@ static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, struct fib_notifier_info *info) { struct fib6_entry_notifier_info *fen6_info; + int err; switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ @@ -6075,8 +6131,10 @@ static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, case FIB_EVENT_ENTRY_DEL: fen6_info = container_of(info, struct fib6_entry_notifier_info, info); - fib_work->fen6_info = *fen6_info; - fib6_info_hold(fib_work->fen6_info.rt); + err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work, + fen6_info); + if (err) + return err; break; } From 94d628d1f97b88749caee797ff78d41eb95e26e5 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 18 Jun 2019 18:12:53 +0300 Subject: [PATCH 11/16] mlxsw: spectrum_router: Adjust IPv6 replace logic to new notifications Previously, IPv6 replace notifications were only sent from fib6_add_rt2node(). The function only emitted such notifications if a route actually replaced another route. A previous patch added another call site in ip6_route_multipath_add() from which such notification can be emitted even if a route was merely added and did not replace another route. Adjust the driver to take this into account and potentially set the 'replace' flag to 'false' if the notified route did not replace an existing route. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 42e0e9677b91..dff96d1b0970 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5435,16 +5435,16 @@ mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, static int mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry, - bool replace) + bool *p_replace) { struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node; struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry); struct mlxsw_sp_fib6_entry *fib6_entry; - fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace); + fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, *p_replace); - if (replace && WARN_ON(!fib6_entry)) - return -EINVAL; + if (*p_replace && !fib6_entry) + *p_replace = false; if (fib6_entry) { list_add_tail(&new6_entry->common.list, @@ -5479,11 +5479,11 @@ mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry) static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry, - bool replace) + bool *p_replace) { int err; - err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace); + err = mlxsw_sp_fib6_node_list_insert(fib6_entry, p_replace); if (err) return err; @@ -5596,7 +5596,7 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, goto err_fib6_entry_create; } - err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace); + err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, &replace); if (err) goto err_fib6_node_entry_link; From 921bc539cbcbb89274ff692ae58fc000c1f251bc Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 18 Jun 2019 18:12:54 +0300 Subject: [PATCH 12/16] mlxsw: spectrum_router: Pass array of routes to route handling functions Prepare the driver to handle multiple routes in a single notification by passing an array of routes to the functions that actually add / delete a route. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index dff96d1b0970..f0332d54aea1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5556,10 +5556,12 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, - struct fib6_info *rt, bool replace) + struct fib6_info **rt_arr, + unsigned int nrt6, bool replace) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; + struct fib6_info *rt = rt_arr[0]; int err; if (mlxsw_sp->router->aborted) @@ -5613,10 +5615,12 @@ err_fib6_entry_nexthop_add: } static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, - struct fib6_info *rt) + struct fib6_info **rt_arr, + unsigned int nrt6) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; + struct fib6_info *rt = rt_arr[0]; if (mlxsw_sp->router->aborted) return; @@ -6021,7 +6025,8 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) case FIB_EVENT_ENTRY_ADD: replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; err = mlxsw_sp_router_fib6_add(mlxsw_sp, - fib_work->fib6_work.rt_arr[0], + fib_work->fib6_work.rt_arr, + fib_work->fib6_work.nrt6, replace); if (err) mlxsw_sp_router_fib_abort(mlxsw_sp); @@ -6029,7 +6034,8 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) break; case FIB_EVENT_ENTRY_DEL: mlxsw_sp_router_fib6_del(mlxsw_sp, - fib_work->fib6_work.rt_arr[0]); + fib_work->fib6_work.rt_arr, + fib_work->fib6_work.nrt6); mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); break; case FIB_EVENT_RULE_ADD: From d21afd3029b940e87539f18b385f2214413fe44c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 18 Jun 2019 18:12:55 +0300 Subject: [PATCH 13/16] mlxsw: spectrum_router: Add / delete multiple IPv6 nexthops Currently, the functions that take care of populating IPv6 nexthop groups only add / delete a single nexthop. Prepare them to handle multiple routes in one notification by passing an array of routes and adding / deleting all of them. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 61 ++++++++++++------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index f0332d54aea1..98fd3c582a60 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5278,17 +5278,21 @@ err_nexthop6_group_get: static int mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry, - struct fib6_info *rt) + struct fib6_info **rt_arr, unsigned int nrt6) { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; - int err; + int err, i; - mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt); - if (IS_ERR(mlxsw_sp_rt6)) - return PTR_ERR(mlxsw_sp_rt6); + for (i = 0; i < nrt6; i++) { + mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]); + if (IS_ERR(mlxsw_sp_rt6)) { + err = PTR_ERR(mlxsw_sp_rt6); + goto err_rt6_create; + } - list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); - fib6_entry->nrt6++; + list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); + fib6_entry->nrt6++; + } err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); if (err) @@ -5297,27 +5301,38 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, return 0; err_nexthop6_group_update: - fib6_entry->nrt6--; - list_del(&mlxsw_sp_rt6->list); - mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + i = nrt6; +err_rt6_create: + for (i--; i >= 0; i--) { + fib6_entry->nrt6--; + mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list, + struct mlxsw_sp_rt6, list); + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + } return err; } static void mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry, - struct fib6_info *rt) + struct fib6_info **rt_arr, unsigned int nrt6) { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + int i; - mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt); - if (WARN_ON(!mlxsw_sp_rt6)) - return; + for (i = 0; i < nrt6; i++) { + mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, + rt_arr[i]); + if (WARN_ON_ONCE(!mlxsw_sp_rt6)) + continue; + + fib6_entry->nrt6--; + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + } - fib6_entry->nrt6--; - list_del(&mlxsw_sp_rt6->list); mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); - mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); } static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp, @@ -5586,7 +5601,8 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, */ fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace); if (fib6_entry) { - err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt); + err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, + rt_arr, nrt6); if (err) goto err_fib6_entry_nexthop_add; return 0; @@ -5632,11 +5648,12 @@ static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, if (WARN_ON(!fib6_entry)) return; - /* If route is part of a multipath entry, but not the last one - * removed, then only reduce its nexthop group. + /* If not all the nexthops are deleted, then only reduce the nexthop + * group. */ - if (!list_is_singular(&fib6_entry->rt6_list)) { - mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt); + if (nrt6 != fib6_entry->nrt6) { + mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr, + nrt6); return; } From 2d9dd7ec79fb656852837c3821a3cb49dd464e76 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 18 Jun 2019 18:12:56 +0300 Subject: [PATCH 14/16] mlxsw: spectrum_router: Create IPv6 multipath routes in one go Allow the driver to create an IPv6 multipath route in one go by passing an array of sibling routes and iterating over them. Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 36 ++++++++++++------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 98fd3c582a60..92ec65188e9a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5373,29 +5373,32 @@ mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry) static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node, - struct fib6_info *rt) + struct fib6_info **rt_arr, unsigned int nrt6) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_entry *fib_entry; struct mlxsw_sp_rt6 *mlxsw_sp_rt6; - int err; + int err, i; fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL); if (!fib6_entry) return ERR_PTR(-ENOMEM); fib_entry = &fib6_entry->common; - mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt); - if (IS_ERR(mlxsw_sp_rt6)) { - err = PTR_ERR(mlxsw_sp_rt6); - goto err_rt6_create; + INIT_LIST_HEAD(&fib6_entry->rt6_list); + + for (i = 0; i < nrt6; i++) { + mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]); + if (IS_ERR(mlxsw_sp_rt6)) { + err = PTR_ERR(mlxsw_sp_rt6); + goto err_rt6_create; + } + list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); + fib6_entry->nrt6++; } - mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt); + mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]); - INIT_LIST_HEAD(&fib6_entry->rt6_list); - list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); - fib6_entry->nrt6 = 1; err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry); if (err) goto err_nexthop6_group_get; @@ -5405,9 +5408,15 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, return fib6_entry; err_nexthop6_group_get: - list_del(&mlxsw_sp_rt6->list); - mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + i = nrt6; err_rt6_create: + for (i--; i >= 0; i--) { + fib6_entry->nrt6--; + mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list, + struct mlxsw_sp_rt6, list); + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + } kfree(fib6_entry); return ERR_PTR(err); } @@ -5608,7 +5617,8 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, return 0; } - fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt); + fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr, + nrt6); if (IS_ERR(fib6_entry)) { err = PTR_ERR(fib6_entry); goto err_fib6_entry_create; From d5382fef70ce273608d6fc652c24f075de3737ef Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 18 Jun 2019 18:12:57 +0300 Subject: [PATCH 15/16] ipv6: Stop sending in-kernel notifications for each nexthop Both listeners - mlxsw and netdevsim - of IPv6 FIB notifications are now ready to handle IPv6 multipath notifications. Therefore, stop ignoring such notifications in both drivers and stop sending notification for each added / deleted nexthop. v2: * Remove 'multipath_rt' from 'struct fib6_entry_notifier_info' Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Reviewed-by: David Ahern Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 2 -- drivers/net/netdevsim/fib.c | 7 ----- include/net/ip6_fib.h | 1 - net/ipv6/ip6_fib.c | 29 +++++++++++-------- 4 files changed, 17 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 92ec65188e9a..e618be7ce6c6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -6294,8 +6294,6 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, NL_SET_ERR_MSG_MOD(info->extack, "IPv6 route with nexthop objects is not supported"); return notifier_from_errno(-EINVAL); } - if (fen6_info->multipath_rt) - return NOTIFY_DONE; } break; } diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index 83ba5113210d..8c57ba747772 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -190,13 +190,6 @@ static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event, case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: - if (info->family == AF_INET6) { - struct fib6_entry_notifier_info *fen6_info = ptr; - - if (fen6_info->multipath_rt) - return NOTIFY_DONE; - } - err = nsim_fib_event(data, info, event == FIB_EVENT_ENTRY_ADD); break; diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 7c3d5ab05879..87331f2c4af0 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -378,7 +378,6 @@ struct fib6_entry_notifier_info { struct fib_notifier_info info; /* must be first */ struct fib6_info *rt; unsigned int nsiblings; - bool multipath_rt; }; /* diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index df08ba8fe6fc..1d16a01eccf5 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -391,7 +391,6 @@ int call_fib6_multipath_entry_notifiers(struct net *net, .info.extack = extack, .rt = rt, .nsiblings = nsiblings, - .multipath_rt = true, }; rt->fib6_table->fib_seq++; @@ -1140,11 +1139,13 @@ next_iter: add: nlflags |= NLM_F_CREATE; - err = call_fib6_entry_notifiers(info->nl_net, - FIB_EVENT_ENTRY_ADD, - rt, extack); - if (err) - return err; + if (!info->skip_notify_kernel) { + err = call_fib6_entry_notifiers(info->nl_net, + FIB_EVENT_ENTRY_ADD, + rt, extack); + if (err) + return err; + } rcu_assign_pointer(rt->fib6_next, iter); fib6_info_hold(rt); @@ -1169,11 +1170,13 @@ add: return -ENOENT; } - err = call_fib6_entry_notifiers(info->nl_net, - FIB_EVENT_ENTRY_REPLACE, - rt, extack); - if (err) - return err; + if (!info->skip_notify_kernel) { + err = call_fib6_entry_notifiers(info->nl_net, + FIB_EVENT_ENTRY_REPLACE, + rt, extack); + if (err) + return err; + } fib6_info_hold(rt); rcu_assign_pointer(rt->fib6_node, fn); @@ -1856,9 +1859,11 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, fib6_purge_rt(rt, fn, net); - call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL); + if (!info->skip_notify_kernel) + call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL); if (!info->skip_notify) inet6_rt_notify(RTM_DELROUTE, rt, info, 0); + fib6_info_release(rt); } From 12ee8220399995cc63fe590d9b01e94921478ad8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 18 Jun 2019 18:12:58 +0300 Subject: [PATCH 16/16] selftests: mlxsw: Add a test for FIB offload indication Test that the offload indication for unicast routes is correctly set in different scenarios. IPv4 support will be added in the future. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/fib_offload.sh | 349 ++++++++++++++++++ 1 file changed, 349 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh b/tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh new file mode 100755 index 000000000000..e99ae500f387 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh @@ -0,0 +1,349 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test unicast FIB offload indication. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + ipv6_route_add + ipv6_route_replace + ipv6_route_nexthop_group_share + ipv6_route_rate +" +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +tor1_create() +{ + simple_if_init $tor1_p1 2001:db8:1::2/128 2001:db8:1::3/128 +} + +tor1_destroy() +{ + simple_if_fini $tor1_p1 2001:db8:1::2/128 2001:db8:1::3/128 +} + +tor2_create() +{ + simple_if_init $tor2_p1 2001:db8:2::2/128 2001:db8:2::3/128 +} + +tor2_destroy() +{ + simple_if_fini $tor2_p1 2001:db8:2::2/128 2001:db8:2::3/128 +} + +spine_create() +{ + ip link set dev $spine_p1 up + ip link set dev $spine_p2 up + + __addr_add_del $spine_p1 add 2001:db8:1::1/64 + __addr_add_del $spine_p2 add 2001:db8:2::1/64 +} + +spine_destroy() +{ + __addr_add_del $spine_p2 del 2001:db8:2::1/64 + __addr_add_del $spine_p1 del 2001:db8:1::1/64 + + ip link set dev $spine_p2 down + ip link set dev $spine_p1 down +} + +ipv6_offload_check() +{ + local pfx="$1"; shift + local expected_num=$1; shift + local num + + # Try to avoid races with route offload + sleep .1 + + num=$(ip -6 route show match ${pfx} | grep "offload" | wc -l) + + if [ $num -eq $expected_num ]; then + return 0 + fi + + return 1 +} + +ipv6_route_add_prefix() +{ + RET=0 + + # Add a prefix route and check that it is offloaded. + ip -6 route add 2001:db8:3::/64 dev $spine_p1 metric 100 + ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 1 + check_err $? "prefix route not offloaded" + + # Append an identical prefix route with an higher metric and check that + # offload indication did not change. + ip -6 route append 2001:db8:3::/64 dev $spine_p1 metric 200 + ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 1 + check_err $? "lowest metric not offloaded after append" + ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 200" 0 + check_err $? "highest metric offloaded when should not" + + # Prepend an identical prefix route with lower metric and check that + # it is offloaded and the others are not. + ip -6 route append 2001:db8:3::/64 dev $spine_p1 metric 10 + ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 10" 1 + check_err $? "lowest metric not offloaded after prepend" + ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 0 + check_err $? "mid metric offloaded when should not" + ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 200" 0 + check_err $? "highest metric offloaded when should not" + + # Delete the routes and add the same route with a different nexthop + # device. Check that it is offloaded. + ip -6 route flush 2001:db8:3::/64 dev $spine_p1 + ip -6 route add 2001:db8:3::/64 dev $spine_p2 + ipv6_offload_check "2001:db8:3::/64 dev $spine_p2" 1 + + log_test "IPv6 prefix route add" + + ip -6 route flush 2001:db8:3::/64 +} + +ipv6_route_add_mpath() +{ + RET=0 + + # Add a multipath route and check that it is offloaded. + ip -6 route add 2001:db8:3::/64 metric 100 \ + nexthop via 2001:db8:1::2 dev $spine_p1 \ + nexthop via 2001:db8:2::2 dev $spine_p2 + ipv6_offload_check "2001:db8:3::/64 metric 100" 2 + check_err $? "multipath route not offloaded when should" + + # Append another nexthop and check that it is offloaded as well. + ip -6 route append 2001:db8:3::/64 metric 100 \ + nexthop via 2001:db8:1::3 dev $spine_p1 + ipv6_offload_check "2001:db8:3::/64 metric 100" 3 + check_err $? "appended nexthop not offloaded when should" + + # Mimic route replace by removing the route and adding it back with + # only two nexthops. + ip -6 route del 2001:db8:3::/64 + ip -6 route add 2001:db8:3::/64 metric 100 \ + nexthop via 2001:db8:1::2 dev $spine_p1 \ + nexthop via 2001:db8:2::2 dev $spine_p2 + ipv6_offload_check "2001:db8:3::/64 metric 100" 2 + check_err $? "multipath route not offloaded after delete & add" + + # Append a nexthop with an higher metric and check that the offload + # indication did not change. + ip -6 route append 2001:db8:3::/64 metric 200 \ + nexthop via 2001:db8:1::3 dev $spine_p1 + ipv6_offload_check "2001:db8:3::/64 metric 100" 2 + check_err $? "lowest metric not offloaded after append" + ipv6_offload_check "2001:db8:3::/64 metric 200" 0 + check_err $? "highest metric offloaded when should not" + + # Prepend a nexthop with a lower metric and check that it is offloaded + # and the others are not. + ip -6 route append 2001:db8:3::/64 metric 10 \ + nexthop via 2001:db8:1::3 dev $spine_p1 + ipv6_offload_check "2001:db8:3::/64 metric 10" 1 + check_err $? "lowest metric not offloaded after prepend" + ipv6_offload_check "2001:db8:3::/64 metric 100" 0 + check_err $? "mid metric offloaded when should not" + ipv6_offload_check "2001:db8:3::/64 metric 200" 0 + check_err $? "highest metric offloaded when should not" + + log_test "IPv6 multipath route add" + + ip -6 route flush 2001:db8:3::/64 +} + +ipv6_route_add() +{ + ipv6_route_add_prefix + ipv6_route_add_mpath +} + +ipv6_route_replace() +{ + RET=0 + + # Replace prefix route with prefix route. + ip -6 route add 2001:db8:3::/64 metric 100 dev $spine_p1 + ipv6_offload_check "2001:db8:3::/64 metric 100" 1 + check_err $? "prefix route not offloaded when should" + ip -6 route replace 2001:db8:3::/64 metric 100 dev $spine_p2 + ipv6_offload_check "2001:db8:3::/64 metric 100" 1 + check_err $? "prefix route not offloaded after replace" + + # Replace prefix route with multipath route. + ip -6 route replace 2001:db8:3::/64 metric 100 \ + nexthop via 2001:db8:1::2 dev $spine_p1 \ + nexthop via 2001:db8:2::2 dev $spine_p2 + ipv6_offload_check "2001:db8:3::/64 metric 100" 2 + check_err $? "multipath route not offloaded after replace" + + # Replace multipath route with prefix route. A prefix route cannot + # replace a multipath route, so it is appended. + ip -6 route replace 2001:db8:3::/64 metric 100 dev $spine_p1 + ipv6_offload_check "2001:db8:3::/64 metric 100 dev $spine_p1" 0 + check_err $? "prefix route offloaded after 'replacing' multipath route" + ipv6_offload_check "2001:db8:3::/64 metric 100" 2 + check_err $? "multipath route not offloaded after being 'replaced' by prefix route" + + # Replace multipath route with multipath route. + ip -6 route replace 2001:db8:3::/64 metric 100 \ + nexthop via 2001:db8:1::3 dev $spine_p1 \ + nexthop via 2001:db8:2::3 dev $spine_p2 + ipv6_offload_check "2001:db8:3::/64 metric 100" 2 + check_err $? "multipath route not offloaded after replacing multipath route" + + # Replace a non-existing multipath route with a multipath route and + # check that it is appended and not offloaded. + ip -6 route replace 2001:db8:3::/64 metric 200 \ + nexthop via 2001:db8:1::3 dev $spine_p1 \ + nexthop via 2001:db8:2::3 dev $spine_p2 + ipv6_offload_check "2001:db8:3::/64 metric 100" 2 + check_err $? "multipath route not offloaded after non-existing route was 'replaced'" + ipv6_offload_check "2001:db8:3::/64 metric 200" 0 + check_err $? "multipath route offloaded after 'replacing' non-existing route" + + log_test "IPv6 route replace" + + ip -6 route flush 2001:db8:3::/64 +} + +ipv6_route_nexthop_group_share() +{ + RET=0 + + # The driver consolidates identical nexthop groups in order to reduce + # the resource usage in its adjacency table. Check that the deletion + # of one multipath route using the group does not affect the other. + ip -6 route add 2001:db8:3::/64 \ + nexthop via 2001:db8:1::2 dev $spine_p1 \ + nexthop via 2001:db8:2::2 dev $spine_p2 + ip -6 route add 2001:db8:4::/64 \ + nexthop via 2001:db8:1::2 dev $spine_p1 \ + nexthop via 2001:db8:2::2 dev $spine_p2 + ipv6_offload_check "2001:db8:3::/64" 2 + check_err $? "multipath route not offloaded when should" + ipv6_offload_check "2001:db8:4::/64" 2 + check_err $? "multipath route not offloaded when should" + ip -6 route del 2001:db8:3::/64 + ipv6_offload_check "2001:db8:4::/64" 2 + check_err $? "multipath route not offloaded after deletion of route sharing the nexthop group" + + # Check that after unsharing a nexthop group the routes are still + # marked as offloaded. + ip -6 route add 2001:db8:3::/64 \ + nexthop via 2001:db8:1::2 dev $spine_p1 \ + nexthop via 2001:db8:2::2 dev $spine_p2 + ip -6 route del 2001:db8:4::/64 \ + nexthop via 2001:db8:1::2 dev $spine_p1 + ipv6_offload_check "2001:db8:4::/64" 1 + check_err $? "singlepath route not offloaded after unsharing the nexthop group" + ipv6_offload_check "2001:db8:3::/64" 2 + check_err $? "multipath route not offloaded after unsharing the nexthop group" + + log_test "IPv6 nexthop group sharing" + + ip -6 route flush 2001:db8:3::/64 + ip -6 route flush 2001:db8:4::/64 +} + +ipv6_route_rate() +{ + local batch_dir=$(mktemp -d) + local num_rts=$((40 * 1024)) + local num_nhs=16 + local total + local start + local diff + local end + local nhs + local i + + RET=0 + + # Prepare 40K /64 multipath routes with 16 nexthops each and check how + # long it takes to add them. A limit of 60 seconds is set. It is much + # higher than insertion should take and meant to flag a serious + # regression. + total=$((nums_nhs * num_rts)) + + for i in $(seq 1 $num_nhs); do + ip -6 address add 2001:db8:1::10:$i/128 dev $tor1_p1 + nexthops+=" nexthop via 2001:db8:1::10:$i dev $spine_p1" + done + + for i in $(seq 1 $num_rts); do + echo "route add 2001:db8:8:$(printf "%x" $i)::/64$nexthops" \ + >> $batch_dir/add.batch + echo "route del 2001:db8:8:$(printf "%x" $i)::/64$nexthops" \ + >> $batch_dir/del.batch + done + + start=$(date +%s.%N) + + ip -batch $batch_dir/add.batch + count=$(ip -6 route show | grep offload | wc -l) + while [ $count -lt $total ]; do + sleep .01 + count=$(ip -6 route show | grep offload | wc -l) + done + + end=$(date +%s.%N) + + diff=$(echo "$end - $start" | bc -l) + test "$(echo "$diff > 60" | bc -l)" -eq 0 + check_err $? "route insertion took too long" + log_info "inserted $num_rts routes in $diff seconds" + + log_test "IPv6 routes insertion rate" + + ip -batch $batch_dir/del.batch + for i in $(seq 1 $num_nhs); do + ip -6 address del 2001:db8:1::10:$i/128 dev $tor1_p1 + done + rm -rf $batch_dir +} + +setup_prepare() +{ + spine_p1=${NETIFS[p1]} + tor1_p1=${NETIFS[p2]} + + spine_p2=${NETIFS[p3]} + tor2_p1=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + + tor1_create + tor2_create + spine_create +} + +cleanup() +{ + pre_cleanup + + spine_destroy + tor2_destroy + tor1_destroy + + forwarding_restore + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS