From 5d8678365c90b9ce1fd2243ff5ea562609f6cec1 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Mon, 3 Dec 2018 13:59:41 +0100 Subject: [PATCH 01/13] mlx5: update timecounter at least twice per counter overflow The timecounter needs to be updated at least once in half of the cyclecounter interval to prevent timecounter_cyc2time() interpreting a new timestamp as an old value and causing a backward jump. This would be an issue if the timecounter multiplier was so small that the update interval would not be limited by the 64-bit overflow in multiplication. Shorten the calculated interval to make sure the timecounter is updated in time even when the system clock is slowed down by up to 10%, the multiplier is increased by up to 10%, and the scheduled overflow check is late by 15%. Cc: Richard Cochran Cc: Ariel Levkovich Cc: Saeed Mahameed Signed-off-by: Miroslav Lichvar Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index d27c239e7d6c..53f021df4934 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -517,14 +517,14 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) ktime_to_ns(ktime_get_real())); /* Calculate period in seconds to call the overflow watchdog - to make - * sure counter is checked at least once every wrap around. + * sure counter is checked at least twice every wrap around. * The period is calculated as the minimum between max HW cycles count * (The clock source mask) and max amount of cycles that can be * multiplied by clock multiplier where the result doesn't exceed * 64bits. */ overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult); - overflow_cycles = min(overflow_cycles, clock->cycles.mask >> 1); + overflow_cycles = min(overflow_cycles, div_u64(clock->cycles.mask, 3)); ns = cyclecounter_cyc2ns(&clock->cycles, overflow_cycles, frac, &frac); From 4a0475d57ad134429f9bb56068bb738b4afc5992 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Mon, 3 Dec 2018 13:59:42 +0100 Subject: [PATCH 02/13] mlx5: extend PTP gettime function to read system clock Read the system time right before and immediately after reading the low register of the internal timer. This adds support for the PTP_SYS_OFFSET_EXTENDED ioctl. Cc: Richard Cochran Cc: Saeed Mahameed Signed-off-by: Miroslav Lichvar Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/lib/clock.c | 20 +++++++++++-------- .../net/ethernet/mellanox/mlx5/core/main.c | 11 ++++++++-- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 4 +++- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 53f021df4934..ca0ee9916e9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -72,7 +72,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc) struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock); - return mlx5_read_internal_timer(mdev) & cc->mask; + return mlx5_read_internal_timer(mdev, NULL) & cc->mask; } static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev) @@ -156,15 +156,19 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp, return 0; } -static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) +static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, + struct ptp_system_timestamp *sts) { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); - u64 ns; + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, + clock); unsigned long flags; + u64 cycles, ns; write_seqlock_irqsave(&clock->lock, flags); - ns = timecounter_read(&clock->tc); + cycles = mlx5_read_internal_timer(mdev, sts); + ns = timecounter_cyc2time(&clock->tc, cycles); write_sequnlock_irqrestore(&clock->lock, flags); *ts = ns_to_timespec64(ns); @@ -307,7 +311,7 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, ts.tv_sec = rq->perout.start.sec; ts.tv_nsec = rq->perout.start.nsec; ns = timespec64_to_ns(&ts); - cycles_now = mlx5_read_internal_timer(mdev); + cycles_now = mlx5_read_internal_timer(mdev, NULL); write_seqlock_irqsave(&clock->lock, flags); nsec_now = timecounter_cyc2time(&clock->tc, cycles_now); nsec_delta = ns - nsec_now; @@ -384,7 +388,7 @@ static const struct ptp_clock_info mlx5_ptp_clock_info = { .pps = 0, .adjfreq = mlx5_ptp_adjfreq, .adjtime = mlx5_ptp_adjtime, - .gettime64 = mlx5_ptp_gettime, + .gettimex64 = mlx5_ptp_gettimex, .settime64 = mlx5_ptp_settime, .enable = NULL, .verify = NULL, @@ -469,8 +473,8 @@ static int mlx5_pps_event(struct notifier_block *nb, ptp_clock_event(clock->ptp, &ptp_event); break; case PTP_PF_PEROUT: - mlx5_ptp_gettime(&clock->ptp_info, &ts); - cycles_now = mlx5_read_internal_timer(mdev); + mlx5_ptp_gettimex(&clock->ptp_info, &ts, NULL); + cycles_now = mlx5_read_internal_timer(mdev, NULL); ts.tv_sec += 1; ts.tv_nsec = 0; ns = timespec64_to_ns(&ts); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c23553164e0d..77896c11f6f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -580,15 +580,22 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev) +u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, + struct ptp_system_timestamp *sts) { u32 timer_h, timer_h1, timer_l; timer_h = ioread32be(&dev->iseg->internal_timer_h); + ptp_read_system_prets(sts); timer_l = ioread32be(&dev->iseg->internal_timer_l); + ptp_read_system_postts(sts); timer_h1 = ioread32be(&dev->iseg->internal_timer_h); - if (timer_h != timer_h1) /* wrap around */ + if (timer_h != timer_h1) { + /* wrap around */ + ptp_read_system_prets(sts); timer_l = ioread32be(&dev->iseg->internal_timer_l); + ptp_read_system_postts(sts); + } return (u64)timer_l | (u64)timer_h1 << 32; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 73bf46599ec6..c68dcea5985b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -121,7 +122,8 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 element_id); int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); -u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev); +u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, + struct ptp_system_timestamp *sts); void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev); int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); From 4c8fb2986d44c5a75949a88ac61deec0ed50a542 Mon Sep 17 00:00:00 2001 From: Gavi Teitz Date: Wed, 12 Dec 2018 21:23:18 +0200 Subject: [PATCH 03/13] net/mlx5e: Increase VF representors' SQ size to 128 The default size for the VF representors' SQ was too small to handle high packet rates. Doubling the size from 64 to 128 drastically improves the packet rate under stress (by about 50%), whereas increasing the size beyond 128 has not shown to make any further difference. The impact of the SQ size was measured with UDP traffic, in the following topology: TG <-> PF <-> TC forwarding <-> VF representor <-> VF in VM over a single core processing bi-directional traffic, with the following results: SQ size of 64: SQ size of 128: Packet rate for 64B UDP packets: 860 [Kpps] 1280 [Kpps] Packet rate for 114B VxLan encapsulated UDP packets: 320 [Kpps] 500 [Kpps] Signed-off-by: Gavi Teitz Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index ed1158b58798..18d0f4f7537b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -45,8 +45,8 @@ #include "en/tc_tun.h" #include "fs_core.h" -#define MLX5E_REP_PARAMS_LOG_SQ_SIZE \ - max(0x6, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) +#define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \ + max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; @@ -1336,7 +1336,7 @@ static void mlx5e_build_rep_params(struct net_device *netdev) if (rep->vport == FDB_UPLINK_VPORT) params->log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; else - params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE; + params->log_sq_size = MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE;; /* RQ */ mlx5e_build_rq_params(mdev, params); From 1ee4457c5c2a6172893eb1f464860c753c9b4e96 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 18 Dec 2018 13:32:46 +0200 Subject: [PATCH 04/13] net/mlx5e: Allow vlans on e-switch uplink reps There are cases (e.g tunneling with vlan on underlay and potentially more) where this makes sense, so allow that. Signed-off-by: Or Gerlitz Reviewed-by: Eli Britstein Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 18d0f4f7537b..91c3eb85f32e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1381,7 +1381,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->switchdev_ops = &mlx5e_rep_switchdev_ops; - netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL; + netdev->features |= NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL; netdev->hw_features |= NETIF_F_HW_TC; netdev->hw_features |= NETIF_F_SG; @@ -1392,6 +1392,9 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_TSO6; netdev->hw_features |= NETIF_F_RXCSUM; + if (rep->vport != FDB_UPLINK_VPORT) + netdev->features |= NETIF_F_VLAN_CHALLENGED; + netdev->features |= netdev->hw_features; } From a0646c88ed3ac527b71cf365b03ff4fc729d35b6 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Wed, 19 Dec 2018 09:24:58 +0200 Subject: [PATCH 05/13] net/mlx5e: Fail attempt to offload e-switch TC flows with egress upper devices We use the switchdev parent HW id helper to identify if the mirred device shares the same ASIC/port with the ingress device. This can get us wrong in the presence of upper devices such as vlan or bridge set over the HW devices (VF or uplink representors), b/c the switchdev ID is retrieved recursively. To fail offload attempts in such cases, we condition the check on the egress device to have not only the same switchdev ID but also the relevant mlx5 netdev ops. Fixes: 03a9d11e6eeb ('net/mlx5e: Add TC drop and mirred/redirect action parsing for SRIOV offloads') Signed-off-by: Eli Britstein Reviewed-by: Roi Dayan Acked-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 9 +++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 +++ 3 files changed, 15 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 91c3eb85f32e..f414f19c1159 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1316,6 +1316,15 @@ static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { .ndo_get_vf_stats = mlx5e_get_vf_stats, }; +bool mlx5e_eswitch_rep(struct net_device *netdev) +{ + if (netdev->netdev_ops == &mlx5e_netdev_ops_vf_rep || + netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep) + return true; + + return false; +} + static void mlx5e_build_rep_params(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 5645d3cef1bb..edd722824697 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -176,6 +176,9 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e); void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); + +bool mlx5e_eswitch_rep(struct net_device *netdev); + #else /* CONFIG_MLX5_ESWITCH */ static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; } static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index c1a9120412b8..9ba8ade3be47 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2583,6 +2583,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, uplink_upper == out_dev) out_dev = uplink_dev; + if (!mlx5e_eswitch_rep(out_dev)) + return -EOPNOTSUPP; + out_priv = netdev_priv(out_dev); rpriv = out_priv->ppriv; attr->dests[attr->out_count].rep = rpriv->rep; From 442e1228cbaccee8a9365624608544460155c5d1 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Tue, 18 Dec 2018 09:46:00 +0200 Subject: [PATCH 06/13] net/mlx5e: Tunnel routing output devs helper function For tunnel we determine the output devs for IPv4/6 cases, in two separate functions, with a duplicated code. Move that code from IPv4/6 functions to a helper function, with no functional change. Signed-off-by: Eli Britstein Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 70 +++++++++---------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index b92f8b3ff6b2..c90a0f9cb6e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -6,6 +6,34 @@ #include "lib/vxlan.h" #include "en/tc_tun.h" +static int get_route_and_out_devs(struct mlx5e_priv *priv, + struct net_device *dev, + struct net_device **route_dev, + struct net_device **out_dev) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct net_device *uplink_dev, *uplink_upper; + bool dst_is_lag_dev; + + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); + uplink_upper = netdev_master_upper_dev_get(uplink_dev); + dst_is_lag_dev = (uplink_upper && + netif_is_lag_master(uplink_upper) && + dev == uplink_upper && + mlx5_lag_is_sriov(priv->mdev)); + + /* if the egress device isn't on the same HW e-switch or + * it's a LAG device, use the uplink + */ + if (!switchdev_port_same_parent_id(priv->netdev, dev) || + dst_is_lag_dev) + *out_dev = uplink_dev; + else + *out_dev = dev; + + return 0; +} + static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct net_device **out_dev, @@ -13,9 +41,6 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, struct neighbour **out_n, u8 *out_ttl) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct net_device *uplink_dev, *uplink_upper; - bool dst_is_lag_dev; struct rtable *rt; struct neighbour *n = NULL; @@ -30,21 +55,9 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, return -EOPNOTSUPP; #endif - uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); - uplink_upper = netdev_master_upper_dev_get(uplink_dev); - dst_is_lag_dev = (uplink_upper && - netif_is_lag_master(uplink_upper) && - rt->dst.dev == uplink_upper && - mlx5_lag_is_sriov(priv->mdev)); - - /* if the egress device isn't on the same HW e-switch or - * it's a LAG device, use the uplink - */ - if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev) || - dst_is_lag_dev) - *out_dev = uplink_dev; - else - *out_dev = rt->dst.dev; + ret = get_route_and_out_devs(priv, rt->dst.dev, NULL, out_dev); + if (ret < 0) + return ret; if (!(*out_ttl)) *out_ttl = ip4_dst_hoplimit(&rt->dst); @@ -76,9 +89,6 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, struct dst_entry *dst; #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct net_device *uplink_dev, *uplink_upper; - bool dst_is_lag_dev; int ret; ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst, @@ -89,21 +99,9 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, if (!(*out_ttl)) *out_ttl = ip6_dst_hoplimit(dst); - uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); - uplink_upper = netdev_master_upper_dev_get(uplink_dev); - dst_is_lag_dev = (uplink_upper && - netif_is_lag_master(uplink_upper) && - dst->dev == uplink_upper && - mlx5_lag_is_sriov(priv->mdev)); - - /* if the egress device isn't on the same HW e-switch or - * it's a LAG device, use the uplink - */ - if (!switchdev_port_same_parent_id(priv->netdev, dst->dev) || - dst_is_lag_dev) - *out_dev = uplink_dev; - else - *out_dev = dst->dev; + ret = get_route_and_out_devs(priv, dst->dev, NULL, out_dev); + if (ret < 0) + return ret; #else return -EOPNOTSUPP; #endif From b168cff0b9b216e68215cdb28db999f542482a56 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Wed, 19 Dec 2018 09:29:10 +0200 Subject: [PATCH 07/13] net/mlx5e: Fail attempt to offload e-switch TC encap flows with vlan on underlay Currently we don't support nor fail attempts to offload encap flows routed to vlan device on the underlay network. We wrongly consider a vlan underlay device to be on the same e-switch b/c the switchdev ID is retrieved recursively. Add explicit check for that and fail such attempts. Also align to a more strict check for the ingress and the underlay devices to practically be on the same eswitch. Fixes: ce99f6b97fcd ('net/mlx5e: Support SRIOV TC encapsulation offloads for IPv6 tunnels') Fixes: 3e621b19b0bb ('net/mlx5e: Support TC encapsulation offloads with upper devices') Signed-off-by: Eli Britstein Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index c90a0f9cb6e1..0d24e9648c21 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -15,6 +15,10 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, struct net_device *uplink_dev, *uplink_upper; bool dst_is_lag_dev; + /* we currently don't offload vlan on underlay */ + if (is_vlan_dev(dev)) + return -EOPNOTSUPP; + uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); uplink_upper = netdev_master_upper_dev_get(uplink_dev); dst_is_lag_dev = (uplink_upper && @@ -28,6 +32,8 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, if (!switchdev_port_same_parent_id(priv->netdev, dev) || dst_is_lag_dev) *out_dev = uplink_dev; + else if (!mlx5e_eswitch_rep(dev)) + return -EOPNOTSUPP; else *out_dev = dev; From 05ada1adb65acd533f978651e7001e3568e7b98a Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Sun, 9 Dec 2018 09:17:18 +0200 Subject: [PATCH 08/13] net/mlx5e: Tunnel encap ETH header helper function In tunnel encap we prepare the encap header for IPv4/6 cases, in two separate functions. For ETH header generation the code is almost duplicated. Move the ETH header generation code from IPv4/6 functions to a helper function, with no functional change. Signed-off-by: Eli Britstein Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 0d24e9648c21..f90c67c9add0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -180,6 +180,19 @@ static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto, return err; } +static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev, + struct mlx5e_encap_entry *e, + u16 proto) +{ + struct ethhdr *eth = (struct ethhdr *)buf; + + ether_addr_copy(eth->h_dest, e->h_dest); + ether_addr_copy(eth->h_source, dev->dev_addr); + eth->h_proto = htons(proto); + + return (char *)eth + ETH_HLEN; +} + int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct mlx5e_encap_entry *e) @@ -193,7 +206,6 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, struct neighbour *n = NULL; struct flowi4 fl4 = {}; char *encap_header; - struct ethhdr *eth; u8 nud_state, ttl; struct iphdr *ip; int err; @@ -242,13 +254,10 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, read_unlock_bh(&n->lock); /* add ethernet header */ - eth = (struct ethhdr *)encap_header; - ether_addr_copy(eth->h_dest, e->h_dest); - ether_addr_copy(eth->h_source, out_dev->dev_addr); - eth->h_proto = htons(ETH_P_IP); + ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, out_dev, e, + ETH_P_IP); /* add ip header */ - ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr)); ip->tos = tun_key->tos; ip->version = 0x4; ip->ihl = 0x5; @@ -308,7 +317,6 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, struct flowi6 fl6 = {}; struct ipv6hdr *ip6h; char *encap_header; - struct ethhdr *eth; u8 nud_state, ttl; int err; @@ -356,13 +364,10 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, read_unlock_bh(&n->lock); /* add ethernet header */ - eth = (struct ethhdr *)encap_header; - ether_addr_copy(eth->h_dest, e->h_dest); - ether_addr_copy(eth->h_source, out_dev->dev_addr); - eth->h_proto = htons(ETH_P_IPV6); + ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, out_dev, e, + ETH_P_IPV6); /* add ip header */ - ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr)); ip6_flow_hdr(ip6h, tun_key->tos, 0); /* the HW fills up ipv6 payload len */ ip6h->hop_limit = ttl; From c7bcb277bda46d75819fb2d475fa817c10dc209b Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Mon, 3 Dec 2018 17:09:54 +0200 Subject: [PATCH 09/13] net/mlx5e: Re-order route and encap header memory allocation Change the order to first route IPv4/6 and return if error. Only after successful route continue to allocate an encap header, with no functional change. Signed-off-by: Eli Britstein Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 62 ++++++++++--------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index f90c67c9add0..bcf08f5abc2f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -198,28 +198,16 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); - int ipv4_encap_size = ETH_HLEN + - sizeof(struct iphdr) + - e->tunnel_hlen; struct ip_tunnel_key *tun_key = &e->tun_info.key; struct net_device *out_dev; struct neighbour *n = NULL; struct flowi4 fl4 = {}; + int ipv4_encap_size; char *encap_header; u8 nud_state, ttl; struct iphdr *ip; int err; - if (max_encap_size < ipv4_encap_size) { - mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", - ipv4_encap_size, max_encap_size); - return -EOPNOTSUPP; - } - - encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); - if (!encap_header) - return -ENOMEM; - /* add the IP fields */ fl4.flowi4_tos = tun_key->tos; fl4.daddr = tun_key->u.ipv4.dst; @@ -229,7 +217,22 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &fl4, &n, &ttl); if (err) - goto free_encap; + return err; + + ipv4_encap_size = + ETH_HLEN + + sizeof(struct iphdr) + + e->tunnel_hlen; + + if (max_encap_size < ipv4_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv4_encap_size, max_encap_size); + return -EOPNOTSUPP; + } + + encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); + if (!encap_header) + return -ENOMEM; /* used by mlx5e_detach_encap to lookup a neigh hash table * entry in the neigh hash table when a user deletes a rule @@ -308,28 +311,16 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); - int ipv6_encap_size = ETH_HLEN + - sizeof(struct ipv6hdr) + - e->tunnel_hlen; struct ip_tunnel_key *tun_key = &e->tun_info.key; struct net_device *out_dev; struct neighbour *n = NULL; struct flowi6 fl6 = {}; struct ipv6hdr *ip6h; + int ipv6_encap_size; char *encap_header; u8 nud_state, ttl; int err; - if (max_encap_size < ipv6_encap_size) { - mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", - ipv6_encap_size, max_encap_size); - return -EOPNOTSUPP; - } - - encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); - if (!encap_header) - return -ENOMEM; - ttl = tun_key->ttl; fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label); @@ -339,7 +330,22 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &fl6, &n, &ttl); if (err) - goto free_encap; + return err; + + ipv6_encap_size = + ETH_HLEN + + sizeof(struct ipv6hdr) + + e->tunnel_hlen; + + if (max_encap_size < ipv6_encap_size) { + mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", + ipv6_encap_size, max_encap_size); + return -EOPNOTSUPP; + } + + encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); + if (!encap_header) + return -ENOMEM; /* used by mlx5e_detach_encap to lookup a neigh hash table * entry in the neigh hash table when a user deletes a rule From aa331450b81f071bc8d7b68d5baab1b78f93598a Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Sun, 9 Dec 2018 09:17:18 +0200 Subject: [PATCH 10/13] net/mlx5e: Support VLAN encap ETH header generation Support generation of native or tagged Ethernet header for encap header, depending on provided net device. Signed-off-by: Eli Britstein Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index bcf08f5abc2f..a6990744730a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -185,12 +185,23 @@ static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev, u16 proto) { struct ethhdr *eth = (struct ethhdr *)buf; + char *ip; ether_addr_copy(eth->h_dest, e->h_dest); ether_addr_copy(eth->h_source, dev->dev_addr); - eth->h_proto = htons(proto); + if (is_vlan_dev(dev)) { + struct vlan_hdr *vlan = (struct vlan_hdr *) + ((char *)eth + ETH_HLEN); + ip = (char *)vlan + VLAN_HLEN; + eth->h_proto = vlan_dev_vlan_proto(dev); + vlan->h_vlan_TCI = htons(vlan_dev_vlan_id(dev)); + vlan->h_vlan_encapsulated_proto = htons(proto); + } else { + eth->h_proto = htons(proto); + ip = (char *)eth + ETH_HLEN; + } - return (char *)eth + ETH_HLEN; + return ip; } int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, From e32ee6c78efa6a32bff782bbe7a9970b018996ca Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Mon, 3 Dec 2018 17:09:54 +0200 Subject: [PATCH 11/13] net/mlx5e: Support tunnel encap over tagged Ethernet Generate encap header depending on the routed device to support native/tagged Ethernet header. Signed-off-by: Eli Britstein Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 44 ++++++++++--------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index a6990744730a..046948ead152 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -15,10 +15,6 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, struct net_device *uplink_dev, *uplink_upper; bool dst_is_lag_dev; - /* we currently don't offload vlan on underlay */ - if (is_vlan_dev(dev)) - return -EOPNOTSUPP; - uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); uplink_upper = netdev_master_upper_dev_get(uplink_dev); dst_is_lag_dev = (uplink_upper && @@ -30,12 +26,18 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, * it's a LAG device, use the uplink */ if (!switchdev_port_same_parent_id(priv->netdev, dev) || - dst_is_lag_dev) - *out_dev = uplink_dev; - else if (!mlx5e_eswitch_rep(dev)) - return -EOPNOTSUPP; - else - *out_dev = dev; + dst_is_lag_dev) { + *route_dev = uplink_dev; + *out_dev = *route_dev; + } else { + *route_dev = dev; + if (is_vlan_dev(*route_dev)) + *out_dev = uplink_dev; + else if (mlx5e_eswitch_rep(dev)) + *out_dev = *route_dev; + else + return -EOPNOTSUPP; + } return 0; } @@ -43,6 +45,7 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct net_device **out_dev, + struct net_device **route_dev, struct flowi4 *fl4, struct neighbour **out_n, u8 *out_ttl) @@ -61,7 +64,7 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, return -EOPNOTSUPP; #endif - ret = get_route_and_out_devs(priv, rt->dst.dev, NULL, out_dev); + ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev); if (ret < 0) return ret; @@ -87,6 +90,7 @@ static const char *mlx5e_netdev_kind(struct net_device *dev) static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct net_device **out_dev, + struct net_device **route_dev, struct flowi6 *fl6, struct neighbour **out_n, u8 *out_ttl) @@ -105,7 +109,7 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, if (!(*out_ttl)) *out_ttl = ip6_dst_hoplimit(dst); - ret = get_route_and_out_devs(priv, dst->dev, NULL, out_dev); + ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev); if (ret < 0) return ret; #else @@ -210,7 +214,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); struct ip_tunnel_key *tun_key = &e->tun_info.key; - struct net_device *out_dev; + struct net_device *out_dev, *route_dev; struct neighbour *n = NULL; struct flowi4 fl4 = {}; int ipv4_encap_size; @@ -225,13 +229,13 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, fl4.saddr = tun_key->u.ipv4.src; ttl = tun_key->ttl; - err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, + err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &route_dev, &fl4, &n, &ttl); if (err) return err; ipv4_encap_size = - ETH_HLEN + + (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + sizeof(struct iphdr) + e->tunnel_hlen; @@ -268,7 +272,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, read_unlock_bh(&n->lock); /* add ethernet header */ - ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, out_dev, e, + ip = (struct iphdr *)gen_eth_tnl_hdr(encap_header, route_dev, e, ETH_P_IP); /* add ip header */ @@ -323,7 +327,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); struct ip_tunnel_key *tun_key = &e->tun_info.key; - struct net_device *out_dev; + struct net_device *out_dev, *route_dev; struct neighbour *n = NULL; struct flowi6 fl6 = {}; struct ipv6hdr *ip6h; @@ -338,13 +342,13 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, fl6.daddr = tun_key->u.ipv6.dst; fl6.saddr = tun_key->u.ipv6.src; - err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, + err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &route_dev, &fl6, &n, &ttl); if (err) return err; ipv6_encap_size = - ETH_HLEN + + (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + sizeof(struct ipv6hdr) + e->tunnel_hlen; @@ -381,7 +385,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, read_unlock_bh(&n->lock); /* add ethernet header */ - ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, out_dev, e, + ip6h = (struct ipv6hdr *)gen_eth_tnl_hdr(encap_header, route_dev, e, ETH_P_IPV6); /* add ip header */ From 0a5b589111c395f2b18c2a508e241207daaa8cca Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Thu, 20 Dec 2018 01:37:17 +0200 Subject: [PATCH 12/13] net/mlx5: Fix query_nic_sys_image_guid() error during init vport system image guid should be queried using vport nic API for Ethernet ports, and vport hca API for Infiniband ports. Fixes: fadd59fc50d0 ("net/mlx5: Introduce inter-device communication mechanism") Signed-off-by: Aviv Heller Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index cfbea66b4879..9b150ce9d315 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1204,9 +1204,19 @@ EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport); u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) { - if (!mdev->sys_image_guid) - mlx5_query_nic_vport_system_image_guid(mdev, &mdev->sys_image_guid); + int port_type_cap = MLX5_CAP_GEN(mdev, port_type); + u64 tmp = 0; - return mdev->sys_image_guid; + if (mdev->sys_image_guid) + return mdev->sys_image_guid; + + if (port_type_cap == MLX5_CAP_PORT_TYPE_ETH) + mlx5_query_nic_vport_system_image_guid(mdev, &tmp); + else + mlx5_query_hca_vport_system_image_guid(mdev, &tmp); + + mdev->sys_image_guid = tmp; + + return tmp; } EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); From a64917446eafc7212e962561622d697fce04e9a6 Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Tue, 18 Dec 2018 19:03:27 +0200 Subject: [PATCH 13/13] net/mlx5: Fix LAG requirement when CONFIG_MLX5_ESWITCH is off If CONFIG_MLX5_ESWITCH is not defined, test for SR-IOV being disabled, instead of calling e-switch LAG prereq routine. Since LAG with SRIOV is allowed only when switchdev mode is on. Fixes: eff849b2c669 ("net/mlx5: Allow/disallow LAG according to pre-req only") Signed-off-by: Aviv Heller Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 5187dc7a72a3..3a6baed722d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -291,12 +291,15 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev) static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) { - if (ldev->pf[0].dev && - ldev->pf[1].dev && - mlx5_esw_lag_prereq(ldev->pf[0].dev, ldev->pf[1].dev)) - return true; - else + if (!ldev->pf[0].dev || !ldev->pf[1].dev) return false; + +#ifdef CONFIG_MLX5_ESWITCH + return mlx5_esw_lag_prereq(ldev->pf[0].dev, ldev->pf[1].dev); +#else + return (!mlx5_sriov_is_enabled(ldev->pf[0].dev) && + !mlx5_sriov_is_enabled(ldev->pf[1].dev)); +#endif } static void mlx5_lag_add_ib_devices(struct mlx5_lag *ldev)