From 8b54874ef1617185048029a3083d510569e93751 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 26 Jul 2021 09:20:14 +0300 Subject: [PATCH 01/12] net/mlx5: Fix flow table chaining Fix a bug when flow table is created in priority that already has other flow tables as shown in the below diagram. If the new flow table (FT-B) has the lowest level in the priority, we need to connect the flow tables from the previous priority (p0) to this new table. In addition when this flow table is destroyed (FT-B), we need to connect the flow tables from the previous priority (p0) to the next level flow table (FT-C) in the same priority of the destroyed table (if exists). --------- |root_ns| --------- | -------------------------------- | | | ---------- ---------- --------- |p(prio)-x| | p-y | | p-n | ---------- ---------- --------- | | ---------------- ------------------ |ns(e.g bypass)| |ns(e.g. kernel) | ---------------- ------------------ | | | ------- ------ ---- | p0 | | p1 | |p2| ------- ------ ---- | | \ -------- ------- ------ | FT-A | |FT-B | |FT-C| -------- ------- ------ Fixes: f90edfd279f3 ("net/mlx5_core: Connect flow tables") Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index d7bf0a3e4a52..c0697e1b7118 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1024,17 +1024,19 @@ static int connect_fwd_rules(struct mlx5_core_dev *dev, static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, struct fs_prio *prio) { - struct mlx5_flow_table *next_ft; + struct mlx5_flow_table *next_ft, *first_ft; int err = 0; /* Connect_prev_fts and update_root_ft_create are mutually exclusive */ - if (list_empty(&prio->node.children)) { + first_ft = list_first_entry_or_null(&prio->node.children, + struct mlx5_flow_table, node.list); + if (!first_ft || first_ft->level > ft->level) { err = connect_prev_fts(dev, ft, prio); if (err) return err; - next_ft = find_next_chained_ft(prio); + next_ft = first_ft ? first_ft : find_next_chained_ft(prio); err = connect_fwd_rules(dev, ft, next_ft); if (err) return err; @@ -2120,7 +2122,7 @@ static int disconnect_flow_table(struct mlx5_flow_table *ft) node.list) == ft)) return 0; - next_ft = find_next_chained_ft(prio); + next_ft = find_next_ft(ft); err = connect_fwd_rules(dev, next_ft, ft); if (err) return err; From 90b22b9bcd242a3ba238f2c6f7eab771799001f8 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Thu, 8 Jul 2021 15:24:58 +0300 Subject: [PATCH 02/12] net/mlx5e: Disable Rx ntuple offload for uplink representor Rx ntuple offload is not supported in switchdev mode. Tryng to enable it cause kernel panic. BUG: kernel NULL pointer dereference, address: 0000000000000008 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 80000001065a5067 P4D 80000001065a5067 PUD 106594067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 7 PID: 1089 Comm: ethtool Not tainted 5.13.0-rc7_for_upstream_min_debug_2021_06_23_16_44 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:mlx5e_arfs_enable+0x70/0xd0 [mlx5_core] Code: 44 24 10 00 00 00 00 48 c7 44 24 18 00 00 00 00 49 63 c4 48 89 e2 44 89 e6 48 69 c0 20 08 00 00 48 89 ef 48 03 85 68 ac 00 00 <48> 8b 40 08 48 89 44 24 08 e8 d2 aa fd ff 48 83 05 82 96 18 00 01 RSP: 0018:ffff8881047679e0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000004000000000 RCX: 0000004000000000 RDX: ffff8881047679e0 RSI: 0000000000000000 RDI: ffff888115100880 RBP: ffff888115100880 R08: ffffffffa00f6cb0 R09: ffff888104767a18 R10: ffff8881151000a0 R11: ffff888109479540 R12: 0000000000000000 R13: ffff888104767bb8 R14: ffff888115100000 R15: ffff8881151000a0 FS: 00007f41a64ab740(0000) GS:ffff8882f5dc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 0000000104cbc005 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: set_feature_arfs+0x1e/0x40 [mlx5_core] mlx5e_handle_feature+0x43/0xa0 [mlx5_core] mlx5e_set_features+0x139/0x1b0 [mlx5_core] __netdev_update_features+0x2b3/0xaf0 ethnl_set_features+0x176/0x3a0 ? __nla_parse+0x22/0x30 genl_family_rcv_msg_doit+0xe2/0x140 genl_rcv_msg+0xde/0x1d0 ? features_reply_size+0xe0/0xe0 ? genl_get_cmd+0xd0/0xd0 netlink_rcv_skb+0x4e/0xf0 genl_rcv+0x24/0x40 netlink_unicast+0x1f6/0x2b0 netlink_sendmsg+0x225/0x450 sock_sendmsg+0x33/0x40 __sys_sendto+0xd4/0x120 ? __sys_recvmsg+0x4e/0x90 ? exc_page_fault+0x219/0x740 __x64_sys_sendto+0x25/0x30 do_syscall_64+0x3f/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f41a65b0cba Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c RSP: 002b:00007ffd8d688358 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00000000010f42a0 RCX: 00007f41a65b0cba RDX: 0000000000000058 RSI: 00000000010f43b0 RDI: 0000000000000003 RBP: 000000000047ae60 R08: 00007f41a667c000 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 00000000010f4340 R13: 00000000010f4350 R14: 00007ffd8d688400 R15: 00000000010f42a0 Modules linked in: mlx5_vdpa vhost_iotlb vdpa xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 br_netfilter rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad ib_ipoib rdma_cm iw_cm ib_cm mlx5_ib ib_uverbs ib_core overlay mlx5_core ptp pps_core fuse CR2: 0000000000000008 ---[ end trace c66523f2aba94b43 ]--- Fixes: 7a9fb35e8c3a ("net/mlx5e: Do not reload ethernet ports when changing eswitch mode") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d09e65557e75..c6f99fc77411 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3829,6 +3829,24 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) return 0; } +static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev, + netdev_features_t features) +{ + features &= ~NETIF_F_HW_TLS_RX; + if (netdev->features & NETIF_F_HW_TLS_RX) + netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n"); + + features &= ~NETIF_F_HW_TLS_TX; + if (netdev->features & NETIF_F_HW_TLS_TX) + netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n"); + + features &= ~NETIF_F_NTUPLE; + if (netdev->features & NETIF_F_NTUPLE) + netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n"); + + return features; +} + static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_features_t features) { @@ -3860,15 +3878,8 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n"); } - if (mlx5e_is_uplink_rep(priv)) { - features &= ~NETIF_F_HW_TLS_RX; - if (netdev->features & NETIF_F_HW_TLS_RX) - netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n"); - - features &= ~NETIF_F_HW_TLS_TX; - if (netdev->features & NETIF_F_HW_TLS_TX) - netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n"); - } + if (mlx5e_is_uplink_rep(priv)) + features = mlx5e_fix_uplink_rep_features(netdev, features); mutex_unlock(&priv->state_lock); From c671972534c6f7fce789ac8156a2bc3bd146f806 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Tue, 22 Jun 2021 17:07:02 +0300 Subject: [PATCH 03/12] net/mlx5: E-Switch, Set destination vport vhca id only when merged eswitch is supported Destination vport vhca id is valid flag is set only merged eswitch isn't supported. Change destination vport vhca id value to be set also only when merged eswitch is supported. Fixes: e4ad91f23f10 ("net/mlx5e: Split offloaded eswitch TC rules for port mirroring") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7579f3402776..b0a2ca9037ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -382,10 +382,11 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f { dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest[dest_idx].vport.num = esw_attr->dests[attr_idx].rep->vport; - dest[dest_idx].vport.vhca_id = - MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { + dest[dest_idx].vport.vhca_id = + MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + } if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) { if (pkt_reformat) { flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; From dd3fddb82780bfa24124834edd90bbc63bd689cc Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 2 Jun 2021 14:17:07 +0300 Subject: [PATCH 04/12] net/mlx5: E-Switch, handle devcom events only for ports on the same device This is the same check as LAG mode checks if to enable lag. This will fix adding peer miss rules if lag is not supported and even an incorrect rules in socket direct mode. Also fix the incorrect comment on mlx5_get_next_phys_dev() as flow #1 doesn't exists. Fixes: ac004b832128 ("net/mlx5e: E-Switch, Add peer miss rules") Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 5 +---- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 +++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index ceebfc20f65e..def2156e50ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -500,10 +500,7 @@ static int next_phys_dev(struct device *dev, const void *data) return 1; } -/* This function is called with two flows: - * 1. During initialization of mlx5_core_dev and we don't need to lock it. - * 2. During LAG configure stage and caller holds &mlx5_intf_mutex. - */ +/* Must be called with intf_mutex held */ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) { struct auxiliary_device *adev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b0a2ca9037ac..011e766e4f67 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2368,6 +2368,9 @@ static int mlx5_esw_offloads_devcom_event(int event, switch (event) { case ESW_OFFLOADS_DEVCOM_PAIR: + if (mlx5_get_next_phys_dev(esw->dev) != peer_esw->dev) + break; + if (mlx5_eswitch_vport_match_metadata_enabled(esw) != mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) break; From e2351e517068718724f1d3b4010e2a41ec91fa76 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 30 Jun 2021 13:45:05 +0300 Subject: [PATCH 05/12] net/mlx5e: RX, Avoid possible data corruption when relaxed ordering and LRO combined When HW aggregates packets for an LRO session, it writes the payload of two consecutive packets of a flow contiguously, so that they usually share a cacheline. The first byte of a packet's payload is written immediately after the last byte of the preceding packet. In this flow, there are two consecutive write requests to the shared cacheline: 1. Regular write for the earlier packet. 2. Read-modify-write for the following packet. In case of relaxed-ordering on, these two writes might be re-ordered. Using the end padding optimization (to avoid partial write for the last cacheline of a packet) becomes problematic if the two writes occur out-of-order, as the padding would overwrite payload that belongs to the following packet, causing data corruption. Avoid this by disabling the end padding optimization when both LRO and relaxed-ordering are enabled. Fixes: 17347d5430c4 ("net/mlx5e: Add support for PCI relaxed ordering") Signed-off-by: Tariq Toukan Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 150c8e82c738..2cbf18c967f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -471,6 +471,15 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev, param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; } +static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) && + MLX5_CAP_GEN(mdev, relaxed_ordering_write); + + return ro && params->lro_en ? + MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN; +} + int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, @@ -508,7 +517,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, } MLX5_SET(wq, wq, wq_type, params->rq_wq_type); - MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); + MLX5_SET(wq, wq, end_padding_mode, rq_end_pad_mode(mdev, params)); MLX5_SET(wq, wq, log_wq_stride, mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs)); MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn); From 9841d58f3550d11c6181424427e8ad8c9c80f1b6 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 30 Jun 2021 13:33:31 +0300 Subject: [PATCH 06/12] net/mlx5e: Add NETIF_F_HW_TC to hw_features when HTB offload is available If a feature flag is only present in features, but not in hw_features, the user can't reset it. Although hw_features may contain NETIF_F_HW_TC by the point where the driver checks whether HTB offload is supported, this flag is controlled by another condition that may not hold. Set it explicitly to make sure the user can disable it. Fixes: 214baf22870c ("net/mlx5e: Support HTB offload") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c6f99fc77411..c5a2e3e6fe4b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4870,6 +4870,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) if (MLX5_CAP_ETH(mdev, scatter_fcs)) netdev->hw_features |= NETIF_F_RXFCS; + if (mlx5_qos_is_supported(mdev)) + netdev->hw_features |= NETIF_F_HW_TC; + netdev->features = netdev->hw_features; /* Defaults */ @@ -4890,8 +4893,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_NTUPLE; #endif } - if (mlx5_qos_is_supported(mdev)) - netdev->features |= NETIF_F_HW_TC; netdev->features |= NETIF_F_HIGHDMA; netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; From a759f845d1f78634b54744db0fa48524ef6d0e14 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 30 Jun 2021 11:17:12 +0300 Subject: [PATCH 07/12] net/mlx5e: Consider PTP-RQ when setting RX VLAN stripping Add PTP-RQ to the loop when setting rx-vlan-offload feature via ethtool. On PTP-RQ's creation, set rx-vlan-offload into its parameters. Fixes: a099da8ffcf6 ("net/mlx5e: Add RQ to PTP channel") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 5 ++++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 778e229310a9..07b429b94d93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -482,8 +482,11 @@ static void mlx5e_ptp_build_params(struct mlx5e_ptp *c, params->log_sq_size = orig->log_sq_size; mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param); } - if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + /* RQ */ + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { + params->vlan_strip_disable = orig->vlan_strip_disable; mlx5e_ptp_build_rq_param(c->mdev, c->netdev, c->priv->q_counter, cparams); + } } static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c5a2e3e6fe4b..37c440837945 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3384,7 +3384,7 @@ static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool en static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) { - int err = 0; + int err; int i; for (i = 0; i < chs->num; i++) { @@ -3392,6 +3392,8 @@ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) if (err) return err; } + if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state)) + return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd); return 0; } From 497008e783452a2ec45c7ec5835cfe6950dcb097 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 21 Jun 2021 18:04:07 +0300 Subject: [PATCH 08/12] net/mlx5e: Fix page allocation failure for trap-RQ over SF Set the correct device pointer to the trap-RQ, to allow access to dma_mask and avoid allocation request with the wrong pci-dev. WARNING: CPU: 1 PID: 12005 at kernel/dma/mapping.c:151 dma_map_page_attrs+0x139/0x1c0 ... all Trace: ? __page_pool_alloc_pages_slow+0x5a/0x210 mlx5e_post_rx_wqes+0x258/0x400 [mlx5_core] mlx5e_trap_napi_poll+0x44/0xc0 [mlx5_core] __napi_poll+0x24/0x150 net_rx_action+0x22b/0x280 __do_softirq+0xc7/0x27e do_softirq+0x61/0x80 __local_bh_enable_ip+0x4b/0x50 mlx5e_handle_action_trap+0x2dd/0x4d0 [mlx5_core] blocking_notifier_call_chain+0x5a/0x80 mlx5_devlink_trap_action_set+0x8b/0x100 [mlx5_core] Fixes: 5543e989fe5e ("net/mlx5e: Add trap entity to ETH driver") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/trap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index 86ab4e864fe6..7f94508594fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -37,7 +37,7 @@ static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params struct mlx5e_priv *priv = t->priv; rq->wq_type = params->rq_wq_type; - rq->pdev = mdev->device; + rq->pdev = t->pdev; rq->netdev = priv->netdev; rq->priv = priv; rq->clock = &mdev->clock; From 678b1ae1af4aef488fcc42baa663e737b9a531ba Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 22 Jun 2021 10:24:17 +0300 Subject: [PATCH 09/12] net/mlx5e: Fix page allocation failure for ptp-RQ over SF Set the correct pci-device pointer to the ptp-RQ. This allows access to dma_mask and avoids allocation request with wrong pci-device. Fixes: a099da8ffcf6 ("net/mlx5e: Add RQ to PTP channel") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 07b429b94d93..efef4adce086 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -497,7 +497,7 @@ static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, int err; rq->wq_type = params->rq_wq_type; - rq->pdev = mdev->device; + rq->pdev = c->pdev; rq->netdev = priv->netdev; rq->priv = priv; rq->clock = &mdev->clock; From 7f331bf0f060c2727e36d64f9b098b4ee5f3dfad Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 16 Jun 2021 19:11:03 +0300 Subject: [PATCH 10/12] net/mlx5: Unload device upon firmware fatal error When fw_fatal reporter reports an error, the firmware in not responding. Unload the device to ensure that the driver closes all its resources, even if recovery is not due (user disabled auto-recovery or reporter is in grace period). On successful recovery the device is loaded back up. Fixes: b3bd076f7501 ("net/mlx5: Report devlink health on FW fatal issues") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 9ff163c5bcde..9abeb80ffa31 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -626,8 +626,16 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) } fw_reporter_ctx.err_synd = health->synd; fw_reporter_ctx.miss_counter = health->miss_counter; - devlink_health_report(health->fw_fatal_reporter, - "FW fatal error reported", &fw_reporter_ctx); + if (devlink_health_report(health->fw_fatal_reporter, + "FW fatal error reported", &fw_reporter_ctx) == -ECANCELED) { + /* If recovery wasn't performed, due to grace period, + * unload the driver. This ensures that the driver + * closes all its resources and it is not subjected to + * requests from the kernel. + */ + mlx5_core_err(dev, "Driver is in error state. Unloading\n"); + mlx5_unload_one(dev); + } } static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { From b1c2f6312c5005c928a72e668bf305a589d828d4 Mon Sep 17 00:00:00 2001 From: Dima Chumak Date: Mon, 26 Apr 2021 15:16:26 +0300 Subject: [PATCH 11/12] net/mlx5e: Fix nullptr in mlx5e_hairpin_get_mdev() The result of __dev_get_by_index() is not checked for NULL and then gets dereferenced immediately. Also, __dev_get_by_index() must be called while holding either RTNL lock or @dev_base_lock, which isn't satisfied by mlx5e_hairpin_get_mdev() or its callers. This makes the underlying hlist_for_each_entry() loop not safe, and can have adverse effects in itself. Fix by using dev_get_by_index() and handling nullptr return value when ifindex device is not found. Update mlx5e_hairpin_get_mdev() callers to check for possible PTR_ERR() result. Fixes: 77ab67b7f0f9 ("net/mlx5e: Basic setup of hairpin object") Addresses-Coverity: ("Dereference null return value") Signed-off-by: Dima Chumak Reviewed-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 629a61e8022f..d273758255c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -452,12 +452,32 @@ static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv, static struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex) { + struct mlx5_core_dev *mdev; struct net_device *netdev; struct mlx5e_priv *priv; - netdev = __dev_get_by_index(net, ifindex); + netdev = dev_get_by_index(net, ifindex); + if (!netdev) + return ERR_PTR(-ENODEV); + priv = netdev_priv(netdev); - return priv->mdev; + mdev = priv->mdev; + dev_put(netdev); + + /* Mirred tc action holds a refcount on the ifindex net_device (see + * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev + * after dev_put(netdev), while we're in the context of adding a tc flow. + * + * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then + * stored in a hairpin object, which exists until all flows, that refer to it, get + * removed. + * + * On the other hand, after a hairpin object has been created, the peer net_device may + * be removed/unbound while there are still some hairpin flows that are using it. This + * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to + * NETDEV_UNREGISTER event of the peer net_device. + */ + return mdev; } static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) @@ -666,6 +686,10 @@ mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params func_mdev = priv->mdev; peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); + if (IS_ERR(peer_mdev)) { + err = PTR_ERR(peer_mdev); + goto create_pair_err; + } pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params); if (IS_ERR(pair)) { @@ -804,6 +828,11 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, int err; peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); + if (IS_ERR(peer_mdev)) { + NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device"); + return PTR_ERR(peer_mdev); + } + if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) { NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported"); return -EOPNOTSUPP; From 740452e09cf5fc489ce60831cf11abef117b5d26 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Mon, 26 Apr 2021 11:06:37 +0800 Subject: [PATCH 12/12] net/mlx5: Fix mlx5_vport_tbl_attr chain from u16 to u32 The offending refactor commit uses u16 chain wrongly. Actually, it should be u32. Fixes: c620b772152b ("net/mlx5: Refactor tc flow attributes structure") CC: Ariel Levkovich Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 48cac5bf606d..d562edf5b0bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -636,7 +636,7 @@ struct esw_vport_tbl_namespace { }; struct mlx5_vport_tbl_attr { - u16 chain; + u32 chain; u16 prio; u16 vport; const struct esw_vport_tbl_namespace *vport_ns;