From 7d47433cf74f942a414171867d89c08640cfef45 Mon Sep 17 00:00:00 2001 From: Yamin Friedman Date: Mon, 7 Oct 2019 16:59:31 +0300 Subject: [PATCH 01/39] net/mlx5: Expose optimal performance scatter entries capability Expose maximum scatter entries per RDMA READ for optimal performance. Signed-off-by: Yamin Friedman Reviewed-by: Or Gerlitz Reviewed-by: Christoph Hellwig Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 138c50d5a353..c0bfb1d90dd2 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1153,7 +1153,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_srq[0x5]; u8 reserved_at_b0[0x10]; - u8 reserved_at_c0[0x8]; + u8 max_sgl_for_optimized_performance[0x8]; u8 log_max_cq_sz[0x8]; u8 reserved_at_d0[0xb]; u8 log_max_cq[0x5]; From e019cb536d046ea1c7ba5d370845ea362610b348 Mon Sep 17 00:00:00 2001 From: Qing Huang Date: Mon, 28 Oct 2019 23:34:56 +0000 Subject: [PATCH 02/39] net/mlx5: Fixed a typo in a comment in esw_del_uc_addr() Changed "managerss" to "managers". Fixes: a1b3839ac4a4 ("net/mlx5: E-Switch, Properly refer to the esw manager vport") Signed-off-by: Qing Huang Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 30aae76b6a1d..4c18ac1299ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -530,7 +530,7 @@ static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) u16 vport = vaddr->vport; int err = 0; - /* Skip mlx5_mpfs_del_mac for eswitch managerss, + /* Skip mlx5_mpfs_del_mac for eswitch managers, * it is already done by its netdev in mlx5e_execute_l2_action */ if (!vaddr->mpfs || esw->manager_vport == vport) From 6d94e610e4b6a77007d50952d3c859d3e300c0ab Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Mon, 28 Oct 2019 23:34:58 +0000 Subject: [PATCH 03/39] net/mlx5: E-Switch, Rename egress config to generic name Refactor vport egress config in offloads mode Refactoring vport egress configuration in offloads mode that includes egress prio tag configuration. This makes code symmetric to ingress configuration. Signed-off-by: Vu Pham Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 50 ++++++++++--------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 00d71db15f22..506cea8181f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1864,32 +1864,16 @@ static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec; int err = 0; - if (!MLX5_CAP_GEN(esw->dev, prio_tag_required)) - return 0; - /* For prio tag mode, there is only 1 FTEs: * 1) prio tag packets - pop the prio tag VLAN, allow * Unmatched traffic is allowed by default */ - - esw_vport_cleanup_egress_rules(esw, vport); - - err = esw_vport_enable_egress_acl(esw, vport); - if (err) { - mlx5_core_warn(esw->dev, - "failed to enable egress acl (%d) on vport[%d]\n", - err, vport->vport); - return err; - } - esw_debug(esw->dev, "vport[%d] configure prio tag egress rules\n", vport->vport); spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) { - err = -ENOMEM; - goto out_no_mem; - } + if (!spec) + return -ENOMEM; /* prio tag vlan rule - pop it so VF receives untagged packets */ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); @@ -1909,14 +1893,9 @@ static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw, "vport[%d] configure egress pop prio tag vlan rule failed, err(%d)\n", vport->vport, err); vport->egress.allowed_vlan = NULL; - goto out; } -out: kvfree(spec); -out_no_mem: - if (err) - esw_vport_cleanup_egress_rules(esw, vport); return err; } @@ -1961,6 +1940,29 @@ out: return err; } +static int esw_vport_egress_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int err; + + if (!MLX5_CAP_GEN(esw->dev, prio_tag_required)) + return 0; + + esw_vport_cleanup_egress_rules(esw, vport); + + err = esw_vport_enable_egress_acl(esw, vport); + if (err) + return err; + + esw_debug(esw->dev, "vport(%d) configure egress rules\n", vport->vport); + + err = esw_vport_egress_prio_tag_config(esw, vport); + if (err) + esw_vport_disable_egress_acl(esw, vport); + + return err; +} + static bool esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw) { @@ -1996,7 +1998,7 @@ static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) goto err_ingress; if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) { - err = esw_vport_egress_prio_tag_config(esw, vport); + err = esw_vport_egress_config(esw, vport); if (err) goto err_egress; } From b1a3380aa709082761c1dba89234ac16c19037c6 Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Mon, 28 Oct 2019 23:35:00 +0000 Subject: [PATCH 04/39] net/mlx5: E-Switch, Rename ingress acl config in offloads mode Changing the function name esw_ingress_acl_common_config() to esw_ingress_acl_config() to be consistent with egress config function naming in offloads mode. Signed-off-by: Vu Pham Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 506cea8181f9..48adec168a7c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1899,8 +1899,8 @@ static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw, return err; } -static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +static int esw_vport_ingress_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { int err; @@ -1993,7 +1993,7 @@ static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; mlx5_esw_for_all_vports(esw, i, vport) { - err = esw_vport_ingress_common_config(esw, vport); + err = esw_vport_ingress_config(esw, vport); if (err) goto err_ingress; From fdde49e00b9d2041086568b52670043a8def96ff Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 28 Oct 2019 23:35:03 +0000 Subject: [PATCH 05/39] net/mlx5: E-switch, Introduce and use vlan rule config helper Between legacy mode and switchdev mode, only two fields are changed, vlan_tag and flow action. Hence to avoid duplicte code between two modes, introduce and and use helper function to configure allowed VLAN rule. While at it, get rid of duplicate debug message. Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 70 ++++++++++++------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 4 ++ .../mellanox/mlx5/core/eswitch_offloads.c | 54 +++----------- 3 files changed, 59 insertions(+), 69 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 4c18ac1299ae..ef7d84a1dbc2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1328,6 +1328,43 @@ out: return err; } +int mlx5_esw_create_vport_egress_acl_vlan(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + u16 vlan_id, u32 flow_action) +{ + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + int err = 0; + + if (vport->egress.allowed_vlan) + return -EEXIST; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vlan_id); + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + flow_act.action = flow_action; + vport->egress.allowed_vlan = + mlx5_add_flow_rules(vport->egress.acl, spec, + &flow_act, NULL, 0); + if (IS_ERR(vport->egress.allowed_vlan)) { + err = PTR_ERR(vport->egress.allowed_vlan); + esw_warn(esw->dev, + "vport[%d] configure egress vlan rule failed, err(%d)\n", + vport->vport, err); + vport->egress.allowed_vlan = NULL; + } + + kvfree(spec); + return err; +} + static int esw_vport_egress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { @@ -1358,34 +1395,17 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", vport->vport, vport->info.vlan, vport->info.qos); - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) { - err = -ENOMEM; - goto out; - } - /* Allowed vlan rule */ - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); - MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag); - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan); - - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; - vport->egress.allowed_vlan = - mlx5_add_flow_rules(vport->egress.acl, spec, - &flow_act, NULL, 0); - if (IS_ERR(vport->egress.allowed_vlan)) { - err = PTR_ERR(vport->egress.allowed_vlan); - esw_warn(esw->dev, - "vport[%d] configure egress allowed vlan rule failed, err(%d)\n", - vport->vport, err); - vport->egress.allowed_vlan = NULL; - goto out; - } + err = mlx5_esw_create_vport_egress_acl_vlan(esw, vport, vport->info.vlan, + MLX5_FLOW_CONTEXT_ACTION_ALLOW); + if (err) + return err; /* Drop others rule (star rule) */ - memset(spec, 0, sizeof(*spec)); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + goto out; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; /* Attach egress drop flow counter */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 6bd6f5895244..1824b0ad7c9f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -421,6 +421,10 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, u16 vport, u16 vlan, u8 qos, u8 set_flags); +int mlx5_esw_create_vport_egress_acl_vlan(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + u16 vlan_id, u32 flow_action); + static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev, u8 vlan_depth) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 48adec168a7c..f0c7abd09120 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1857,48 +1857,6 @@ void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, } } -static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) -{ - struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_spec *spec; - int err = 0; - - /* For prio tag mode, there is only 1 FTEs: - * 1) prio tag packets - pop the prio tag VLAN, allow - * Unmatched traffic is allowed by default - */ - esw_debug(esw->dev, - "vport[%d] configure prio tag egress rules\n", vport->vport); - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return -ENOMEM; - - /* prio tag vlan rule - pop it so VF receives untagged packets */ - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); - MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag); - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, 0); - - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_POP | - MLX5_FLOW_CONTEXT_ACTION_ALLOW; - vport->egress.allowed_vlan = - mlx5_add_flow_rules(vport->egress.acl, spec, - &flow_act, NULL, 0); - if (IS_ERR(vport->egress.allowed_vlan)) { - err = PTR_ERR(vport->egress.allowed_vlan); - esw_warn(esw->dev, - "vport[%d] configure egress pop prio tag vlan rule failed, err(%d)\n", - vport->vport, err); - vport->egress.allowed_vlan = NULL; - } - - kvfree(spec); - return err; -} - static int esw_vport_ingress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { @@ -1954,9 +1912,17 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, if (err) return err; - esw_debug(esw->dev, "vport(%d) configure egress rules\n", vport->vport); + /* For prio tag mode, there is only 1 FTEs: + * 1) prio tag packets - pop the prio tag VLAN, allow + * Unmatched traffic is allowed by default + */ + esw_debug(esw->dev, + "vport[%d] configure prio tag egress rules\n", vport->vport); - err = esw_vport_egress_prio_tag_config(esw, vport); + /* prio tag vlan rule - pop it so VF receives untagged packets */ + err = mlx5_esw_create_vport_egress_acl_vlan(esw, vport, 0, + MLX5_FLOW_CONTEXT_ACTION_VLAN_POP | + MLX5_FLOW_CONTEXT_ACTION_ALLOW); if (err) esw_vport_disable_egress_acl(esw, vport); From ea2300e02a71207b11111a44cbe7185a94f78a72 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 28 Oct 2019 23:35:05 +0000 Subject: [PATCH 06/39] net/mlx5: Introduce and use mlx5_esw_is_manager_vport() Currently esw_enable_vport() does vport check for zero to enable drop counters regardless of execution on ECPF/PF. While esw_disable_vport() considers such scenario. To keep consistency across code for checking for manager_vport, introduce and use mlx5_esw_is_manager_vport() to check if a specified vport is eswitch manager vport or not. Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 13 +++++++------ drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 6 ++++++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index ef7d84a1dbc2..fa1228a8005f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -501,7 +501,7 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) /* Skip mlx5_mpfs_add_mac for eswitch_managers, * it is already done by its netdev in mlx5e_execute_l2_action */ - if (esw->manager_vport == vport) + if (mlx5_esw_is_manager_vport(esw, vport)) goto fdb_add; err = mlx5_mpfs_add_mac(esw->dev, mac); @@ -533,7 +533,7 @@ static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) /* Skip mlx5_mpfs_del_mac for eswitch managers, * it is already done by its netdev in mlx5e_execute_l2_action */ - if (!vaddr->mpfs || esw->manager_vport == vport) + if (!vaddr->mpfs || mlx5_esw_is_manager_vport(esw, vport)) goto fdb_del; err = mlx5_mpfs_del_mac(esw->dev, mac); @@ -1639,7 +1639,7 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, u16 vport_num = vport->vport; int flags; - if (esw->manager_vport == vport_num) + if (mlx5_esw_is_manager_vport(esw, vport_num)) return; mlx5_modify_vport_admin_state(esw->dev, @@ -1713,7 +1713,8 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); /* Create steering drop counters for ingress and egress ACLs */ - if (vport_num && esw->mode == MLX5_ESWITCH_LEGACY) + if (!mlx5_esw_is_manager_vport(esw, vport_num) && + esw->mode == MLX5_ESWITCH_LEGACY) esw_vport_create_drop_counters(vport); /* Restore old vport configuration */ @@ -1731,7 +1732,7 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, /* Esw manager is trusted by default. Host PF (vport 0) is trusted as well * in smartNIC as it's a vport group manager. */ - if (esw->manager_vport == vport_num || + if (mlx5_esw_is_manager_vport(esw, vport_num) || (!vport_num && mlx5_core_is_ecpf(esw->dev))) vport->info.trusted = true; @@ -1766,7 +1767,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, esw_vport_change_handle_locked(vport); vport->enabled_events = 0; esw_vport_disable_qos(esw, vport); - if (esw->manager_vport != vport_num && + if (!mlx5_esw_is_manager_vport(esw, vport_num) && esw->mode == MLX5_ESWITCH_LEGACY) { mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 1824b0ad7c9f..75e69644d70e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -463,6 +463,12 @@ static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) MLX5_VPORT_ECPF : MLX5_VPORT_PF; } +static inline bool +mlx5_esw_is_manager_vport(const struct mlx5_eswitch *esw, u16 vport_num) +{ + return esw->manager_vport == vport_num; +} + static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev) { return mlx5_core_is_ecpf_esw_manager(dev) ? From 99ecd64631ef9873619be493c711afb83aff75fc Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 28 Oct 2019 23:35:07 +0000 Subject: [PATCH 07/39] net/mlx5: Correct comment for legacy fields fdb_table is used for both legacy and offloads mode. It was incorrect to comment that fdb_table is legacy specific. Hence, fix the comment to reflect that fdb_table is used in legacy and offloads mode. Fixes: 131ce7014043 ("net/mlx5: E-Switch, Remove redundant mc_promisc NULL check") Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 75e69644d70e..a41d4aad9d28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -217,8 +217,8 @@ enum { struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_nb nb; - /* legacy data structures */ struct mlx5_eswitch_fdb fdb_table; + /* legacy data structures */ struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; struct esw_mc_addr mc_promisc; /* end of legacy */ From d68316b5a1046b489097c5e5e24139548b79971f Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 28 Oct 2019 23:35:10 +0000 Subject: [PATCH 08/39] net/mlx5: Move metdata fields under offloads structure Metadata fields are offload mode specific. To improve code readability, move metadata under offloads structure. Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 6 ++-- .../mellanox/mlx5/core/eswitch_offloads.c | 33 ++++++++++--------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index a41d4aad9d28..5f862992b9c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -69,11 +69,13 @@ struct vport_ingress { struct mlx5_flow_group *allow_spoofchk_only_grp; struct mlx5_flow_group *allow_untagged_only_grp; struct mlx5_flow_group *drop_grp; - struct mlx5_modify_hdr *modify_metadata; - struct mlx5_flow_handle *modify_metadata_rule; struct mlx5_flow_handle *allow_rule; struct mlx5_flow_handle *drop_rule; struct mlx5_fc *drop_counter; + struct { + struct mlx5_modify_hdr *modify_metadata; + struct mlx5_flow_handle *modify_metadata_rule; + } offloads; }; struct vport_egress { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index f0c7abd09120..874e70e3792a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1778,9 +1778,9 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw, flow_act.vlan[0].vid = 0; flow_act.vlan[0].prio = 0; - if (vport->ingress.modify_metadata_rule) { + if (vport->ingress.offloads.modify_metadata_rule) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - flow_act.modify_hdr = vport->ingress.modify_metadata; + flow_act.modify_hdr = vport->ingress.offloads.modify_metadata; } vport->ingress.allow_rule = @@ -1816,11 +1816,11 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, MLX5_SET(set_action_in, action, data, mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport)); - vport->ingress.modify_metadata = + vport->ingress.offloads.modify_metadata = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, 1, action); - if (IS_ERR(vport->ingress.modify_metadata)) { - err = PTR_ERR(vport->ingress.modify_metadata); + if (IS_ERR(vport->ingress.offloads.modify_metadata)) { + err = PTR_ERR(vport->ingress.offloads.modify_metadata); esw_warn(esw->dev, "failed to alloc modify header for vport %d ingress acl (%d)\n", vport->vport, err); @@ -1828,32 +1828,33 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, } flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW; - flow_act.modify_hdr = vport->ingress.modify_metadata; - vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl, - &spec, &flow_act, NULL, 0); - if (IS_ERR(vport->ingress.modify_metadata_rule)) { - err = PTR_ERR(vport->ingress.modify_metadata_rule); + flow_act.modify_hdr = vport->ingress.offloads.modify_metadata; + vport->ingress.offloads.modify_metadata_rule = + mlx5_add_flow_rules(vport->ingress.acl, + &spec, &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.offloads.modify_metadata_rule)) { + err = PTR_ERR(vport->ingress.offloads.modify_metadata_rule); esw_warn(esw->dev, "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n", vport->vport, err); - vport->ingress.modify_metadata_rule = NULL; + vport->ingress.offloads.modify_metadata_rule = NULL; goto out; } out: if (err) - mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata); + mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata); return err; } void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - if (vport->ingress.modify_metadata_rule) { - mlx5_del_flow_rules(vport->ingress.modify_metadata_rule); - mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata); + if (vport->ingress.offloads.modify_metadata_rule) { + mlx5_del_flow_rules(vport->ingress.offloads.modify_metadata_rule); + mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata); - vport->ingress.modify_metadata_rule = NULL; + vport->ingress.offloads.modify_metadata_rule = NULL; } } From 853b53520c9d11db7652e3603665b0ad475741a5 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 28 Oct 2019 23:35:11 +0000 Subject: [PATCH 09/39] net/mlx5: Move legacy drop counter and rule under legacy structure To improve code readability, move legacy drop counters and droup rule under legacy structure. While at it, (a) prefix drop flow counters helper with legacy_. (b) nullify the rule pointers only if they were valid. Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 82 ++++++++++--------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 12 ++- 2 files changed, 50 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index fa1228a8005f..0dd5e5d5ea35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1040,14 +1040,15 @@ out: void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) + if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) { mlx5_del_flow_rules(vport->egress.allowed_vlan); + vport->egress.allowed_vlan = NULL; + } - if (!IS_ERR_OR_NULL(vport->egress.drop_rule)) - mlx5_del_flow_rules(vport->egress.drop_rule); - - vport->egress.allowed_vlan = NULL; - vport->egress.drop_rule = NULL; + if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_rule)) { + mlx5_del_flow_rules(vport->egress.legacy.drop_rule); + vport->egress.legacy.drop_rule = NULL; + } } void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, @@ -1202,14 +1203,15 @@ out: void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - if (!IS_ERR_OR_NULL(vport->ingress.drop_rule)) - mlx5_del_flow_rules(vport->ingress.drop_rule); + if (!IS_ERR_OR_NULL(vport->ingress.legacy.drop_rule)) { + mlx5_del_flow_rules(vport->ingress.legacy.drop_rule); + vport->ingress.legacy.drop_rule = NULL; + } - if (!IS_ERR_OR_NULL(vport->ingress.allow_rule)) + if (!IS_ERR_OR_NULL(vport->ingress.allow_rule)) { mlx5_del_flow_rules(vport->ingress.allow_rule); - - vport->ingress.drop_rule = NULL; - vport->ingress.allow_rule = NULL; + vport->ingress.allow_rule = NULL; + } esw_vport_del_ingress_acl_modify_metadata(esw, vport); } @@ -1238,7 +1240,7 @@ void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, static int esw_vport_ingress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - struct mlx5_fc *counter = vport->ingress.drop_counter; + struct mlx5_fc *counter = vport->ingress.legacy.drop_counter; struct mlx5_flow_destination drop_ctr_dst = {0}; struct mlx5_flow_destination *dst = NULL; struct mlx5_flow_act flow_act = {0}; @@ -1309,15 +1311,15 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, dst = &drop_ctr_dst; dest_num++; } - vport->ingress.drop_rule = + vport->ingress.legacy.drop_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, &flow_act, dst, dest_num); - if (IS_ERR(vport->ingress.drop_rule)) { - err = PTR_ERR(vport->ingress.drop_rule); + if (IS_ERR(vport->ingress.legacy.drop_rule)) { + err = PTR_ERR(vport->ingress.legacy.drop_rule); esw_warn(esw->dev, "vport[%d] configure ingress drop rule, err(%d)\n", vport->vport, err); - vport->ingress.drop_rule = NULL; + vport->ingress.legacy.drop_rule = NULL; goto out; } @@ -1368,7 +1370,7 @@ int mlx5_esw_create_vport_egress_acl_vlan(struct mlx5_eswitch *esw, static int esw_vport_egress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - struct mlx5_fc *counter = vport->egress.drop_counter; + struct mlx5_fc *counter = vport->egress.legacy.drop_counter; struct mlx5_flow_destination drop_ctr_dst = {0}; struct mlx5_flow_destination *dst = NULL; struct mlx5_flow_act flow_act = {0}; @@ -1416,15 +1418,15 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, dst = &drop_ctr_dst; dest_num++; } - vport->egress.drop_rule = + vport->egress.legacy.drop_rule = mlx5_add_flow_rules(vport->egress.acl, spec, &flow_act, dst, dest_num); - if (IS_ERR(vport->egress.drop_rule)) { - err = PTR_ERR(vport->egress.drop_rule); + if (IS_ERR(vport->egress.legacy.drop_rule)) { + err = PTR_ERR(vport->egress.legacy.drop_rule); esw_warn(esw->dev, "vport[%d] configure egress drop rule failed, err(%d)\n", vport->vport, err); - vport->egress.drop_rule = NULL; + vport->egress.legacy.drop_rule = NULL; } out: kvfree(spec); @@ -1667,39 +1669,39 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, } } -static void esw_vport_create_drop_counters(struct mlx5_vport *vport) +static void esw_legacy_vport_create_drop_counters(struct mlx5_vport *vport) { struct mlx5_core_dev *dev = vport->dev; if (MLX5_CAP_ESW_INGRESS_ACL(dev, flow_counter)) { - vport->ingress.drop_counter = mlx5_fc_create(dev, false); - if (IS_ERR(vport->ingress.drop_counter)) { + vport->ingress.legacy.drop_counter = mlx5_fc_create(dev, false); + if (IS_ERR(vport->ingress.legacy.drop_counter)) { esw_warn(dev, "vport[%d] configure ingress drop rule counter failed\n", vport->vport); - vport->ingress.drop_counter = NULL; + vport->ingress.legacy.drop_counter = NULL; } } if (MLX5_CAP_ESW_EGRESS_ACL(dev, flow_counter)) { - vport->egress.drop_counter = mlx5_fc_create(dev, false); - if (IS_ERR(vport->egress.drop_counter)) { + vport->egress.legacy.drop_counter = mlx5_fc_create(dev, false); + if (IS_ERR(vport->egress.legacy.drop_counter)) { esw_warn(dev, "vport[%d] configure egress drop rule counter failed\n", vport->vport); - vport->egress.drop_counter = NULL; + vport->egress.legacy.drop_counter = NULL; } } } -static void esw_vport_destroy_drop_counters(struct mlx5_vport *vport) +static void esw_legacy_vport_destroy_drop_counters(struct mlx5_vport *vport) { struct mlx5_core_dev *dev = vport->dev; - if (vport->ingress.drop_counter) - mlx5_fc_destroy(dev, vport->ingress.drop_counter); - if (vport->egress.drop_counter) - mlx5_fc_destroy(dev, vport->egress.drop_counter); + if (vport->ingress.legacy.drop_counter) + mlx5_fc_destroy(dev, vport->ingress.legacy.drop_counter); + if (vport->egress.legacy.drop_counter) + mlx5_fc_destroy(dev, vport->egress.legacy.drop_counter); } static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, @@ -1715,7 +1717,7 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, /* Create steering drop counters for ingress and egress ACLs */ if (!mlx5_esw_is_manager_vport(esw, vport_num) && esw->mode == MLX5_ESWITCH_LEGACY) - esw_vport_create_drop_counters(vport); + esw_legacy_vport_create_drop_counters(vport); /* Restore old vport configuration */ esw_apply_vport_conf(esw, vport); @@ -1775,7 +1777,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, MLX5_VPORT_ADMIN_STATE_DOWN); esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); - esw_vport_destroy_drop_counters(vport); + esw_legacy_vport_destroy_drop_counters(vport); } esw->enabled_vports--; mutex_unlock(&esw->state_lock); @@ -2495,12 +2497,12 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, if (!vport->enabled || esw->mode != MLX5_ESWITCH_LEGACY) return 0; - if (vport->egress.drop_counter) - mlx5_fc_query(dev, vport->egress.drop_counter, + if (vport->egress.legacy.drop_counter) + mlx5_fc_query(dev, vport->egress.legacy.drop_counter, &stats->rx_dropped, &bytes); - if (vport->ingress.drop_counter) - mlx5_fc_query(dev, vport->ingress.drop_counter, + if (vport->ingress.legacy.drop_counter) + mlx5_fc_query(dev, vport->ingress.legacy.drop_counter, &stats->tx_dropped, &bytes); if (!MLX5_CAP_GEN(dev, receive_discard_vport_down) && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 5f862992b9c8..ec0ef7c5d539 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -70,8 +70,10 @@ struct vport_ingress { struct mlx5_flow_group *allow_untagged_only_grp; struct mlx5_flow_group *drop_grp; struct mlx5_flow_handle *allow_rule; - struct mlx5_flow_handle *drop_rule; - struct mlx5_fc *drop_counter; + struct { + struct mlx5_flow_handle *drop_rule; + struct mlx5_fc *drop_counter; + } legacy; struct { struct mlx5_modify_hdr *modify_metadata; struct mlx5_flow_handle *modify_metadata_rule; @@ -83,8 +85,10 @@ struct vport_egress { struct mlx5_flow_group *allowed_vlans_grp; struct mlx5_flow_group *drop_grp; struct mlx5_flow_handle *allowed_vlan; - struct mlx5_flow_handle *drop_rule; - struct mlx5_fc *drop_counter; + struct { + struct mlx5_flow_handle *drop_rule; + struct mlx5_fc *drop_counter; + } legacy; }; struct mlx5_vport_drop_stats { From 77b094305b1ba23e716bb34d3e33c8fe30a5f487 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 28 Oct 2019 23:35:13 +0000 Subject: [PATCH 10/39] net/mlx5: Tide up state_lock and vport enabled flag usage When eswitch is disabled, vport event handler is unregistered. This unregistration already synchronizes with running EQ event handler in below code flow. mlx5_eswitch_disable() mlx5_eswitch_event_handlers_unregister() mlx5_eq_notifier_unregister() atomic_notifier_chain_unregister() synchronize_rcu() notifier_callchain eswitch_vport_event() queue_work() Additionally vport->enabled flag is set under state_lock during esw_enable_vport() but is not read under state_lock in (a) esw_disable_vport() and (b) under atomic context eswitch_vport_event(). It is also necessary to synchronize with already scheduled vport event. This is already achieved using below sequence. mlx5_eswitch_event_handlers_unregister() [..] flush_workqueue() Hence, (a) Remove vport->enabled check in eswitch_vport_event() which doesn't make any sense. (b) Remove redundant flush_workqueue() on every vport disable. (c) Keep esw_disable_vport() symmetric with esw_enable_vport() for state_lock. Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 0dd5e5d5ea35..f15ffb8f5cac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1750,18 +1750,16 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, { u16 vport_num = vport->vport; + mutex_lock(&esw->state_lock); if (!vport->enabled) - return; + goto done; esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num); /* Mark this vport as disabled to discard new events */ vport->enabled = false; - /* Wait for current already scheduled events to complete */ - flush_workqueue(esw->work_queue); /* Disable events from this vport */ arm_vport_context_events_cmd(esw->dev, vport->vport, 0); - mutex_lock(&esw->state_lock); /* We don't assume VFs will cleanup after themselves. * Calling vport change handler while vport is disabled will cleanup * the vport resources. @@ -1780,6 +1778,8 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, esw_legacy_vport_destroy_drop_counters(vport); } esw->enabled_vports--; + +done: mutex_unlock(&esw->state_lock); } @@ -1793,12 +1793,8 @@ static int eswitch_vport_event(struct notifier_block *nb, vport_num = be16_to_cpu(eqe->data.vport_change.vport_num); vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) - return NOTIFY_OK; - - if (vport->enabled) + if (!IS_ERR(vport)) queue_work(esw->work_queue, &vport->vport_change_handler); - return NOTIFY_OK; } From 925a6acc77a70f8b5bfd0df75e36557aa400b0a0 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 28 Oct 2019 23:35:15 +0000 Subject: [PATCH 11/39] net/mlx5: E-switch, Prepare code to handle vport enable error In subsequent patch, esw_enable_vport() could fail and return error. Prepare code to handle such error. Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 62 ++++++++++++++----- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 2 +- .../mellanox/mlx5/core/eswitch_offloads.c | 5 +- 3 files changed, 50 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index f15ffb8f5cac..0bdaef508e74 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -452,6 +452,13 @@ static int esw_create_legacy_table(struct mlx5_eswitch *esw) return err; } +static void esw_destroy_legacy_table(struct mlx5_eswitch *esw) +{ + esw_cleanup_vepa_rules(esw); + esw_destroy_legacy_fdb_table(esw); + esw_destroy_legacy_vepa_table(esw); +} + #define MLX5_LEGACY_SRIOV_VPORT_EVENTS (MLX5_VPORT_UC_ADDR_CHANGE | \ MLX5_VPORT_MC_ADDR_CHANGE | \ MLX5_VPORT_PROMISC_CHANGE) @@ -464,15 +471,10 @@ static int esw_legacy_enable(struct mlx5_eswitch *esw) if (ret) return ret; - mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS); - return 0; -} - -static void esw_destroy_legacy_table(struct mlx5_eswitch *esw) -{ - esw_cleanup_vepa_rules(esw); - esw_destroy_legacy_fdb_table(esw); - esw_destroy_legacy_vepa_table(esw); + ret = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS); + if (ret) + esw_destroy_legacy_table(esw); + return ret; } static void esw_legacy_disable(struct mlx5_eswitch *esw) @@ -1704,8 +1706,8 @@ static void esw_legacy_vport_destroy_drop_counters(struct mlx5_vport *vport) mlx5_fc_destroy(dev, vport->egress.legacy.drop_counter); } -static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, - enum mlx5_eswitch_vport_event enabled_events) +static int esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + enum mlx5_eswitch_vport_event enabled_events) { u16 vport_num = vport->vport; @@ -1743,6 +1745,7 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, esw->enabled_vports++; esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num); mutex_unlock(&esw->state_lock); + return 0; } static void esw_disable_vport(struct mlx5_eswitch *esw, @@ -1856,26 +1859,51 @@ static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw) /* mlx5_eswitch_enable_pf_vf_vports() enables vports of PF, ECPF and VFs * whichever are present on the eswitch. */ -void +int mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, enum mlx5_eswitch_vport_event enabled_events) { struct mlx5_vport *vport; + int num_vfs; + int ret; int i; /* Enable PF vport */ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); - esw_enable_vport(esw, vport, enabled_events); + ret = esw_enable_vport(esw, vport, enabled_events); + if (ret) + return ret; - /* Enable ECPF vports */ + /* Enable ECPF vport */ if (mlx5_ecpf_vport_exists(esw->dev)) { vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); - esw_enable_vport(esw, vport, enabled_events); + ret = esw_enable_vport(esw, vport, enabled_events); + if (ret) + goto ecpf_err; } /* Enable VF vports */ - mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) - esw_enable_vport(esw, vport, enabled_events); + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) { + ret = esw_enable_vport(esw, vport, enabled_events); + if (ret) + goto vf_err; + } + return 0; + +vf_err: + num_vfs = i - 1; + mlx5_esw_for_each_vf_vport_reverse(esw, i, vport, num_vfs) + esw_disable_vport(esw, vport); + + if (mlx5_ecpf_vport_exists(esw->dev)) { + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + esw_disable_vport(esw, vport); + } + +ecpf_err: + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); + esw_disable_vport(esw, vport); + return ret; } /* mlx5_eswitch_disable_pf_vf_vports() disables vports of PF, ECPF and VFs diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index ec0ef7c5d539..36edee35f155 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -609,7 +609,7 @@ bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num); void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs); int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data); -void +int mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, enum mlx5_eswitch_vport_event enabled_events); void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 874e70e3792a..98df1eeee873 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2139,7 +2139,9 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) if (err) goto err_vport_metadata; - mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE); + err = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_VPORT_UC_ADDR_CHANGE); + if (err) + goto err_vports; err = esw_offloads_load_all_reps(esw); if (err) @@ -2152,6 +2154,7 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) err_reps: mlx5_eswitch_disable_pf_vf_vports(esw); +err_vports: esw_set_passing_vport_metadata(esw, false); err_vport_metadata: esw_offloads_steering_cleanup(esw); From f5d0c01d65adba2b898836894d200e85c8a8def3 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 28 Oct 2019 23:35:17 +0000 Subject: [PATCH 12/39] net/mlx5: E-switch, Legacy introduce and use per vport acl tables APIs Introduce and use per vport ACL tables creation and destroy APIs, so that subsequently patch can use them during enabling/disabling a vport in unified way for legacy vs offloads mode. Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 73 +++++++++++++++---- 1 file changed, 60 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 0bdaef508e74..47555e272dda 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1663,12 +1663,6 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, SET_VLAN_STRIP | SET_VLAN_INSERT : 0; modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos, flags); - - /* Only legacy mode needs ACLs */ - if (esw->mode == MLX5_ESWITCH_LEGACY) { - esw_vport_ingress_config(esw, vport); - esw_vport_egress_config(esw, vport); - } } static void esw_legacy_vport_create_drop_counters(struct mlx5_vport *vport) @@ -1706,10 +1700,59 @@ static void esw_legacy_vport_destroy_drop_counters(struct mlx5_vport *vport) mlx5_fc_destroy(dev, vport->egress.legacy.drop_counter); } +static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int ret; + + /* Only non manager vports need ACL in legacy mode */ + if (mlx5_esw_is_manager_vport(esw, vport->vport)) + return 0; + + ret = esw_vport_ingress_config(esw, vport); + if (ret) + return ret; + + ret = esw_vport_egress_config(esw, vport); + if (ret) + esw_vport_disable_ingress_acl(esw, vport); + + return ret; +} + +static int esw_vport_setup_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (esw->mode == MLX5_ESWITCH_LEGACY) + return esw_vport_create_legacy_acl_tables(esw, vport); + + return 0; +} + +static void esw_vport_destroy_legacy_acl_tables(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) + +{ + if (mlx5_esw_is_manager_vport(esw, vport->vport)) + return; + + esw_vport_disable_egress_acl(esw, vport); + esw_vport_disable_ingress_acl(esw, vport); + esw_legacy_vport_destroy_drop_counters(vport); +} + +static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (esw->mode == MLX5_ESWITCH_LEGACY) + esw_vport_destroy_legacy_acl_tables(esw, vport); +} + static int esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, enum mlx5_eswitch_vport_event enabled_events) { u16 vport_num = vport->vport; + int ret; mutex_lock(&esw->state_lock); WARN_ON(vport->enabled); @@ -1724,6 +1767,10 @@ static int esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, /* Restore old vport configuration */ esw_apply_vport_conf(esw, vport); + ret = esw_vport_setup_acl(esw, vport); + if (ret) + goto done; + /* Attach vport to the eswitch rate limiter */ if (esw_vport_enable_qos(esw, vport, vport->info.max_rate, vport->qos.bw_share)) @@ -1744,8 +1791,9 @@ static int esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, esw->enabled_vports++; esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num); +done: mutex_unlock(&esw->state_lock); - return 0; + return ret; } static void esw_disable_vport(struct mlx5_eswitch *esw, @@ -1770,16 +1818,15 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, esw_vport_change_handle_locked(vport); vport->enabled_events = 0; esw_vport_disable_qos(esw, vport); - if (!mlx5_esw_is_manager_vport(esw, vport_num) && - esw->mode == MLX5_ESWITCH_LEGACY) { + + if (!mlx5_esw_is_manager_vport(esw, vport->vport) && + esw->mode == MLX5_ESWITCH_LEGACY) mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, vport_num, 1, MLX5_VPORT_ADMIN_STATE_DOWN); - esw_vport_disable_egress_acl(esw, vport); - esw_vport_disable_ingress_acl(esw, vport); - esw_legacy_vport_destroy_drop_counters(vport); - } + + esw_vport_cleanup_acl(esw, vport); esw->enabled_vports--; done: From b7752f8341c4fecc4720fbd58f868e114a57fdea Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 28 Oct 2019 23:35:19 +0000 Subject: [PATCH 13/39] net/mlx5: Move ACL drop counters life cycle close to ACL lifecycle It is better to create/destroy ACL related drop counters where the actual drop rule ACLs are created/destroyed, so that ACL configuration is self contained for ingress and egress. Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 82 +++++++++---------- 1 file changed, 39 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 47555e272dda..0e5113167739 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1665,41 +1665,6 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, flags); } -static void esw_legacy_vport_create_drop_counters(struct mlx5_vport *vport) -{ - struct mlx5_core_dev *dev = vport->dev; - - if (MLX5_CAP_ESW_INGRESS_ACL(dev, flow_counter)) { - vport->ingress.legacy.drop_counter = mlx5_fc_create(dev, false); - if (IS_ERR(vport->ingress.legacy.drop_counter)) { - esw_warn(dev, - "vport[%d] configure ingress drop rule counter failed\n", - vport->vport); - vport->ingress.legacy.drop_counter = NULL; - } - } - - if (MLX5_CAP_ESW_EGRESS_ACL(dev, flow_counter)) { - vport->egress.legacy.drop_counter = mlx5_fc_create(dev, false); - if (IS_ERR(vport->egress.legacy.drop_counter)) { - esw_warn(dev, - "vport[%d] configure egress drop rule counter failed\n", - vport->vport); - vport->egress.legacy.drop_counter = NULL; - } - } -} - -static void esw_legacy_vport_destroy_drop_counters(struct mlx5_vport *vport) -{ - struct mlx5_core_dev *dev = vport->dev; - - if (vport->ingress.legacy.drop_counter) - mlx5_fc_destroy(dev, vport->ingress.legacy.drop_counter); - if (vport->egress.legacy.drop_counter) - mlx5_fc_destroy(dev, vport->egress.legacy.drop_counter); -} - static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { @@ -1709,14 +1674,46 @@ static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw, if (mlx5_esw_is_manager_vport(esw, vport->vport)) return 0; + if (!mlx5_esw_is_manager_vport(esw, vport->vport) && + MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) { + vport->ingress.legacy.drop_counter = mlx5_fc_create(esw->dev, false); + if (IS_ERR(vport->ingress.legacy.drop_counter)) { + esw_warn(esw->dev, + "vport[%d] configure ingress drop rule counter failed\n", + vport->vport); + vport->ingress.legacy.drop_counter = NULL; + } + } + ret = esw_vport_ingress_config(esw, vport); if (ret) - return ret; + goto ingress_err; + + if (!mlx5_esw_is_manager_vport(esw, vport->vport) && + MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) { + vport->egress.legacy.drop_counter = mlx5_fc_create(esw->dev, false); + if (IS_ERR(vport->egress.legacy.drop_counter)) { + esw_warn(esw->dev, + "vport[%d] configure egress drop rule counter failed\n", + vport->vport); + vport->egress.legacy.drop_counter = NULL; + } + } ret = esw_vport_egress_config(esw, vport); if (ret) - esw_vport_disable_ingress_acl(esw, vport); + goto egress_err; + return 0; + +egress_err: + esw_vport_disable_ingress_acl(esw, vport); + mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter); + vport->egress.legacy.drop_counter = NULL; + +ingress_err: + mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter); + vport->ingress.legacy.drop_counter = NULL; return ret; } @@ -1737,8 +1734,12 @@ static void esw_vport_destroy_legacy_acl_tables(struct mlx5_eswitch *esw, return; esw_vport_disable_egress_acl(esw, vport); + mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter); + vport->egress.legacy.drop_counter = NULL; + esw_vport_disable_ingress_acl(esw, vport); - esw_legacy_vport_destroy_drop_counters(vport); + mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter); + vport->ingress.legacy.drop_counter = NULL; } static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw, @@ -1759,11 +1760,6 @@ static int esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); - /* Create steering drop counters for ingress and egress ACLs */ - if (!mlx5_esw_is_manager_vport(esw, vport_num) && - esw->mode == MLX5_ESWITCH_LEGACY) - esw_legacy_vport_create_drop_counters(vport); - /* Restore old vport configuration */ esw_apply_vport_conf(esw, vport); From 89a0f1fb16adca959ea1485a856fbcfcd1d24208 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 28 Oct 2019 23:35:20 +0000 Subject: [PATCH 14/39] net/mlx5: E-switch, Offloads introduce and use per vport acl tables APIs Introduce and use per vport ACL tables creation and destroy APIs, so that subsequently patch can use them during enabling/disabling a vport. Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 49 ++++++++++++------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 98df1eeee873..94eb18ae33a4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1950,6 +1950,32 @@ esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw) return true; } +static int +esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int err; + + err = esw_vport_ingress_config(esw, vport); + if (err) + return err; + + if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) { + err = esw_vport_egress_config(esw, vport); + if (err) + esw_vport_disable_ingress_acl(esw, vport); + } + return err; +} + +static void +esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + esw_vport_disable_egress_acl(esw, vport); + esw_vport_disable_ingress_acl(esw, vport); +} + static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) { struct mlx5_vport *vport; @@ -1960,15 +1986,9 @@ static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; mlx5_esw_for_all_vports(esw, i, vport) { - err = esw_vport_ingress_config(esw, vport); + err = esw_vport_create_offloads_acl_tables(esw, vport); if (err) - goto err_ingress; - - if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) { - err = esw_vport_egress_config(esw, vport); - if (err) - goto err_egress; - } + goto err_acl_table; } if (mlx5_eswitch_vport_match_metadata_enabled(esw)) @@ -1976,13 +1996,10 @@ static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) return 0; -err_egress: - esw_vport_disable_ingress_acl(esw, vport); -err_ingress: +err_acl_table: for (j = MLX5_VPORT_PF; j < i; j++) { vport = &esw->vports[j]; - esw_vport_disable_egress_acl(esw, vport); - esw_vport_disable_ingress_acl(esw, vport); + esw_vport_destroy_offloads_acl_tables(esw, vport); } return err; @@ -1993,10 +2010,8 @@ static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw) struct mlx5_vport *vport; int i; - mlx5_esw_for_all_vports(esw, i, vport) { - esw_vport_disable_egress_acl(esw, vport); - esw_vport_disable_ingress_acl(esw, vport); - } + mlx5_esw_for_all_vports(esw, i, vport) + esw_vport_destroy_offloads_acl_tables(esw, vport); esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; } From 748da30b376e034ae54b53e7e38e15cfa2bf4dda Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Mon, 28 Oct 2019 23:35:22 +0000 Subject: [PATCH 15/39] net/mlx5: E-switch, Offloads shift ACL programming during enable/disable vport Currently legacy mode enables ACL while enabling vport, while offloads mode enable ACL when moving to offloads mode. Bring consistency to both modes by enabling/disabling ACL when enabling/disabling a vport. It also eliminates creating ingress ACL table on unused ECPF vport in offloads mode. Signed-off-by: Vu Pham Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 6 ++- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 7 ++++ .../mellanox/mlx5/core/eswitch_offloads.c | 42 ++++++------------- 3 files changed, 24 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 0e5113167739..1ce6ae1c446e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1722,8 +1722,8 @@ static int esw_vport_setup_acl(struct mlx5_eswitch *esw, { if (esw->mode == MLX5_ESWITCH_LEGACY) return esw_vport_create_legacy_acl_tables(esw, vport); - - return 0; + else + return esw_vport_create_offloads_acl_tables(esw, vport); } static void esw_vport_destroy_legacy_acl_tables(struct mlx5_eswitch *esw, @@ -1747,6 +1747,8 @@ static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw, { if (esw->mode == MLX5_ESWITCH_LEGACY) esw_vport_destroy_legacy_acl_tables(esw, vport); + else + esw_vport_destroy_offloads_acl_tables(esw, vport); } static int esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 36edee35f155..d926bdacbdcc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -614,6 +614,13 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, enum mlx5_eswitch_vport_event enabled_events); void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw); +int +esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); +void +esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 94eb18ae33a4..ce30ead90617 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1950,7 +1950,7 @@ esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw) return true; } -static int +int esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { @@ -1968,7 +1968,7 @@ esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, return err; } -static void +void esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { @@ -1976,43 +1976,27 @@ esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, esw_vport_disable_ingress_acl(esw, vport); } -static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) +static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) { struct mlx5_vport *vport; - int i, j; int err; if (esw_check_vport_match_metadata_supported(esw)) esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; - mlx5_esw_for_all_vports(esw, i, vport) { - err = esw_vport_create_offloads_acl_tables(esw, vport); - if (err) - goto err_acl_table; - } - - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) - esw_info(esw->dev, "Use metadata reg_c as source vport to match\n"); - - return 0; - -err_acl_table: - for (j = MLX5_VPORT_PF; j < i; j++) { - vport = &esw->vports[j]; - esw_vport_destroy_offloads_acl_tables(esw, vport); - } - + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + err = esw_vport_create_offloads_acl_tables(esw, vport); + if (err) + esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; return err; } -static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw) +static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) { struct mlx5_vport *vport; - int i; - - mlx5_esw_for_all_vports(esw, i, vport) - esw_vport_destroy_offloads_acl_tables(esw, vport); + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + esw_vport_destroy_offloads_acl_tables(esw, vport); esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; } @@ -2030,7 +2014,7 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw) memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); - err = esw_create_offloads_acl_tables(esw); + err = esw_create_uplink_offloads_acl_tables(esw); if (err) return err; @@ -2055,7 +2039,7 @@ create_ft_err: esw_destroy_offloads_fdb_tables(esw); create_fdb_err: - esw_destroy_offloads_acl_tables(esw); + esw_destroy_uplink_offloads_acl_tables(esw); return err; } @@ -2065,7 +2049,7 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) esw_destroy_vport_rx_group(esw); esw_destroy_offloads_table(esw); esw_destroy_offloads_fdb_tables(esw); - esw_destroy_offloads_acl_tables(esw); + esw_destroy_uplink_offloads_acl_tables(esw); } static void From a962d7a61e2404cda6a89bfa5cc193c62223bb5e Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 28 Oct 2019 23:35:24 +0000 Subject: [PATCH 16/39] net/mlx5: Restrict metadata disablement to offloads mode Now that there is clear separation for acl setup/cleanup between legacy and offloads mode, limit metdata disablement to offloads mode. Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 -- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 -- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 9 ++++++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 1ce6ae1c446e..61459c06f56c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1214,8 +1214,6 @@ void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, mlx5_del_flow_rules(vport->ingress.allow_rule); vport->ingress.allow_rule = NULL; } - - esw_vport_del_ingress_acl_modify_metadata(esw, vport); } void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index d926bdacbdcc..aa3588446cba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -267,8 +267,6 @@ void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport); void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport); -void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, - struct mlx5_vport *vport); int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index ce30ead90617..b536c8fa0061 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1847,8 +1847,8 @@ out: return err; } -void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +static void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { if (vport->ingress.offloads.modify_metadata_rule) { mlx5_del_flow_rules(vport->ingress.offloads.modify_metadata_rule); @@ -1962,8 +1962,10 @@ esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) { err = esw_vport_egress_config(esw, vport); - if (err) + if (err) { + esw_vport_del_ingress_acl_modify_metadata(esw, vport); esw_vport_disable_ingress_acl(esw, vport); + } } return err; } @@ -1973,6 +1975,7 @@ esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { esw_vport_disable_egress_acl(esw, vport); + esw_vport_del_ingress_acl_modify_metadata(esw, vport); esw_vport_disable_ingress_acl(esw, vport); } From 10652f39943ec19d32a6fa44a8523b0d40abcbcf Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 28 Oct 2019 23:35:26 +0000 Subject: [PATCH 17/39] net/mlx5: Refactor ingress acl configuration Drop, untagged, spoof check and untagged spoof check flow groups are limited to legacy mode only. Therefore, following refactoring is done to (a) improve code readability (b) have better code split between legacy and offloads mode 1. Move legacy flow groups under legacy structure 2. Add validity check for group deletion 3. Restrict scope of esw_vport_disable_ingress_acl to legacy mode 4. Rename esw_vport_enable_ingress_acl() to esw_vport_create_ingress_acl_table() and limit its scope to table creation 5. Introduce legacy flow groups creation helper esw_legacy_create_ingress_acl_groups() and keep its scope to legacy mode 6. Reduce offloads ingress groups from 4 to just 1 metadata group per vport 7. Removed redundant IS_ERR_OR_NULL as entries are marked NULL on free. 8. Shortern error message to remove redundant 'E-switch' Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 228 ++++++++++-------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 19 +- .../mellanox/mlx5/core/eswitch_offloads.c | 67 ++++- 3 files changed, 200 insertions(+), 114 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 61459c06f56c..cc8d43d8c469 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1070,57 +1070,21 @@ void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, vport->egress.acl = NULL; } -int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +static int +esw_vport_create_legacy_ingress_acl_groups(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_core_dev *dev = esw->dev; - struct mlx5_flow_namespace *root_ns; - struct mlx5_flow_table *acl; struct mlx5_flow_group *g; void *match_criteria; u32 *flow_group_in; - /* The ingress acl table contains 4 groups - * (2 active rules at the same time - - * 1 allow rule from one of the first 3 groups. - * 1 drop rule from the last group): - * 1)Allow untagged traffic with smac=original mac. - * 2)Allow untagged traffic. - * 3)Allow traffic with smac=original mac. - * 4)Drop all other traffic. - */ - int table_size = 4; - int err = 0; - - if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) - return -EOPNOTSUPP; - - if (!IS_ERR_OR_NULL(vport->ingress.acl)) - return 0; - - esw_debug(dev, "Create vport[%d] ingress ACL log_max_size(%d)\n", - vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size)); - - root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, - mlx5_eswitch_vport_num_to_index(esw, vport->vport)); - if (!root_ns) { - esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", vport->vport); - return -EOPNOTSUPP; - } + int err; flow_group_in = kvzalloc(inlen, GFP_KERNEL); if (!flow_group_in) return -ENOMEM; - acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); - if (IS_ERR(acl)) { - err = PTR_ERR(acl); - esw_warn(dev, "Failed to create E-Switch vport[%d] ingress flow Table, err(%d)\n", - vport->vport, err); - goto out; - } - vport->ingress.acl = acl; - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); @@ -1130,14 +1094,14 @@ int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); - g = mlx5_create_flow_group(acl, flow_group_in); + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); if (IS_ERR(g)) { err = PTR_ERR(g); - esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged spoofchk flow group, err(%d)\n", + esw_warn(dev, "vport[%d] ingress create untagged spoofchk flow group, err(%d)\n", vport->vport, err); - goto out; + goto spoof_err; } - vport->ingress.allow_untagged_spoofchk_grp = g; + vport->ingress.legacy.allow_untagged_spoofchk_grp = g; memset(flow_group_in, 0, inlen); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); @@ -1145,14 +1109,14 @@ int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); - g = mlx5_create_flow_group(acl, flow_group_in); + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); if (IS_ERR(g)) { err = PTR_ERR(g); - esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged flow group, err(%d)\n", + esw_warn(dev, "vport[%d] ingress create untagged flow group, err(%d)\n", vport->vport, err); - goto out; + goto untagged_err; } - vport->ingress.allow_untagged_only_grp = g; + vport->ingress.legacy.allow_untagged_only_grp = g; memset(flow_group_in, 0, inlen); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); @@ -1161,80 +1125,134 @@ int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 2); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2); - g = mlx5_create_flow_group(acl, flow_group_in); + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); if (IS_ERR(g)) { err = PTR_ERR(g); - esw_warn(dev, "Failed to create E-Switch vport[%d] ingress spoofchk flow group, err(%d)\n", + esw_warn(dev, "vport[%d] ingress create spoofchk flow group, err(%d)\n", vport->vport, err); - goto out; + goto allow_spoof_err; } - vport->ingress.allow_spoofchk_only_grp = g; + vport->ingress.legacy.allow_spoofchk_only_grp = g; memset(flow_group_in, 0, inlen); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 3); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3); - g = mlx5_create_flow_group(acl, flow_group_in); + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); if (IS_ERR(g)) { err = PTR_ERR(g); - esw_warn(dev, "Failed to create E-Switch vport[%d] ingress drop flow group, err(%d)\n", + esw_warn(dev, "vport[%d] ingress create drop flow group, err(%d)\n", vport->vport, err); - goto out; + goto drop_err; } - vport->ingress.drop_grp = g; + vport->ingress.legacy.drop_grp = g; + kvfree(flow_group_in); + return 0; -out: - if (err) { - if (!IS_ERR_OR_NULL(vport->ingress.allow_spoofchk_only_grp)) - mlx5_destroy_flow_group( - vport->ingress.allow_spoofchk_only_grp); - if (!IS_ERR_OR_NULL(vport->ingress.allow_untagged_only_grp)) - mlx5_destroy_flow_group( - vport->ingress.allow_untagged_only_grp); - if (!IS_ERR_OR_NULL(vport->ingress.allow_untagged_spoofchk_grp)) - mlx5_destroy_flow_group( - vport->ingress.allow_untagged_spoofchk_grp); - if (!IS_ERR_OR_NULL(vport->ingress.acl)) - mlx5_destroy_flow_table(vport->ingress.acl); +drop_err: + if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_spoofchk_only_grp)) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp); + vport->ingress.legacy.allow_spoofchk_only_grp = NULL; } - +allow_spoof_err: + if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_only_grp)) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp); + vport->ingress.legacy.allow_untagged_only_grp = NULL; + } +untagged_err: + if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_spoofchk_grp)) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp); + vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL; + } +spoof_err: kvfree(flow_group_in); return err; } +int esw_vport_create_ingress_acl_table(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, int table_size) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *acl; + int vport_index; + int err; + + if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) + return -EOPNOTSUPP; + + esw_debug(dev, "Create vport[%d] ingress ACL log_max_size(%d)\n", + vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size)); + + vport_index = mlx5_eswitch_vport_num_to_index(esw, vport->vport); + root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, + vport_index); + if (!root_ns) { + esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", + vport->vport); + return -EOPNOTSUPP; + } + + acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); + if (IS_ERR(acl)) { + err = PTR_ERR(acl); + esw_warn(dev, "vport[%d] ingress create flow Table, err(%d)\n", + vport->vport, err); + return err; + } + vport->ingress.acl = acl; + return 0; +} + +void esw_vport_destroy_ingress_acl_table(struct mlx5_vport *vport) +{ + if (!vport->ingress.acl) + return; + + mlx5_destroy_flow_table(vport->ingress.acl); + vport->ingress.acl = NULL; +} + void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - if (!IS_ERR_OR_NULL(vport->ingress.legacy.drop_rule)) { + if (vport->ingress.legacy.drop_rule) { mlx5_del_flow_rules(vport->ingress.legacy.drop_rule); vport->ingress.legacy.drop_rule = NULL; } - if (!IS_ERR_OR_NULL(vport->ingress.allow_rule)) { + if (vport->ingress.allow_rule) { mlx5_del_flow_rules(vport->ingress.allow_rule); vport->ingress.allow_rule = NULL; } } -void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +static void esw_vport_disable_legacy_ingress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - if (IS_ERR_OR_NULL(vport->ingress.acl)) + if (!vport->ingress.acl) return; esw_debug(esw->dev, "Destroy vport[%d] E-Switch ingress ACL\n", vport->vport); esw_vport_cleanup_ingress_rules(esw, vport); - mlx5_destroy_flow_group(vport->ingress.allow_spoofchk_only_grp); - mlx5_destroy_flow_group(vport->ingress.allow_untagged_only_grp); - mlx5_destroy_flow_group(vport->ingress.allow_untagged_spoofchk_grp); - mlx5_destroy_flow_group(vport->ingress.drop_grp); - mlx5_destroy_flow_table(vport->ingress.acl); - vport->ingress.acl = NULL; - vport->ingress.drop_grp = NULL; - vport->ingress.allow_spoofchk_only_grp = NULL; - vport->ingress.allow_untagged_only_grp = NULL; - vport->ingress.allow_untagged_spoofchk_grp = NULL; + if (vport->ingress.legacy.allow_spoofchk_only_grp) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp); + vport->ingress.legacy.allow_spoofchk_only_grp = NULL; + } + if (vport->ingress.legacy.allow_untagged_only_grp) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp); + vport->ingress.legacy.allow_untagged_only_grp = NULL; + } + if (vport->ingress.legacy.allow_untagged_spoofchk_grp) { + mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp); + vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL; + } + if (vport->ingress.legacy.drop_grp) { + mlx5_destroy_flow_group(vport->ingress.legacy.drop_grp); + vport->ingress.legacy.drop_grp = NULL; + } + esw_vport_destroy_ingress_acl_table(vport); } static int esw_vport_ingress_config(struct mlx5_eswitch *esw, @@ -1249,19 +1267,36 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, int err = 0; u8 *smac_v; + /* The ingress acl table contains 4 groups + * (2 active rules at the same time - + * 1 allow rule from one of the first 3 groups. + * 1 drop rule from the last group): + * 1)Allow untagged traffic with smac=original mac. + * 2)Allow untagged traffic. + * 3)Allow traffic with smac=original mac. + * 4)Drop all other traffic. + */ + int table_size = 4; + esw_vport_cleanup_ingress_rules(esw, vport); if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) { - esw_vport_disable_ingress_acl(esw, vport); + esw_vport_disable_legacy_ingress_acl(esw, vport); return 0; } - err = esw_vport_enable_ingress_acl(esw, vport); - if (err) { - mlx5_core_warn(esw->dev, - "failed to enable ingress acl (%d) on vport[%d]\n", - err, vport->vport); - return err; + if (!vport->ingress.acl) { + err = esw_vport_create_ingress_acl_table(esw, vport, table_size); + if (err) { + esw_warn(esw->dev, + "vport[%d] enable ingress acl err (%d)\n", + err, vport->vport); + return err; + } + + err = esw_vport_create_legacy_ingress_acl_groups(esw, vport); + if (err) + goto out; } esw_debug(esw->dev, @@ -1322,10 +1357,11 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, vport->ingress.legacy.drop_rule = NULL; goto out; } + kvfree(spec); + return 0; out: - if (err) - esw_vport_cleanup_ingress_rules(esw, vport); + esw_vport_disable_legacy_ingress_acl(esw, vport); kvfree(spec); return err; } @@ -1705,7 +1741,7 @@ static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw, return 0; egress_err: - esw_vport_disable_ingress_acl(esw, vport); + esw_vport_disable_legacy_ingress_acl(esw, vport); mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter); vport->egress.legacy.drop_counter = NULL; @@ -1735,7 +1771,7 @@ static void esw_vport_destroy_legacy_acl_tables(struct mlx5_eswitch *esw, mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter); vport->egress.legacy.drop_counter = NULL; - esw_vport_disable_ingress_acl(esw, vport); + esw_vport_disable_legacy_ingress_acl(esw, vport); mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter); vport->ingress.legacy.drop_counter = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index aa3588446cba..5e91735726b7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -65,16 +65,17 @@ struct vport_ingress { struct mlx5_flow_table *acl; - struct mlx5_flow_group *allow_untagged_spoofchk_grp; - struct mlx5_flow_group *allow_spoofchk_only_grp; - struct mlx5_flow_group *allow_untagged_only_grp; - struct mlx5_flow_group *drop_grp; - struct mlx5_flow_handle *allow_rule; + struct mlx5_flow_handle *allow_rule; struct { + struct mlx5_flow_group *allow_spoofchk_only_grp; + struct mlx5_flow_group *allow_untagged_spoofchk_grp; + struct mlx5_flow_group *allow_untagged_only_grp; + struct mlx5_flow_group *drop_grp; struct mlx5_flow_handle *drop_rule; struct mlx5_fc *drop_counter; } legacy; struct { + struct mlx5_flow_group *metadata_grp; struct mlx5_modify_hdr *modify_metadata; struct mlx5_flow_handle *modify_metadata_rule; } offloads; @@ -257,16 +258,16 @@ void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); int esw_offloads_init_reps(struct mlx5_eswitch *esw); void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, struct mlx5_vport *vport); -int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport); +int esw_vport_create_ingress_acl_table(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + int table_size); +void esw_vport_destroy_ingress_acl_table(struct mlx5_vport *vport); void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw, struct mlx5_vport *vport); int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport); void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport); -void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport); int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b536c8fa0061..807372a7211b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1858,6 +1858,44 @@ static void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, } } +static int esw_vport_create_ingress_acl_group(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + u32 *flow_group_in; + int ret = 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + + g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in); + if (IS_ERR(g)) { + ret = PTR_ERR(g); + esw_warn(esw->dev, + "Failed to create vport[%d] ingress metdata group, err(%d)\n", + vport->vport, ret); + goto grp_err; + } + vport->ingress.offloads.metadata_grp = g; +grp_err: + kvfree(flow_group_in); + return ret; +} + +static void esw_vport_destroy_ingress_acl_group(struct mlx5_vport *vport) +{ + if (vport->ingress.offloads.metadata_grp) { + mlx5_destroy_flow_group(vport->ingress.offloads.metadata_grp); + vport->ingress.offloads.metadata_grp = NULL; + } +} + static int esw_vport_ingress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { @@ -1868,8 +1906,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, return 0; esw_vport_cleanup_ingress_rules(esw, vport); - - err = esw_vport_enable_ingress_acl(esw, vport); + err = esw_vport_create_ingress_acl_table(esw, vport, 1); if (err) { esw_warn(esw->dev, "failed to enable ingress acl (%d) on vport[%d]\n", @@ -1877,25 +1914,34 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, return err; } + err = esw_vport_create_ingress_acl_group(esw, vport); + if (err) + goto group_err; + esw_debug(esw->dev, "vport[%d] configure ingress rules\n", vport->vport); if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { err = esw_vport_add_ingress_acl_modify_metadata(esw, vport); if (err) - goto out; + goto metadata_err; } if (MLX5_CAP_GEN(esw->dev, prio_tag_required) && mlx5_eswitch_is_vf_vport(esw, vport->vport)) { err = esw_vport_ingress_prio_tag_config(esw, vport); if (err) - goto out; + goto prio_tag_err; } + return 0; -out: - if (err) - esw_vport_disable_ingress_acl(esw, vport); +prio_tag_err: + esw_vport_del_ingress_acl_modify_metadata(esw, vport); +metadata_err: + esw_vport_cleanup_ingress_rules(esw, vport); + esw_vport_destroy_ingress_acl_group(vport); +group_err: + esw_vport_destroy_ingress_acl_table(vport); return err; } @@ -1964,7 +2010,8 @@ esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, err = esw_vport_egress_config(esw, vport); if (err) { esw_vport_del_ingress_acl_modify_metadata(esw, vport); - esw_vport_disable_ingress_acl(esw, vport); + esw_vport_cleanup_ingress_rules(esw, vport); + esw_vport_destroy_ingress_acl_table(vport); } } return err; @@ -1976,7 +2023,9 @@ esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, { esw_vport_disable_egress_acl(esw, vport); esw_vport_del_ingress_acl_modify_metadata(esw, vport); - esw_vport_disable_ingress_acl(esw, vport); + esw_vport_cleanup_ingress_rules(esw, vport); + esw_vport_destroy_ingress_acl_group(vport); + esw_vport_destroy_ingress_acl_table(vport); } static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) From 238302fae0216cb2e6a087ba403f3ecc3450b18b Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 28 Oct 2019 23:35:28 +0000 Subject: [PATCH 18/39] net/mlx5: E-switch, Enable metadata on own vport Currently on ECPF, metadata is enabled on the ECPF vport = 0xfffe (manager vport). Metadata when supported, must be enabled on own vport which is used to pass metadata to vport of NIC Rx Flow Table. Due to this error, traffic tagged by ingress ACL is not processed correctly at NIC rx flow table level which is supposed to work on metadata tag. Hence, instead of working on eswitch manager vport, always working on eswitch own vport regardless of PF or ECPF. Given that mlx5_eswitch_query/modify_esw_vport_context() is used to access other vport in legacy mode and own vport settings in switchdev mode, extend low level API to explicitly specify other_vport. Fixes: c1286050cf47 ("net/mlx5: E-Switch, Pass metadata from FDB to eswitch manager") Signed-off-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 29 +++++++------------ .../net/ethernet/mellanox/mlx5/core/eswitch.h | 6 ++-- .../mellanox/mlx5/core/eswitch_offloads.c | 4 +-- 3 files changed, 16 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index cc8d43d8c469..24c2217a4ce8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -111,42 +111,32 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, } /* E-Switch vport context HW commands */ -static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, - void *in, int inlen) +int mlx5_eswitch_modify_esw_vport_context(struct mlx5_core_dev *dev, u16 vport, + bool other_vport, + void *in, int inlen) { u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)] = {0}; MLX5_SET(modify_esw_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT); MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); - MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_esw_vport_context_in, in, other_vport, other_vport); return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } -int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, - void *in, int inlen) -{ - return modify_esw_vport_context_cmd(esw->dev, vport, in, inlen); -} - -static int query_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, - void *out, int outlen) +int mlx5_eswitch_query_esw_vport_context(struct mlx5_core_dev *dev, u16 vport, + bool other_vport, + void *out, int outlen) { u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {}; MLX5_SET(query_esw_vport_context_in, in, opcode, MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); - MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_esw_vport_context_in, in, other_vport, other_vport); return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } -int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, - void *out, int outlen) -{ - return query_esw_vport_context_cmd(esw->dev, vport, out, outlen); -} - static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, u16 vlan, u8 qos, u8 set_flags) { @@ -179,7 +169,8 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, MLX5_SET(modify_esw_vport_context_in, in, field_select.vport_cvlan_insert, 1); - return modify_esw_vport_context_cmd(dev, vport, in, sizeof(in)); + return mlx5_eswitch_modify_esw_vport_context(dev, vport, true, + in, sizeof(in)); } /* E-Switch FDB */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 5e91735726b7..a05b948a6287 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -297,9 +297,11 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, struct ifla_vf_stats *vf_stats); void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule); -int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, +int mlx5_eswitch_modify_esw_vport_context(struct mlx5_core_dev *dev, u16 vport, + bool other_vport, void *in, int inlen); -int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, +int mlx5_eswitch_query_esw_vport_context(struct mlx5_core_dev *dev, u16 vport, + bool other_vport, void *out, int outlen); struct mlx5_flow_spec; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 807372a7211b..59eebcae5df6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -600,7 +600,7 @@ static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable) if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) return 0; - err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport, + err = mlx5_eswitch_query_esw_vport_context(esw->dev, 0, false, out, sizeof(out)); if (err) return err; @@ -619,7 +619,7 @@ static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable) MLX5_SET(modify_esw_vport_context_in, in, field_select.fdb_to_vport_reg_c_id, 1); - return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport, + return mlx5_eswitch_modify_esw_vport_context(esw->dev, 0, false, in, sizeof(in)); } From e53a9d26cf80565cfb7172fc52a0dfac73613a0f Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 28 Oct 2019 23:35:30 +0000 Subject: [PATCH 19/39] IB/mlx5: Introduce and use mlx5_core_is_vf() Instead of deciding a given device is virtual function or not based on a device is PF or not, use already defined MLX5_COREDEV_VF by introducing an helper API mlx5_core_is_vf(). This enables to clearly identify PF, VF and non virtual functions. Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 2 +- include/linux/mlx5/driver.h | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 831539419c30..8343a740c91e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1031,7 +1031,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_GEN(mdev, cd)) props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL; - if (!mlx5_core_is_pf(mdev)) + if (mlx5_core_is_vf(mdev)) props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION; if (mlx5_ib_port_link_layer(ibdev, 1) == diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3e80f03a387f..7b4801e96feb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1121,6 +1121,11 @@ static inline bool mlx5_core_is_pf(const struct mlx5_core_dev *dev) return dev->coredev_type == MLX5_COREDEV_PF; } +static inline bool mlx5_core_is_vf(const struct mlx5_core_dev *dev) +{ + return dev->coredev_type == MLX5_COREDEV_VF; +} + static inline bool mlx5_core_is_ecpf(struct mlx5_core_dev *dev) { return dev->caps.embedded_cpu; From 8b3f2eb038d3098b37715afced1e62bbc72da90f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 5 Nov 2019 18:27:40 +0000 Subject: [PATCH 20/39] net/mlx5: fix kvfree of uninitialized pointer spec Currently when a call to esw_vport_create_legacy_ingress_acl_group fails the error exit path to label 'out' will cause a kvfree on the uninitialized pointer spec. Fix this by ensuring pointer spec is initialized to NULL to avoid this issue. Addresses-Coverity: ("Uninitialized pointer read") Fixes: 10652f39943e ("net/mlx5: Refactor ingress acl configuration") Signed-off-by: Colin Ian King Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 24c2217a4ce8..48627472a691 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1253,7 +1253,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, struct mlx5_flow_destination drop_ctr_dst = {0}; struct mlx5_flow_destination *dst = NULL; struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_spec *spec; + struct mlx5_flow_spec *spec = NULL; int dest_num = 0; int err = 0; u8 *smac_v; From 9ea7f01f470a25bb795224cc0ecc57c91a1519c6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 5 Nov 2019 14:54:16 +0000 Subject: [PATCH 21/39] net/mlx5: fix spelling mistake "metdata" -> "metadata" There is a spelling mistake in a esw_warn warning message. Fix it. Signed-off-by: Colin Ian King Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 59eebcae5df6..d8e25416a15d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1878,7 +1878,7 @@ static int esw_vport_create_ingress_acl_group(struct mlx5_eswitch *esw, if (IS_ERR(g)) { ret = PTR_ERR(g); esw_warn(esw->dev, - "Failed to create vport[%d] ingress metdata group, err(%d)\n", + "Failed to create vport[%d] ingress metadata group, err(%d)\n", vport->vport, ret); goto grp_err; } From 6c7295e13ffd5623b02f1adc1442f1d8a3d52424 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Fri, 8 Nov 2019 23:45:20 +0000 Subject: [PATCH 22/39] devlink: Add new "enable_roce" generic device param New device parameter to enable/disable handling of RoCE traffic in the device. Signed-off-by: Michael Guralnik Acked-by: Jiri Pirko Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- Documentation/networking/devlink-params.txt | 4 ++++ include/net/devlink.h | 4 ++++ net/core/devlink.c | 5 +++++ 3 files changed, 13 insertions(+) diff --git a/Documentation/networking/devlink-params.txt b/Documentation/networking/devlink-params.txt index ddba3e9b55b1..04e234e9acc9 100644 --- a/Documentation/networking/devlink-params.txt +++ b/Documentation/networking/devlink-params.txt @@ -65,3 +65,7 @@ reset_dev_on_drv_probe [DEVICE, GENERIC] Reset only if device firmware can be found in the filesystem. Type: u8 + +enable_roce [DEVICE, GENERIC] + Enable handling of RoCE traffic in the device. + Type: Boolean diff --git a/include/net/devlink.h b/include/net/devlink.h index 23e4b65ec9df..39fb4d957838 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -400,6 +400,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY, DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -434,6 +435,9 @@ enum devlink_param_generic_id { "reset_dev_on_drv_probe" #define DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_TYPE DEVLINK_PARAM_TYPE_U8 +#define DEVLINK_PARAM_GENERIC_ENABLE_ROCE_NAME "enable_roce" +#define DEVLINK_PARAM_GENERIC_ENABLE_ROCE_TYPE DEVLINK_PARAM_TYPE_BOOL + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ diff --git a/net/core/devlink.c b/net/core/devlink.c index f80151eeaf51..0fbcd44aa64f 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2884,6 +2884,11 @@ static const struct devlink_param devlink_param_generic[] = { .name = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_NAME, .type = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_TYPE, }, + { + .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + .name = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_NAME, + .type = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_TYPE, + }, }; static int devlink_param_generic_verify(const struct devlink_param *param) From e90cde0d76f01fd3b60da0a983d2e93c5c35bedc Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Fri, 8 Nov 2019 23:45:22 +0000 Subject: [PATCH 23/39] net/mlx5: Document flow_steering_mode devlink param Add documentation for current mlx5 supported devlink param. Signed-off-by: Michael Guralnik Acked-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- Documentation/networking/devlink-params-mlx5.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 Documentation/networking/devlink-params-mlx5.txt diff --git a/Documentation/networking/devlink-params-mlx5.txt b/Documentation/networking/devlink-params-mlx5.txt new file mode 100644 index 000000000000..8c0b82d655dc --- /dev/null +++ b/Documentation/networking/devlink-params-mlx5.txt @@ -0,0 +1,12 @@ +flow_steering_mode [DEVICE, DRIVER-SPECIFIC] + Controls the flow steering mode of the driver. + Two modes are supported: + 1. 'dmfs' - Device managed flow steering. + 2. 'smfs - Software/Driver managed flow steering. + In DMFS mode, the HW steering entities are created and + managed through the Firmware. + In SMFS mode, the HW steering entities are created and + managed though by the driver directly into Hardware + without firmware intervention. + Type: String + Configuration mode: runtime From cc9defcbb8fae52810f7795b039223edae51ef95 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Fri, 8 Nov 2019 23:45:24 +0000 Subject: [PATCH 24/39] net/mlx5: Handle "enable_roce" devlink param Register "enable_roce" param, default value is RoCE enabled. Current configuration is stored on mlx5_core_dev and exposed to user through the cmode runtime devlink param. Changing configuration requires changing the cmode driverinit devlink param and calling devlink reload. Signed-off-by: Michael Guralnik Acked-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- .../device_drivers/mellanox/mlx5.rst | 21 ++++++++++++++++++ .../networking/devlink-params-mlx5.txt | 5 +++++ .../net/ethernet/mellanox/mlx5/core/devlink.c | 22 +++++++++++++++++++ include/linux/mlx5/driver.h | 11 ++++++++++ 4 files changed, 59 insertions(+) diff --git a/Documentation/networking/device_drivers/mellanox/mlx5.rst b/Documentation/networking/device_drivers/mellanox/mlx5.rst index d071c6b49e1f..7599dceba9f1 100644 --- a/Documentation/networking/device_drivers/mellanox/mlx5.rst +++ b/Documentation/networking/device_drivers/mellanox/mlx5.rst @@ -154,6 +154,27 @@ User command examples: values: cmode runtime value smfs +enable_roce: RoCE enablement state +---------------------------------- +RoCE enablement state controls driver support for RoCE traffic. +When RoCE is disabled, there is no gid table, only raw ethernet QPs are supported and traffic on the well known UDP RoCE port is handled as raw ethernet traffic. + +To change RoCE enablement state a user must change the driverinit cmode value and run devlink reload. + +User command examples: + +- Disable RoCE:: + + $ devlink dev param set pci/0000:06:00.0 name enable_roce value false cmode driverinit + $ devlink dev reload pci/0000:06:00.0 + +- Read RoCE enablement state:: + + $ devlink dev param show pci/0000:06:00.0 name enable_roce + pci/0000:06:00.0: + name enable_roce type generic + values: + cmode driverinit value true Devlink health reporters ======================== diff --git a/Documentation/networking/devlink-params-mlx5.txt b/Documentation/networking/devlink-params-mlx5.txt index 8c0b82d655dc..5071467118bd 100644 --- a/Documentation/networking/devlink-params-mlx5.txt +++ b/Documentation/networking/devlink-params-mlx5.txt @@ -10,3 +10,8 @@ flow_steering_mode [DEVICE, DRIVER-SPECIFIC] without firmware intervention. Type: String Configuration mode: runtime + +enable_roce [DEVICE, GENERIC] + Enable handling of RoCE traffic in the device. + Defaultly enabled. + Configuration mode: driverinit diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 381925c90d94..b2c26388edb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -177,12 +177,29 @@ enum mlx5_devlink_param_id { MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, }; +static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + bool new_state = val.vbool; + + if (new_state && !MLX5_CAP_GEN(dev, roce)) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE"); + return -EOPNOTSUPP; + } + + return 0; +} + static const struct devlink_param mlx5_devlink_params[] = { DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, "flow_steering_mode", DEVLINK_PARAM_TYPE_STRING, BIT(DEVLINK_PARAM_CMODE_RUNTIME), mlx5_devlink_fs_mode_get, mlx5_devlink_fs_mode_set, mlx5_devlink_fs_mode_validate), + DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_enable_roce_validate), }; static void mlx5_devlink_set_params_init_values(struct devlink *devlink) @@ -197,6 +214,11 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) devlink_param_driverinit_value_set(devlink, MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, value); + + value.vbool = MLX5_CAP_GEN(dev, roce); + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + value); } int mlx5_devlink_register(struct devlink *devlink, struct device *dev) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7b4801e96feb..1884513aac90 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1191,4 +1191,15 @@ enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; +static inline bool mlx5_is_roce_enabled(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + + devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + &val); + return val.vbool; +} + #endif /* MLX5_DRIVER_H */ From b5a498baf929a15536a4275967cf2377ad1b6015 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Fri, 8 Nov 2019 23:45:26 +0000 Subject: [PATCH 25/39] IB/mlx5: Rename profile and init methods Rename uplink_rep_profile and its unique init and cleanup stages to suit its upcoming use as the profile when RoCE is disabled. Signed-off-by: Michael Guralnik Reviewed-by: Maor Gottlieb Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/ib_rep.c | 2 +- drivers/infiniband/hw/mlx5/ib_rep.h | 2 +- drivers/infiniband/hw/mlx5/main.c | 14 +++++++------- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 74ce9249e75a..5c3d052ac30b 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -35,7 +35,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) int vport_index; if (rep->vport == MLX5_VPORT_UPLINK) - profile = &uplink_rep_profile; + profile = &raw_eth_profile; else return mlx5_ib_set_vport_rep(dev, rep); diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index de43b423bafc..3b6750cba796 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -10,7 +10,7 @@ #include "mlx5_ib.h" #ifdef CONFIG_MLX5_ESWITCH -extern const struct mlx5_ib_profile uplink_rep_profile; +extern const struct mlx5_ib_profile raw_eth_profile; u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw); struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8343a740c91e..d6afe33d56ac 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6444,7 +6444,7 @@ static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = { .query_port = mlx5_ib_rep_query_port, }; -static int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_raw_eth_non_default_cb(struct mlx5_ib_dev *dev) { ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_rep_ops); return 0; @@ -6484,7 +6484,7 @@ static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev) mlx5_remove_netdev_notifier(dev, port_num); } -static int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_raw_eth_roce_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; @@ -6500,7 +6500,7 @@ static int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev) return err; } -static void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_raw_eth_roce_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_stage_common_roce_cleanup(dev); } @@ -6807,7 +6807,7 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_ib_stage_delay_drop_cleanup), }; -const struct mlx5_ib_profile uplink_rep_profile = { +const struct mlx5_ib_profile raw_eth_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, mlx5_ib_stage_init_init, mlx5_ib_stage_init_cleanup), @@ -6818,11 +6818,11 @@ const struct mlx5_ib_profile uplink_rep_profile = { mlx5_ib_stage_caps_init, NULL), STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, - mlx5_ib_stage_rep_non_default_cb, + mlx5_ib_stage_raw_eth_non_default_cb, NULL), STAGE_CREATE(MLX5_IB_STAGE_ROCE, - mlx5_ib_stage_rep_roce_init, - mlx5_ib_stage_rep_roce_cleanup), + mlx5_ib_stage_raw_eth_roce_init, + mlx5_ib_stage_raw_eth_roce_cleanup), STAGE_CREATE(MLX5_IB_STAGE_SRQ, mlx5_init_srq_table, mlx5_cleanup_srq_table), From 94de879c28d8e6c20bfec308de84703625221712 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Fri, 8 Nov 2019 23:45:28 +0000 Subject: [PATCH 26/39] IB/mlx5: Load profile according to RoCE enablement state When RoCE is disabled load mlx5_ib in raw_eth profile. Clean pf_profile roce capability checks as it will not be used without roce capability. Signed-off-by: Michael Guralnik Reviewed-by: Maor Gottlieb Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d6afe33d56ac..46ea4f0b9b51 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -5145,8 +5145,7 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; immutable->core_cap_flags = get_core_cap_flags(ibdev, &rep); - if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce)) - immutable->max_mad_size = IB_MGMT_MAD_SIZE; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; return 0; } @@ -5249,11 +5248,9 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev) { int err; - if (MLX5_CAP_GEN(dev->mdev, roce)) { - err = mlx5_nic_vport_enable_roce(dev->mdev); - if (err) - return err; - } + err = mlx5_nic_vport_enable_roce(dev->mdev); + if (err) + return err; err = mlx5_eth_lag_init(dev); if (err) @@ -5262,8 +5259,7 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev) return 0; err_disable_roce: - if (MLX5_CAP_GEN(dev->mdev, roce)) - mlx5_nic_vport_disable_roce(dev->mdev); + mlx5_nic_vport_disable_roce(dev->mdev); return err; } @@ -5271,8 +5267,7 @@ err_disable_roce: static void mlx5_disable_eth(struct mlx5_ib_dev *dev) { mlx5_eth_lag_cleanup(dev); - if (MLX5_CAP_GEN(dev->mdev, roce)) - mlx5_nic_vport_disable_roce(dev->mdev); + mlx5_nic_vport_disable_roce(dev->mdev); } struct mlx5_ib_counter { @@ -6898,6 +6893,7 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { + const struct mlx5_ib_profile *profile; enum rdma_link_layer ll; struct mlx5_ib_dev *dev; int port_type_cap; @@ -6933,7 +6929,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->mdev = mdev; dev->num_ports = num_ports; - return __mlx5_ib_add(dev, &pf_profile); + if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_is_roce_enabled(mdev)) + profile = &raw_eth_profile; + else + profile = &pf_profile; + + return __mlx5_ib_add(dev, profile); } static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) From 12063c2e4c0e38f36c0e6f0942cd138feed022b3 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 12 Nov 2019 00:34:24 +0100 Subject: [PATCH 27/39] net/mlx5: Simplify fdb chain and prio eswitch defines FDB_MAX_CHAIN and FDB_MAX_PRIO were defined differently depending on if CONFIG_MLX5_ESWITCH is enabled to save space on allocations. This is a minor space saving, and there is no real need for it. Simplify things instead, and define them the same in both cases. Signed-off-by: Paul Blakey Reviewed-by: Mark Bloch Acked-by: Pablo Neira Ayuso Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index a05b948a6287..ab8cdd3dd8d0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -43,6 +43,10 @@ #include #include "lib/mpfs.h" +#define FDB_MAX_CHAIN 3 +#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) +#define FDB_MAX_PRIO 16 + #ifdef CONFIG_MLX5_ESWITCH #define MLX5_MAX_UC_PER_VPORT(dev) \ @@ -59,10 +63,6 @@ #define mlx5_esw_has_fwd_fdb(dev) \ MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table) -#define FDB_MAX_CHAIN 3 -#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) -#define FDB_MAX_PRIO 16 - struct vport_ingress { struct mlx5_flow_table *acl; struct mlx5_flow_handle *allow_rule; @@ -637,10 +637,6 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {} -#define FDB_MAX_CHAIN 1 -#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) -#define FDB_MAX_PRIO 1 - #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_H__ */ From 2cf2954bd7ffd8250ae257b45b96915003c26d7d Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 12 Nov 2019 00:34:25 +0100 Subject: [PATCH 28/39] net/mlx5: Rename FDB_* tc related defines to FDB_TC_* defines Rename it to prepare for next patch that will add a different type of offload to the FDB. Signed-off-by: Paul Blakey Reviewed-by: Mark Bloch Acked-by: Pablo Neira Ayuso Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 8 ++++---- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 10 +++++----- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 8 ++++---- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3e78a727f3e6..15b771b6c09d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1074,7 +1074,7 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; slow_attr->split_count = 0; - slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN; + slow_attr->dest_chain = FDB_TC_SLOW_PATH_CHAIN; rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); if (!IS_ERR(rule)) @@ -1091,7 +1091,7 @@ mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; slow_attr->split_count = 0; - slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN; + slow_attr->dest_chain = FDB_TC_SLOW_PATH_CHAIN; mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); flow_flag_clear(flow, SLOW); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index ab8cdd3dd8d0..d73187bdbc06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -43,9 +43,9 @@ #include #include "lib/mpfs.h" -#define FDB_MAX_CHAIN 3 -#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) -#define FDB_MAX_PRIO 16 +#define FDB_TC_MAX_CHAIN 3 +#define FDB_TC_SLOW_PATH_CHAIN (FDB_TC_MAX_CHAIN + 1) +#define FDB_TC_MAX_PRIO 16 #ifdef CONFIG_MLX5_ESWITCH @@ -173,7 +173,7 @@ struct mlx5_eswitch_fdb { struct { struct mlx5_flow_table *fdb; u32 num_rules; - } fdb_prio[FDB_MAX_CHAIN + 1][FDB_MAX_PRIO + 1][PRIO_LEVELS]; + } fdb_prio[FDB_TC_MAX_CHAIN + 1][FDB_TC_MAX_PRIO + 1][PRIO_LEVELS]; /* Protects fdb_prio table */ struct mutex fdb_prio_lock; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d8e25416a15d..9c51fedd890f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -75,7 +75,7 @@ bool mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw) u32 mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw) { if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED) - return FDB_MAX_CHAIN; + return FDB_TC_MAX_CHAIN; return 0; } @@ -83,7 +83,7 @@ u32 mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw) u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw) { if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED) - return FDB_MAX_PRIO; + return FDB_TC_MAX_PRIO; return 1; } @@ -928,7 +928,7 @@ esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level) int table_prio, l = 0; u32 flags = 0; - if (chain == FDB_SLOW_PATH_CHAIN) + if (chain == FDB_TC_SLOW_PATH_CHAIN) return esw->fdb_table.offloads.slow_fdb; mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock); @@ -953,7 +953,7 @@ esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level) flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); - table_prio = (chain * FDB_MAX_PRIO) + prio - 1; + table_prio = (chain * FDB_TC_MAX_PRIO) + prio - 1; /* create earlier levels for correct fs_core lookup when * connecting tables @@ -990,7 +990,7 @@ esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level) { int l; - if (chain == FDB_SLOW_PATH_CHAIN) + if (chain == FDB_TC_SLOW_PATH_CHAIN) return; mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 3bbb49354829..b33f77892d10 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2565,7 +2565,7 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering) return -ENOMEM; steering->fdb_sub_ns = kzalloc(sizeof(steering->fdb_sub_ns) * - (FDB_MAX_CHAIN + 1), GFP_KERNEL); + (FDB_TC_MAX_CHAIN + 1), GFP_KERNEL); if (!steering->fdb_sub_ns) return -ENOMEM; @@ -2576,7 +2576,7 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering) goto out_err; } - levels = 2 * FDB_MAX_PRIO * (FDB_MAX_CHAIN + 1); + levels = 2 * FDB_TC_MAX_PRIO * (FDB_TC_MAX_CHAIN + 1); maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns, FDB_FAST_PATH, levels); @@ -2585,14 +2585,14 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering) goto out_err; } - for (chain = 0; chain <= FDB_MAX_CHAIN; chain++) { + for (chain = 0; chain <= FDB_TC_MAX_CHAIN; chain++) { ns = fs_create_namespace(maj_prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF); if (IS_ERR(ns)) { err = PTR_ERR(ns); goto out_err; } - for (prio = 0; prio < FDB_MAX_PRIO * (chain + 1); prio++) { + for (prio = 0; prio < FDB_TC_MAX_PRIO * (chain + 1); prio++) { min_prio = fs_create_prio(ns, prio, 2); if (IS_ERR(min_prio)) { err = PTR_ERR(min_prio); From 4db7b98e943225dc2a7435811767e44f63640462 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 12 Nov 2019 00:34:26 +0100 Subject: [PATCH 29/39] net/mlx5: Define fdb tc levels per prio Define FDB_TC_LEVELS_PER_PRIO instead of magic number 2. This is the number of levels used by each tc prio table in the fdb. Signed-off-by: Paul Blakey Reviewed-by: Mark Bloch Acked-by: Pablo Neira Ayuso Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index d73187bdbc06..8c9d8dc85861 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -46,6 +46,7 @@ #define FDB_TC_MAX_CHAIN 3 #define FDB_TC_SLOW_PATH_CHAIN (FDB_TC_MAX_CHAIN + 1) #define FDB_TC_MAX_PRIO 16 +#define FDB_TC_LEVELS_PER_PRIO 2 #ifdef CONFIG_MLX5_ESWITCH @@ -146,7 +147,6 @@ enum offloads_fdb_flags { extern const unsigned int ESW_POOLS[4]; -#define PRIO_LEVELS 2 struct mlx5_eswitch_fdb { union { struct legacy_fdb { @@ -173,7 +173,7 @@ struct mlx5_eswitch_fdb { struct { struct mlx5_flow_table *fdb; u32 num_rules; - } fdb_prio[FDB_TC_MAX_CHAIN + 1][FDB_TC_MAX_PRIO + 1][PRIO_LEVELS]; + } fdb_prio[FDB_TC_MAX_CHAIN + 1][FDB_TC_MAX_PRIO + 1][FDB_TC_LEVELS_PER_PRIO]; /* Protects fdb_prio table */ struct mutex fdb_prio_lock; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index b33f77892d10..190c5c71b534 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2576,7 +2576,8 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering) goto out_err; } - levels = 2 * FDB_TC_MAX_PRIO * (FDB_TC_MAX_CHAIN + 1); + levels = FDB_TC_LEVELS_PER_PRIO * + FDB_TC_MAX_PRIO * (FDB_TC_MAX_CHAIN + 1); maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns, FDB_FAST_PATH, levels); @@ -2593,7 +2594,8 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering) } for (prio = 0; prio < FDB_TC_MAX_PRIO * (chain + 1); prio++) { - min_prio = fs_create_prio(ns, prio, 2); + min_prio = fs_create_prio(ns, prio, + FDB_TC_LEVELS_PER_PRIO); if (IS_ERR(min_prio)) { err = PTR_ERR(min_prio); goto out_err; From 34b13cb3eaa5ad205f4497da6420262da4940b9e Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 12 Nov 2019 00:34:27 +0100 Subject: [PATCH 30/39] net/mlx5: Accumulate levels for chains prio namespaces Tc chains are implemented by creating a chained prio steering type, and inside it there is a namespace for each chain (FDB_TC_MAX_CHAINS). Each of those has a list of priorities. Currently, all namespaces in a prio start at the parent prio level. But since we can jump from chain (namespace) to another chain in the same prio, we need the levels for higher chains to be higher as well. So we created unused prios to account for levels in previous namespaces. Fix that by accumulating the namespaces levels if we are inside a chained type prio, and removing the unused prios. Fixes: 328edb499f99 ('net/mlx5: Split FDB fast path prio to multiple namespaces') Signed-off-by: Paul Blakey Reviewed-by: Mark Bloch Acked-by: Pablo Neira Ayuso Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 9c51fedd890f..60d3d88e406c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -953,7 +953,7 @@ esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level) flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); - table_prio = (chain * FDB_TC_MAX_PRIO) + prio - 1; + table_prio = prio - 1; /* create earlier levels for correct fs_core lookup when * connecting tables diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 190c5c71b534..3cdad1d1021f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2359,9 +2359,17 @@ static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level) int acc_level_ns = acc_level; prio->start_level = acc_level; - fs_for_each_ns(ns, prio) + fs_for_each_ns(ns, prio) { /* This updates start_level and num_levels of ns's priority descendants */ acc_level_ns = set_prio_attrs_in_ns(ns, acc_level); + + /* If this a prio with chains, and we can jump from one chain + * (namepsace) to another, so we accumulate the levels + */ + if (prio->node.type == FS_TYPE_PRIO_CHAINS) + acc_level = acc_level_ns; + } + if (!prio->num_levels) prio->num_levels = acc_level_ns - prio->start_level; WARN_ON(prio->num_levels < acc_level_ns - prio->start_level); From 439e843f1f43640fd52530433d803db8585cd028 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 12 Nov 2019 00:34:28 +0100 Subject: [PATCH 31/39] net/mlx5: Refactor creating fast path prio chains Next patch will re-use this to add a new chain but in a different prio. Signed-off-by: Paul Blakey Reviewed-by: Mark Bloch Acked-by: Pablo Neira Ayuso Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 118 ++++++++++++------ 1 file changed, 82 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 3cdad1d1021f..4aa6990a38b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2558,60 +2558,106 @@ out_err: steering->rdma_rx_root_ns = NULL; return err; } -static int init_fdb_root_ns(struct mlx5_flow_steering *steering) + +/* FT and tc chains are stored in the same array so we can re-use the + * mlx5_get_fdb_sub_ns() and tc api for FT chains. + * When creating a new ns for each chain store it in the first available slot. + * Assume tc chains are created and stored first and only then the FT chain. + */ +static void store_fdb_sub_ns_prio_chain(struct mlx5_flow_steering *steering, + struct mlx5_flow_namespace *ns) +{ + int chain = 0; + + while (steering->fdb_sub_ns[chain]) + ++chain; + + steering->fdb_sub_ns[chain] = ns; +} + +static int create_fdb_sub_ns_prio_chain(struct mlx5_flow_steering *steering, + struct fs_prio *maj_prio) { struct mlx5_flow_namespace *ns; - struct fs_prio *maj_prio; struct fs_prio *min_prio; + int prio; + + ns = fs_create_namespace(maj_prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF); + if (IS_ERR(ns)) + return PTR_ERR(ns); + + for (prio = 0; prio < FDB_TC_MAX_PRIO; prio++) { + min_prio = fs_create_prio(ns, prio, FDB_TC_LEVELS_PER_PRIO); + if (IS_ERR(min_prio)) + return PTR_ERR(min_prio); + } + + store_fdb_sub_ns_prio_chain(steering, ns); + + return 0; +} + +static int create_fdb_chains(struct mlx5_flow_steering *steering, + int fs_prio, + int chains) +{ + struct fs_prio *maj_prio; int levels; int chain; - int prio; + int err; + + levels = FDB_TC_LEVELS_PER_PRIO * FDB_TC_MAX_PRIO * chains; + maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns, + fs_prio, + levels); + if (IS_ERR(maj_prio)) + return PTR_ERR(maj_prio); + + for (chain = 0; chain < chains; chain++) { + err = create_fdb_sub_ns_prio_chain(steering, maj_prio); + if (err) + return err; + } + + return 0; +} + +static int create_fdb_fast_path(struct mlx5_flow_steering *steering) +{ + const int total_chains = FDB_TC_MAX_CHAIN + 1; + int err; + + steering->fdb_sub_ns = kcalloc(total_chains, + sizeof(*steering->fdb_sub_ns), + GFP_KERNEL); + if (!steering->fdb_sub_ns) + return -ENOMEM; + + err = create_fdb_chains(steering, FDB_FAST_PATH, FDB_TC_MAX_CHAIN + 1); + if (err) + return err; + + return 0; +} + +static int init_fdb_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *maj_prio; int err; steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB); if (!steering->fdb_root_ns) return -ENOMEM; - steering->fdb_sub_ns = kzalloc(sizeof(steering->fdb_sub_ns) * - (FDB_TC_MAX_CHAIN + 1), GFP_KERNEL); - if (!steering->fdb_sub_ns) - return -ENOMEM; - maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH, 1); if (IS_ERR(maj_prio)) { err = PTR_ERR(maj_prio); goto out_err; } - - levels = FDB_TC_LEVELS_PER_PRIO * - FDB_TC_MAX_PRIO * (FDB_TC_MAX_CHAIN + 1); - maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns, - FDB_FAST_PATH, - levels); - if (IS_ERR(maj_prio)) { - err = PTR_ERR(maj_prio); + err = create_fdb_fast_path(steering); + if (err) goto out_err; - } - - for (chain = 0; chain <= FDB_TC_MAX_CHAIN; chain++) { - ns = fs_create_namespace(maj_prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF); - if (IS_ERR(ns)) { - err = PTR_ERR(ns); - goto out_err; - } - - for (prio = 0; prio < FDB_TC_MAX_PRIO * (chain + 1); prio++) { - min_prio = fs_create_prio(ns, prio, - FDB_TC_LEVELS_PER_PRIO); - if (IS_ERR(min_prio)) { - err = PTR_ERR(min_prio); - goto out_err; - } - } - - steering->fdb_sub_ns[chain] = ns; - } maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_SLOW_PATH, 1); if (IS_ERR(maj_prio)) { From 975b992fdd4b38028d7c1dcf38286d6e7991c1b2 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 12 Nov 2019 00:34:29 +0100 Subject: [PATCH 32/39] net/mlx5: Add new chain for netfilter flow table offload Netfilter tables (nftables) implements a software datapath that comes after tc ingress datapath. The datapath supports offloading such rules via the flow table offload API. This API is currently only used by NFT and it doesn't provide the global priority in regards to tc offload, so we assume offloading such rules must come after tc. It does provide a flow table priority parameter, so we need to provide some supported priority range. For that, split fastpath prio to two, flow table offload and tc offload, with one dedicated priority chain for flow table offload. Next patch will re-use the multi chain API to access this chain by allowing access to this chain by the fdb_sub_namespace. Signed-off-by: Paul Blakey Reviewed-by: Mark Bloch Acked-by: Pablo Neira Ayuso Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 9 +++++++-- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 9 ++++++--- include/linux/mlx5/fs.h | 3 ++- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 8c9d8dc85861..2b563700c664 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -44,7 +44,12 @@ #include "lib/mpfs.h" #define FDB_TC_MAX_CHAIN 3 -#define FDB_TC_SLOW_PATH_CHAIN (FDB_TC_MAX_CHAIN + 1) +#define FDB_FT_CHAIN (FDB_TC_MAX_CHAIN + 1) +#define FDB_TC_SLOW_PATH_CHAIN (FDB_FT_CHAIN + 1) + +/* The index of the last real chain (FT) + 1 as chain zero is valid as well */ +#define FDB_NUM_CHAINS (FDB_FT_CHAIN + 1) + #define FDB_TC_MAX_PRIO 16 #define FDB_TC_LEVELS_PER_PRIO 2 @@ -173,7 +178,7 @@ struct mlx5_eswitch_fdb { struct { struct mlx5_flow_table *fdb; u32 num_rules; - } fdb_prio[FDB_TC_MAX_CHAIN + 1][FDB_TC_MAX_PRIO + 1][FDB_TC_LEVELS_PER_PRIO]; + } fdb_prio[FDB_NUM_CHAINS][FDB_TC_MAX_PRIO + 1][FDB_TC_LEVELS_PER_PRIO]; /* Protects fdb_prio table */ struct mutex fdb_prio_lock; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 4aa6990a38b3..84e90b21e148 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2624,16 +2624,19 @@ static int create_fdb_chains(struct mlx5_flow_steering *steering, static int create_fdb_fast_path(struct mlx5_flow_steering *steering) { - const int total_chains = FDB_TC_MAX_CHAIN + 1; int err; - steering->fdb_sub_ns = kcalloc(total_chains, + steering->fdb_sub_ns = kcalloc(FDB_NUM_CHAINS, sizeof(*steering->fdb_sub_ns), GFP_KERNEL); if (!steering->fdb_sub_ns) return -ENOMEM; - err = create_fdb_chains(steering, FDB_FAST_PATH, FDB_TC_MAX_CHAIN + 1); + err = create_fdb_chains(steering, FDB_TC_OFFLOAD, FDB_TC_MAX_CHAIN + 1); + if (err) + return err; + + err = create_fdb_chains(steering, FDB_FT_OFFLOAD, 1); if (err) return err; diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 724d276ea133..4e5b84e66822 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -80,7 +80,8 @@ enum mlx5_flow_namespace_type { enum { FDB_BYPASS_PATH, - FDB_FAST_PATH, + FDB_TC_OFFLOAD, + FDB_FT_OFFLOAD, FDB_SLOW_PATH, }; From 86bb811b08086f98744ecc600907673af7f2aec5 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Sun, 10 Nov 2019 18:10:13 +0200 Subject: [PATCH 33/39] net/mlx5: DR, Fix matcher builders select check When selecting a matcher ste_builder_arr will always be evaluated as true, instead check if num_of_builders is set for validity. Fixes: 667f264676c7 ("net/mlx5: DR, Support IPv4 and IPv6 mixed matcher") Signed-off-by: Alex Vesker Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c index 5db947df8763..c6548980daf0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -154,7 +154,7 @@ int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher, nic_matcher->num_of_builders = nic_matcher->num_of_builders_arr[outer_ipv][inner_ipv]; - if (!nic_matcher->ste_builder) { + if (!nic_matcher->num_of_builders) { mlx5dr_dbg(matcher->tbl->dmn, "Rule not supported on this matcher due to IP related fields\n"); return -EINVAL; From a7cba0a4d508d2d78f2932ee944feadd38c97c2c Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 28 Oct 2019 13:29:17 -0500 Subject: [PATCH 34/39] net/mlx5: Read num_vfs before disabling SR-IOV mlx5_device_disable_sriov() currently reads num_vfs from the PCI core. However when mlx5_device_disable_sriov() is executed, SR-IOV is already disabled at the PCI level. Due to this disable_hca() cleanup is not done during SR-IOV disable flow. mlx5_sriov_disable() pci_enable_sriov() mlx5_device_disable_sriov() <- num_vfs is zero here. When SR-IOV enablement fails during mlx5_sriov_enable(), HCA's are left in enabled stage because mlx5_device_disable_sriov() relies on num_vfs from PCI core. mlx5_sriov_enable() mlx5_device_enable_sriov() pci_enable_sriov() <- Fails Hence, to overcome above issues, (a) Read num_vfs before disabling SR-IOV and use it. (b) Use num_vfs given when enabling sriov in error unwinding path. Fixes: d886aba677a0 ("net/mlx5: Reduce dependency on enabled_vfs counter and num_vfs") Signed-off-by: Parav Pandit Reviewed-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index f641f1336402..03f037811f1d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -108,10 +108,10 @@ enable_vfs_hca: return 0; } -static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev, bool clear_vf) +static void +mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; - int num_vfs = pci_num_vf(dev->pdev); int err; int vf; @@ -147,7 +147,7 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) err = pci_enable_sriov(pdev, num_vfs); if (err) { mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err); - mlx5_device_disable_sriov(dev, true); + mlx5_device_disable_sriov(dev, num_vfs, true); } return err; } @@ -155,9 +155,10 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) static void mlx5_sriov_disable(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int num_vfs = pci_num_vf(dev->pdev); pci_disable_sriov(pdev); - mlx5_device_disable_sriov(dev, true); + mlx5_device_disable_sriov(dev, num_vfs, true); } int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) @@ -192,7 +193,7 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev) if (!mlx5_core_is_pf(dev)) return; - mlx5_device_disable_sriov(dev, false); + mlx5_device_disable_sriov(dev, pci_num_vf(dev->pdev), false); } static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev) From e6014afd1c5717d556778ec1307cf7ab27ba5a2d Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 30 Oct 2019 16:48:15 +0200 Subject: [PATCH 35/39] net/mlx5: Remove redundant NULL initializations Neighbour initializations to NULL are not necessary as the pointers are not used if an error is returned, and if success returned, pointers are initialized. Signed-off-by: Eli Cohen Reviewed-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 13af72556987..4f78efeb6ee8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -77,8 +77,8 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, struct neighbour **out_n, u8 *out_ttl) { + struct neighbour *n; struct rtable *rt; - struct neighbour *n = NULL; #if IS_ENABLED(CONFIG_INET) struct mlx5_core_dev *mdev = priv->mdev; @@ -138,8 +138,8 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, struct neighbour **out_n, u8 *out_ttl) { - struct neighbour *n = NULL; struct dst_entry *dst; + struct neighbour *n; #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6) int ret; @@ -212,8 +212,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); const struct ip_tunnel_key *tun_key = &e->tun_info->key; struct net_device *out_dev, *route_dev; - struct neighbour *n = NULL; struct flowi4 fl4 = {}; + struct neighbour *n; int ipv4_encap_size; char *encap_header; u8 nud_state, ttl; @@ -328,9 +328,9 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); const struct ip_tunnel_key *tun_key = &e->tun_info->key; struct net_device *out_dev, *route_dev; - struct neighbour *n = NULL; struct flowi6 fl6 = {}; struct ipv6hdr *ip6h; + struct neighbour *n; int ipv6_encap_size; char *encap_header; u8 nud_state, ttl; From 85bf490af1e2e4b6263898f0d47af13ee1bb4d28 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 31 Oct 2019 09:00:43 +0200 Subject: [PATCH 36/39] net/mlx5e: Fix error flow cleanup in mlx5e_tc_tun_create_header_ipv4/6 Be sure to release the neighbour in case of failures after successful route lookup. Signed-off-by: Eli Cohen Reviewed-by: Roi Dayan Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_tun.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 4f78efeb6ee8..5316cedd78bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -239,12 +239,15 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, if (max_encap_size < ipv4_encap_size) { mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", ipv4_encap_size, max_encap_size); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto out; } encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL); - if (!encap_header) - return -ENOMEM; + if (!encap_header) { + err = -ENOMEM; + goto out; + } /* used by mlx5e_detach_encap to lookup a neigh hash table * entry in the neigh hash table when a user deletes a rule @@ -355,12 +358,15 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, if (max_encap_size < ipv6_encap_size) { mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", ipv6_encap_size, max_encap_size); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto out; } encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL); - if (!encap_header) - return -ENOMEM; + if (!encap_header) { + err = -ENOMEM; + goto out; + } /* used by mlx5e_detach_encap to lookup a neigh hash table * entry in the neigh hash table when a user deletes a rule From 71c6eaebf06aa8353b0dcd57786b801b96fe2c08 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Tue, 29 Oct 2019 17:04:30 +0200 Subject: [PATCH 37/39] net/mlx5e: Set netdev name space on creation Use devlink instance name space to set the netdev net namespace. Preparation patch for devlink reload implementation. Signed-off-by: Michael Guralnik Acked-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h | 5 +++++ 3 files changed, 9 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 772bfdbdeb9c..06a592fb62bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -63,6 +63,7 @@ #include "en/xsk/rx.h" #include "en/xsk/tx.h" #include "en/hv_vhca_stats.h" +#include "lib/mlx5.h" bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) @@ -5427,6 +5428,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) return NULL; } + dev_net_set(netdev, mlx5_core_net(mdev)); priv = netdev_priv(netdev); err = mlx5e_attach(mdev, priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index cd9bb7c7b341..c7f98f1fd9b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -47,6 +47,7 @@ #include "en/tc_tun.h" #include "fs_core.h" #include "lib/port_tun.h" +#include "lib/mlx5.h" #define CREATE_TRACE_POINTS #include "diag/en_rep_tracepoint.h" @@ -1877,6 +1878,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) return -EINVAL; } + dev_net_set(netdev, mlx5_core_net(dev)); rpriv->netdev = netdev; rep->rep_data[REP_ETH].priv = rpriv; INIT_LIST_HEAD(&rpriv->vport_sqs_list); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index b99d469e4e64..249539247e2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -84,4 +84,9 @@ int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, void *key, u32 sz_bytes, u32 *p_key_id); void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id); +static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) +{ + return devlink_net(priv_to_devlink(dev)); +} + #endif From 4383cfcc65e7879e1858da56954dae9fc20dfae9 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Sun, 27 Oct 2019 14:34:11 +0200 Subject: [PATCH 38/39] net/mlx5: Add devlink reload Implement devlink reload for mlx5. Usage example: devlink dev reload pci/0000:06:00.0 Signed-off-by: Michael Guralnik Acked-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/devlink.c | 20 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/main.c | 4 ++-- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 3 +++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index b2c26388edb1..ac108f1e5bd6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -85,6 +85,22 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, return 0; } +static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + return mlx5_unload_one(dev, false); +} + +static int mlx5_devlink_reload_up(struct devlink *devlink, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + return mlx5_load_one(dev, false); +} + static const struct devlink_ops mlx5_devlink_ops = { #ifdef CONFIG_MLX5_ESWITCH .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, @@ -96,6 +112,8 @@ static const struct devlink_ops mlx5_devlink_ops = { #endif .flash_update = mlx5_devlink_flash_update, .info_get = mlx5_devlink_info_get, + .reload_down = mlx5_devlink_reload_down, + .reload_up = mlx5_devlink_reload_up, }; struct devlink *mlx5_devlink_alloc(void) @@ -235,6 +253,7 @@ int mlx5_devlink_register(struct devlink *devlink, struct device *dev) goto params_reg_err; mlx5_devlink_set_params_init_values(devlink); devlink_params_publish(devlink); + devlink_reload_enable(devlink); return 0; params_reg_err: @@ -244,6 +263,7 @@ params_reg_err: void mlx5_devlink_unregister(struct devlink *devlink) { + devlink_reload_disable(devlink); devlink_params_unregister(devlink, mlx5_devlink_params, ARRAY_SIZE(mlx5_devlink_params)); devlink_unregister(devlink); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c9a091d3226c..31fbfd6e8bb9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1168,7 +1168,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_put_uars_page(dev, dev->priv.uar); } -static int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) +int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) { int err = 0; @@ -1226,7 +1226,7 @@ function_teardown: return err; } -static int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) +int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) { if (cleanup) { mlx5_unregister_device(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index b100489dc85c..da67b28d6e23 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -243,4 +243,7 @@ enum { u8 mlx5_get_nic_state(struct mlx5_core_dev *dev); void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state); + +int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup); +int mlx5_load_one(struct mlx5_core_dev *dev, bool boot); #endif /* __MLX5_CORE_H__ */ From 84179981317fb4fb3e9df5acd42ea33cf6037793 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 12 Nov 2019 00:34:30 +0100 Subject: [PATCH 39/39] net/mlx5: TC: Offload flow table rules Since both tc rules and flow table rules are of the same format, we can re-use tc parsing for that, and move the flow table rules to their steering domain - In this case, the next chain after max tc chain. Signed-off-by: Paul Blakey Reviewed-by: Mark Bloch Acked-by: Pablo Neira Ayuso Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 45 +++++++++++++++++-- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 28 +++++++++++- .../net/ethernet/mellanox/mlx5/core/en_tc.h | 3 +- 3 files changed, 71 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index c7f98f1fd9b1..f175cb24bb67 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1244,21 +1244,60 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, } } -static LIST_HEAD(mlx5e_rep_block_cb_list); +static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct flow_cls_offload *f = type_data; + struct flow_cls_offload cls_flower; + struct mlx5e_priv *priv = cb_priv; + struct mlx5_eswitch *esw; + unsigned long flags; + int err; + flags = MLX5_TC_FLAG(INGRESS) | + MLX5_TC_FLAG(ESW_OFFLOAD) | + MLX5_TC_FLAG(FT_OFFLOAD); + esw = priv->mdev->priv.eswitch; + + switch (type) { + case TC_SETUP_CLSFLOWER: + if (!mlx5_eswitch_prios_supported(esw) || f->common.chain_index) + return -EOPNOTSUPP; + + /* Re-use tc offload path by moving the ft flow to the + * reserved ft chain. + */ + memcpy(&cls_flower, f, sizeof(*f)); + cls_flower.common.chain_index = FDB_FT_CHAIN; + err = mlx5e_rep_setup_tc_cls_flower(priv, &cls_flower, flags); + memcpy(&f->stats, &cls_flower.stats, sizeof(f->stats)); + return err; + default: + return -EOPNOTSUPP; + } +} + +static LIST_HEAD(mlx5e_rep_block_tc_cb_list); +static LIST_HEAD(mlx5e_rep_block_ft_cb_list); static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { struct mlx5e_priv *priv = netdev_priv(dev); struct flow_block_offload *f = type_data; + f->unlocked_driver_cb = true; + switch (type) { case TC_SETUP_BLOCK: - f->unlocked_driver_cb = true; return flow_block_cb_setup_simple(type_data, - &mlx5e_rep_block_cb_list, + &mlx5e_rep_block_tc_cb_list, mlx5e_rep_setup_tc_cb, priv, priv, true); + case TC_SETUP_FT: + return flow_block_cb_setup_simple(type_data, + &mlx5e_rep_block_ft_cb_list, + mlx5e_rep_setup_ft_cb, + priv, priv, true); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 0c1022cda128..3a707d788022 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -74,6 +74,7 @@ enum { MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT, MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT, MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_FT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT, MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT, MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE, MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1, @@ -276,6 +277,11 @@ static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow) return flow_flag_test(flow, ESWITCH); } +static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow) +{ + return flow_flag_test(flow, FT); +} + static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow) { return flow_flag_test(flow, OFFLOADED); @@ -1168,7 +1174,12 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (attr->chain > max_chain) { + /* We check chain range only for tc flows. + * For ft flows, we checked attr->chain was originally 0 and set it to + * FDB_FT_CHAIN which is outside tc range. + * See mlx5e_rep_setup_ft_cb(). + */ + if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) { NL_SET_ERR_MSG(extack, "Requested chain is out of supported range"); return -EOPNOTSUPP; } @@ -3217,6 +3228,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr; struct mlx5e_rep_priv *rpriv = priv->ppriv; const struct ip_tunnel_info *info = NULL; + bool ft_flow = mlx5e_is_ft_flow(flow); const struct flow_action_entry *act; bool encap = false; u32 action = 0; @@ -3261,6 +3273,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return -EINVAL; } + if (ft_flow && out_dev == priv->netdev) { + /* Ignore forward to self rules generated + * by adding both mlx5 devs to the flow table + * block on a normal nft offload setup. + */ + return -EOPNOTSUPP; + } + if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { NL_SET_ERR_MSG_MOD(extack, "can't support more output ports, can't offload forwarding"); @@ -3385,6 +3405,10 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, u32 dest_chain = act->chain_index; u32 max_chain = mlx5_eswitch_get_chain_range(esw); + if (ft_flow) { + NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported"); + return -EOPNOTSUPP; + } if (dest_chain <= attr->chain) { NL_SET_ERR_MSG(extack, "Goto earlier chain isn't supported"); return -EOPNOTSUPP; @@ -3475,6 +3499,8 @@ static void get_flags(int flags, unsigned long *flow_flags) __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); if (flags & MLX5_TC_FLAG(NIC_OFFLOAD)) __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); + if (flags & MLX5_TC_FLAG(FT_OFFLOAD)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT); *flow_flags = __flow_flags; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 924c6ef86a14..262cdb7b69b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -44,7 +44,8 @@ enum { MLX5E_TC_FLAG_EGRESS_BIT, MLX5E_TC_FLAG_NIC_OFFLOAD_BIT, MLX5E_TC_FLAG_ESW_OFFLOAD_BIT, - MLX5E_TC_FLAG_LAST_EXPORTED_BIT = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT, + MLX5E_TC_FLAG_FT_OFFLOAD_BIT, + MLX5E_TC_FLAG_LAST_EXPORTED_BIT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT, }; #define MLX5_TC_FLAG(flag) BIT(MLX5E_TC_FLAG_##flag##_BIT)