From 4ecc8633056b8d2f214c6ce860a50837e5c8fb41 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 13 Sep 2021 11:22:19 -0700 Subject: [PATCH 01/10] ice: Add support for VF rate limiting Implement ndo_set_vf_rate to support setting of min_tx_rate and max_tx_rate; set the appropriate bandwidth in the scheduler for the node representing the specified VF VSI. Co-developed-by: Tarun Singh Signed-off-by: Tarun Singh Signed-off-by: Brett Creeley Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 174 ++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_lib.h | 4 +- drivers/net/ethernet/intel/ice/ice_main.c | 1 + drivers/net/ethernet/intel/ice/ice_sched.c | 130 +++++++++++++ drivers/net/ethernet/intel/ice/ice_sched.h | 6 + .../net/ethernet/intel/ice/ice_virtchnl_pf.c | 160 +++++++++++++++- .../net/ethernet/intel/ice/ice_virtchnl_pf.h | 15 +- 7 files changed, 486 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index f981e77f72ad..acff8d3b1c97 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3600,6 +3600,180 @@ int ice_clear_dflt_vsi(struct ice_sw *sw) return 0; } +/** + * ice_get_link_speed_mbps - get link speed in Mbps + * @vsi: the VSI whose link speed is being queried + * + * Return current VSI link speed and 0 if the speed is unknown. + */ +int ice_get_link_speed_mbps(struct ice_vsi *vsi) +{ + switch (vsi->port_info->phy.link_info.link_speed) { + case ICE_AQ_LINK_SPEED_100GB: + return SPEED_100000; + case ICE_AQ_LINK_SPEED_50GB: + return SPEED_50000; + case ICE_AQ_LINK_SPEED_40GB: + return SPEED_40000; + case ICE_AQ_LINK_SPEED_25GB: + return SPEED_25000; + case ICE_AQ_LINK_SPEED_20GB: + return SPEED_20000; + case ICE_AQ_LINK_SPEED_10GB: + return SPEED_10000; + case ICE_AQ_LINK_SPEED_5GB: + return SPEED_5000; + case ICE_AQ_LINK_SPEED_2500MB: + return SPEED_2500; + case ICE_AQ_LINK_SPEED_1000MB: + return SPEED_1000; + case ICE_AQ_LINK_SPEED_100MB: + return SPEED_100; + case ICE_AQ_LINK_SPEED_10MB: + return SPEED_10; + case ICE_AQ_LINK_SPEED_UNKNOWN: + default: + return 0; + } +} + +/** + * ice_get_link_speed_kbps - get link speed in Kbps + * @vsi: the VSI whose link speed is being queried + * + * Return current VSI link speed and 0 if the speed is unknown. + */ +static int ice_get_link_speed_kbps(struct ice_vsi *vsi) +{ + int speed_mbps; + + speed_mbps = ice_get_link_speed_mbps(vsi); + + return speed_mbps * 1000; +} + +/** + * ice_set_min_bw_limit - setup minimum BW limit for Tx based on min_tx_rate + * @vsi: VSI to be configured + * @min_tx_rate: min Tx rate in Kbps to be configured as BW limit + * + * If the min_tx_rate is specified as 0 that means to clear the minimum BW limit + * profile, otherwise a non-zero value will force a minimum BW limit for the VSI + * on TC 0. + */ +int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate) +{ + struct ice_pf *pf = vsi->back; + enum ice_status status; + struct device *dev; + int speed; + + dev = ice_pf_to_dev(pf); + if (!vsi->port_info) { + dev_dbg(dev, "VSI %d, type %u specified doesn't have valid port_info\n", + vsi->idx, vsi->type); + return -EINVAL; + } + + speed = ice_get_link_speed_kbps(vsi); + if (min_tx_rate > (u64)speed) { + dev_err(dev, "invalid min Tx rate %llu Kbps specified for %s %d is greater than current link speed %u Kbps\n", + min_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx, + speed); + return -EINVAL; + } + + /* Configure min BW for VSI limit */ + if (min_tx_rate) { + status = ice_cfg_vsi_bw_lmt_per_tc(vsi->port_info, vsi->idx, 0, + ICE_MIN_BW, min_tx_rate); + if (status) { + dev_err(dev, "failed to set min Tx rate(%llu Kbps) for %s %d\n", + min_tx_rate, ice_vsi_type_str(vsi->type), + vsi->idx); + return -EIO; + } + + dev_dbg(dev, "set min Tx rate(%llu Kbps) for %s\n", + min_tx_rate, ice_vsi_type_str(vsi->type)); + } else { + status = ice_cfg_vsi_bw_dflt_lmt_per_tc(vsi->port_info, + vsi->idx, 0, + ICE_MIN_BW); + if (status) { + dev_err(dev, "failed to clear min Tx rate configuration for %s %d\n", + ice_vsi_type_str(vsi->type), vsi->idx); + return -EIO; + } + + dev_dbg(dev, "cleared min Tx rate configuration for %s %d\n", + ice_vsi_type_str(vsi->type), vsi->idx); + } + + return 0; +} + +/** + * ice_set_max_bw_limit - setup maximum BW limit for Tx based on max_tx_rate + * @vsi: VSI to be configured + * @max_tx_rate: max Tx rate in Kbps to be configured as BW limit + * + * If the max_tx_rate is specified as 0 that means to clear the maximum BW limit + * profile, otherwise a non-zero value will force a maximum BW limit for the VSI + * on TC 0. + */ +int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate) +{ + struct ice_pf *pf = vsi->back; + enum ice_status status; + struct device *dev; + int speed; + + dev = ice_pf_to_dev(pf); + if (!vsi->port_info) { + dev_dbg(dev, "VSI %d, type %u specified doesn't have valid port_info\n", + vsi->idx, vsi->type); + return -EINVAL; + } + + speed = ice_get_link_speed_kbps(vsi); + if (max_tx_rate > (u64)speed) { + dev_err(dev, "invalid max Tx rate %llu Kbps specified for %s %d is greater than current link speed %u Kbps\n", + max_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx, + speed); + return -EINVAL; + } + + /* Configure max BW for VSI limit */ + if (max_tx_rate) { + status = ice_cfg_vsi_bw_lmt_per_tc(vsi->port_info, vsi->idx, 0, + ICE_MAX_BW, max_tx_rate); + if (status) { + dev_err(dev, "failed setting max Tx rate(%llu Kbps) for %s %d\n", + max_tx_rate, ice_vsi_type_str(vsi->type), + vsi->idx); + return -EIO; + } + + dev_dbg(dev, "set max Tx rate(%llu Kbps) for %s %d\n", + max_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx); + } else { + status = ice_cfg_vsi_bw_dflt_lmt_per_tc(vsi->port_info, + vsi->idx, 0, + ICE_MAX_BW); + if (status) { + dev_err(dev, "failed clearing max Tx rate configuration for %s %d\n", + ice_vsi_type_str(vsi->type), vsi->idx); + return -EIO; + } + + dev_dbg(dev, "cleared max Tx rate configuration for %s %d\n", + ice_vsi_type_str(vsi->type), vsi->idx); + } + + return 0; +} + /** * ice_set_link - turn on/off physical link * @vsi: VSI to modify physical link on diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index b6c429c5875d..d395bd590e84 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -116,7 +116,9 @@ bool ice_is_vsi_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi); int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi); int ice_clear_dflt_vsi(struct ice_sw *sw); - +int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate); +int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate); +int ice_get_link_speed_mbps(struct ice_vsi *vsi); int ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *)); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index f531691a3e12..99647dceefc4 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7390,6 +7390,7 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_set_vf_vlan = ice_set_vf_port_vlan, .ndo_set_vf_link_state = ice_set_vf_link_state, .ndo_get_vf_stats = ice_get_vf_stats, + .ndo_set_vf_rate = ice_set_vf_bw, .ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid, .ndo_setup_tc = ice_setup_tc, diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 9f07b6641705..560e52b99f83 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -3770,6 +3770,136 @@ ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, ICE_SCHED_DFLT_BW); } +/** + * ice_sched_get_node_by_id_type - get node from ID type + * @pi: port information structure + * @id: identifier + * @agg_type: type of aggregator + * @tc: traffic class + * + * This function returns node identified by ID of type aggregator, and + * based on traffic class (TC). This function needs to be called with + * the scheduler lock held. + */ +static struct ice_sched_node * +ice_sched_get_node_by_id_type(struct ice_port_info *pi, u32 id, + enum ice_agg_type agg_type, u8 tc) +{ + struct ice_sched_node *node = NULL; + + switch (agg_type) { + case ICE_AGG_TYPE_VSI: { + struct ice_vsi_ctx *vsi_ctx; + u16 vsi_handle = (u16)id; + + if (!ice_is_vsi_valid(pi->hw, vsi_handle)) + break; + /* Get sched_vsi_info */ + vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle); + if (!vsi_ctx) + break; + node = vsi_ctx->sched.vsi_node[tc]; + break; + } + + case ICE_AGG_TYPE_AGG: { + struct ice_sched_node *tc_node; + + tc_node = ice_sched_get_tc_node(pi, tc); + if (tc_node) + node = ice_sched_get_agg_node(pi, tc_node, id); + break; + } + + default: + break; + } + + return node; +} + +/** + * ice_sched_set_node_bw_lmt_per_tc - set node BW limit per TC + * @pi: port information structure + * @id: ID (software VSI handle or AGG ID) + * @agg_type: aggregator type (VSI or AGG type node) + * @tc: traffic class + * @rl_type: min or max + * @bw: bandwidth in Kbps + * + * This function sets BW limit of VSI or Aggregator scheduling node + * based on TC information from passed in argument BW. + */ +static enum ice_status +ice_sched_set_node_bw_lmt_per_tc(struct ice_port_info *pi, u32 id, + enum ice_agg_type agg_type, u8 tc, + enum ice_rl_type rl_type, u32 bw) +{ + enum ice_status status = ICE_ERR_PARAM; + struct ice_sched_node *node; + + if (!pi) + return status; + + if (rl_type == ICE_UNKNOWN_BW) + return status; + + mutex_lock(&pi->sched_lock); + node = ice_sched_get_node_by_id_type(pi, id, agg_type, tc); + if (!node) { + ice_debug(pi->hw, ICE_DBG_SCHED, "Wrong id, agg type, or tc\n"); + goto exit_set_node_bw_lmt_per_tc; + } + if (bw == ICE_SCHED_DFLT_BW) + status = ice_sched_set_node_bw_dflt_lmt(pi, node, rl_type); + else + status = ice_sched_set_node_bw_lmt(pi, node, rl_type, bw); + +exit_set_node_bw_lmt_per_tc: + mutex_unlock(&pi->sched_lock); + return status; +} + +/** + * ice_cfg_vsi_bw_lmt_per_tc - configure VSI BW limit per TC + * @pi: port information structure + * @vsi_handle: software VSI handle + * @tc: traffic class + * @rl_type: min or max + * @bw: bandwidth in Kbps + * + * This function configures BW limit of VSI scheduling node based on TC + * information. + */ +enum ice_status +ice_cfg_vsi_bw_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + enum ice_rl_type rl_type, u32 bw) +{ + return ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle, + ICE_AGG_TYPE_VSI, + tc, rl_type, bw); +} + +/** + * ice_cfg_vsi_bw_dflt_lmt_per_tc - configure default VSI BW limit per TC + * @pi: port information structure + * @vsi_handle: software VSI handle + * @tc: traffic class + * @rl_type: min or max + * + * This function configures default BW limit of VSI scheduling node based on TC + * information. + */ +enum ice_status +ice_cfg_vsi_bw_dflt_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + enum ice_rl_type rl_type) +{ + return ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle, + ICE_AGG_TYPE_VSI, + tc, rl_type, + ICE_SCHED_DFLT_BW); +} + /** * ice_cfg_rl_burst_size - Set burst size value * @hw: pointer to the HW struct diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h index 9beef8f0ec76..f89b80ba3499 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.h +++ b/drivers/net/ethernet/intel/ice/ice_sched.h @@ -103,6 +103,12 @@ ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, enum ice_status ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle, enum ice_rl_type rl_type); +enum ice_status +ice_cfg_vsi_bw_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + enum ice_rl_type rl_type, u32 bw); +enum ice_status +ice_cfg_vsi_bw_dflt_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + enum ice_rl_type rl_type); enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes); void ice_sched_replay_agg_vsi_preinit(struct ice_hw *hw); void ice_sched_replay_agg(struct ice_hw *hw); diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index d90a3b7be713..86f265268ac8 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -5,6 +5,7 @@ #include "ice_base.h" #include "ice_lib.h" #include "ice_fltr.h" +#include "ice_dcb_lib.h" #include "ice_flow.h" #include "ice_eswitch.h" #include "ice_virtchnl_allowlist.h" @@ -884,6 +885,40 @@ static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf) return pf->sriov_base_vector + vf->vf_id * pf->num_msix_per_vf; } +/** + * ice_vf_rebuild_host_tx_rate_cfg - re-apply the Tx rate limiting configuration + * @vf: VF to re-apply the configuration for + * + * Called after a VF VSI has been re-added/rebuild during reset. The PF driver + * needs to re-apply the host configured Tx rate limiting configuration. + */ +static int ice_vf_rebuild_host_tx_rate_cfg(struct ice_vf *vf) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + int err; + + if (vf->min_tx_rate) { + err = ice_set_min_bw_limit(vsi, (u64)vf->min_tx_rate * 1000); + if (err) { + dev_err(dev, "failed to set min Tx rate to %d Mbps for VF %u, error %d\n", + vf->min_tx_rate, vf->vf_id, err); + return err; + } + } + + if (vf->max_tx_rate) { + err = ice_set_max_bw_limit(vsi, (u64)vf->max_tx_rate * 1000); + if (err) { + dev_err(dev, "failed to set max Tx rate to %d Mbps for VF %u, error %d\n", + vf->max_tx_rate, vf->vf_id, err); + return err; + } + } + + return 0; +} + /** * ice_vf_rebuild_host_vlan_cfg - add VLAN 0 filter or rebuild the Port VLAN * @vf: VF to add MAC filters for @@ -1420,6 +1455,11 @@ static void ice_vf_rebuild_host_cfg(struct ice_vf *vf) if (ice_vf_rebuild_host_vlan_cfg(vf)) dev_err(dev, "failed to rebuild VLAN configuration for VF %u\n", vf->vf_id); + + if (ice_vf_rebuild_host_tx_rate_cfg(vf)) + dev_err(dev, "failed to rebuild Tx rate limiting configuration for VF %u\n", + vf->vf_id); + /* rebuild aggregator node config for main VF VSI */ ice_vf_rebuild_aggregator_node_cfg(vsi); } @@ -4747,8 +4787,8 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) ivi->linkstate = IFLA_VF_LINK_STATE_ENABLE; else ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE; - ivi->max_tx_rate = vf->tx_rate; - ivi->min_tx_rate = 0; + ivi->max_tx_rate = vf->max_tx_rate; + ivi->min_tx_rate = vf->min_tx_rate; return 0; } @@ -4926,6 +4966,122 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) return 0; } +/** + * ice_calc_all_vfs_min_tx_rate - calculate cumulative min Tx rate on all VFs + * @pf: PF associated with VFs + */ +static int ice_calc_all_vfs_min_tx_rate(struct ice_pf *pf) +{ + int rate = 0, i; + + ice_for_each_vf(pf, i) + rate += pf->vf[i].min_tx_rate; + + return rate; +} + +/** + * ice_min_tx_rate_oversubscribed - check if min Tx rate causes oversubscription + * @vf: VF trying to configure min_tx_rate + * @min_tx_rate: min Tx rate in Mbps + * + * Check if the min_tx_rate being passed in will cause oversubscription of total + * min_tx_rate based on the current link speed and all other VFs configured + * min_tx_rate + * + * Return true if the passed min_tx_rate would cause oversubscription, else + * return false + */ +static bool +ice_min_tx_rate_oversubscribed(struct ice_vf *vf, int min_tx_rate) +{ + int link_speed_mbps = ice_get_link_speed_mbps(ice_get_vf_vsi(vf)); + int all_vfs_min_tx_rate = ice_calc_all_vfs_min_tx_rate(vf->pf); + + /* this VF's previous rate is being overwritten */ + all_vfs_min_tx_rate -= vf->min_tx_rate; + + if (all_vfs_min_tx_rate + min_tx_rate > link_speed_mbps) { + dev_err(ice_pf_to_dev(vf->pf), "min_tx_rate of %d Mbps on VF %u would cause oversubscription of %d Mbps based on the current link speed %d Mbps\n", + min_tx_rate, vf->vf_id, + all_vfs_min_tx_rate + min_tx_rate - link_speed_mbps, + link_speed_mbps); + return true; + } + + return false; +} + +/** + * ice_set_vf_bw - set min/max VF bandwidth + * @netdev: network interface device structure + * @vf_id: VF identifier + * @min_tx_rate: Minimum Tx rate in Mbps + * @max_tx_rate: Maximum Tx rate in Mbps + */ +int +ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, + int max_tx_rate) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_vsi *vsi; + struct device *dev; + struct ice_vf *vf; + int ret; + + dev = ice_pf_to_dev(pf); + if (ice_validate_vf_id(pf, vf_id)) + return -EINVAL; + + vf = &pf->vf[vf_id]; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; + + vsi = ice_get_vf_vsi(vf); + + /* when max_tx_rate is zero that means no max Tx rate limiting, so only + * check if max_tx_rate is non-zero + */ + if (max_tx_rate && min_tx_rate > max_tx_rate) { + dev_err(dev, "Cannot set min Tx rate %d Mbps greater than max Tx rate %d Mbps\n", + min_tx_rate, max_tx_rate); + return -EINVAL; + } + + if (min_tx_rate && ice_is_dcb_active(pf)) { + dev_err(dev, "DCB on PF is currently enabled. VF min Tx rate limiting not allowed on this PF.\n"); + return -EOPNOTSUPP; + } + + if (ice_min_tx_rate_oversubscribed(vf, min_tx_rate)) + return -EINVAL; + + if (vf->min_tx_rate != (unsigned int)min_tx_rate) { + ret = ice_set_min_bw_limit(vsi, (u64)min_tx_rate * 1000); + if (ret) { + dev_err(dev, "Unable to set min-tx-rate for VF %d\n", + vf->vf_id); + return ret; + } + + vf->min_tx_rate = min_tx_rate; + } + + if (vf->max_tx_rate != (unsigned int)max_tx_rate) { + ret = ice_set_max_bw_limit(vsi, (u64)max_tx_rate * 1000); + if (ret) { + dev_err(dev, "Unable to set max-tx-rate for VF %d\n", + vf->vf_id); + return ret; + } + + vf->max_tx_rate = max_tx_rate; + } + + return 0; +} + /** * ice_get_vf_stats - populate some stats for the VF * @netdev: the netdev of the PF diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h index 3115284e5411..5ff93a08f54c 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h @@ -125,7 +125,8 @@ struct ice_vf { * the main LAN VSI for the PF. */ u16 lan_vsi_num; /* ID as used by firmware */ - unsigned int tx_rate; /* Tx bandwidth limit in Mbps */ + unsigned int min_tx_rate; /* Minimum Tx bandwidth limit in Mbps */ + unsigned int max_tx_rate; /* Maximum Tx bandwidth limit in Mbps */ DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */ u64 num_inval_msgs; /* number of continuous invalid msgs */ @@ -172,6 +173,10 @@ int ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, __be16 vlan_proto); +int +ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, + int max_tx_rate); + int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted); int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state); @@ -303,6 +308,14 @@ ice_set_vf_link_state(struct net_device __always_unused *netdev, return -EOPNOTSUPP; } +static inline int +ice_set_vf_bw(struct net_device __always_unused *netdev, + int __always_unused vf_id, int __always_unused min_tx_rate, + int __always_unused max_tx_rate) +{ + return -EOPNOTSUPP; +} + static inline int ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf, struct ice_q_vector __always_unused *q_vector) From d8eb7ad5e46c65d57fa72571cf2ccca7da1e7196 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 20 Sep 2021 12:30:12 -0700 Subject: [PATCH 02/10] ice: update dim usage and moderation The driver was having trouble with unreliable latency when doing single threaded ping-pong tests. This was root caused to the DIM algorithm landing on a too slow interrupt value, which caused high latency, and it was especially present when queues were being switched frequently by the scheduler as happens on default setups today. In attempting to improve this, we allow the upper rate limit for interrupts to move to rate limit of 4 microseconds as a max, which means that no vector can generate more than 250,000 interrupts per second. The old config was up to 100,000. The driver previously tried to program the rate limit too frequently and if the receive and transmit side were both active on the same vector, the INTRL would be set incorrectly, and this change fixes that issue as a side effect of the redesign. This driver will operate from now on with a slightly changed DIM table with more emphasis towards latency sensitivity by having more table entries with lower latency than with high latency (high being >= 64 microseconds). The driver also resets the DIM algorithm state with a new stats set when there is no work done and the data becomes stale (older than 1 second), for the respective receive or transmit portion of the interrupt. Add a new helper for setting rate limit, which will be used more in a followup patch. Signed-off-by: Jesse Brandeburg Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 25 +++++ drivers/net/ethernet/intel/ice/ice_lib.h | 1 + drivers/net/ethernet/intel/ice/ice_main.c | 119 ++++++++++++---------- drivers/net/ethernet/intel/ice/ice_txrx.c | 84 ++++++++------- 4 files changed, 142 insertions(+), 87 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index acff8d3b1c97..fd894e89be3b 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1941,6 +1941,31 @@ void ice_write_itr(struct ice_ring_container *rc, u16 itr) __ice_write_itr(q_vector, rc, itr); } +/** + * ice_set_q_vector_intrl - set up interrupt rate limiting + * @q_vector: the vector to be configured + * + * Interrupt rate limiting is local to the vector, not per-queue so we must + * detect if either ring container has dynamic moderation enabled to decide + * what to set the interrupt rate limit to via INTRL settings. In the case that + * dynamic moderation is disabled on both, write the value with the cached + * setting to make sure INTRL register matches the user visible value. + */ +void ice_set_q_vector_intrl(struct ice_q_vector *q_vector) +{ + if (ITR_IS_DYNAMIC(&q_vector->tx) || ITR_IS_DYNAMIC(&q_vector->rx)) { + /* in the case of dynamic enabled, cap each vector to no more + * than (4 us) 250,000 ints/sec, which allows low latency + * but still less than 500,000 interrupts per second, which + * reduces CPU a bit in the case of the lowest latency + * setting. The 4 here is a value in microseconds. + */ + ice_write_intrl(q_vector, 4); + } else { + ice_write_intrl(q_vector, q_vector->intrl); + } +} + /** * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW * @vsi: the VSI being configured diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index d395bd590e84..c79fcbf82d8f 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -103,6 +103,7 @@ int ice_status_to_errno(enum ice_status err); void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl); void ice_write_itr(struct ice_ring_container *rc, u16 itr); +void ice_set_q_vector_intrl(struct ice_q_vector *q_vector); enum ice_status ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 99647dceefc4..846623a97723 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5502,77 +5502,59 @@ int ice_vsi_cfg(struct ice_vsi *vsi) } /* THEORY OF MODERATION: - * The below code creates custom DIM profiles for use by this driver, because - * the ice driver hardware works differently than the hardware that DIMLIB was + * The ice driver hardware works differently than the hardware that DIMLIB was * originally made for. ice hardware doesn't have packet count limits that * can trigger an interrupt, but it *does* have interrupt rate limit support, - * and this code adds that capability to be used by the driver when it's using - * DIMLIB. The DIMLIB code was always designed to be a suggestion to the driver - * for how to "respond" to traffic and interrupts, so this driver uses a - * slightly different set of moderation parameters to get best performance. + * which is hard-coded to a limit of 250,000 ints/second. + * If not using dynamic moderation, the INTRL value can be modified + * by ethtool rx-usecs-high. */ struct ice_dim { /* the throttle rate for interrupts, basically worst case delay before * an initial interrupt fires, value is stored in microseconds. */ u16 itr; - /* the rate limit for interrupts, which can cap a delay from a small - * ITR at a certain amount of interrupts per second. f.e. a 2us ITR - * could yield as much as 500,000 interrupts per second, but with a - * 10us rate limit, it limits to 100,000 interrupts per second. Value - * is stored in microseconds. - */ - u16 intrl; }; /* Make a different profile for Rx that doesn't allow quite so aggressive - * moderation at the high end (it maxes out at 128us or about 8k interrupts a - * second. The INTRL/rate parameters here are only useful to cap small ITR - * values, which is why for larger ITR's - like 128, which can only generate - * 8k interrupts per second, there is no point to rate limit and the values - * are set to zero. The rate limit values do affect latency, and so must - * be reasonably small so to not impact latency sensitive tests. + * moderation at the high end (it maxes out at 126us or about 8k interrupts a + * second. */ static const struct ice_dim rx_profile[] = { - {2, 10}, - {8, 16}, - {32, 0}, - {96, 0}, - {128, 0} + {2}, /* 500,000 ints/s, capped at 250K by INTRL */ + {8}, /* 125,000 ints/s */ + {16}, /* 62,500 ints/s */ + {62}, /* 16,129 ints/s */ + {126} /* 7,936 ints/s */ }; /* The transmit profile, which has the same sorts of values * as the previous struct */ static const struct ice_dim tx_profile[] = { - {2, 10}, - {8, 16}, - {64, 0}, - {128, 0}, - {256, 0} + {2}, /* 500,000 ints/s, capped at 250K by INTRL */ + {8}, /* 125,000 ints/s */ + {40}, /* 16,125 ints/s */ + {128}, /* 7,812 ints/s */ + {256} /* 3,906 ints/s */ }; static void ice_tx_dim_work(struct work_struct *work) { struct ice_ring_container *rc; - struct ice_q_vector *q_vector; struct dim *dim; - u16 itr, intrl; + u16 itr; dim = container_of(work, struct dim, work); - rc = container_of(dim, struct ice_ring_container, dim); - q_vector = container_of(rc, struct ice_q_vector, tx); + rc = (struct ice_ring_container *)dim->priv; - if (dim->profile_ix >= ARRAY_SIZE(tx_profile)) - dim->profile_ix = ARRAY_SIZE(tx_profile) - 1; + WARN_ON(dim->profile_ix >= ARRAY_SIZE(tx_profile)); /* look up the values in our local table */ itr = tx_profile[dim->profile_ix].itr; - intrl = tx_profile[dim->profile_ix].intrl; - ice_trace(tx_dim_work, q_vector, dim); + ice_trace(tx_dim_work, container_of(rc, struct ice_q_vector, tx), dim); ice_write_itr(rc, itr); - ice_write_intrl(q_vector, intrl); dim->state = DIM_START_MEASURE; } @@ -5580,28 +5562,65 @@ static void ice_tx_dim_work(struct work_struct *work) static void ice_rx_dim_work(struct work_struct *work) { struct ice_ring_container *rc; - struct ice_q_vector *q_vector; struct dim *dim; - u16 itr, intrl; + u16 itr; dim = container_of(work, struct dim, work); - rc = container_of(dim, struct ice_ring_container, dim); - q_vector = container_of(rc, struct ice_q_vector, rx); + rc = (struct ice_ring_container *)dim->priv; - if (dim->profile_ix >= ARRAY_SIZE(rx_profile)) - dim->profile_ix = ARRAY_SIZE(rx_profile) - 1; + WARN_ON(dim->profile_ix >= ARRAY_SIZE(rx_profile)); /* look up the values in our local table */ itr = rx_profile[dim->profile_ix].itr; - intrl = rx_profile[dim->profile_ix].intrl; - ice_trace(rx_dim_work, q_vector, dim); + ice_trace(rx_dim_work, container_of(rc, struct ice_q_vector, rx), dim); ice_write_itr(rc, itr); - ice_write_intrl(q_vector, intrl); dim->state = DIM_START_MEASURE; } +#define ICE_DIM_DEFAULT_PROFILE_IX 1 + +/** + * ice_init_moderation - set up interrupt moderation + * @q_vector: the vector containing rings to be configured + * + * Set up interrupt moderation registers, with the intent to do the right thing + * when called from reset or from probe, and whether or not dynamic moderation + * is enabled or not. Take special care to write all the registers in both + * dynamic moderation mode or not in order to make sure hardware is in a known + * state. + */ +static void ice_init_moderation(struct ice_q_vector *q_vector) +{ + struct ice_ring_container *rc; + bool tx_dynamic, rx_dynamic; + + rc = &q_vector->tx; + INIT_WORK(&rc->dim.work, ice_tx_dim_work); + rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX; + rc->dim.priv = rc; + tx_dynamic = ITR_IS_DYNAMIC(rc); + + /* set the initial TX ITR to match the above */ + ice_write_itr(rc, tx_dynamic ? + tx_profile[rc->dim.profile_ix].itr : rc->itr_setting); + + rc = &q_vector->rx; + INIT_WORK(&rc->dim.work, ice_rx_dim_work); + rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX; + rc->dim.priv = rc; + rx_dynamic = ITR_IS_DYNAMIC(rc); + + /* set the initial RX ITR to match the above */ + ice_write_itr(rc, rx_dynamic ? rx_profile[rc->dim.profile_ix].itr : + rc->itr_setting); + + ice_set_q_vector_intrl(q_vector); +} + /** * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI * @vsi: the VSI being configured @@ -5616,11 +5635,7 @@ static void ice_napi_enable_all(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, q_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; - INIT_WORK(&q_vector->tx.dim.work, ice_tx_dim_work); - q_vector->tx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; - - INIT_WORK(&q_vector->rx.dim.work, ice_rx_dim_work); - q_vector->rx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + ice_init_moderation(q_vector); if (q_vector->rx.rx_ring || q_vector->tx.tx_ring) napi_enable(&q_vector->napi); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 01ae331927bd..1373b97b117a 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1259,6 +1259,41 @@ construct_skb: return failure ? budget : (int)total_rx_pkts; } +static void __ice_update_sample(struct ice_q_vector *q_vector, + struct ice_ring_container *rc, + struct dim_sample *sample, + bool is_tx) +{ + u64 packets = 0, bytes = 0; + + if (is_tx) { + struct ice_tx_ring *tx_ring; + + ice_for_each_tx_ring(tx_ring, *rc) { + packets += tx_ring->stats.pkts; + bytes += tx_ring->stats.bytes; + } + } else { + struct ice_rx_ring *rx_ring; + + ice_for_each_rx_ring(rx_ring, *rc) { + packets += rx_ring->stats.pkts; + bytes += rx_ring->stats.bytes; + } + } + + dim_update_sample(q_vector->total_events, packets, bytes, sample); + sample->comp_ctr = 0; + + /* if dim settings get stale, like when not updated for 1 + * second or longer, force it to start again. This addresses the + * frequent case of an idle queue being switched to by the + * scheduler. The 1,000 here means 1,000 milliseconds. + */ + if (ktime_ms_delta(sample->time, rc->dim.start_sample.time) >= 1000) + rc->dim.state = DIM_START_MEASURE; +} + /** * ice_net_dim - Update net DIM algorithm * @q_vector: the vector associated with the interrupt @@ -1274,34 +1309,16 @@ static void ice_net_dim(struct ice_q_vector *q_vector) struct ice_ring_container *rx = &q_vector->rx; if (ITR_IS_DYNAMIC(tx)) { - struct dim_sample dim_sample = {}; - u64 packets = 0, bytes = 0; - struct ice_tx_ring *ring; - - ice_for_each_tx_ring(ring, q_vector->tx) { - packets += ring->stats.pkts; - bytes += ring->stats.bytes; - } - - dim_update_sample(q_vector->total_events, packets, bytes, - &dim_sample); + struct dim_sample dim_sample; + __ice_update_sample(q_vector, tx, &dim_sample, true); net_dim(&tx->dim, dim_sample); } if (ITR_IS_DYNAMIC(rx)) { - struct dim_sample dim_sample = {}; - u64 packets = 0, bytes = 0; - struct ice_rx_ring *ring; - - ice_for_each_rx_ring(ring, q_vector->rx) { - packets += ring->stats.pkts; - bytes += ring->stats.bytes; - } - - dim_update_sample(q_vector->total_events, packets, bytes, - &dim_sample); + struct dim_sample dim_sample; + __ice_update_sample(q_vector, rx, &dim_sample, false); net_dim(&rx->dim, dim_sample); } } @@ -1328,15 +1345,14 @@ static u32 ice_buildreg_itr(u16 itr_idx, u16 itr) } /** - * ice_update_ena_itr - Update ITR moderation and re-enable MSI-X interrupt + * ice_enable_interrupt - re-enable MSI-X interrupt * @q_vector: the vector associated with the interrupt to enable * - * Update the net_dim() algorithm and re-enable the interrupt associated with - * this vector. - * - * If the VSI is down, the interrupt will not be re-enabled. + * If the VSI is down, the interrupt will not be re-enabled. Also, + * when enabling the interrupt always reset the wb_on_itr to false + * and trigger a software interrupt to clean out internal state. */ -static void ice_update_ena_itr(struct ice_q_vector *q_vector) +static void ice_enable_interrupt(struct ice_q_vector *q_vector) { struct ice_vsi *vsi = q_vector->vsi; bool wb_en = q_vector->wb_on_itr; @@ -1351,10 +1367,6 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector) if (wb_en) q_vector->wb_on_itr = false; - /* This will do nothing if dynamic updates are not enabled. */ - ice_net_dim(q_vector); - - /* net_dim() updates ITR out-of-band using a work item */ itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); /* trigger an immediate software interrupt when exiting * busy poll, to make sure to catch any pending cleanups @@ -1482,10 +1494,12 @@ int ice_napi_poll(struct napi_struct *napi, int budget) /* Exit the polling mode, but don't re-enable interrupts if stack might * poll us due to busy-polling */ - if (likely(napi_complete_done(napi, work_done))) - ice_update_ena_itr(q_vector); - else + if (likely(napi_complete_done(napi, work_done))) { + ice_net_dim(q_vector); + ice_enable_interrupt(q_vector); + } else { ice_set_wb_on_itr(q_vector); + } return min_t(int, work_done, budget - 1); } From d16a4f45f3a3afcb56910a7242cc621c071e80e4 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 20 Sep 2021 12:30:13 -0700 Subject: [PATCH 03/10] ice: fix rate limit update after coalesce change If the adaptive settings are changed with ethtool -C ethx adaptive-rx off adaptive-tx off then the interrupt rate limit should be maintained as a user set value, but only if BOTH adaptive settings are off. Fix a bug where the rate limit that was being used in adaptive mode was staying set in the register but was not reported correctly by ethtool -c ethx. Due to long lines include a small refactor of q_vector variable. Fixes: b8b4772377dd ("ice: refactor interrupt moderation writes") Signed-off-by: Jesse Brandeburg Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 17 +++++++++++------ drivers/net/ethernet/intel/ice/ice_lib.c | 4 ++-- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 6e0af72c2020..f4b3c5b73c7d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3640,6 +3640,9 @@ ice_set_rc_coalesce(struct ethtool_coalesce *ec, switch (rc->type) { case ICE_RX_CONTAINER: + { + struct ice_q_vector *q_vector = rc->rx_ring->q_vector; + if (ec->rx_coalesce_usecs_high > ICE_MAX_INTRL || (ec->rx_coalesce_usecs_high && ec->rx_coalesce_usecs_high < pf->hw.intrl_gran)) { @@ -3648,22 +3651,20 @@ ice_set_rc_coalesce(struct ethtool_coalesce *ec, ICE_MAX_INTRL); return -EINVAL; } - if (ec->rx_coalesce_usecs_high != rc->rx_ring->q_vector->intrl && + if (ec->rx_coalesce_usecs_high != q_vector->intrl && (ec->use_adaptive_rx_coalesce || ec->use_adaptive_tx_coalesce)) { netdev_info(vsi->netdev, "Invalid value, %s-usecs-high cannot be changed if adaptive-tx or adaptive-rx is enabled\n", c_type_str); return -EINVAL; } - if (ec->rx_coalesce_usecs_high != rc->rx_ring->q_vector->intrl) { - rc->rx_ring->q_vector->intrl = ec->rx_coalesce_usecs_high; - ice_write_intrl(rc->rx_ring->q_vector, - ec->rx_coalesce_usecs_high); - } + if (ec->rx_coalesce_usecs_high != q_vector->intrl) + q_vector->intrl = ec->rx_coalesce_usecs_high; use_adaptive_coalesce = ec->use_adaptive_rx_coalesce; coalesce_usecs = ec->rx_coalesce_usecs; break; + } case ICE_TX_CONTAINER: use_adaptive_coalesce = ec->use_adaptive_tx_coalesce; coalesce_usecs = ec->tx_coalesce_usecs; @@ -3808,6 +3809,8 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, if (ice_set_q_coalesce(vsi, ec, v_idx)) return -EINVAL; + + ice_set_q_vector_intrl(vsi->q_vectors[v_idx]); } goto set_complete; } @@ -3815,6 +3818,8 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, if (ice_set_q_coalesce(vsi, ec, q_num)) return -EINVAL; + ice_set_q_vector_intrl(vsi->q_vectors[q_num]); + set_complete: return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index fd894e89be3b..231f8bea2519 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3121,7 +3121,7 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, } vsi->q_vectors[i]->intrl = coalesce[i].intrl; - ice_write_intrl(vsi->q_vectors[i], coalesce[i].intrl); + ice_set_q_vector_intrl(vsi->q_vectors[i]); } /* the number of queue vectors increased so write whatever is in @@ -3139,7 +3139,7 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, ice_write_itr(rc, rc->itr_setting); vsi->q_vectors[i]->intrl = coalesce[0].intrl; - ice_write_intrl(vsi->q_vectors[i], coalesce[0].intrl); + ice_set_q_vector_intrl(vsi->q_vectors[i]); } } From 23be7075b31863da9cf3983aebcc8ff7537c06ac Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 20 Sep 2021 12:30:14 -0700 Subject: [PATCH 04/10] ice: fix software generating extra interrupts The driver tried to work around missing completion events that occurred while interrupts are disabled, by triggering a software interrupt whenever we exit polling (but we had to have polled at least once). This was causing a *lot* of extra interrupts for some workloads like NVMe over TCP, which resulted in regressions in performance. It was also visible when polling didn't prevent interrupts when busy_poll was enabled. Fix the extra interrupts by utilizing our previously unused 3rd ITR (interrupt throttle) index and set it to 20K interrupts per second, and then trigger a software interrupt within that rate limit. While here, slightly refactor the code to avoid an overwrite of a local variable in the case of wb_en = true. Fixes: b7306b42beaf ("ice: manage interrupts during poll exit") Signed-off-by: Jesse Brandeburg Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/ice_hw_autogen.h | 1 + drivers/net/ethernet/intel/ice/ice_txrx.c | 26 +++++++++++-------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 76021d977b60..a49082485642 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -182,6 +182,7 @@ #define GLINT_DYN_CTL_INTERVAL_S 5 #define GLINT_DYN_CTL_INTERVAL_M ICE_M(0xFFF, 5) #define GLINT_DYN_CTL_SW_ITR_INDX_ENA_M BIT(24) +#define GLINT_DYN_CTL_SW_ITR_INDX_S 25 #define GLINT_DYN_CTL_SW_ITR_INDX_M ICE_M(0x3, 25) #define GLINT_DYN_CTL_WB_ON_ITR_M BIT(30) #define GLINT_DYN_CTL_INTENA_MSK_M BIT(31) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 1373b97b117a..8f908af9bdd5 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1361,21 +1361,25 @@ static void ice_enable_interrupt(struct ice_q_vector *q_vector) if (test_bit(ICE_DOWN, vsi->state)) return; - /* When exiting WB_ON_ITR, let ITR resume its normal - * interrupts-enabled path. + /* trigger an ITR delayed software interrupt when exiting busy poll, to + * make sure to catch any pending cleanups that might have been missed + * due to interrupt state transition. If busy poll or poll isn't + * enabled, then don't update ITR, and just enable the interrupt. */ - if (wb_en) + if (!wb_en) { + itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); + } else { q_vector->wb_on_itr = false; - itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); - /* trigger an immediate software interrupt when exiting - * busy poll, to make sure to catch any pending cleanups - * that might have been missed due to interrupt state - * transition. - */ - if (wb_en) { + /* do two things here with a single write. Set up the third ITR + * index to be used for software interrupt moderation, and then + * trigger a software interrupt with a rate limit of 20K on + * software interrupts, this will help avoid high interrupt + * loads due to frequently polling and exiting polling. + */ + itr_val = ice_buildreg_itr(ICE_IDX_ITR2, ICE_ITR_20K); itr_val |= GLINT_DYN_CTL_SWINT_TRIG_M | - GLINT_DYN_CTL_SW_ITR_INDX_M | + ICE_IDX_ITR2 << GLINT_DYN_CTL_SW_ITR_INDX_S | GLINT_DYN_CTL_SW_ITR_INDX_ENA_M; } wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val); From 1281b7459657abcd6db74fbfbf9b20717de8966c Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Fri, 15 Oct 2021 10:27:19 +0200 Subject: [PATCH 05/10] ice: Forbid trusted VFs in switchdev mode Merge issues caused the check for switchdev mode has been inserted in wrong place. It should be in ice_set_vf_trust not in ice_set_vf_mac. Trusted VFs are forbidden in switchdev mode because they should be configured only from the host side. Fixes: 1c54c839935b ("ice: enable/disable switchdev when managing VFs") Signed-off-by: Wojciech Drewek Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 86f265268ac8..862d191284ba 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -4839,11 +4839,6 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) struct ice_vf *vf; int ret; - if (ice_is_eswitch_mode_switchdev(pf)) { - dev_info(ice_pf_to_dev(pf), "Trusted VF is forbidden in switchdev mode\n"); - return -EOPNOTSUPP; - } - if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; @@ -4903,6 +4898,11 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) struct ice_vf *vf; int ret; + if (ice_is_eswitch_mode_switchdev(pf)) { + dev_info(ice_pf_to_dev(pf), "Trusted VF is forbidden in switchdev mode\n"); + return -EOPNOTSUPP; + } + if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; From 73b483b7902918cd7de3481313ed4943eb9c3f3b Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Thu, 23 Sep 2021 14:43:48 +0200 Subject: [PATCH 06/10] ice: Manage act flags for switchdev offloads Currently it is not possible to set/unset lb_en and lan_en flags for advanced rules during their creation. Both flags are enabled by default. In case of switchdev offloads for egress traffic we need lb_en to be disabled. Because of that, we work around it by updating the rule immediately after its creation. This change allows us to set/unset those flags right away and it gets rid of old workaround as well. Using ice_adv_rule_flags_info structure we can pass info about flags we want to be set for a given advanced rule. Flags are stored in flags_info.act. Values from act would be used only if act_valid was set to true, otherwise default values would be used. Signed-off-by: Wojciech Drewek Acked-by: Paul Menzel Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_fltr.c | 127 -------------------- drivers/net/ethernet/intel/ice/ice_fltr.h | 4 - drivers/net/ethernet/intel/ice/ice_switch.c | 9 +- drivers/net/ethernet/intel/ice/ice_switch.h | 11 ++ drivers/net/ethernet/intel/ice/ice_tc_lib.c | 8 +- 5 files changed, 21 insertions(+), 138 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.c b/drivers/net/ethernet/intel/ice/ice_fltr.c index f8c59df4ff9c..c2e78eaf4ccb 100644 --- a/drivers/net/ethernet/intel/ice/ice_fltr.c +++ b/drivers/net/ethernet/intel/ice/ice_fltr.c @@ -453,133 +453,6 @@ static u32 ice_fltr_build_action(u16 vsi_id) ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT; } -/** - * ice_fltr_find_adv_entry - find advanced rule - * @rules: list of rules - * @rule_id: id of wanted rule - */ -static struct ice_adv_fltr_mgmt_list_entry * -ice_fltr_find_adv_entry(struct list_head *rules, u16 rule_id) -{ - struct ice_adv_fltr_mgmt_list_entry *entry; - - list_for_each_entry(entry, rules, list_entry) { - if (entry->rule_info.fltr_rule_id == rule_id) - return entry; - } - - return NULL; -} - -/** - * ice_fltr_update_adv_rule_flags - update flags on advanced rule - * @vsi: pointer to VSI - * @recipe_id: id of recipe - * @entry: advanced rule entry - * @new_flags: flags to update - */ -static enum ice_status -ice_fltr_update_adv_rule_flags(struct ice_vsi *vsi, u16 recipe_id, - struct ice_adv_fltr_mgmt_list_entry *entry, - u32 new_flags) -{ - struct ice_adv_rule_info *info = &entry->rule_info; - struct ice_sw_act_ctrl *act = &info->sw_act; - u32 action; - - if (act->fltr_act != ICE_FWD_TO_VSI) - return ICE_ERR_NOT_SUPPORTED; - - action = ice_fltr_build_action(act->fwd_id.hw_vsi_id); - - return ice_fltr_update_rule_flags(&vsi->back->hw, info->fltr_rule_id, - recipe_id, action, info->sw_act.flag, - act->src, new_flags); -} - -/** - * ice_fltr_find_regular_entry - find regular rule - * @rules: list of rules - * @rule_id: id of wanted rule - */ -static struct ice_fltr_mgmt_list_entry * -ice_fltr_find_regular_entry(struct list_head *rules, u16 rule_id) -{ - struct ice_fltr_mgmt_list_entry *entry; - - list_for_each_entry(entry, rules, list_entry) { - if (entry->fltr_info.fltr_rule_id == rule_id) - return entry; - } - - return NULL; -} - -/** - * ice_fltr_update_regular_rule - update flags on regular rule - * @vsi: pointer to VSI - * @recipe_id: id of recipe - * @entry: regular rule entry - * @new_flags: flags to update - */ -static enum ice_status -ice_fltr_update_regular_rule(struct ice_vsi *vsi, u16 recipe_id, - struct ice_fltr_mgmt_list_entry *entry, - u32 new_flags) -{ - struct ice_fltr_info *info = &entry->fltr_info; - u32 action; - - if (info->fltr_act != ICE_FWD_TO_VSI) - return ICE_ERR_NOT_SUPPORTED; - - action = ice_fltr_build_action(info->fwd_id.hw_vsi_id); - - return ice_fltr_update_rule_flags(&vsi->back->hw, info->fltr_rule_id, - recipe_id, action, info->flag, - info->src, new_flags); -} - -/** - * ice_fltr_update_flags - update flags on rule - * @vsi: pointer to VSI - * @rule_id: id of rule - * @recipe_id: id of recipe - * @new_flags: flags to update - * - * Function updates flags on regular and advance rule. - * - * Flags should be a combination of ICE_SINGLE_ACT_LB_ENABLE and - * ICE_SINGLE_ACT_LAN_ENABLE. - */ -enum ice_status -ice_fltr_update_flags(struct ice_vsi *vsi, u16 rule_id, u16 recipe_id, - u32 new_flags) -{ - struct ice_adv_fltr_mgmt_list_entry *adv_entry; - struct ice_fltr_mgmt_list_entry *regular_entry; - struct ice_hw *hw = &vsi->back->hw; - struct ice_sw_recipe *recp_list; - struct list_head *fltr_rules; - - recp_list = &hw->switch_info->recp_list[recipe_id]; - if (!recp_list) - return ICE_ERR_DOES_NOT_EXIST; - - fltr_rules = &recp_list->filt_rules; - regular_entry = ice_fltr_find_regular_entry(fltr_rules, rule_id); - if (regular_entry) - return ice_fltr_update_regular_rule(vsi, recipe_id, - regular_entry, new_flags); - - adv_entry = ice_fltr_find_adv_entry(fltr_rules, rule_id); - if (adv_entry) - return ice_fltr_update_adv_rule_flags(vsi, recipe_id, - adv_entry, new_flags); - - return ICE_ERR_DOES_NOT_EXIST; -} - /** * ice_fltr_update_flags_dflt_rule - update flags on default rule * @vsi: pointer to VSI diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.h b/drivers/net/ethernet/intel/ice/ice_fltr.h index 949e38ce016c..8eec4febead1 100644 --- a/drivers/net/ethernet/intel/ice/ice_fltr.h +++ b/drivers/net/ethernet/intel/ice/ice_fltr.h @@ -36,10 +36,6 @@ enum ice_status ice_fltr_remove_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag, enum ice_sw_fwd_act_type action); void ice_fltr_remove_all(struct ice_vsi *vsi); - -enum ice_status -ice_fltr_update_flags(struct ice_vsi *vsi, u16 rule_id, u16 recipe_id, - u32 new_flags); enum ice_status ice_fltr_update_flags_dflt_rule(struct ice_vsi *vsi, u16 rule_id, u8 direction, u32 new_flags); diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 0d07547b40f1..a4a299012f9f 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -4783,7 +4783,14 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, s_rule = kzalloc(rule_buf_sz, GFP_KERNEL); if (!s_rule) return ICE_ERR_NO_MEMORY; - act |= ICE_SINGLE_ACT_LB_ENABLE | ICE_SINGLE_ACT_LAN_ENABLE; + if (!rinfo->flags_info.act_valid) { + act |= ICE_SINGLE_ACT_LAN_ENABLE; + act |= ICE_SINGLE_ACT_LB_ENABLE; + } else { + act |= rinfo->flags_info.act & (ICE_SINGLE_ACT_LAN_ENABLE | + ICE_SINGLE_ACT_LB_ENABLE); + } + switch (rinfo->sw_act.fltr_act) { case ICE_FWD_TO_VSI: act |= (rinfo->sw_act.fwd_id.hw_vsi_id << diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index 34b7f74b1ab8..d4c0a3b594af 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -160,11 +160,22 @@ struct ice_rule_query_data { u16 vsi_handle; }; +/* This structure allows to pass info about lb_en and lan_en + * flags to ice_add_adv_rule. Values in act would be used + * only if act_valid was set to true, otherwise default + * values would be used. + */ +struct ice_adv_rule_flags_info { + u32 act; + u8 act_valid; /* indicate if flags in act are valid */ +}; + struct ice_adv_rule_info { struct ice_sw_act_ctrl sw_act; u32 priority; u8 rx; /* true means LOOKUP_RX otherwise LOOKUP_TX */ u16 fltr_rule_id; + struct ice_adv_rule_flags_info flags_info; }; /* A collection of one or more four word recipe */ diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index 4c1daa1a02a1..1dccfd116bc9 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -274,6 +274,8 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) rule_info.sw_act.flag |= ICE_FLTR_TX; rule_info.sw_act.src = vsi->idx; rule_info.rx = false; + rule_info.flags_info.act = ICE_SINGLE_ACT_LAN_ENABLE; + rule_info.flags_info.act_valid = true; } /* specify the cookie as filter_rule_id */ @@ -296,12 +298,6 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) fltr->rid = rule_added.rid; fltr->rule_id = rule_added.rule_id; - if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS) { - if (ice_fltr_update_flags(vsi, fltr->rule_id, fltr->rid, - ICE_SINGLE_ACT_LAN_ENABLE)) - ice_rem_adv_rule_by_id(hw, &rule_added); - } - exit: kfree(list); return ret; From 3f13f570ff2c413dbdf088058a3e019db376f49d Mon Sep 17 00:00:00 2001 From: Wojciech Drewek Date: Fri, 8 Oct 2021 10:44:03 +0200 Subject: [PATCH 07/10] ice: Refactor PR ethtool ops This patch improves a few things: - it fixes issue where ethtool -i reports that PR supports priv-flags and tests when in fact it does not support them - instead of using the same functions for both PF and PR ethtool ops, this patch introduces separate ops for both cases and internal functions with core logic. - prevent accessing VF VSI while VF is not ready by calling ice_check_vf_ready_for_cfg - all PR specific functions in ethtool.c were moved to one place in file - instead overwriting n_priv_flags in ice_repr_get_drvinfo, priv-flags code was moved from __ice_get_drvinfo to ice_get_drvinfo Signed-off-by: Wojciech Drewek Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool.c | 99 +++++++++++++++----- 1 file changed, 74 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index f4b3c5b73c7d..8b3eef6632e9 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -192,7 +192,6 @@ __ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo, strscpy(drvinfo->bus_info, pci_name(pf->pdev), sizeof(drvinfo->bus_info)); - drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE; } static void @@ -201,18 +200,8 @@ ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) struct ice_netdev_priv *np = netdev_priv(netdev); __ice_get_drvinfo(netdev, drvinfo, np->vsi); -} -static void -ice_repr_get_drvinfo(struct net_device *netdev, - struct ethtool_drvinfo *drvinfo) -{ - struct ice_repr *repr = ice_netdev_to_repr(netdev); - - if (ice_check_vf_ready_for_cfg(repr->vf)) - return; - - __ice_get_drvinfo(netdev, drvinfo, repr->src_vsi); + drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE; } static int ice_get_regs_len(struct net_device __always_unused *netdev) @@ -886,10 +875,10 @@ skip_ol_tests: netdev_info(netdev, "testing finished\n"); } -static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +static void +__ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data, + struct ice_vsi *vsi) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = ice_get_netdev_priv_vsi(np); unsigned int i; u8 *p = data; @@ -940,6 +929,13 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) } } +static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + + __ice_get_strings(netdev, stringset, data, np->vsi); +} + static int ice_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) { @@ -1331,9 +1327,6 @@ static int ice_get_sset_count(struct net_device *netdev, int sset) * order of strings will suffer from race conditions and are * not safe. */ - if (ice_is_port_repr_netdev(netdev)) - return ICE_VSI_STATS_LEN; - return ICE_ALL_STATS_LEN(netdev); case ETH_SS_TEST: return ICE_TEST_LEN; @@ -1345,11 +1338,10 @@ static int ice_get_sset_count(struct net_device *netdev, int sset) } static void -ice_get_ethtool_stats(struct net_device *netdev, - struct ethtool_stats __always_unused *stats, u64 *data) +__ice_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats __always_unused *stats, u64 *data, + struct ice_vsi *vsi) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = ice_get_netdev_priv_vsi(np); struct ice_pf *pf = vsi->back; struct ice_tx_ring *tx_ring; struct ice_rx_ring *rx_ring; @@ -1416,6 +1408,15 @@ ice_get_ethtool_stats(struct net_device *netdev, } } +static void +ice_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats __always_unused *stats, u64 *data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + + __ice_get_ethtool_stats(netdev, stats, data, np->vsi); +} + #define ICE_PHY_TYPE_LOW_MASK_MIN_1G (ICE_PHY_TYPE_LOW_100BASE_TX | \ ICE_PHY_TYPE_LOW_100M_SGMII) @@ -3839,6 +3840,54 @@ ice_set_per_q_coalesce(struct net_device *netdev, u32 q_num, return __ice_set_coalesce(netdev, ec, q_num); } +static void +ice_repr_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct ice_repr *repr = ice_netdev_to_repr(netdev); + + if (ice_check_vf_ready_for_cfg(repr->vf)) + return; + + __ice_get_drvinfo(netdev, drvinfo, repr->src_vsi); +} + +static void +ice_repr_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + struct ice_repr *repr = ice_netdev_to_repr(netdev); + + /* for port representors only ETH_SS_STATS is supported */ + if (ice_check_vf_ready_for_cfg(repr->vf) || + stringset != ETH_SS_STATS) + return; + + __ice_get_strings(netdev, stringset, data, repr->src_vsi); +} + +static void +ice_repr_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats __always_unused *stats, + u64 *data) +{ + struct ice_repr *repr = ice_netdev_to_repr(netdev); + + if (ice_check_vf_ready_for_cfg(repr->vf)) + return; + + __ice_get_ethtool_stats(netdev, stats, data, repr->src_vsi); +} + +static int ice_repr_get_sset_count(struct net_device *netdev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ICE_VSI_STATS_LEN; + default: + return -EOPNOTSUPP; + } +} + #define ICE_I2C_EEPROM_DEV_ADDR 0xA0 #define ICE_I2C_EEPROM_DEV_ADDR2 0xA2 #define ICE_MODULE_TYPE_SFP 0x03 @@ -4093,9 +4142,9 @@ void ice_set_ethtool_safe_mode_ops(struct net_device *netdev) static const struct ethtool_ops ice_ethtool_repr_ops = { .get_drvinfo = ice_repr_get_drvinfo, .get_link = ethtool_op_get_link, - .get_strings = ice_get_strings, - .get_ethtool_stats = ice_get_ethtool_stats, - .get_sset_count = ice_get_sset_count, + .get_strings = ice_repr_get_strings, + .get_ethtool_stats = ice_repr_get_ethtool_stats, + .get_sset_count = ice_repr_get_sset_count, }; /** From 7c1b694adab1a4a24477c393843730a90f595124 Mon Sep 17 00:00:00 2001 From: Cai Huoqing Date: Wed, 22 Sep 2021 20:59:46 +0800 Subject: [PATCH 08/10] ice: Make use of the helper function devm_add_action_or_reset() The helper function devm_add_action_or_reset() will internally call devm_add_action(), and if devm_add_action() fails then it will execute the action mentioned and return the error code. So use devm_add_action_or_reset() instead of devm_add_action() to simplify the error handling, reduce the code. Signed-off-by: Cai Huoqing Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_devlink.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 55353bf4cbef..e3d9f3f3a631 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -452,10 +452,8 @@ struct ice_pf *ice_allocate_pf(struct device *dev) return NULL; /* Add an action to teardown the devlink when unwinding the driver */ - if (devm_add_action(dev, ice_devlink_free, devlink)) { - devlink_free(devlink); + if (devm_add_action_or_reset(dev, ice_devlink_free, devlink)) return NULL; - } return devlink_priv(devlink); } From 6f3323536aa8895b1e3fc6fe6762e7ddee916de1 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 6 Oct 2021 13:09:08 -0500 Subject: [PATCH 09/10] ice: use devm_kcalloc() instead of devm_kzalloc() Use 2-factor multiplication argument form devm_kcalloc() instead of devm_kzalloc(). Link: https://github.com/KSPP/linux/issues/162 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c | 4 ++-- drivers/net/ethernet/intel/ice/ice_txrx.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index 16de603b280c..38960bcc384c 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -706,7 +706,7 @@ ice_create_init_fdir_rule(struct ice_pf *pf, enum ice_fltr_ptype flow) if (!seg) return -ENOMEM; - tun_seg = devm_kzalloc(dev, sizeof(*seg) * ICE_FD_HW_SEG_MAX, + tun_seg = devm_kcalloc(dev, sizeof(*seg), ICE_FD_HW_SEG_MAX, GFP_KERNEL); if (!tun_seg) { devm_kfree(dev, seg); @@ -1068,7 +1068,7 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp, if (!seg) return -ENOMEM; - tun_seg = devm_kzalloc(dev, sizeof(*seg) * ICE_FD_HW_SEG_MAX, + tun_seg = devm_kcalloc(dev, sizeof(*seg), ICE_FD_HW_SEG_MAX, GFP_KERNEL); if (!tun_seg) { devm_kfree(dev, seg); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 8f908af9bdd5..bc3ba19dc88f 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -343,7 +343,7 @@ int ice_setup_tx_ring(struct ice_tx_ring *tx_ring) /* warn if we are about to overwrite the pointer */ WARN_ON(tx_ring->tx_buf); tx_ring->tx_buf = - devm_kzalloc(dev, sizeof(*tx_ring->tx_buf) * tx_ring->count, + devm_kcalloc(dev, sizeof(*tx_ring->tx_buf), tx_ring->count, GFP_KERNEL); if (!tx_ring->tx_buf) return -ENOMEM; @@ -475,7 +475,7 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) /* warn if we are about to overwrite the pointer */ WARN_ON(rx_ring->rx_buf); rx_ring->rx_buf = - devm_kzalloc(dev, sizeof(*rx_ring->rx_buf) * rx_ring->count, + devm_kcalloc(dev, sizeof(*rx_ring->rx_buf), rx_ring->count, GFP_KERNEL); if (!rx_ring->rx_buf) return -ENOMEM; From 8702ed0b0de146c980883896b4cbb40ce860142e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Oct 2021 11:00:12 +0300 Subject: [PATCH 10/10] ice: fix an error code in ice_ena_vfs() Return the error code if ice_eswitch_configure() fails. Don't return success. Fixes: 1c54c839935b ("ice: enable/disable switchdev when managing VFs") Signed-off-by: Dan Carpenter Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 862d191284ba..8e3f9ec4e35b 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -2015,7 +2015,8 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) clear_bit(ICE_VF_DIS, pf->state); - if (ice_eswitch_configure(pf)) + ret = ice_eswitch_configure(pf); + if (ret) goto err_unroll_sriov; return 0;