diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 55353bf4cbef..e3d9f3f3a631 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -452,10 +452,8 @@ struct ice_pf *ice_allocate_pf(struct device *dev) return NULL; /* Add an action to teardown the devlink when unwinding the driver */ - if (devm_add_action(dev, ice_devlink_free, devlink)) { - devlink_free(devlink); + if (devm_add_action_or_reset(dev, ice_devlink_free, devlink)) return NULL; - } return devlink_priv(devlink); } diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 6e0af72c2020..8b3eef6632e9 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -192,7 +192,6 @@ __ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo, strscpy(drvinfo->bus_info, pci_name(pf->pdev), sizeof(drvinfo->bus_info)); - drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE; } static void @@ -201,18 +200,8 @@ ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) struct ice_netdev_priv *np = netdev_priv(netdev); __ice_get_drvinfo(netdev, drvinfo, np->vsi); -} -static void -ice_repr_get_drvinfo(struct net_device *netdev, - struct ethtool_drvinfo *drvinfo) -{ - struct ice_repr *repr = ice_netdev_to_repr(netdev); - - if (ice_check_vf_ready_for_cfg(repr->vf)) - return; - - __ice_get_drvinfo(netdev, drvinfo, repr->src_vsi); + drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE; } static int ice_get_regs_len(struct net_device __always_unused *netdev) @@ -886,10 +875,10 @@ skip_ol_tests: netdev_info(netdev, "testing finished\n"); } -static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +static void +__ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data, + struct ice_vsi *vsi) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = ice_get_netdev_priv_vsi(np); unsigned int i; u8 *p = data; @@ -940,6 +929,13 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) } } +static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + + __ice_get_strings(netdev, stringset, data, np->vsi); +} + static int ice_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) { @@ -1331,9 +1327,6 @@ static int ice_get_sset_count(struct net_device *netdev, int sset) * order of strings will suffer from race conditions and are * not safe. */ - if (ice_is_port_repr_netdev(netdev)) - return ICE_VSI_STATS_LEN; - return ICE_ALL_STATS_LEN(netdev); case ETH_SS_TEST: return ICE_TEST_LEN; @@ -1345,11 +1338,10 @@ static int ice_get_sset_count(struct net_device *netdev, int sset) } static void -ice_get_ethtool_stats(struct net_device *netdev, - struct ethtool_stats __always_unused *stats, u64 *data) +__ice_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats __always_unused *stats, u64 *data, + struct ice_vsi *vsi) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = ice_get_netdev_priv_vsi(np); struct ice_pf *pf = vsi->back; struct ice_tx_ring *tx_ring; struct ice_rx_ring *rx_ring; @@ -1416,6 +1408,15 @@ ice_get_ethtool_stats(struct net_device *netdev, } } +static void +ice_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats __always_unused *stats, u64 *data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + + __ice_get_ethtool_stats(netdev, stats, data, np->vsi); +} + #define ICE_PHY_TYPE_LOW_MASK_MIN_1G (ICE_PHY_TYPE_LOW_100BASE_TX | \ ICE_PHY_TYPE_LOW_100M_SGMII) @@ -3640,6 +3641,9 @@ ice_set_rc_coalesce(struct ethtool_coalesce *ec, switch (rc->type) { case ICE_RX_CONTAINER: + { + struct ice_q_vector *q_vector = rc->rx_ring->q_vector; + if (ec->rx_coalesce_usecs_high > ICE_MAX_INTRL || (ec->rx_coalesce_usecs_high && ec->rx_coalesce_usecs_high < pf->hw.intrl_gran)) { @@ -3648,22 +3652,20 @@ ice_set_rc_coalesce(struct ethtool_coalesce *ec, ICE_MAX_INTRL); return -EINVAL; } - if (ec->rx_coalesce_usecs_high != rc->rx_ring->q_vector->intrl && + if (ec->rx_coalesce_usecs_high != q_vector->intrl && (ec->use_adaptive_rx_coalesce || ec->use_adaptive_tx_coalesce)) { netdev_info(vsi->netdev, "Invalid value, %s-usecs-high cannot be changed if adaptive-tx or adaptive-rx is enabled\n", c_type_str); return -EINVAL; } - if (ec->rx_coalesce_usecs_high != rc->rx_ring->q_vector->intrl) { - rc->rx_ring->q_vector->intrl = ec->rx_coalesce_usecs_high; - ice_write_intrl(rc->rx_ring->q_vector, - ec->rx_coalesce_usecs_high); - } + if (ec->rx_coalesce_usecs_high != q_vector->intrl) + q_vector->intrl = ec->rx_coalesce_usecs_high; use_adaptive_coalesce = ec->use_adaptive_rx_coalesce; coalesce_usecs = ec->rx_coalesce_usecs; break; + } case ICE_TX_CONTAINER: use_adaptive_coalesce = ec->use_adaptive_tx_coalesce; coalesce_usecs = ec->tx_coalesce_usecs; @@ -3808,6 +3810,8 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, if (ice_set_q_coalesce(vsi, ec, v_idx)) return -EINVAL; + + ice_set_q_vector_intrl(vsi->q_vectors[v_idx]); } goto set_complete; } @@ -3815,6 +3819,8 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, if (ice_set_q_coalesce(vsi, ec, q_num)) return -EINVAL; + ice_set_q_vector_intrl(vsi->q_vectors[q_num]); + set_complete: return 0; } @@ -3834,6 +3840,54 @@ ice_set_per_q_coalesce(struct net_device *netdev, u32 q_num, return __ice_set_coalesce(netdev, ec, q_num); } +static void +ice_repr_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct ice_repr *repr = ice_netdev_to_repr(netdev); + + if (ice_check_vf_ready_for_cfg(repr->vf)) + return; + + __ice_get_drvinfo(netdev, drvinfo, repr->src_vsi); +} + +static void +ice_repr_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + struct ice_repr *repr = ice_netdev_to_repr(netdev); + + /* for port representors only ETH_SS_STATS is supported */ + if (ice_check_vf_ready_for_cfg(repr->vf) || + stringset != ETH_SS_STATS) + return; + + __ice_get_strings(netdev, stringset, data, repr->src_vsi); +} + +static void +ice_repr_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats __always_unused *stats, + u64 *data) +{ + struct ice_repr *repr = ice_netdev_to_repr(netdev); + + if (ice_check_vf_ready_for_cfg(repr->vf)) + return; + + __ice_get_ethtool_stats(netdev, stats, data, repr->src_vsi); +} + +static int ice_repr_get_sset_count(struct net_device *netdev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ICE_VSI_STATS_LEN; + default: + return -EOPNOTSUPP; + } +} + #define ICE_I2C_EEPROM_DEV_ADDR 0xA0 #define ICE_I2C_EEPROM_DEV_ADDR2 0xA2 #define ICE_MODULE_TYPE_SFP 0x03 @@ -4088,9 +4142,9 @@ void ice_set_ethtool_safe_mode_ops(struct net_device *netdev) static const struct ethtool_ops ice_ethtool_repr_ops = { .get_drvinfo = ice_repr_get_drvinfo, .get_link = ethtool_op_get_link, - .get_strings = ice_get_strings, - .get_ethtool_stats = ice_get_ethtool_stats, - .get_sset_count = ice_get_sset_count, + .get_strings = ice_repr_get_strings, + .get_ethtool_stats = ice_repr_get_ethtool_stats, + .get_sset_count = ice_repr_get_sset_count, }; /** diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index 16de603b280c..38960bcc384c 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -706,7 +706,7 @@ ice_create_init_fdir_rule(struct ice_pf *pf, enum ice_fltr_ptype flow) if (!seg) return -ENOMEM; - tun_seg = devm_kzalloc(dev, sizeof(*seg) * ICE_FD_HW_SEG_MAX, + tun_seg = devm_kcalloc(dev, sizeof(*seg), ICE_FD_HW_SEG_MAX, GFP_KERNEL); if (!tun_seg) { devm_kfree(dev, seg); @@ -1068,7 +1068,7 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp, if (!seg) return -ENOMEM; - tun_seg = devm_kzalloc(dev, sizeof(*seg) * ICE_FD_HW_SEG_MAX, + tun_seg = devm_kcalloc(dev, sizeof(*seg), ICE_FD_HW_SEG_MAX, GFP_KERNEL); if (!tun_seg) { devm_kfree(dev, seg); diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.c b/drivers/net/ethernet/intel/ice/ice_fltr.c index f8c59df4ff9c..c2e78eaf4ccb 100644 --- a/drivers/net/ethernet/intel/ice/ice_fltr.c +++ b/drivers/net/ethernet/intel/ice/ice_fltr.c @@ -453,133 +453,6 @@ static u32 ice_fltr_build_action(u16 vsi_id) ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT; } -/** - * ice_fltr_find_adv_entry - find advanced rule - * @rules: list of rules - * @rule_id: id of wanted rule - */ -static struct ice_adv_fltr_mgmt_list_entry * -ice_fltr_find_adv_entry(struct list_head *rules, u16 rule_id) -{ - struct ice_adv_fltr_mgmt_list_entry *entry; - - list_for_each_entry(entry, rules, list_entry) { - if (entry->rule_info.fltr_rule_id == rule_id) - return entry; - } - - return NULL; -} - -/** - * ice_fltr_update_adv_rule_flags - update flags on advanced rule - * @vsi: pointer to VSI - * @recipe_id: id of recipe - * @entry: advanced rule entry - * @new_flags: flags to update - */ -static enum ice_status -ice_fltr_update_adv_rule_flags(struct ice_vsi *vsi, u16 recipe_id, - struct ice_adv_fltr_mgmt_list_entry *entry, - u32 new_flags) -{ - struct ice_adv_rule_info *info = &entry->rule_info; - struct ice_sw_act_ctrl *act = &info->sw_act; - u32 action; - - if (act->fltr_act != ICE_FWD_TO_VSI) - return ICE_ERR_NOT_SUPPORTED; - - action = ice_fltr_build_action(act->fwd_id.hw_vsi_id); - - return ice_fltr_update_rule_flags(&vsi->back->hw, info->fltr_rule_id, - recipe_id, action, info->sw_act.flag, - act->src, new_flags); -} - -/** - * ice_fltr_find_regular_entry - find regular rule - * @rules: list of rules - * @rule_id: id of wanted rule - */ -static struct ice_fltr_mgmt_list_entry * -ice_fltr_find_regular_entry(struct list_head *rules, u16 rule_id) -{ - struct ice_fltr_mgmt_list_entry *entry; - - list_for_each_entry(entry, rules, list_entry) { - if (entry->fltr_info.fltr_rule_id == rule_id) - return entry; - } - - return NULL; -} - -/** - * ice_fltr_update_regular_rule - update flags on regular rule - * @vsi: pointer to VSI - * @recipe_id: id of recipe - * @entry: regular rule entry - * @new_flags: flags to update - */ -static enum ice_status -ice_fltr_update_regular_rule(struct ice_vsi *vsi, u16 recipe_id, - struct ice_fltr_mgmt_list_entry *entry, - u32 new_flags) -{ - struct ice_fltr_info *info = &entry->fltr_info; - u32 action; - - if (info->fltr_act != ICE_FWD_TO_VSI) - return ICE_ERR_NOT_SUPPORTED; - - action = ice_fltr_build_action(info->fwd_id.hw_vsi_id); - - return ice_fltr_update_rule_flags(&vsi->back->hw, info->fltr_rule_id, - recipe_id, action, info->flag, - info->src, new_flags); -} - -/** - * ice_fltr_update_flags - update flags on rule - * @vsi: pointer to VSI - * @rule_id: id of rule - * @recipe_id: id of recipe - * @new_flags: flags to update - * - * Function updates flags on regular and advance rule. - * - * Flags should be a combination of ICE_SINGLE_ACT_LB_ENABLE and - * ICE_SINGLE_ACT_LAN_ENABLE. - */ -enum ice_status -ice_fltr_update_flags(struct ice_vsi *vsi, u16 rule_id, u16 recipe_id, - u32 new_flags) -{ - struct ice_adv_fltr_mgmt_list_entry *adv_entry; - struct ice_fltr_mgmt_list_entry *regular_entry; - struct ice_hw *hw = &vsi->back->hw; - struct ice_sw_recipe *recp_list; - struct list_head *fltr_rules; - - recp_list = &hw->switch_info->recp_list[recipe_id]; - if (!recp_list) - return ICE_ERR_DOES_NOT_EXIST; - - fltr_rules = &recp_list->filt_rules; - regular_entry = ice_fltr_find_regular_entry(fltr_rules, rule_id); - if (regular_entry) - return ice_fltr_update_regular_rule(vsi, recipe_id, - regular_entry, new_flags); - - adv_entry = ice_fltr_find_adv_entry(fltr_rules, rule_id); - if (adv_entry) - return ice_fltr_update_adv_rule_flags(vsi, recipe_id, - adv_entry, new_flags); - - return ICE_ERR_DOES_NOT_EXIST; -} - /** * ice_fltr_update_flags_dflt_rule - update flags on default rule * @vsi: pointer to VSI diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.h b/drivers/net/ethernet/intel/ice/ice_fltr.h index 949e38ce016c..8eec4febead1 100644 --- a/drivers/net/ethernet/intel/ice/ice_fltr.h +++ b/drivers/net/ethernet/intel/ice/ice_fltr.h @@ -36,10 +36,6 @@ enum ice_status ice_fltr_remove_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag, enum ice_sw_fwd_act_type action); void ice_fltr_remove_all(struct ice_vsi *vsi); - -enum ice_status -ice_fltr_update_flags(struct ice_vsi *vsi, u16 rule_id, u16 recipe_id, - u32 new_flags); enum ice_status ice_fltr_update_flags_dflt_rule(struct ice_vsi *vsi, u16 rule_id, u8 direction, u32 new_flags); diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 76021d977b60..a49082485642 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -182,6 +182,7 @@ #define GLINT_DYN_CTL_INTERVAL_S 5 #define GLINT_DYN_CTL_INTERVAL_M ICE_M(0xFFF, 5) #define GLINT_DYN_CTL_SW_ITR_INDX_ENA_M BIT(24) +#define GLINT_DYN_CTL_SW_ITR_INDX_S 25 #define GLINT_DYN_CTL_SW_ITR_INDX_M ICE_M(0x3, 25) #define GLINT_DYN_CTL_WB_ON_ITR_M BIT(30) #define GLINT_DYN_CTL_INTENA_MSK_M BIT(31) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index f981e77f72ad..231f8bea2519 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1941,6 +1941,31 @@ void ice_write_itr(struct ice_ring_container *rc, u16 itr) __ice_write_itr(q_vector, rc, itr); } +/** + * ice_set_q_vector_intrl - set up interrupt rate limiting + * @q_vector: the vector to be configured + * + * Interrupt rate limiting is local to the vector, not per-queue so we must + * detect if either ring container has dynamic moderation enabled to decide + * what to set the interrupt rate limit to via INTRL settings. In the case that + * dynamic moderation is disabled on both, write the value with the cached + * setting to make sure INTRL register matches the user visible value. + */ +void ice_set_q_vector_intrl(struct ice_q_vector *q_vector) +{ + if (ITR_IS_DYNAMIC(&q_vector->tx) || ITR_IS_DYNAMIC(&q_vector->rx)) { + /* in the case of dynamic enabled, cap each vector to no more + * than (4 us) 250,000 ints/sec, which allows low latency + * but still less than 500,000 interrupts per second, which + * reduces CPU a bit in the case of the lowest latency + * setting. The 4 here is a value in microseconds. + */ + ice_write_intrl(q_vector, 4); + } else { + ice_write_intrl(q_vector, q_vector->intrl); + } +} + /** * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW * @vsi: the VSI being configured @@ -3096,7 +3121,7 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, } vsi->q_vectors[i]->intrl = coalesce[i].intrl; - ice_write_intrl(vsi->q_vectors[i], coalesce[i].intrl); + ice_set_q_vector_intrl(vsi->q_vectors[i]); } /* the number of queue vectors increased so write whatever is in @@ -3114,7 +3139,7 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, ice_write_itr(rc, rc->itr_setting); vsi->q_vectors[i]->intrl = coalesce[0].intrl; - ice_write_intrl(vsi->q_vectors[i], coalesce[0].intrl); + ice_set_q_vector_intrl(vsi->q_vectors[i]); } } @@ -3600,6 +3625,180 @@ int ice_clear_dflt_vsi(struct ice_sw *sw) return 0; } +/** + * ice_get_link_speed_mbps - get link speed in Mbps + * @vsi: the VSI whose link speed is being queried + * + * Return current VSI link speed and 0 if the speed is unknown. + */ +int ice_get_link_speed_mbps(struct ice_vsi *vsi) +{ + switch (vsi->port_info->phy.link_info.link_speed) { + case ICE_AQ_LINK_SPEED_100GB: + return SPEED_100000; + case ICE_AQ_LINK_SPEED_50GB: + return SPEED_50000; + case ICE_AQ_LINK_SPEED_40GB: + return SPEED_40000; + case ICE_AQ_LINK_SPEED_25GB: + return SPEED_25000; + case ICE_AQ_LINK_SPEED_20GB: + return SPEED_20000; + case ICE_AQ_LINK_SPEED_10GB: + return SPEED_10000; + case ICE_AQ_LINK_SPEED_5GB: + return SPEED_5000; + case ICE_AQ_LINK_SPEED_2500MB: + return SPEED_2500; + case ICE_AQ_LINK_SPEED_1000MB: + return SPEED_1000; + case ICE_AQ_LINK_SPEED_100MB: + return SPEED_100; + case ICE_AQ_LINK_SPEED_10MB: + return SPEED_10; + case ICE_AQ_LINK_SPEED_UNKNOWN: + default: + return 0; + } +} + +/** + * ice_get_link_speed_kbps - get link speed in Kbps + * @vsi: the VSI whose link speed is being queried + * + * Return current VSI link speed and 0 if the speed is unknown. + */ +static int ice_get_link_speed_kbps(struct ice_vsi *vsi) +{ + int speed_mbps; + + speed_mbps = ice_get_link_speed_mbps(vsi); + + return speed_mbps * 1000; +} + +/** + * ice_set_min_bw_limit - setup minimum BW limit for Tx based on min_tx_rate + * @vsi: VSI to be configured + * @min_tx_rate: min Tx rate in Kbps to be configured as BW limit + * + * If the min_tx_rate is specified as 0 that means to clear the minimum BW limit + * profile, otherwise a non-zero value will force a minimum BW limit for the VSI + * on TC 0. + */ +int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate) +{ + struct ice_pf *pf = vsi->back; + enum ice_status status; + struct device *dev; + int speed; + + dev = ice_pf_to_dev(pf); + if (!vsi->port_info) { + dev_dbg(dev, "VSI %d, type %u specified doesn't have valid port_info\n", + vsi->idx, vsi->type); + return -EINVAL; + } + + speed = ice_get_link_speed_kbps(vsi); + if (min_tx_rate > (u64)speed) { + dev_err(dev, "invalid min Tx rate %llu Kbps specified for %s %d is greater than current link speed %u Kbps\n", + min_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx, + speed); + return -EINVAL; + } + + /* Configure min BW for VSI limit */ + if (min_tx_rate) { + status = ice_cfg_vsi_bw_lmt_per_tc(vsi->port_info, vsi->idx, 0, + ICE_MIN_BW, min_tx_rate); + if (status) { + dev_err(dev, "failed to set min Tx rate(%llu Kbps) for %s %d\n", + min_tx_rate, ice_vsi_type_str(vsi->type), + vsi->idx); + return -EIO; + } + + dev_dbg(dev, "set min Tx rate(%llu Kbps) for %s\n", + min_tx_rate, ice_vsi_type_str(vsi->type)); + } else { + status = ice_cfg_vsi_bw_dflt_lmt_per_tc(vsi->port_info, + vsi->idx, 0, + ICE_MIN_BW); + if (status) { + dev_err(dev, "failed to clear min Tx rate configuration for %s %d\n", + ice_vsi_type_str(vsi->type), vsi->idx); + return -EIO; + } + + dev_dbg(dev, "cleared min Tx rate configuration for %s %d\n", + ice_vsi_type_str(vsi->type), vsi->idx); + } + + return 0; +} + +/** + * ice_set_max_bw_limit - setup maximum BW limit for Tx based on max_tx_rate + * @vsi: VSI to be configured + * @max_tx_rate: max Tx rate in Kbps to be configured as BW limit + * + * If the max_tx_rate is specified as 0 that means to clear the maximum BW limit + * profile, otherwise a non-zero value will force a maximum BW limit for the VSI + * on TC 0. + */ +int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate) +{ + struct ice_pf *pf = vsi->back; + enum ice_status status; + struct device *dev; + int speed; + + dev = ice_pf_to_dev(pf); + if (!vsi->port_info) { + dev_dbg(dev, "VSI %d, type %u specified doesn't have valid port_info\n", + vsi->idx, vsi->type); + return -EINVAL; + } + + speed = ice_get_link_speed_kbps(vsi); + if (max_tx_rate > (u64)speed) { + dev_err(dev, "invalid max Tx rate %llu Kbps specified for %s %d is greater than current link speed %u Kbps\n", + max_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx, + speed); + return -EINVAL; + } + + /* Configure max BW for VSI limit */ + if (max_tx_rate) { + status = ice_cfg_vsi_bw_lmt_per_tc(vsi->port_info, vsi->idx, 0, + ICE_MAX_BW, max_tx_rate); + if (status) { + dev_err(dev, "failed setting max Tx rate(%llu Kbps) for %s %d\n", + max_tx_rate, ice_vsi_type_str(vsi->type), + vsi->idx); + return -EIO; + } + + dev_dbg(dev, "set max Tx rate(%llu Kbps) for %s %d\n", + max_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx); + } else { + status = ice_cfg_vsi_bw_dflt_lmt_per_tc(vsi->port_info, + vsi->idx, 0, + ICE_MAX_BW); + if (status) { + dev_err(dev, "failed clearing max Tx rate configuration for %s %d\n", + ice_vsi_type_str(vsi->type), vsi->idx); + return -EIO; + } + + dev_dbg(dev, "cleared max Tx rate configuration for %s %d\n", + ice_vsi_type_str(vsi->type), vsi->idx); + } + + return 0; +} + /** * ice_set_link - turn on/off physical link * @vsi: VSI to modify physical link on diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index b6c429c5875d..c79fcbf82d8f 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -103,6 +103,7 @@ int ice_status_to_errno(enum ice_status err); void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl); void ice_write_itr(struct ice_ring_container *rc, u16 itr); +void ice_set_q_vector_intrl(struct ice_q_vector *q_vector); enum ice_status ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set); @@ -116,7 +117,9 @@ bool ice_is_vsi_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi); int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi); int ice_clear_dflt_vsi(struct ice_sw *sw); - +int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate); +int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate); +int ice_get_link_speed_mbps(struct ice_vsi *vsi); int ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *)); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index f531691a3e12..846623a97723 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5502,77 +5502,59 @@ int ice_vsi_cfg(struct ice_vsi *vsi) } /* THEORY OF MODERATION: - * The below code creates custom DIM profiles for use by this driver, because - * the ice driver hardware works differently than the hardware that DIMLIB was + * The ice driver hardware works differently than the hardware that DIMLIB was * originally made for. ice hardware doesn't have packet count limits that * can trigger an interrupt, but it *does* have interrupt rate limit support, - * and this code adds that capability to be used by the driver when it's using - * DIMLIB. The DIMLIB code was always designed to be a suggestion to the driver - * for how to "respond" to traffic and interrupts, so this driver uses a - * slightly different set of moderation parameters to get best performance. + * which is hard-coded to a limit of 250,000 ints/second. + * If not using dynamic moderation, the INTRL value can be modified + * by ethtool rx-usecs-high. */ struct ice_dim { /* the throttle rate for interrupts, basically worst case delay before * an initial interrupt fires, value is stored in microseconds. */ u16 itr; - /* the rate limit for interrupts, which can cap a delay from a small - * ITR at a certain amount of interrupts per second. f.e. a 2us ITR - * could yield as much as 500,000 interrupts per second, but with a - * 10us rate limit, it limits to 100,000 interrupts per second. Value - * is stored in microseconds. - */ - u16 intrl; }; /* Make a different profile for Rx that doesn't allow quite so aggressive - * moderation at the high end (it maxes out at 128us or about 8k interrupts a - * second. The INTRL/rate parameters here are only useful to cap small ITR - * values, which is why for larger ITR's - like 128, which can only generate - * 8k interrupts per second, there is no point to rate limit and the values - * are set to zero. The rate limit values do affect latency, and so must - * be reasonably small so to not impact latency sensitive tests. + * moderation at the high end (it maxes out at 126us or about 8k interrupts a + * second. */ static const struct ice_dim rx_profile[] = { - {2, 10}, - {8, 16}, - {32, 0}, - {96, 0}, - {128, 0} + {2}, /* 500,000 ints/s, capped at 250K by INTRL */ + {8}, /* 125,000 ints/s */ + {16}, /* 62,500 ints/s */ + {62}, /* 16,129 ints/s */ + {126} /* 7,936 ints/s */ }; /* The transmit profile, which has the same sorts of values * as the previous struct */ static const struct ice_dim tx_profile[] = { - {2, 10}, - {8, 16}, - {64, 0}, - {128, 0}, - {256, 0} + {2}, /* 500,000 ints/s, capped at 250K by INTRL */ + {8}, /* 125,000 ints/s */ + {40}, /* 16,125 ints/s */ + {128}, /* 7,812 ints/s */ + {256} /* 3,906 ints/s */ }; static void ice_tx_dim_work(struct work_struct *work) { struct ice_ring_container *rc; - struct ice_q_vector *q_vector; struct dim *dim; - u16 itr, intrl; + u16 itr; dim = container_of(work, struct dim, work); - rc = container_of(dim, struct ice_ring_container, dim); - q_vector = container_of(rc, struct ice_q_vector, tx); + rc = (struct ice_ring_container *)dim->priv; - if (dim->profile_ix >= ARRAY_SIZE(tx_profile)) - dim->profile_ix = ARRAY_SIZE(tx_profile) - 1; + WARN_ON(dim->profile_ix >= ARRAY_SIZE(tx_profile)); /* look up the values in our local table */ itr = tx_profile[dim->profile_ix].itr; - intrl = tx_profile[dim->profile_ix].intrl; - ice_trace(tx_dim_work, q_vector, dim); + ice_trace(tx_dim_work, container_of(rc, struct ice_q_vector, tx), dim); ice_write_itr(rc, itr); - ice_write_intrl(q_vector, intrl); dim->state = DIM_START_MEASURE; } @@ -5580,28 +5562,65 @@ static void ice_tx_dim_work(struct work_struct *work) static void ice_rx_dim_work(struct work_struct *work) { struct ice_ring_container *rc; - struct ice_q_vector *q_vector; struct dim *dim; - u16 itr, intrl; + u16 itr; dim = container_of(work, struct dim, work); - rc = container_of(dim, struct ice_ring_container, dim); - q_vector = container_of(rc, struct ice_q_vector, rx); + rc = (struct ice_ring_container *)dim->priv; - if (dim->profile_ix >= ARRAY_SIZE(rx_profile)) - dim->profile_ix = ARRAY_SIZE(rx_profile) - 1; + WARN_ON(dim->profile_ix >= ARRAY_SIZE(rx_profile)); /* look up the values in our local table */ itr = rx_profile[dim->profile_ix].itr; - intrl = rx_profile[dim->profile_ix].intrl; - ice_trace(rx_dim_work, q_vector, dim); + ice_trace(rx_dim_work, container_of(rc, struct ice_q_vector, rx), dim); ice_write_itr(rc, itr); - ice_write_intrl(q_vector, intrl); dim->state = DIM_START_MEASURE; } +#define ICE_DIM_DEFAULT_PROFILE_IX 1 + +/** + * ice_init_moderation - set up interrupt moderation + * @q_vector: the vector containing rings to be configured + * + * Set up interrupt moderation registers, with the intent to do the right thing + * when called from reset or from probe, and whether or not dynamic moderation + * is enabled or not. Take special care to write all the registers in both + * dynamic moderation mode or not in order to make sure hardware is in a known + * state. + */ +static void ice_init_moderation(struct ice_q_vector *q_vector) +{ + struct ice_ring_container *rc; + bool tx_dynamic, rx_dynamic; + + rc = &q_vector->tx; + INIT_WORK(&rc->dim.work, ice_tx_dim_work); + rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX; + rc->dim.priv = rc; + tx_dynamic = ITR_IS_DYNAMIC(rc); + + /* set the initial TX ITR to match the above */ + ice_write_itr(rc, tx_dynamic ? + tx_profile[rc->dim.profile_ix].itr : rc->itr_setting); + + rc = &q_vector->rx; + INIT_WORK(&rc->dim.work, ice_rx_dim_work); + rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX; + rc->dim.priv = rc; + rx_dynamic = ITR_IS_DYNAMIC(rc); + + /* set the initial RX ITR to match the above */ + ice_write_itr(rc, rx_dynamic ? rx_profile[rc->dim.profile_ix].itr : + rc->itr_setting); + + ice_set_q_vector_intrl(q_vector); +} + /** * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI * @vsi: the VSI being configured @@ -5616,11 +5635,7 @@ static void ice_napi_enable_all(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, q_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; - INIT_WORK(&q_vector->tx.dim.work, ice_tx_dim_work); - q_vector->tx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; - - INIT_WORK(&q_vector->rx.dim.work, ice_rx_dim_work); - q_vector->rx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + ice_init_moderation(q_vector); if (q_vector->rx.rx_ring || q_vector->tx.tx_ring) napi_enable(&q_vector->napi); @@ -7390,6 +7405,7 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_set_vf_vlan = ice_set_vf_port_vlan, .ndo_set_vf_link_state = ice_set_vf_link_state, .ndo_get_vf_stats = ice_get_vf_stats, + .ndo_set_vf_rate = ice_set_vf_bw, .ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid, .ndo_setup_tc = ice_setup_tc, diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 9f07b6641705..560e52b99f83 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -3770,6 +3770,136 @@ ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, ICE_SCHED_DFLT_BW); } +/** + * ice_sched_get_node_by_id_type - get node from ID type + * @pi: port information structure + * @id: identifier + * @agg_type: type of aggregator + * @tc: traffic class + * + * This function returns node identified by ID of type aggregator, and + * based on traffic class (TC). This function needs to be called with + * the scheduler lock held. + */ +static struct ice_sched_node * +ice_sched_get_node_by_id_type(struct ice_port_info *pi, u32 id, + enum ice_agg_type agg_type, u8 tc) +{ + struct ice_sched_node *node = NULL; + + switch (agg_type) { + case ICE_AGG_TYPE_VSI: { + struct ice_vsi_ctx *vsi_ctx; + u16 vsi_handle = (u16)id; + + if (!ice_is_vsi_valid(pi->hw, vsi_handle)) + break; + /* Get sched_vsi_info */ + vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle); + if (!vsi_ctx) + break; + node = vsi_ctx->sched.vsi_node[tc]; + break; + } + + case ICE_AGG_TYPE_AGG: { + struct ice_sched_node *tc_node; + + tc_node = ice_sched_get_tc_node(pi, tc); + if (tc_node) + node = ice_sched_get_agg_node(pi, tc_node, id); + break; + } + + default: + break; + } + + return node; +} + +/** + * ice_sched_set_node_bw_lmt_per_tc - set node BW limit per TC + * @pi: port information structure + * @id: ID (software VSI handle or AGG ID) + * @agg_type: aggregator type (VSI or AGG type node) + * @tc: traffic class + * @rl_type: min or max + * @bw: bandwidth in Kbps + * + * This function sets BW limit of VSI or Aggregator scheduling node + * based on TC information from passed in argument BW. + */ +static enum ice_status +ice_sched_set_node_bw_lmt_per_tc(struct ice_port_info *pi, u32 id, + enum ice_agg_type agg_type, u8 tc, + enum ice_rl_type rl_type, u32 bw) +{ + enum ice_status status = ICE_ERR_PARAM; + struct ice_sched_node *node; + + if (!pi) + return status; + + if (rl_type == ICE_UNKNOWN_BW) + return status; + + mutex_lock(&pi->sched_lock); + node = ice_sched_get_node_by_id_type(pi, id, agg_type, tc); + if (!node) { + ice_debug(pi->hw, ICE_DBG_SCHED, "Wrong id, agg type, or tc\n"); + goto exit_set_node_bw_lmt_per_tc; + } + if (bw == ICE_SCHED_DFLT_BW) + status = ice_sched_set_node_bw_dflt_lmt(pi, node, rl_type); + else + status = ice_sched_set_node_bw_lmt(pi, node, rl_type, bw); + +exit_set_node_bw_lmt_per_tc: + mutex_unlock(&pi->sched_lock); + return status; +} + +/** + * ice_cfg_vsi_bw_lmt_per_tc - configure VSI BW limit per TC + * @pi: port information structure + * @vsi_handle: software VSI handle + * @tc: traffic class + * @rl_type: min or max + * @bw: bandwidth in Kbps + * + * This function configures BW limit of VSI scheduling node based on TC + * information. + */ +enum ice_status +ice_cfg_vsi_bw_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + enum ice_rl_type rl_type, u32 bw) +{ + return ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle, + ICE_AGG_TYPE_VSI, + tc, rl_type, bw); +} + +/** + * ice_cfg_vsi_bw_dflt_lmt_per_tc - configure default VSI BW limit per TC + * @pi: port information structure + * @vsi_handle: software VSI handle + * @tc: traffic class + * @rl_type: min or max + * + * This function configures default BW limit of VSI scheduling node based on TC + * information. + */ +enum ice_status +ice_cfg_vsi_bw_dflt_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + enum ice_rl_type rl_type) +{ + return ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle, + ICE_AGG_TYPE_VSI, + tc, rl_type, + ICE_SCHED_DFLT_BW); +} + /** * ice_cfg_rl_burst_size - Set burst size value * @hw: pointer to the HW struct diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h index 9beef8f0ec76..f89b80ba3499 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.h +++ b/drivers/net/ethernet/intel/ice/ice_sched.h @@ -103,6 +103,12 @@ ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, enum ice_status ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle, enum ice_rl_type rl_type); +enum ice_status +ice_cfg_vsi_bw_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + enum ice_rl_type rl_type, u32 bw); +enum ice_status +ice_cfg_vsi_bw_dflt_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + enum ice_rl_type rl_type); enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes); void ice_sched_replay_agg_vsi_preinit(struct ice_hw *hw); void ice_sched_replay_agg(struct ice_hw *hw); diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 0d07547b40f1..a4a299012f9f 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -4783,7 +4783,14 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, s_rule = kzalloc(rule_buf_sz, GFP_KERNEL); if (!s_rule) return ICE_ERR_NO_MEMORY; - act |= ICE_SINGLE_ACT_LB_ENABLE | ICE_SINGLE_ACT_LAN_ENABLE; + if (!rinfo->flags_info.act_valid) { + act |= ICE_SINGLE_ACT_LAN_ENABLE; + act |= ICE_SINGLE_ACT_LB_ENABLE; + } else { + act |= rinfo->flags_info.act & (ICE_SINGLE_ACT_LAN_ENABLE | + ICE_SINGLE_ACT_LB_ENABLE); + } + switch (rinfo->sw_act.fltr_act) { case ICE_FWD_TO_VSI: act |= (rinfo->sw_act.fwd_id.hw_vsi_id << diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index 34b7f74b1ab8..d4c0a3b594af 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -160,11 +160,22 @@ struct ice_rule_query_data { u16 vsi_handle; }; +/* This structure allows to pass info about lb_en and lan_en + * flags to ice_add_adv_rule. Values in act would be used + * only if act_valid was set to true, otherwise default + * values would be used. + */ +struct ice_adv_rule_flags_info { + u32 act; + u8 act_valid; /* indicate if flags in act are valid */ +}; + struct ice_adv_rule_info { struct ice_sw_act_ctrl sw_act; u32 priority; u8 rx; /* true means LOOKUP_RX otherwise LOOKUP_TX */ u16 fltr_rule_id; + struct ice_adv_rule_flags_info flags_info; }; /* A collection of one or more four word recipe */ diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index 4c1daa1a02a1..1dccfd116bc9 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -274,6 +274,8 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) rule_info.sw_act.flag |= ICE_FLTR_TX; rule_info.sw_act.src = vsi->idx; rule_info.rx = false; + rule_info.flags_info.act = ICE_SINGLE_ACT_LAN_ENABLE; + rule_info.flags_info.act_valid = true; } /* specify the cookie as filter_rule_id */ @@ -296,12 +298,6 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) fltr->rid = rule_added.rid; fltr->rule_id = rule_added.rule_id; - if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS) { - if (ice_fltr_update_flags(vsi, fltr->rule_id, fltr->rid, - ICE_SINGLE_ACT_LAN_ENABLE)) - ice_rem_adv_rule_by_id(hw, &rule_added); - } - exit: kfree(list); return ret; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 01ae331927bd..bc3ba19dc88f 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -343,7 +343,7 @@ int ice_setup_tx_ring(struct ice_tx_ring *tx_ring) /* warn if we are about to overwrite the pointer */ WARN_ON(tx_ring->tx_buf); tx_ring->tx_buf = - devm_kzalloc(dev, sizeof(*tx_ring->tx_buf) * tx_ring->count, + devm_kcalloc(dev, sizeof(*tx_ring->tx_buf), tx_ring->count, GFP_KERNEL); if (!tx_ring->tx_buf) return -ENOMEM; @@ -475,7 +475,7 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) /* warn if we are about to overwrite the pointer */ WARN_ON(rx_ring->rx_buf); rx_ring->rx_buf = - devm_kzalloc(dev, sizeof(*rx_ring->rx_buf) * rx_ring->count, + devm_kcalloc(dev, sizeof(*rx_ring->rx_buf), rx_ring->count, GFP_KERNEL); if (!rx_ring->rx_buf) return -ENOMEM; @@ -1259,6 +1259,41 @@ construct_skb: return failure ? budget : (int)total_rx_pkts; } +static void __ice_update_sample(struct ice_q_vector *q_vector, + struct ice_ring_container *rc, + struct dim_sample *sample, + bool is_tx) +{ + u64 packets = 0, bytes = 0; + + if (is_tx) { + struct ice_tx_ring *tx_ring; + + ice_for_each_tx_ring(tx_ring, *rc) { + packets += tx_ring->stats.pkts; + bytes += tx_ring->stats.bytes; + } + } else { + struct ice_rx_ring *rx_ring; + + ice_for_each_rx_ring(rx_ring, *rc) { + packets += rx_ring->stats.pkts; + bytes += rx_ring->stats.bytes; + } + } + + dim_update_sample(q_vector->total_events, packets, bytes, sample); + sample->comp_ctr = 0; + + /* if dim settings get stale, like when not updated for 1 + * second or longer, force it to start again. This addresses the + * frequent case of an idle queue being switched to by the + * scheduler. The 1,000 here means 1,000 milliseconds. + */ + if (ktime_ms_delta(sample->time, rc->dim.start_sample.time) >= 1000) + rc->dim.state = DIM_START_MEASURE; +} + /** * ice_net_dim - Update net DIM algorithm * @q_vector: the vector associated with the interrupt @@ -1274,34 +1309,16 @@ static void ice_net_dim(struct ice_q_vector *q_vector) struct ice_ring_container *rx = &q_vector->rx; if (ITR_IS_DYNAMIC(tx)) { - struct dim_sample dim_sample = {}; - u64 packets = 0, bytes = 0; - struct ice_tx_ring *ring; - - ice_for_each_tx_ring(ring, q_vector->tx) { - packets += ring->stats.pkts; - bytes += ring->stats.bytes; - } - - dim_update_sample(q_vector->total_events, packets, bytes, - &dim_sample); + struct dim_sample dim_sample; + __ice_update_sample(q_vector, tx, &dim_sample, true); net_dim(&tx->dim, dim_sample); } if (ITR_IS_DYNAMIC(rx)) { - struct dim_sample dim_sample = {}; - u64 packets = 0, bytes = 0; - struct ice_rx_ring *ring; - - ice_for_each_rx_ring(ring, q_vector->rx) { - packets += ring->stats.pkts; - bytes += ring->stats.bytes; - } - - dim_update_sample(q_vector->total_events, packets, bytes, - &dim_sample); + struct dim_sample dim_sample; + __ice_update_sample(q_vector, rx, &dim_sample, false); net_dim(&rx->dim, dim_sample); } } @@ -1328,15 +1345,14 @@ static u32 ice_buildreg_itr(u16 itr_idx, u16 itr) } /** - * ice_update_ena_itr - Update ITR moderation and re-enable MSI-X interrupt + * ice_enable_interrupt - re-enable MSI-X interrupt * @q_vector: the vector associated with the interrupt to enable * - * Update the net_dim() algorithm and re-enable the interrupt associated with - * this vector. - * - * If the VSI is down, the interrupt will not be re-enabled. + * If the VSI is down, the interrupt will not be re-enabled. Also, + * when enabling the interrupt always reset the wb_on_itr to false + * and trigger a software interrupt to clean out internal state. */ -static void ice_update_ena_itr(struct ice_q_vector *q_vector) +static void ice_enable_interrupt(struct ice_q_vector *q_vector) { struct ice_vsi *vsi = q_vector->vsi; bool wb_en = q_vector->wb_on_itr; @@ -1345,25 +1361,25 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector) if (test_bit(ICE_DOWN, vsi->state)) return; - /* When exiting WB_ON_ITR, let ITR resume its normal - * interrupts-enabled path. + /* trigger an ITR delayed software interrupt when exiting busy poll, to + * make sure to catch any pending cleanups that might have been missed + * due to interrupt state transition. If busy poll or poll isn't + * enabled, then don't update ITR, and just enable the interrupt. */ - if (wb_en) + if (!wb_en) { + itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); + } else { q_vector->wb_on_itr = false; - /* This will do nothing if dynamic updates are not enabled. */ - ice_net_dim(q_vector); - - /* net_dim() updates ITR out-of-band using a work item */ - itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); - /* trigger an immediate software interrupt when exiting - * busy poll, to make sure to catch any pending cleanups - * that might have been missed due to interrupt state - * transition. - */ - if (wb_en) { + /* do two things here with a single write. Set up the third ITR + * index to be used for software interrupt moderation, and then + * trigger a software interrupt with a rate limit of 20K on + * software interrupts, this will help avoid high interrupt + * loads due to frequently polling and exiting polling. + */ + itr_val = ice_buildreg_itr(ICE_IDX_ITR2, ICE_ITR_20K); itr_val |= GLINT_DYN_CTL_SWINT_TRIG_M | - GLINT_DYN_CTL_SW_ITR_INDX_M | + ICE_IDX_ITR2 << GLINT_DYN_CTL_SW_ITR_INDX_S | GLINT_DYN_CTL_SW_ITR_INDX_ENA_M; } wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val); @@ -1482,10 +1498,12 @@ int ice_napi_poll(struct napi_struct *napi, int budget) /* Exit the polling mode, but don't re-enable interrupts if stack might * poll us due to busy-polling */ - if (likely(napi_complete_done(napi, work_done))) - ice_update_ena_itr(q_vector); - else + if (likely(napi_complete_done(napi, work_done))) { + ice_net_dim(q_vector); + ice_enable_interrupt(q_vector); + } else { ice_set_wb_on_itr(q_vector); + } return min_t(int, work_done, budget - 1); } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index d90a3b7be713..8e3f9ec4e35b 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -5,6 +5,7 @@ #include "ice_base.h" #include "ice_lib.h" #include "ice_fltr.h" +#include "ice_dcb_lib.h" #include "ice_flow.h" #include "ice_eswitch.h" #include "ice_virtchnl_allowlist.h" @@ -884,6 +885,40 @@ static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf) return pf->sriov_base_vector + vf->vf_id * pf->num_msix_per_vf; } +/** + * ice_vf_rebuild_host_tx_rate_cfg - re-apply the Tx rate limiting configuration + * @vf: VF to re-apply the configuration for + * + * Called after a VF VSI has been re-added/rebuild during reset. The PF driver + * needs to re-apply the host configured Tx rate limiting configuration. + */ +static int ice_vf_rebuild_host_tx_rate_cfg(struct ice_vf *vf) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + int err; + + if (vf->min_tx_rate) { + err = ice_set_min_bw_limit(vsi, (u64)vf->min_tx_rate * 1000); + if (err) { + dev_err(dev, "failed to set min Tx rate to %d Mbps for VF %u, error %d\n", + vf->min_tx_rate, vf->vf_id, err); + return err; + } + } + + if (vf->max_tx_rate) { + err = ice_set_max_bw_limit(vsi, (u64)vf->max_tx_rate * 1000); + if (err) { + dev_err(dev, "failed to set max Tx rate to %d Mbps for VF %u, error %d\n", + vf->max_tx_rate, vf->vf_id, err); + return err; + } + } + + return 0; +} + /** * ice_vf_rebuild_host_vlan_cfg - add VLAN 0 filter or rebuild the Port VLAN * @vf: VF to add MAC filters for @@ -1420,6 +1455,11 @@ static void ice_vf_rebuild_host_cfg(struct ice_vf *vf) if (ice_vf_rebuild_host_vlan_cfg(vf)) dev_err(dev, "failed to rebuild VLAN configuration for VF %u\n", vf->vf_id); + + if (ice_vf_rebuild_host_tx_rate_cfg(vf)) + dev_err(dev, "failed to rebuild Tx rate limiting configuration for VF %u\n", + vf->vf_id); + /* rebuild aggregator node config for main VF VSI */ ice_vf_rebuild_aggregator_node_cfg(vsi); } @@ -1975,7 +2015,8 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) clear_bit(ICE_VF_DIS, pf->state); - if (ice_eswitch_configure(pf)) + ret = ice_eswitch_configure(pf); + if (ret) goto err_unroll_sriov; return 0; @@ -4747,8 +4788,8 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) ivi->linkstate = IFLA_VF_LINK_STATE_ENABLE; else ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE; - ivi->max_tx_rate = vf->tx_rate; - ivi->min_tx_rate = 0; + ivi->max_tx_rate = vf->max_tx_rate; + ivi->min_tx_rate = vf->min_tx_rate; return 0; } @@ -4799,11 +4840,6 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) struct ice_vf *vf; int ret; - if (ice_is_eswitch_mode_switchdev(pf)) { - dev_info(ice_pf_to_dev(pf), "Trusted VF is forbidden in switchdev mode\n"); - return -EOPNOTSUPP; - } - if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; @@ -4863,6 +4899,11 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) struct ice_vf *vf; int ret; + if (ice_is_eswitch_mode_switchdev(pf)) { + dev_info(ice_pf_to_dev(pf), "Trusted VF is forbidden in switchdev mode\n"); + return -EOPNOTSUPP; + } + if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; @@ -4926,6 +4967,122 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) return 0; } +/** + * ice_calc_all_vfs_min_tx_rate - calculate cumulative min Tx rate on all VFs + * @pf: PF associated with VFs + */ +static int ice_calc_all_vfs_min_tx_rate(struct ice_pf *pf) +{ + int rate = 0, i; + + ice_for_each_vf(pf, i) + rate += pf->vf[i].min_tx_rate; + + return rate; +} + +/** + * ice_min_tx_rate_oversubscribed - check if min Tx rate causes oversubscription + * @vf: VF trying to configure min_tx_rate + * @min_tx_rate: min Tx rate in Mbps + * + * Check if the min_tx_rate being passed in will cause oversubscription of total + * min_tx_rate based on the current link speed and all other VFs configured + * min_tx_rate + * + * Return true if the passed min_tx_rate would cause oversubscription, else + * return false + */ +static bool +ice_min_tx_rate_oversubscribed(struct ice_vf *vf, int min_tx_rate) +{ + int link_speed_mbps = ice_get_link_speed_mbps(ice_get_vf_vsi(vf)); + int all_vfs_min_tx_rate = ice_calc_all_vfs_min_tx_rate(vf->pf); + + /* this VF's previous rate is being overwritten */ + all_vfs_min_tx_rate -= vf->min_tx_rate; + + if (all_vfs_min_tx_rate + min_tx_rate > link_speed_mbps) { + dev_err(ice_pf_to_dev(vf->pf), "min_tx_rate of %d Mbps on VF %u would cause oversubscription of %d Mbps based on the current link speed %d Mbps\n", + min_tx_rate, vf->vf_id, + all_vfs_min_tx_rate + min_tx_rate - link_speed_mbps, + link_speed_mbps); + return true; + } + + return false; +} + +/** + * ice_set_vf_bw - set min/max VF bandwidth + * @netdev: network interface device structure + * @vf_id: VF identifier + * @min_tx_rate: Minimum Tx rate in Mbps + * @max_tx_rate: Maximum Tx rate in Mbps + */ +int +ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, + int max_tx_rate) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_vsi *vsi; + struct device *dev; + struct ice_vf *vf; + int ret; + + dev = ice_pf_to_dev(pf); + if (ice_validate_vf_id(pf, vf_id)) + return -EINVAL; + + vf = &pf->vf[vf_id]; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; + + vsi = ice_get_vf_vsi(vf); + + /* when max_tx_rate is zero that means no max Tx rate limiting, so only + * check if max_tx_rate is non-zero + */ + if (max_tx_rate && min_tx_rate > max_tx_rate) { + dev_err(dev, "Cannot set min Tx rate %d Mbps greater than max Tx rate %d Mbps\n", + min_tx_rate, max_tx_rate); + return -EINVAL; + } + + if (min_tx_rate && ice_is_dcb_active(pf)) { + dev_err(dev, "DCB on PF is currently enabled. VF min Tx rate limiting not allowed on this PF.\n"); + return -EOPNOTSUPP; + } + + if (ice_min_tx_rate_oversubscribed(vf, min_tx_rate)) + return -EINVAL; + + if (vf->min_tx_rate != (unsigned int)min_tx_rate) { + ret = ice_set_min_bw_limit(vsi, (u64)min_tx_rate * 1000); + if (ret) { + dev_err(dev, "Unable to set min-tx-rate for VF %d\n", + vf->vf_id); + return ret; + } + + vf->min_tx_rate = min_tx_rate; + } + + if (vf->max_tx_rate != (unsigned int)max_tx_rate) { + ret = ice_set_max_bw_limit(vsi, (u64)max_tx_rate * 1000); + if (ret) { + dev_err(dev, "Unable to set max-tx-rate for VF %d\n", + vf->vf_id); + return ret; + } + + vf->max_tx_rate = max_tx_rate; + } + + return 0; +} + /** * ice_get_vf_stats - populate some stats for the VF * @netdev: the netdev of the PF diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h index 3115284e5411..5ff93a08f54c 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h @@ -125,7 +125,8 @@ struct ice_vf { * the main LAN VSI for the PF. */ u16 lan_vsi_num; /* ID as used by firmware */ - unsigned int tx_rate; /* Tx bandwidth limit in Mbps */ + unsigned int min_tx_rate; /* Minimum Tx bandwidth limit in Mbps */ + unsigned int max_tx_rate; /* Maximum Tx bandwidth limit in Mbps */ DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */ u64 num_inval_msgs; /* number of continuous invalid msgs */ @@ -172,6 +173,10 @@ int ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, __be16 vlan_proto); +int +ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, + int max_tx_rate); + int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted); int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state); @@ -303,6 +308,14 @@ ice_set_vf_link_state(struct net_device __always_unused *netdev, return -EOPNOTSUPP; } +static inline int +ice_set_vf_bw(struct net_device __always_unused *netdev, + int __always_unused vf_id, int __always_unused min_tx_rate, + int __always_unused max_tx_rate) +{ + return -EOPNOTSUPP; +} + static inline int ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf, struct ice_q_vector __always_unused *q_vector)