Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue

Tony Nguyen says:

====================
100GbE Intel Wired LAN Driver Updates 2021-10-19

This series contains updates to ice driver only.

Brett implements support for ndo_set_vf_rate allowing for min_tx_rate
and max_tx_rate to be set for a VF.

Jesse updates DIM moderation to improve latency and resolves problems
with reported rate limit and extra software generated interrupts.

Wojciech moves a check for trusted VFs to the correct function,
disables lb_en for switchdev offloads, and refactors ethtool ops due
to differences in support for PF and port representor support.

Cai Huoqing utilizes the helper function devm_add_action_or_reset().

Gustavo A. R. Silva replaces uses of allocation to devm_kcalloc() as
applicable.

Dan Carpenter propagates an error instead of returning success.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2021-10-20 11:43:11 +01:00
commit 37ba803dbd
17 changed files with 764 additions and 286 deletions

View File

@ -452,10 +452,8 @@ struct ice_pf *ice_allocate_pf(struct device *dev)
return NULL;
/* Add an action to teardown the devlink when unwinding the driver */
if (devm_add_action(dev, ice_devlink_free, devlink)) {
devlink_free(devlink);
if (devm_add_action_or_reset(dev, ice_devlink_free, devlink))
return NULL;
}
return devlink_priv(devlink);
}

View File

@ -192,7 +192,6 @@ __ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo,
strscpy(drvinfo->bus_info, pci_name(pf->pdev),
sizeof(drvinfo->bus_info));
drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE;
}
static void
@ -201,18 +200,8 @@ ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
struct ice_netdev_priv *np = netdev_priv(netdev);
__ice_get_drvinfo(netdev, drvinfo, np->vsi);
}
static void
ice_repr_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *drvinfo)
{
struct ice_repr *repr = ice_netdev_to_repr(netdev);
if (ice_check_vf_ready_for_cfg(repr->vf))
return;
__ice_get_drvinfo(netdev, drvinfo, repr->src_vsi);
drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE;
}
static int ice_get_regs_len(struct net_device __always_unused *netdev)
@ -886,10 +875,10 @@ skip_ol_tests:
netdev_info(netdev, "testing finished\n");
}
static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
static void
__ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data,
struct ice_vsi *vsi)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = ice_get_netdev_priv_vsi(np);
unsigned int i;
u8 *p = data;
@ -940,6 +929,13 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
}
}
static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
__ice_get_strings(netdev, stringset, data, np->vsi);
}
static int
ice_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state)
{
@ -1331,9 +1327,6 @@ static int ice_get_sset_count(struct net_device *netdev, int sset)
* order of strings will suffer from race conditions and are
* not safe.
*/
if (ice_is_port_repr_netdev(netdev))
return ICE_VSI_STATS_LEN;
return ICE_ALL_STATS_LEN(netdev);
case ETH_SS_TEST:
return ICE_TEST_LEN;
@ -1345,11 +1338,10 @@ static int ice_get_sset_count(struct net_device *netdev, int sset)
}
static void
ice_get_ethtool_stats(struct net_device *netdev,
struct ethtool_stats __always_unused *stats, u64 *data)
__ice_get_ethtool_stats(struct net_device *netdev,
struct ethtool_stats __always_unused *stats, u64 *data,
struct ice_vsi *vsi)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = ice_get_netdev_priv_vsi(np);
struct ice_pf *pf = vsi->back;
struct ice_tx_ring *tx_ring;
struct ice_rx_ring *rx_ring;
@ -1416,6 +1408,15 @@ ice_get_ethtool_stats(struct net_device *netdev,
}
}
static void
ice_get_ethtool_stats(struct net_device *netdev,
struct ethtool_stats __always_unused *stats, u64 *data)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
__ice_get_ethtool_stats(netdev, stats, data, np->vsi);
}
#define ICE_PHY_TYPE_LOW_MASK_MIN_1G (ICE_PHY_TYPE_LOW_100BASE_TX | \
ICE_PHY_TYPE_LOW_100M_SGMII)
@ -3640,6 +3641,9 @@ ice_set_rc_coalesce(struct ethtool_coalesce *ec,
switch (rc->type) {
case ICE_RX_CONTAINER:
{
struct ice_q_vector *q_vector = rc->rx_ring->q_vector;
if (ec->rx_coalesce_usecs_high > ICE_MAX_INTRL ||
(ec->rx_coalesce_usecs_high &&
ec->rx_coalesce_usecs_high < pf->hw.intrl_gran)) {
@ -3648,22 +3652,20 @@ ice_set_rc_coalesce(struct ethtool_coalesce *ec,
ICE_MAX_INTRL);
return -EINVAL;
}
if (ec->rx_coalesce_usecs_high != rc->rx_ring->q_vector->intrl &&
if (ec->rx_coalesce_usecs_high != q_vector->intrl &&
(ec->use_adaptive_rx_coalesce || ec->use_adaptive_tx_coalesce)) {
netdev_info(vsi->netdev, "Invalid value, %s-usecs-high cannot be changed if adaptive-tx or adaptive-rx is enabled\n",
c_type_str);
return -EINVAL;
}
if (ec->rx_coalesce_usecs_high != rc->rx_ring->q_vector->intrl) {
rc->rx_ring->q_vector->intrl = ec->rx_coalesce_usecs_high;
ice_write_intrl(rc->rx_ring->q_vector,
ec->rx_coalesce_usecs_high);
}
if (ec->rx_coalesce_usecs_high != q_vector->intrl)
q_vector->intrl = ec->rx_coalesce_usecs_high;
use_adaptive_coalesce = ec->use_adaptive_rx_coalesce;
coalesce_usecs = ec->rx_coalesce_usecs;
break;
}
case ICE_TX_CONTAINER:
use_adaptive_coalesce = ec->use_adaptive_tx_coalesce;
coalesce_usecs = ec->tx_coalesce_usecs;
@ -3808,6 +3810,8 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
if (ice_set_q_coalesce(vsi, ec, v_idx))
return -EINVAL;
ice_set_q_vector_intrl(vsi->q_vectors[v_idx]);
}
goto set_complete;
}
@ -3815,6 +3819,8 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
if (ice_set_q_coalesce(vsi, ec, q_num))
return -EINVAL;
ice_set_q_vector_intrl(vsi->q_vectors[q_num]);
set_complete:
return 0;
}
@ -3834,6 +3840,54 @@ ice_set_per_q_coalesce(struct net_device *netdev, u32 q_num,
return __ice_set_coalesce(netdev, ec, q_num);
}
static void
ice_repr_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *drvinfo)
{
struct ice_repr *repr = ice_netdev_to_repr(netdev);
if (ice_check_vf_ready_for_cfg(repr->vf))
return;
__ice_get_drvinfo(netdev, drvinfo, repr->src_vsi);
}
static void
ice_repr_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
{
struct ice_repr *repr = ice_netdev_to_repr(netdev);
/* for port representors only ETH_SS_STATS is supported */
if (ice_check_vf_ready_for_cfg(repr->vf) ||
stringset != ETH_SS_STATS)
return;
__ice_get_strings(netdev, stringset, data, repr->src_vsi);
}
static void
ice_repr_get_ethtool_stats(struct net_device *netdev,
struct ethtool_stats __always_unused *stats,
u64 *data)
{
struct ice_repr *repr = ice_netdev_to_repr(netdev);
if (ice_check_vf_ready_for_cfg(repr->vf))
return;
__ice_get_ethtool_stats(netdev, stats, data, repr->src_vsi);
}
static int ice_repr_get_sset_count(struct net_device *netdev, int sset)
{
switch (sset) {
case ETH_SS_STATS:
return ICE_VSI_STATS_LEN;
default:
return -EOPNOTSUPP;
}
}
#define ICE_I2C_EEPROM_DEV_ADDR 0xA0
#define ICE_I2C_EEPROM_DEV_ADDR2 0xA2
#define ICE_MODULE_TYPE_SFP 0x03
@ -4088,9 +4142,9 @@ void ice_set_ethtool_safe_mode_ops(struct net_device *netdev)
static const struct ethtool_ops ice_ethtool_repr_ops = {
.get_drvinfo = ice_repr_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_strings = ice_get_strings,
.get_ethtool_stats = ice_get_ethtool_stats,
.get_sset_count = ice_get_sset_count,
.get_strings = ice_repr_get_strings,
.get_ethtool_stats = ice_repr_get_ethtool_stats,
.get_sset_count = ice_repr_get_sset_count,
};
/**

View File

@ -706,7 +706,7 @@ ice_create_init_fdir_rule(struct ice_pf *pf, enum ice_fltr_ptype flow)
if (!seg)
return -ENOMEM;
tun_seg = devm_kzalloc(dev, sizeof(*seg) * ICE_FD_HW_SEG_MAX,
tun_seg = devm_kcalloc(dev, sizeof(*seg), ICE_FD_HW_SEG_MAX,
GFP_KERNEL);
if (!tun_seg) {
devm_kfree(dev, seg);
@ -1068,7 +1068,7 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp,
if (!seg)
return -ENOMEM;
tun_seg = devm_kzalloc(dev, sizeof(*seg) * ICE_FD_HW_SEG_MAX,
tun_seg = devm_kcalloc(dev, sizeof(*seg), ICE_FD_HW_SEG_MAX,
GFP_KERNEL);
if (!tun_seg) {
devm_kfree(dev, seg);

View File

@ -453,133 +453,6 @@ static u32 ice_fltr_build_action(u16 vsi_id)
ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT;
}
/**
* ice_fltr_find_adv_entry - find advanced rule
* @rules: list of rules
* @rule_id: id of wanted rule
*/
static struct ice_adv_fltr_mgmt_list_entry *
ice_fltr_find_adv_entry(struct list_head *rules, u16 rule_id)
{
struct ice_adv_fltr_mgmt_list_entry *entry;
list_for_each_entry(entry, rules, list_entry) {
if (entry->rule_info.fltr_rule_id == rule_id)
return entry;
}
return NULL;
}
/**
* ice_fltr_update_adv_rule_flags - update flags on advanced rule
* @vsi: pointer to VSI
* @recipe_id: id of recipe
* @entry: advanced rule entry
* @new_flags: flags to update
*/
static enum ice_status
ice_fltr_update_adv_rule_flags(struct ice_vsi *vsi, u16 recipe_id,
struct ice_adv_fltr_mgmt_list_entry *entry,
u32 new_flags)
{
struct ice_adv_rule_info *info = &entry->rule_info;
struct ice_sw_act_ctrl *act = &info->sw_act;
u32 action;
if (act->fltr_act != ICE_FWD_TO_VSI)
return ICE_ERR_NOT_SUPPORTED;
action = ice_fltr_build_action(act->fwd_id.hw_vsi_id);
return ice_fltr_update_rule_flags(&vsi->back->hw, info->fltr_rule_id,
recipe_id, action, info->sw_act.flag,
act->src, new_flags);
}
/**
* ice_fltr_find_regular_entry - find regular rule
* @rules: list of rules
* @rule_id: id of wanted rule
*/
static struct ice_fltr_mgmt_list_entry *
ice_fltr_find_regular_entry(struct list_head *rules, u16 rule_id)
{
struct ice_fltr_mgmt_list_entry *entry;
list_for_each_entry(entry, rules, list_entry) {
if (entry->fltr_info.fltr_rule_id == rule_id)
return entry;
}
return NULL;
}
/**
* ice_fltr_update_regular_rule - update flags on regular rule
* @vsi: pointer to VSI
* @recipe_id: id of recipe
* @entry: regular rule entry
* @new_flags: flags to update
*/
static enum ice_status
ice_fltr_update_regular_rule(struct ice_vsi *vsi, u16 recipe_id,
struct ice_fltr_mgmt_list_entry *entry,
u32 new_flags)
{
struct ice_fltr_info *info = &entry->fltr_info;
u32 action;
if (info->fltr_act != ICE_FWD_TO_VSI)
return ICE_ERR_NOT_SUPPORTED;
action = ice_fltr_build_action(info->fwd_id.hw_vsi_id);
return ice_fltr_update_rule_flags(&vsi->back->hw, info->fltr_rule_id,
recipe_id, action, info->flag,
info->src, new_flags);
}
/**
* ice_fltr_update_flags - update flags on rule
* @vsi: pointer to VSI
* @rule_id: id of rule
* @recipe_id: id of recipe
* @new_flags: flags to update
*
* Function updates flags on regular and advance rule.
*
* Flags should be a combination of ICE_SINGLE_ACT_LB_ENABLE and
* ICE_SINGLE_ACT_LAN_ENABLE.
*/
enum ice_status
ice_fltr_update_flags(struct ice_vsi *vsi, u16 rule_id, u16 recipe_id,
u32 new_flags)
{
struct ice_adv_fltr_mgmt_list_entry *adv_entry;
struct ice_fltr_mgmt_list_entry *regular_entry;
struct ice_hw *hw = &vsi->back->hw;
struct ice_sw_recipe *recp_list;
struct list_head *fltr_rules;
recp_list = &hw->switch_info->recp_list[recipe_id];
if (!recp_list)
return ICE_ERR_DOES_NOT_EXIST;
fltr_rules = &recp_list->filt_rules;
regular_entry = ice_fltr_find_regular_entry(fltr_rules, rule_id);
if (regular_entry)
return ice_fltr_update_regular_rule(vsi, recipe_id,
regular_entry, new_flags);
adv_entry = ice_fltr_find_adv_entry(fltr_rules, rule_id);
if (adv_entry)
return ice_fltr_update_adv_rule_flags(vsi, recipe_id,
adv_entry, new_flags);
return ICE_ERR_DOES_NOT_EXIST;
}
/**
* ice_fltr_update_flags_dflt_rule - update flags on default rule
* @vsi: pointer to VSI

View File

@ -36,10 +36,6 @@ enum ice_status
ice_fltr_remove_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
enum ice_sw_fwd_act_type action);
void ice_fltr_remove_all(struct ice_vsi *vsi);
enum ice_status
ice_fltr_update_flags(struct ice_vsi *vsi, u16 rule_id, u16 recipe_id,
u32 new_flags);
enum ice_status
ice_fltr_update_flags_dflt_rule(struct ice_vsi *vsi, u16 rule_id, u8 direction,
u32 new_flags);

View File

@ -182,6 +182,7 @@
#define GLINT_DYN_CTL_INTERVAL_S 5
#define GLINT_DYN_CTL_INTERVAL_M ICE_M(0xFFF, 5)
#define GLINT_DYN_CTL_SW_ITR_INDX_ENA_M BIT(24)
#define GLINT_DYN_CTL_SW_ITR_INDX_S 25
#define GLINT_DYN_CTL_SW_ITR_INDX_M ICE_M(0x3, 25)
#define GLINT_DYN_CTL_WB_ON_ITR_M BIT(30)
#define GLINT_DYN_CTL_INTENA_MSK_M BIT(31)

View File

@ -1941,6 +1941,31 @@ void ice_write_itr(struct ice_ring_container *rc, u16 itr)
__ice_write_itr(q_vector, rc, itr);
}
/**
* ice_set_q_vector_intrl - set up interrupt rate limiting
* @q_vector: the vector to be configured
*
* Interrupt rate limiting is local to the vector, not per-queue so we must
* detect if either ring container has dynamic moderation enabled to decide
* what to set the interrupt rate limit to via INTRL settings. In the case that
* dynamic moderation is disabled on both, write the value with the cached
* setting to make sure INTRL register matches the user visible value.
*/
void ice_set_q_vector_intrl(struct ice_q_vector *q_vector)
{
if (ITR_IS_DYNAMIC(&q_vector->tx) || ITR_IS_DYNAMIC(&q_vector->rx)) {
/* in the case of dynamic enabled, cap each vector to no more
* than (4 us) 250,000 ints/sec, which allows low latency
* but still less than 500,000 interrupts per second, which
* reduces CPU a bit in the case of the lowest latency
* setting. The 4 here is a value in microseconds.
*/
ice_write_intrl(q_vector, 4);
} else {
ice_write_intrl(q_vector, q_vector->intrl);
}
}
/**
* ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW
* @vsi: the VSI being configured
@ -3096,7 +3121,7 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
}
vsi->q_vectors[i]->intrl = coalesce[i].intrl;
ice_write_intrl(vsi->q_vectors[i], coalesce[i].intrl);
ice_set_q_vector_intrl(vsi->q_vectors[i]);
}
/* the number of queue vectors increased so write whatever is in
@ -3114,7 +3139,7 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
ice_write_itr(rc, rc->itr_setting);
vsi->q_vectors[i]->intrl = coalesce[0].intrl;
ice_write_intrl(vsi->q_vectors[i], coalesce[0].intrl);
ice_set_q_vector_intrl(vsi->q_vectors[i]);
}
}
@ -3600,6 +3625,180 @@ int ice_clear_dflt_vsi(struct ice_sw *sw)
return 0;
}
/**
* ice_get_link_speed_mbps - get link speed in Mbps
* @vsi: the VSI whose link speed is being queried
*
* Return current VSI link speed and 0 if the speed is unknown.
*/
int ice_get_link_speed_mbps(struct ice_vsi *vsi)
{
switch (vsi->port_info->phy.link_info.link_speed) {
case ICE_AQ_LINK_SPEED_100GB:
return SPEED_100000;
case ICE_AQ_LINK_SPEED_50GB:
return SPEED_50000;
case ICE_AQ_LINK_SPEED_40GB:
return SPEED_40000;
case ICE_AQ_LINK_SPEED_25GB:
return SPEED_25000;
case ICE_AQ_LINK_SPEED_20GB:
return SPEED_20000;
case ICE_AQ_LINK_SPEED_10GB:
return SPEED_10000;
case ICE_AQ_LINK_SPEED_5GB:
return SPEED_5000;
case ICE_AQ_LINK_SPEED_2500MB:
return SPEED_2500;
case ICE_AQ_LINK_SPEED_1000MB:
return SPEED_1000;
case ICE_AQ_LINK_SPEED_100MB:
return SPEED_100;
case ICE_AQ_LINK_SPEED_10MB:
return SPEED_10;
case ICE_AQ_LINK_SPEED_UNKNOWN:
default:
return 0;
}
}
/**
* ice_get_link_speed_kbps - get link speed in Kbps
* @vsi: the VSI whose link speed is being queried
*
* Return current VSI link speed and 0 if the speed is unknown.
*/
static int ice_get_link_speed_kbps(struct ice_vsi *vsi)
{
int speed_mbps;
speed_mbps = ice_get_link_speed_mbps(vsi);
return speed_mbps * 1000;
}
/**
* ice_set_min_bw_limit - setup minimum BW limit for Tx based on min_tx_rate
* @vsi: VSI to be configured
* @min_tx_rate: min Tx rate in Kbps to be configured as BW limit
*
* If the min_tx_rate is specified as 0 that means to clear the minimum BW limit
* profile, otherwise a non-zero value will force a minimum BW limit for the VSI
* on TC 0.
*/
int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate)
{
struct ice_pf *pf = vsi->back;
enum ice_status status;
struct device *dev;
int speed;
dev = ice_pf_to_dev(pf);
if (!vsi->port_info) {
dev_dbg(dev, "VSI %d, type %u specified doesn't have valid port_info\n",
vsi->idx, vsi->type);
return -EINVAL;
}
speed = ice_get_link_speed_kbps(vsi);
if (min_tx_rate > (u64)speed) {
dev_err(dev, "invalid min Tx rate %llu Kbps specified for %s %d is greater than current link speed %u Kbps\n",
min_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx,
speed);
return -EINVAL;
}
/* Configure min BW for VSI limit */
if (min_tx_rate) {
status = ice_cfg_vsi_bw_lmt_per_tc(vsi->port_info, vsi->idx, 0,
ICE_MIN_BW, min_tx_rate);
if (status) {
dev_err(dev, "failed to set min Tx rate(%llu Kbps) for %s %d\n",
min_tx_rate, ice_vsi_type_str(vsi->type),
vsi->idx);
return -EIO;
}
dev_dbg(dev, "set min Tx rate(%llu Kbps) for %s\n",
min_tx_rate, ice_vsi_type_str(vsi->type));
} else {
status = ice_cfg_vsi_bw_dflt_lmt_per_tc(vsi->port_info,
vsi->idx, 0,
ICE_MIN_BW);
if (status) {
dev_err(dev, "failed to clear min Tx rate configuration for %s %d\n",
ice_vsi_type_str(vsi->type), vsi->idx);
return -EIO;
}
dev_dbg(dev, "cleared min Tx rate configuration for %s %d\n",
ice_vsi_type_str(vsi->type), vsi->idx);
}
return 0;
}
/**
* ice_set_max_bw_limit - setup maximum BW limit for Tx based on max_tx_rate
* @vsi: VSI to be configured
* @max_tx_rate: max Tx rate in Kbps to be configured as BW limit
*
* If the max_tx_rate is specified as 0 that means to clear the maximum BW limit
* profile, otherwise a non-zero value will force a maximum BW limit for the VSI
* on TC 0.
*/
int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate)
{
struct ice_pf *pf = vsi->back;
enum ice_status status;
struct device *dev;
int speed;
dev = ice_pf_to_dev(pf);
if (!vsi->port_info) {
dev_dbg(dev, "VSI %d, type %u specified doesn't have valid port_info\n",
vsi->idx, vsi->type);
return -EINVAL;
}
speed = ice_get_link_speed_kbps(vsi);
if (max_tx_rate > (u64)speed) {
dev_err(dev, "invalid max Tx rate %llu Kbps specified for %s %d is greater than current link speed %u Kbps\n",
max_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx,
speed);
return -EINVAL;
}
/* Configure max BW for VSI limit */
if (max_tx_rate) {
status = ice_cfg_vsi_bw_lmt_per_tc(vsi->port_info, vsi->idx, 0,
ICE_MAX_BW, max_tx_rate);
if (status) {
dev_err(dev, "failed setting max Tx rate(%llu Kbps) for %s %d\n",
max_tx_rate, ice_vsi_type_str(vsi->type),
vsi->idx);
return -EIO;
}
dev_dbg(dev, "set max Tx rate(%llu Kbps) for %s %d\n",
max_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx);
} else {
status = ice_cfg_vsi_bw_dflt_lmt_per_tc(vsi->port_info,
vsi->idx, 0,
ICE_MAX_BW);
if (status) {
dev_err(dev, "failed clearing max Tx rate configuration for %s %d\n",
ice_vsi_type_str(vsi->type), vsi->idx);
return -EIO;
}
dev_dbg(dev, "cleared max Tx rate configuration for %s %d\n",
ice_vsi_type_str(vsi->type), vsi->idx);
}
return 0;
}
/**
* ice_set_link - turn on/off physical link
* @vsi: VSI to modify physical link on

View File

@ -103,6 +103,7 @@ int ice_status_to_errno(enum ice_status err);
void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl);
void ice_write_itr(struct ice_ring_container *rc, u16 itr);
void ice_set_q_vector_intrl(struct ice_q_vector *q_vector);
enum ice_status
ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set);
@ -116,7 +117,9 @@ bool ice_is_vsi_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi);
int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi);
int ice_clear_dflt_vsi(struct ice_sw *sw);
int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate);
int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate);
int ice_get_link_speed_mbps(struct ice_vsi *vsi);
int
ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *));

View File

@ -5502,77 +5502,59 @@ int ice_vsi_cfg(struct ice_vsi *vsi)
}
/* THEORY OF MODERATION:
* The below code creates custom DIM profiles for use by this driver, because
* the ice driver hardware works differently than the hardware that DIMLIB was
* The ice driver hardware works differently than the hardware that DIMLIB was
* originally made for. ice hardware doesn't have packet count limits that
* can trigger an interrupt, but it *does* have interrupt rate limit support,
* and this code adds that capability to be used by the driver when it's using
* DIMLIB. The DIMLIB code was always designed to be a suggestion to the driver
* for how to "respond" to traffic and interrupts, so this driver uses a
* slightly different set of moderation parameters to get best performance.
* which is hard-coded to a limit of 250,000 ints/second.
* If not using dynamic moderation, the INTRL value can be modified
* by ethtool rx-usecs-high.
*/
struct ice_dim {
/* the throttle rate for interrupts, basically worst case delay before
* an initial interrupt fires, value is stored in microseconds.
*/
u16 itr;
/* the rate limit for interrupts, which can cap a delay from a small
* ITR at a certain amount of interrupts per second. f.e. a 2us ITR
* could yield as much as 500,000 interrupts per second, but with a
* 10us rate limit, it limits to 100,000 interrupts per second. Value
* is stored in microseconds.
*/
u16 intrl;
};
/* Make a different profile for Rx that doesn't allow quite so aggressive
* moderation at the high end (it maxes out at 128us or about 8k interrupts a
* second. The INTRL/rate parameters here are only useful to cap small ITR
* values, which is why for larger ITR's - like 128, which can only generate
* 8k interrupts per second, there is no point to rate limit and the values
* are set to zero. The rate limit values do affect latency, and so must
* be reasonably small so to not impact latency sensitive tests.
* moderation at the high end (it maxes out at 126us or about 8k interrupts a
* second.
*/
static const struct ice_dim rx_profile[] = {
{2, 10},
{8, 16},
{32, 0},
{96, 0},
{128, 0}
{2}, /* 500,000 ints/s, capped at 250K by INTRL */
{8}, /* 125,000 ints/s */
{16}, /* 62,500 ints/s */
{62}, /* 16,129 ints/s */
{126} /* 7,936 ints/s */
};
/* The transmit profile, which has the same sorts of values
* as the previous struct
*/
static const struct ice_dim tx_profile[] = {
{2, 10},
{8, 16},
{64, 0},
{128, 0},
{256, 0}
{2}, /* 500,000 ints/s, capped at 250K by INTRL */
{8}, /* 125,000 ints/s */
{40}, /* 16,125 ints/s */
{128}, /* 7,812 ints/s */
{256} /* 3,906 ints/s */
};
static void ice_tx_dim_work(struct work_struct *work)
{
struct ice_ring_container *rc;
struct ice_q_vector *q_vector;
struct dim *dim;
u16 itr, intrl;
u16 itr;
dim = container_of(work, struct dim, work);
rc = container_of(dim, struct ice_ring_container, dim);
q_vector = container_of(rc, struct ice_q_vector, tx);
rc = (struct ice_ring_container *)dim->priv;
if (dim->profile_ix >= ARRAY_SIZE(tx_profile))
dim->profile_ix = ARRAY_SIZE(tx_profile) - 1;
WARN_ON(dim->profile_ix >= ARRAY_SIZE(tx_profile));
/* look up the values in our local table */
itr = tx_profile[dim->profile_ix].itr;
intrl = tx_profile[dim->profile_ix].intrl;
ice_trace(tx_dim_work, q_vector, dim);
ice_trace(tx_dim_work, container_of(rc, struct ice_q_vector, tx), dim);
ice_write_itr(rc, itr);
ice_write_intrl(q_vector, intrl);
dim->state = DIM_START_MEASURE;
}
@ -5580,28 +5562,65 @@ static void ice_tx_dim_work(struct work_struct *work)
static void ice_rx_dim_work(struct work_struct *work)
{
struct ice_ring_container *rc;
struct ice_q_vector *q_vector;
struct dim *dim;
u16 itr, intrl;
u16 itr;
dim = container_of(work, struct dim, work);
rc = container_of(dim, struct ice_ring_container, dim);
q_vector = container_of(rc, struct ice_q_vector, rx);
rc = (struct ice_ring_container *)dim->priv;
if (dim->profile_ix >= ARRAY_SIZE(rx_profile))
dim->profile_ix = ARRAY_SIZE(rx_profile) - 1;
WARN_ON(dim->profile_ix >= ARRAY_SIZE(rx_profile));
/* look up the values in our local table */
itr = rx_profile[dim->profile_ix].itr;
intrl = rx_profile[dim->profile_ix].intrl;
ice_trace(rx_dim_work, q_vector, dim);
ice_trace(rx_dim_work, container_of(rc, struct ice_q_vector, rx), dim);
ice_write_itr(rc, itr);
ice_write_intrl(q_vector, intrl);
dim->state = DIM_START_MEASURE;
}
#define ICE_DIM_DEFAULT_PROFILE_IX 1
/**
* ice_init_moderation - set up interrupt moderation
* @q_vector: the vector containing rings to be configured
*
* Set up interrupt moderation registers, with the intent to do the right thing
* when called from reset or from probe, and whether or not dynamic moderation
* is enabled or not. Take special care to write all the registers in both
* dynamic moderation mode or not in order to make sure hardware is in a known
* state.
*/
static void ice_init_moderation(struct ice_q_vector *q_vector)
{
struct ice_ring_container *rc;
bool tx_dynamic, rx_dynamic;
rc = &q_vector->tx;
INIT_WORK(&rc->dim.work, ice_tx_dim_work);
rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX;
rc->dim.priv = rc;
tx_dynamic = ITR_IS_DYNAMIC(rc);
/* set the initial TX ITR to match the above */
ice_write_itr(rc, tx_dynamic ?
tx_profile[rc->dim.profile_ix].itr : rc->itr_setting);
rc = &q_vector->rx;
INIT_WORK(&rc->dim.work, ice_rx_dim_work);
rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX;
rc->dim.priv = rc;
rx_dynamic = ITR_IS_DYNAMIC(rc);
/* set the initial RX ITR to match the above */
ice_write_itr(rc, rx_dynamic ? rx_profile[rc->dim.profile_ix].itr :
rc->itr_setting);
ice_set_q_vector_intrl(q_vector);
}
/**
* ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI
* @vsi: the VSI being configured
@ -5616,11 +5635,7 @@ static void ice_napi_enable_all(struct ice_vsi *vsi)
ice_for_each_q_vector(vsi, q_idx) {
struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
INIT_WORK(&q_vector->tx.dim.work, ice_tx_dim_work);
q_vector->tx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
INIT_WORK(&q_vector->rx.dim.work, ice_rx_dim_work);
q_vector->rx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
ice_init_moderation(q_vector);
if (q_vector->rx.rx_ring || q_vector->tx.tx_ring)
napi_enable(&q_vector->napi);
@ -7390,6 +7405,7 @@ static const struct net_device_ops ice_netdev_ops = {
.ndo_set_vf_vlan = ice_set_vf_port_vlan,
.ndo_set_vf_link_state = ice_set_vf_link_state,
.ndo_get_vf_stats = ice_get_vf_stats,
.ndo_set_vf_rate = ice_set_vf_bw,
.ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid,
.ndo_setup_tc = ice_setup_tc,

View File

@ -3770,6 +3770,136 @@ ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
ICE_SCHED_DFLT_BW);
}
/**
* ice_sched_get_node_by_id_type - get node from ID type
* @pi: port information structure
* @id: identifier
* @agg_type: type of aggregator
* @tc: traffic class
*
* This function returns node identified by ID of type aggregator, and
* based on traffic class (TC). This function needs to be called with
* the scheduler lock held.
*/
static struct ice_sched_node *
ice_sched_get_node_by_id_type(struct ice_port_info *pi, u32 id,
enum ice_agg_type agg_type, u8 tc)
{
struct ice_sched_node *node = NULL;
switch (agg_type) {
case ICE_AGG_TYPE_VSI: {
struct ice_vsi_ctx *vsi_ctx;
u16 vsi_handle = (u16)id;
if (!ice_is_vsi_valid(pi->hw, vsi_handle))
break;
/* Get sched_vsi_info */
vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
if (!vsi_ctx)
break;
node = vsi_ctx->sched.vsi_node[tc];
break;
}
case ICE_AGG_TYPE_AGG: {
struct ice_sched_node *tc_node;
tc_node = ice_sched_get_tc_node(pi, tc);
if (tc_node)
node = ice_sched_get_agg_node(pi, tc_node, id);
break;
}
default:
break;
}
return node;
}
/**
* ice_sched_set_node_bw_lmt_per_tc - set node BW limit per TC
* @pi: port information structure
* @id: ID (software VSI handle or AGG ID)
* @agg_type: aggregator type (VSI or AGG type node)
* @tc: traffic class
* @rl_type: min or max
* @bw: bandwidth in Kbps
*
* This function sets BW limit of VSI or Aggregator scheduling node
* based on TC information from passed in argument BW.
*/
static enum ice_status
ice_sched_set_node_bw_lmt_per_tc(struct ice_port_info *pi, u32 id,
enum ice_agg_type agg_type, u8 tc,
enum ice_rl_type rl_type, u32 bw)
{
enum ice_status status = ICE_ERR_PARAM;
struct ice_sched_node *node;
if (!pi)
return status;
if (rl_type == ICE_UNKNOWN_BW)
return status;
mutex_lock(&pi->sched_lock);
node = ice_sched_get_node_by_id_type(pi, id, agg_type, tc);
if (!node) {
ice_debug(pi->hw, ICE_DBG_SCHED, "Wrong id, agg type, or tc\n");
goto exit_set_node_bw_lmt_per_tc;
}
if (bw == ICE_SCHED_DFLT_BW)
status = ice_sched_set_node_bw_dflt_lmt(pi, node, rl_type);
else
status = ice_sched_set_node_bw_lmt(pi, node, rl_type, bw);
exit_set_node_bw_lmt_per_tc:
mutex_unlock(&pi->sched_lock);
return status;
}
/**
* ice_cfg_vsi_bw_lmt_per_tc - configure VSI BW limit per TC
* @pi: port information structure
* @vsi_handle: software VSI handle
* @tc: traffic class
* @rl_type: min or max
* @bw: bandwidth in Kbps
*
* This function configures BW limit of VSI scheduling node based on TC
* information.
*/
enum ice_status
ice_cfg_vsi_bw_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
enum ice_rl_type rl_type, u32 bw)
{
return ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle,
ICE_AGG_TYPE_VSI,
tc, rl_type, bw);
}
/**
* ice_cfg_vsi_bw_dflt_lmt_per_tc - configure default VSI BW limit per TC
* @pi: port information structure
* @vsi_handle: software VSI handle
* @tc: traffic class
* @rl_type: min or max
*
* This function configures default BW limit of VSI scheduling node based on TC
* information.
*/
enum ice_status
ice_cfg_vsi_bw_dflt_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
enum ice_rl_type rl_type)
{
return ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle,
ICE_AGG_TYPE_VSI,
tc, rl_type,
ICE_SCHED_DFLT_BW);
}
/**
* ice_cfg_rl_burst_size - Set burst size value
* @hw: pointer to the HW struct

View File

@ -103,6 +103,12 @@ ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
enum ice_status
ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
u16 q_handle, enum ice_rl_type rl_type);
enum ice_status
ice_cfg_vsi_bw_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
enum ice_rl_type rl_type, u32 bw);
enum ice_status
ice_cfg_vsi_bw_dflt_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
enum ice_rl_type rl_type);
enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes);
void ice_sched_replay_agg_vsi_preinit(struct ice_hw *hw);
void ice_sched_replay_agg(struct ice_hw *hw);

View File

@ -4783,7 +4783,14 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
s_rule = kzalloc(rule_buf_sz, GFP_KERNEL);
if (!s_rule)
return ICE_ERR_NO_MEMORY;
act |= ICE_SINGLE_ACT_LB_ENABLE | ICE_SINGLE_ACT_LAN_ENABLE;
if (!rinfo->flags_info.act_valid) {
act |= ICE_SINGLE_ACT_LAN_ENABLE;
act |= ICE_SINGLE_ACT_LB_ENABLE;
} else {
act |= rinfo->flags_info.act & (ICE_SINGLE_ACT_LAN_ENABLE |
ICE_SINGLE_ACT_LB_ENABLE);
}
switch (rinfo->sw_act.fltr_act) {
case ICE_FWD_TO_VSI:
act |= (rinfo->sw_act.fwd_id.hw_vsi_id <<

View File

@ -160,11 +160,22 @@ struct ice_rule_query_data {
u16 vsi_handle;
};
/* This structure allows to pass info about lb_en and lan_en
* flags to ice_add_adv_rule. Values in act would be used
* only if act_valid was set to true, otherwise default
* values would be used.
*/
struct ice_adv_rule_flags_info {
u32 act;
u8 act_valid; /* indicate if flags in act are valid */
};
struct ice_adv_rule_info {
struct ice_sw_act_ctrl sw_act;
u32 priority;
u8 rx; /* true means LOOKUP_RX otherwise LOOKUP_TX */
u16 fltr_rule_id;
struct ice_adv_rule_flags_info flags_info;
};
/* A collection of one or more four word recipe */

View File

@ -274,6 +274,8 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
rule_info.sw_act.flag |= ICE_FLTR_TX;
rule_info.sw_act.src = vsi->idx;
rule_info.rx = false;
rule_info.flags_info.act = ICE_SINGLE_ACT_LAN_ENABLE;
rule_info.flags_info.act_valid = true;
}
/* specify the cookie as filter_rule_id */
@ -296,12 +298,6 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
fltr->rid = rule_added.rid;
fltr->rule_id = rule_added.rule_id;
if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS) {
if (ice_fltr_update_flags(vsi, fltr->rule_id, fltr->rid,
ICE_SINGLE_ACT_LAN_ENABLE))
ice_rem_adv_rule_by_id(hw, &rule_added);
}
exit:
kfree(list);
return ret;

View File

@ -343,7 +343,7 @@ int ice_setup_tx_ring(struct ice_tx_ring *tx_ring)
/* warn if we are about to overwrite the pointer */
WARN_ON(tx_ring->tx_buf);
tx_ring->tx_buf =
devm_kzalloc(dev, sizeof(*tx_ring->tx_buf) * tx_ring->count,
devm_kcalloc(dev, sizeof(*tx_ring->tx_buf), tx_ring->count,
GFP_KERNEL);
if (!tx_ring->tx_buf)
return -ENOMEM;
@ -475,7 +475,7 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring)
/* warn if we are about to overwrite the pointer */
WARN_ON(rx_ring->rx_buf);
rx_ring->rx_buf =
devm_kzalloc(dev, sizeof(*rx_ring->rx_buf) * rx_ring->count,
devm_kcalloc(dev, sizeof(*rx_ring->rx_buf), rx_ring->count,
GFP_KERNEL);
if (!rx_ring->rx_buf)
return -ENOMEM;
@ -1259,6 +1259,41 @@ construct_skb:
return failure ? budget : (int)total_rx_pkts;
}
static void __ice_update_sample(struct ice_q_vector *q_vector,
struct ice_ring_container *rc,
struct dim_sample *sample,
bool is_tx)
{
u64 packets = 0, bytes = 0;
if (is_tx) {
struct ice_tx_ring *tx_ring;
ice_for_each_tx_ring(tx_ring, *rc) {
packets += tx_ring->stats.pkts;
bytes += tx_ring->stats.bytes;
}
} else {
struct ice_rx_ring *rx_ring;
ice_for_each_rx_ring(rx_ring, *rc) {
packets += rx_ring->stats.pkts;
bytes += rx_ring->stats.bytes;
}
}
dim_update_sample(q_vector->total_events, packets, bytes, sample);
sample->comp_ctr = 0;
/* if dim settings get stale, like when not updated for 1
* second or longer, force it to start again. This addresses the
* frequent case of an idle queue being switched to by the
* scheduler. The 1,000 here means 1,000 milliseconds.
*/
if (ktime_ms_delta(sample->time, rc->dim.start_sample.time) >= 1000)
rc->dim.state = DIM_START_MEASURE;
}
/**
* ice_net_dim - Update net DIM algorithm
* @q_vector: the vector associated with the interrupt
@ -1274,34 +1309,16 @@ static void ice_net_dim(struct ice_q_vector *q_vector)
struct ice_ring_container *rx = &q_vector->rx;
if (ITR_IS_DYNAMIC(tx)) {
struct dim_sample dim_sample = {};
u64 packets = 0, bytes = 0;
struct ice_tx_ring *ring;
ice_for_each_tx_ring(ring, q_vector->tx) {
packets += ring->stats.pkts;
bytes += ring->stats.bytes;
}
dim_update_sample(q_vector->total_events, packets, bytes,
&dim_sample);
struct dim_sample dim_sample;
__ice_update_sample(q_vector, tx, &dim_sample, true);
net_dim(&tx->dim, dim_sample);
}
if (ITR_IS_DYNAMIC(rx)) {
struct dim_sample dim_sample = {};
u64 packets = 0, bytes = 0;
struct ice_rx_ring *ring;
ice_for_each_rx_ring(ring, q_vector->rx) {
packets += ring->stats.pkts;
bytes += ring->stats.bytes;
}
dim_update_sample(q_vector->total_events, packets, bytes,
&dim_sample);
struct dim_sample dim_sample;
__ice_update_sample(q_vector, rx, &dim_sample, false);
net_dim(&rx->dim, dim_sample);
}
}
@ -1328,15 +1345,14 @@ static u32 ice_buildreg_itr(u16 itr_idx, u16 itr)
}
/**
* ice_update_ena_itr - Update ITR moderation and re-enable MSI-X interrupt
* ice_enable_interrupt - re-enable MSI-X interrupt
* @q_vector: the vector associated with the interrupt to enable
*
* Update the net_dim() algorithm and re-enable the interrupt associated with
* this vector.
*
* If the VSI is down, the interrupt will not be re-enabled.
* If the VSI is down, the interrupt will not be re-enabled. Also,
* when enabling the interrupt always reset the wb_on_itr to false
* and trigger a software interrupt to clean out internal state.
*/
static void ice_update_ena_itr(struct ice_q_vector *q_vector)
static void ice_enable_interrupt(struct ice_q_vector *q_vector)
{
struct ice_vsi *vsi = q_vector->vsi;
bool wb_en = q_vector->wb_on_itr;
@ -1345,25 +1361,25 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector)
if (test_bit(ICE_DOWN, vsi->state))
return;
/* When exiting WB_ON_ITR, let ITR resume its normal
* interrupts-enabled path.
/* trigger an ITR delayed software interrupt when exiting busy poll, to
* make sure to catch any pending cleanups that might have been missed
* due to interrupt state transition. If busy poll or poll isn't
* enabled, then don't update ITR, and just enable the interrupt.
*/
if (wb_en)
if (!wb_en) {
itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0);
} else {
q_vector->wb_on_itr = false;
/* This will do nothing if dynamic updates are not enabled. */
ice_net_dim(q_vector);
/* net_dim() updates ITR out-of-band using a work item */
itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0);
/* trigger an immediate software interrupt when exiting
* busy poll, to make sure to catch any pending cleanups
* that might have been missed due to interrupt state
* transition.
*/
if (wb_en) {
/* do two things here with a single write. Set up the third ITR
* index to be used for software interrupt moderation, and then
* trigger a software interrupt with a rate limit of 20K on
* software interrupts, this will help avoid high interrupt
* loads due to frequently polling and exiting polling.
*/
itr_val = ice_buildreg_itr(ICE_IDX_ITR2, ICE_ITR_20K);
itr_val |= GLINT_DYN_CTL_SWINT_TRIG_M |
GLINT_DYN_CTL_SW_ITR_INDX_M |
ICE_IDX_ITR2 << GLINT_DYN_CTL_SW_ITR_INDX_S |
GLINT_DYN_CTL_SW_ITR_INDX_ENA_M;
}
wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val);
@ -1482,10 +1498,12 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
/* Exit the polling mode, but don't re-enable interrupts if stack might
* poll us due to busy-polling
*/
if (likely(napi_complete_done(napi, work_done)))
ice_update_ena_itr(q_vector);
else
if (likely(napi_complete_done(napi, work_done))) {
ice_net_dim(q_vector);
ice_enable_interrupt(q_vector);
} else {
ice_set_wb_on_itr(q_vector);
}
return min_t(int, work_done, budget - 1);
}

View File

@ -5,6 +5,7 @@
#include "ice_base.h"
#include "ice_lib.h"
#include "ice_fltr.h"
#include "ice_dcb_lib.h"
#include "ice_flow.h"
#include "ice_eswitch.h"
#include "ice_virtchnl_allowlist.h"
@ -884,6 +885,40 @@ static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf)
return pf->sriov_base_vector + vf->vf_id * pf->num_msix_per_vf;
}
/**
* ice_vf_rebuild_host_tx_rate_cfg - re-apply the Tx rate limiting configuration
* @vf: VF to re-apply the configuration for
*
* Called after a VF VSI has been re-added/rebuild during reset. The PF driver
* needs to re-apply the host configured Tx rate limiting configuration.
*/
static int ice_vf_rebuild_host_tx_rate_cfg(struct ice_vf *vf)
{
struct device *dev = ice_pf_to_dev(vf->pf);
struct ice_vsi *vsi = ice_get_vf_vsi(vf);
int err;
if (vf->min_tx_rate) {
err = ice_set_min_bw_limit(vsi, (u64)vf->min_tx_rate * 1000);
if (err) {
dev_err(dev, "failed to set min Tx rate to %d Mbps for VF %u, error %d\n",
vf->min_tx_rate, vf->vf_id, err);
return err;
}
}
if (vf->max_tx_rate) {
err = ice_set_max_bw_limit(vsi, (u64)vf->max_tx_rate * 1000);
if (err) {
dev_err(dev, "failed to set max Tx rate to %d Mbps for VF %u, error %d\n",
vf->max_tx_rate, vf->vf_id, err);
return err;
}
}
return 0;
}
/**
* ice_vf_rebuild_host_vlan_cfg - add VLAN 0 filter or rebuild the Port VLAN
* @vf: VF to add MAC filters for
@ -1420,6 +1455,11 @@ static void ice_vf_rebuild_host_cfg(struct ice_vf *vf)
if (ice_vf_rebuild_host_vlan_cfg(vf))
dev_err(dev, "failed to rebuild VLAN configuration for VF %u\n",
vf->vf_id);
if (ice_vf_rebuild_host_tx_rate_cfg(vf))
dev_err(dev, "failed to rebuild Tx rate limiting configuration for VF %u\n",
vf->vf_id);
/* rebuild aggregator node config for main VF VSI */
ice_vf_rebuild_aggregator_node_cfg(vsi);
}
@ -1975,7 +2015,8 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs)
clear_bit(ICE_VF_DIS, pf->state);
if (ice_eswitch_configure(pf))
ret = ice_eswitch_configure(pf);
if (ret)
goto err_unroll_sriov;
return 0;
@ -4747,8 +4788,8 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi)
ivi->linkstate = IFLA_VF_LINK_STATE_ENABLE;
else
ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE;
ivi->max_tx_rate = vf->tx_rate;
ivi->min_tx_rate = 0;
ivi->max_tx_rate = vf->max_tx_rate;
ivi->min_tx_rate = vf->min_tx_rate;
return 0;
}
@ -4799,11 +4840,6 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
struct ice_vf *vf;
int ret;
if (ice_is_eswitch_mode_switchdev(pf)) {
dev_info(ice_pf_to_dev(pf), "Trusted VF is forbidden in switchdev mode\n");
return -EOPNOTSUPP;
}
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
@ -4863,6 +4899,11 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
struct ice_vf *vf;
int ret;
if (ice_is_eswitch_mode_switchdev(pf)) {
dev_info(ice_pf_to_dev(pf), "Trusted VF is forbidden in switchdev mode\n");
return -EOPNOTSUPP;
}
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
@ -4926,6 +4967,122 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state)
return 0;
}
/**
* ice_calc_all_vfs_min_tx_rate - calculate cumulative min Tx rate on all VFs
* @pf: PF associated with VFs
*/
static int ice_calc_all_vfs_min_tx_rate(struct ice_pf *pf)
{
int rate = 0, i;
ice_for_each_vf(pf, i)
rate += pf->vf[i].min_tx_rate;
return rate;
}
/**
* ice_min_tx_rate_oversubscribed - check if min Tx rate causes oversubscription
* @vf: VF trying to configure min_tx_rate
* @min_tx_rate: min Tx rate in Mbps
*
* Check if the min_tx_rate being passed in will cause oversubscription of total
* min_tx_rate based on the current link speed and all other VFs configured
* min_tx_rate
*
* Return true if the passed min_tx_rate would cause oversubscription, else
* return false
*/
static bool
ice_min_tx_rate_oversubscribed(struct ice_vf *vf, int min_tx_rate)
{
int link_speed_mbps = ice_get_link_speed_mbps(ice_get_vf_vsi(vf));
int all_vfs_min_tx_rate = ice_calc_all_vfs_min_tx_rate(vf->pf);
/* this VF's previous rate is being overwritten */
all_vfs_min_tx_rate -= vf->min_tx_rate;
if (all_vfs_min_tx_rate + min_tx_rate > link_speed_mbps) {
dev_err(ice_pf_to_dev(vf->pf), "min_tx_rate of %d Mbps on VF %u would cause oversubscription of %d Mbps based on the current link speed %d Mbps\n",
min_tx_rate, vf->vf_id,
all_vfs_min_tx_rate + min_tx_rate - link_speed_mbps,
link_speed_mbps);
return true;
}
return false;
}
/**
* ice_set_vf_bw - set min/max VF bandwidth
* @netdev: network interface device structure
* @vf_id: VF identifier
* @min_tx_rate: Minimum Tx rate in Mbps
* @max_tx_rate: Maximum Tx rate in Mbps
*/
int
ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
int max_tx_rate)
{
struct ice_pf *pf = ice_netdev_to_pf(netdev);
struct ice_vsi *vsi;
struct device *dev;
struct ice_vf *vf;
int ret;
dev = ice_pf_to_dev(pf);
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
vf = &pf->vf[vf_id];
ret = ice_check_vf_ready_for_cfg(vf);
if (ret)
return ret;
vsi = ice_get_vf_vsi(vf);
/* when max_tx_rate is zero that means no max Tx rate limiting, so only
* check if max_tx_rate is non-zero
*/
if (max_tx_rate && min_tx_rate > max_tx_rate) {
dev_err(dev, "Cannot set min Tx rate %d Mbps greater than max Tx rate %d Mbps\n",
min_tx_rate, max_tx_rate);
return -EINVAL;
}
if (min_tx_rate && ice_is_dcb_active(pf)) {
dev_err(dev, "DCB on PF is currently enabled. VF min Tx rate limiting not allowed on this PF.\n");
return -EOPNOTSUPP;
}
if (ice_min_tx_rate_oversubscribed(vf, min_tx_rate))
return -EINVAL;
if (vf->min_tx_rate != (unsigned int)min_tx_rate) {
ret = ice_set_min_bw_limit(vsi, (u64)min_tx_rate * 1000);
if (ret) {
dev_err(dev, "Unable to set min-tx-rate for VF %d\n",
vf->vf_id);
return ret;
}
vf->min_tx_rate = min_tx_rate;
}
if (vf->max_tx_rate != (unsigned int)max_tx_rate) {
ret = ice_set_max_bw_limit(vsi, (u64)max_tx_rate * 1000);
if (ret) {
dev_err(dev, "Unable to set max-tx-rate for VF %d\n",
vf->vf_id);
return ret;
}
vf->max_tx_rate = max_tx_rate;
}
return 0;
}
/**
* ice_get_vf_stats - populate some stats for the VF
* @netdev: the netdev of the PF

View File

@ -125,7 +125,8 @@ struct ice_vf {
* the main LAN VSI for the PF.
*/
u16 lan_vsi_num; /* ID as used by firmware */
unsigned int tx_rate; /* Tx bandwidth limit in Mbps */
unsigned int min_tx_rate; /* Minimum Tx bandwidth limit in Mbps */
unsigned int max_tx_rate; /* Maximum Tx bandwidth limit in Mbps */
DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */
u64 num_inval_msgs; /* number of continuous invalid msgs */
@ -172,6 +173,10 @@ int
ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
__be16 vlan_proto);
int
ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
int max_tx_rate);
int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted);
int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state);
@ -303,6 +308,14 @@ ice_set_vf_link_state(struct net_device __always_unused *netdev,
return -EOPNOTSUPP;
}
static inline int
ice_set_vf_bw(struct net_device __always_unused *netdev,
int __always_unused vf_id, int __always_unused min_tx_rate,
int __always_unused max_tx_rate)
{
return -EOPNOTSUPP;
}
static inline int
ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf,
struct ice_q_vector __always_unused *q_vector)