Merge branch 'mlxsw-Offload-TBF'

Ido Schimmel says:

====================
mlxsw: Offload TBF

Petr says:

In order to allow configuration of shapers on Spectrum family of
machines, recognize TBF either as root Qdisc, or as a child of ETS or
PRIO. Configure rate of maximum shaper according to TBF rate setting,
and maximum shaper burst size according to TBF burst setting.

- Patches #1 and #2 make the TBF shaper suitable for offloading.
- Patches #3, #4 and #5 are refactoring aimed at easier support of leaf
  Qdiscs in general.
- Patches #6 to #10 gradually introduce TBF offload.
- Patches #11 to #14 add selftests.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2020-01-25 10:56:31 +01:00
commit 3333e50b64
18 changed files with 793 additions and 90 deletions

View File

@ -3563,8 +3563,8 @@ MLXSW_ITEM32(reg, qeec, min_shaper_rate, 0x0C, 0, 28);
*/
MLXSW_ITEM32(reg, qeec, mase, 0x10, 31, 1);
/* A large max rate will disable the max shaper. */
#define MLXSW_REG_QEEC_MAS_DIS 200000000 /* Kbps */
/* The largest max shaper value possible to disable the shaper. */
#define MLXSW_REG_QEEC_MAS_DIS ((1u << 31) - 1) /* Kbps */
/* reg_qeec_max_shaper_rate
* Max shaper information rate.
@ -3602,6 +3602,21 @@ MLXSW_ITEM32(reg, qeec, dwrr, 0x18, 15, 1);
*/
MLXSW_ITEM32(reg, qeec, dwrr_weight, 0x18, 0, 8);
/* reg_qeec_max_shaper_bs
* Max shaper burst size
* Burst size is 2^max_shaper_bs * 512 bits
* For Spectrum-1: Range is: 5..25
* For Spectrum-2: Range is: 11..25
* Reserved when ptps = 1
* Access: RW
*/
MLXSW_ITEM32(reg, qeec, max_shaper_bs, 0x1C, 0, 6);
#define MLXSW_REG_QEEC_HIGHEST_SHAPER_BS 25
#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1 5
#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2 11
#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3 5
static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port,
enum mlxsw_reg_qeec_hr hr, u8 index,
u8 next_index)

View File

@ -1796,6 +1796,8 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type,
return mlxsw_sp_setup_tc_prio(mlxsw_sp_port, type_data);
case TC_SETUP_QDISC_ETS:
return mlxsw_sp_setup_tc_ets(mlxsw_sp_port, type_data);
case TC_SETUP_QDISC_TBF:
return mlxsw_sp_setup_tc_tbf(mlxsw_sp_port, type_data);
default:
return -EOPNOTSUPP;
}
@ -3577,7 +3579,7 @@ int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
enum mlxsw_reg_qeec_hr hr, u8 index,
u8 next_index, u32 maxrate)
u8 next_index, u32 maxrate, u8 burst_size)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
char qeec_pl[MLXSW_REG_QEEC_LEN];
@ -3586,6 +3588,7 @@ int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
next_index);
mlxsw_reg_qeec_mase_set(qeec_pl, true);
mlxsw_reg_qeec_max_shaper_rate_set(qeec_pl, maxrate);
mlxsw_reg_qeec_max_shaper_bs_set(qeec_pl, burst_size);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
}
@ -3654,14 +3657,14 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
*/
err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_PORT, 0, 0,
MLXSW_REG_QEEC_MAS_DIS);
MLXSW_REG_QEEC_MAS_DIS, 0);
if (err)
return err;
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_SUBGROUP,
i, 0,
MLXSW_REG_QEEC_MAS_DIS);
MLXSW_REG_QEEC_MAS_DIS, 0);
if (err)
return err;
}
@ -3669,14 +3672,14 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_TC,
i, i,
MLXSW_REG_QEEC_MAS_DIS);
MLXSW_REG_QEEC_MAS_DIS, 0);
if (err)
return err;
err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_TC,
i + 8, i,
MLXSW_REG_QEEC_MAS_DIS);
MLXSW_REG_QEEC_MAS_DIS, 0);
if (err)
return err;
}
@ -5173,6 +5176,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->span_ops = &mlxsw_sp1_span_ops;
mlxsw_sp->listeners = mlxsw_sp1_listener;
mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener);
mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1;
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
}
@ -5197,6 +5201,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops;
mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops;
mlxsw_sp->span_ops = &mlxsw_sp2_span_ops;
mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2;
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
}
@ -5219,6 +5224,7 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops;
mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops;
mlxsw_sp->span_ops = &mlxsw_sp2_span_ops;
mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3;
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
}

View File

@ -189,6 +189,7 @@ struct mlxsw_sp {
const struct mlxsw_sp_span_ops *span_ops;
const struct mlxsw_listener *listeners;
size_t listeners_count;
u32 lowest_shaper_bs;
};
static inline struct mlxsw_sp_upper *
@ -487,7 +488,7 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu,
struct ieee_pfc *my_pfc);
int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
enum mlxsw_reg_qeec_hr hr, u8 index,
u8 next_index, u32 maxrate);
u8 next_index, u32 maxrate, u8 burst_size);
enum mlxsw_reg_spms_state mlxsw_sp_stp_spms_state(u8 stp_state);
int mlxsw_sp_port_vid_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
u8 state);
@ -861,6 +862,8 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_prio_qopt_offload *p);
int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_ets_qopt_offload *p);
int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_tbf_qopt_offload *p);
/* spectrum_fid.c */
bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index);

View File

@ -526,7 +526,7 @@ static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev,
err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_SUBGROUP,
i, 0,
maxrate->tc_maxrate[i]);
maxrate->tc_maxrate[i], 0);
if (err) {
netdev_err(dev, "Failed to set maxrate for TC %d\n", i);
goto err_port_ets_maxrate_set;
@ -541,7 +541,8 @@ err_port_ets_maxrate_set:
for (i--; i >= 0; i--)
mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_SUBGROUP,
i, 0, my_maxrate->tc_maxrate[i]);
i, 0,
my_maxrate->tc_maxrate[i], 0);
return err;
}

View File

@ -19,6 +19,7 @@ enum mlxsw_sp_qdisc_type {
MLXSW_SP_QDISC_RED,
MLXSW_SP_QDISC_PRIO,
MLXSW_SP_QDISC_ETS,
MLXSW_SP_QDISC_TBF,
};
struct mlxsw_sp_qdisc_ops {
@ -227,6 +228,70 @@ mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats,
}
}
static void
mlxsw_sp_qdisc_collect_tc_stats(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
u64 *p_tx_bytes, u64 *p_tx_packets,
u64 *p_drops, u64 *p_backlog)
{
u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
struct mlxsw_sp_port_xstats *xstats;
u64 tx_bytes, tx_packets;
xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
mlxsw_sp_qdisc->prio_bitmap,
&tx_packets, &tx_bytes);
*p_tx_packets += tx_packets;
*p_tx_bytes += tx_bytes;
*p_drops += xstats->wred_drop[tclass_num] +
mlxsw_sp_xstats_tail_drop(xstats, tclass_num);
*p_backlog += mlxsw_sp_xstats_backlog(xstats, tclass_num);
}
static void
mlxsw_sp_qdisc_update_stats(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
u64 tx_bytes, u64 tx_packets,
u64 drops, u64 backlog,
struct tc_qopt_offload_stats *stats_ptr)
{
struct mlxsw_sp_qdisc_stats *stats_base = &mlxsw_sp_qdisc->stats_base;
tx_bytes -= stats_base->tx_bytes;
tx_packets -= stats_base->tx_packets;
drops -= stats_base->drops;
backlog -= stats_base->backlog;
_bstats_update(stats_ptr->bstats, tx_bytes, tx_packets);
stats_ptr->qstats->drops += drops;
stats_ptr->qstats->backlog += mlxsw_sp_cells_bytes(mlxsw_sp, backlog);
stats_base->backlog += backlog;
stats_base->drops += drops;
stats_base->tx_bytes += tx_bytes;
stats_base->tx_packets += tx_packets;
}
static void
mlxsw_sp_qdisc_get_tc_stats(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
struct tc_qopt_offload_stats *stats_ptr)
{
u64 tx_packets = 0;
u64 tx_bytes = 0;
u64 backlog = 0;
u64 drops = 0;
mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
&tx_bytes, &tx_packets,
&drops, &backlog);
mlxsw_sp_qdisc_update_stats(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_qdisc,
tx_bytes, tx_packets, drops, backlog,
stats_ptr);
}
static int
mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
int tclass_num, u32 min, u32 max,
@ -356,18 +421,27 @@ mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port,
max, prob, p->is_ecn);
}
static void
mlxsw_sp_qdisc_leaf_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
struct gnet_stats_queue *qstats)
{
u64 backlog;
backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
mlxsw_sp_qdisc->stats_base.backlog);
qstats->backlog -= backlog;
mlxsw_sp_qdisc->stats_base.backlog = 0;
}
static void
mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
void *params)
{
struct tc_red_qopt_offload_params *p = params;
u64 backlog;
backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
mlxsw_sp_qdisc->stats_base.backlog);
p->qstats->backlog -= backlog;
mlxsw_sp_qdisc->stats_base.backlog = 0;
mlxsw_sp_qdisc_leaf_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, p->qstats);
}
static int
@ -403,41 +477,21 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
struct tc_qopt_offload_stats *stats_ptr)
{
u64 tx_bytes, tx_packets, overlimits, drops, backlog;
u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
struct mlxsw_sp_qdisc_stats *stats_base;
struct mlxsw_sp_port_xstats *xstats;
u64 overlimits;
xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
stats_base = &mlxsw_sp_qdisc->stats_base;
mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
mlxsw_sp_qdisc->prio_bitmap,
&tx_packets, &tx_bytes);
tx_bytes = tx_bytes - stats_base->tx_bytes;
tx_packets = tx_packets - stats_base->tx_packets;
mlxsw_sp_qdisc_get_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, stats_ptr);
overlimits = xstats->wred_drop[tclass_num] + xstats->ecn -
stats_base->overlimits;
drops = xstats->wred_drop[tclass_num] +
mlxsw_sp_xstats_tail_drop(xstats, tclass_num) -
stats_base->drops;
backlog = mlxsw_sp_xstats_backlog(xstats, tclass_num);
_bstats_update(stats_ptr->bstats, tx_bytes, tx_packets);
stats_ptr->qstats->overlimits += overlimits;
stats_ptr->qstats->drops += drops;
stats_ptr->qstats->backlog +=
mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
backlog) -
mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
stats_base->backlog);
stats_base->backlog = backlog;
stats_base->drops += drops;
stats_base->overlimits += overlimits;
stats_base->tx_bytes += tx_bytes;
stats_base->tx_packets += tx_packets;
return 0;
}
@ -487,6 +541,204 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
}
}
static void
mlxsw_sp_setup_tc_qdisc_leaf_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
{
u64 backlog_cells = 0;
u64 tx_packets = 0;
u64 tx_bytes = 0;
u64 drops = 0;
mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
&tx_bytes, &tx_packets,
&drops, &backlog_cells);
mlxsw_sp_qdisc->stats_base.tx_packets = tx_packets;
mlxsw_sp_qdisc->stats_base.tx_bytes = tx_bytes;
mlxsw_sp_qdisc->stats_base.drops = drops;
mlxsw_sp_qdisc->stats_base.backlog = 0;
}
static int
mlxsw_sp_qdisc_tbf_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
{
struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc;
if (root_qdisc != mlxsw_sp_qdisc)
root_qdisc->stats_base.backlog -=
mlxsw_sp_qdisc->stats_base.backlog;
return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_SUBGROUP,
mlxsw_sp_qdisc->tclass_num, 0,
MLXSW_REG_QEEC_MAS_DIS, 0);
}
static int
mlxsw_sp_qdisc_tbf_bs(struct mlxsw_sp_port *mlxsw_sp_port,
u32 max_size, u8 *p_burst_size)
{
/* TBF burst size is configured in bytes. The ASIC burst size value is
* ((2 ^ bs) * 512 bits. Convert the TBF bytes to 512-bit units.
*/
u32 bs512 = max_size / 64;
u8 bs = fls(bs512);
if (!bs)
return -EINVAL;
--bs;
/* Demand a power of two. */
if ((1 << bs) != bs512)
return -EINVAL;
if (bs < mlxsw_sp_port->mlxsw_sp->lowest_shaper_bs ||
bs > MLXSW_REG_QEEC_HIGHEST_SHAPER_BS)
return -EINVAL;
*p_burst_size = bs;
return 0;
}
static u32
mlxsw_sp_qdisc_tbf_max_size(u8 bs)
{
return (1U << bs) * 64;
}
static u64
mlxsw_sp_qdisc_tbf_rate_kbps(struct tc_tbf_qopt_offload_replace_params *p)
{
/* TBF interface is in bytes/s, whereas Spectrum ASIC is configured in
* Kbits/s.
*/
return p->rate.rate_bytes_ps / 1000 * 8;
}
static int
mlxsw_sp_qdisc_tbf_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
void *params)
{
struct tc_tbf_qopt_offload_replace_params *p = params;
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
u64 rate_kbps = mlxsw_sp_qdisc_tbf_rate_kbps(p);
u8 burst_size;
int err;
if (rate_kbps >= MLXSW_REG_QEEC_MAS_DIS) {
dev_err(mlxsw_sp_port->mlxsw_sp->bus_info->dev,
"spectrum: TBF: rate of %lluKbps must be below %u\n",
rate_kbps, MLXSW_REG_QEEC_MAS_DIS);
return -EINVAL;
}
err = mlxsw_sp_qdisc_tbf_bs(mlxsw_sp_port, p->max_size, &burst_size);
if (err) {
u8 highest_shaper_bs = MLXSW_REG_QEEC_HIGHEST_SHAPER_BS;
dev_err(mlxsw_sp->bus_info->dev,
"spectrum: TBF: invalid burst size of %u, must be a power of two between %u and %u",
p->max_size,
mlxsw_sp_qdisc_tbf_max_size(mlxsw_sp->lowest_shaper_bs),
mlxsw_sp_qdisc_tbf_max_size(highest_shaper_bs));
return -EINVAL;
}
return 0;
}
static int
mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
void *params)
{
struct tc_tbf_qopt_offload_replace_params *p = params;
u64 rate_kbps = mlxsw_sp_qdisc_tbf_rate_kbps(p);
u8 burst_size;
int err;
err = mlxsw_sp_qdisc_tbf_bs(mlxsw_sp_port, p->max_size, &burst_size);
if (WARN_ON_ONCE(err))
/* check_params above was supposed to reject this value. */
return -EINVAL;
/* Configure subgroup shaper, so that both UC and MC traffic is subject
* to shaping. That is unlike RED, however UC queue lengths are going to
* be different than MC ones due to different pool and quota
* configurations, so the configuration is not applicable. For shaper on
* the other hand, subjecting the overall stream to the configured
* shaper makes sense. Also note that that is what we do for
* ieee_setmaxrate().
*/
return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_SUBGROUP,
mlxsw_sp_qdisc->tclass_num, 0,
rate_kbps, burst_size);
}
static void
mlxsw_sp_qdisc_tbf_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
void *params)
{
struct tc_tbf_qopt_offload_replace_params *p = params;
mlxsw_sp_qdisc_leaf_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, p->qstats);
}
static int
mlxsw_sp_qdisc_get_tbf_stats(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
struct tc_qopt_offload_stats *stats_ptr)
{
mlxsw_sp_qdisc_get_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
stats_ptr);
return 0;
}
static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_tbf = {
.type = MLXSW_SP_QDISC_TBF,
.check_params = mlxsw_sp_qdisc_tbf_check_params,
.replace = mlxsw_sp_qdisc_tbf_replace,
.unoffload = mlxsw_sp_qdisc_tbf_unoffload,
.destroy = mlxsw_sp_qdisc_tbf_destroy,
.get_stats = mlxsw_sp_qdisc_get_tbf_stats,
.clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats,
};
int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_tbf_qopt_offload *p)
{
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
if (!mlxsw_sp_qdisc)
return -EOPNOTSUPP;
if (p->command == TC_TBF_REPLACE)
return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
mlxsw_sp_qdisc,
&mlxsw_sp_qdisc_ops_tbf,
&p->replace_params);
if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle,
MLXSW_SP_QDISC_TBF))
return -EOPNOTSUPP;
switch (p->command) {
case TC_TBF_DESTROY:
return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
case TC_TBF_STATS:
return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
&p->stats);
default:
return -EOPNOTSUPP;
}
}
static int
__mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port)
{
@ -628,37 +880,23 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
struct tc_qopt_offload_stats *stats_ptr)
{
u64 tx_bytes, tx_packets, drops = 0, backlog = 0;
struct mlxsw_sp_qdisc_stats *stats_base;
struct mlxsw_sp_port_xstats *xstats;
struct rtnl_link_stats64 *stats;
struct mlxsw_sp_qdisc *tc_qdisc;
u64 tx_packets = 0;
u64 tx_bytes = 0;
u64 backlog = 0;
u64 drops = 0;
int i;
xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
stats = &mlxsw_sp_port->periodic_hw_stats.stats;
stats_base = &mlxsw_sp_qdisc->stats_base;
tx_bytes = stats->tx_bytes - stats_base->tx_bytes;
tx_packets = stats->tx_packets - stats_base->tx_packets;
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
drops += mlxsw_sp_xstats_tail_drop(xstats, i);
drops += xstats->wred_drop[i];
backlog += mlxsw_sp_xstats_backlog(xstats, i);
tc_qdisc = &mlxsw_sp_port->tclass_qdiscs[i];
mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, tc_qdisc,
&tx_bytes, &tx_packets,
&drops, &backlog);
}
drops = drops - stats_base->drops;
_bstats_update(stats_ptr->bstats, tx_bytes, tx_packets);
stats_ptr->qstats->drops += drops;
stats_ptr->qstats->backlog +=
mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
backlog) -
mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
stats_base->backlog);
stats_base->backlog = backlog;
stats_base->drops += drops;
stats_base->tx_bytes += tx_bytes;
stats_base->tx_packets += tx_packets;
mlxsw_sp_qdisc_update_stats(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_qdisc,
tx_bytes, tx_packets, drops, backlog,
stats_ptr);
return 0;
}

View File

@ -850,6 +850,7 @@ enum tc_setup_type {
TC_SETUP_QDISC_TAPRIO,
TC_SETUP_FT,
TC_SETUP_QDISC_ETS,
TC_SETUP_QDISC_TBF,
};
/* These structures hold the attributes of bpf state that are being passed

View File

@ -854,4 +854,26 @@ struct tc_ets_qopt_offload {
};
};
enum tc_tbf_command {
TC_TBF_REPLACE,
TC_TBF_DESTROY,
TC_TBF_STATS,
};
struct tc_tbf_qopt_offload_replace_params {
struct psched_ratecfg rate;
u32 max_size;
struct gnet_stats_queue *qstats;
};
struct tc_tbf_qopt_offload {
enum tc_tbf_command command;
u32 handle;
u32 parent;
union {
struct tc_tbf_qopt_offload_replace_params replace_params;
struct tc_qopt_offload_stats stats;
};
};
#endif

View File

@ -15,6 +15,7 @@
#include <linux/skbuff.h>
#include <net/netlink.h>
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
#include <net/pkt_sched.h>
@ -137,6 +138,52 @@ static u64 psched_ns_t2l(const struct psched_ratecfg *r,
return len;
}
static void tbf_offload_change(struct Qdisc *sch)
{
struct tbf_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
struct tc_tbf_qopt_offload qopt;
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return;
qopt.command = TC_TBF_REPLACE;
qopt.handle = sch->handle;
qopt.parent = sch->parent;
qopt.replace_params.rate = q->rate;
qopt.replace_params.max_size = q->max_size;
qopt.replace_params.qstats = &sch->qstats;
dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
}
static void tbf_offload_destroy(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct tc_tbf_qopt_offload qopt;
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return;
qopt.command = TC_TBF_DESTROY;
qopt.handle = sch->handle;
qopt.parent = sch->parent;
dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
}
static int tbf_offload_dump(struct Qdisc *sch)
{
struct tc_tbf_qopt_offload qopt;
qopt.command = TC_TBF_STATS;
qopt.handle = sch->handle;
qopt.parent = sch->parent;
qopt.stats.bstats = &sch->bstats;
qopt.stats.qstats = &sch->qstats;
return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt);
}
/* GSO packet is too big, segment it so that tbf can transmit
* each segment in time
*/
@ -407,6 +454,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_unlock(sch);
err = 0;
tbf_offload_change(sch);
done:
return err;
}
@ -432,6 +481,7 @@ static void tbf_destroy(struct Qdisc *sch)
struct tbf_sched_data *q = qdisc_priv(sch);
qdisc_watchdog_cancel(&q->watchdog);
tbf_offload_destroy(sch);
qdisc_put(q->qdisc);
}
@ -440,8 +490,12 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
struct tbf_sched_data *q = qdisc_priv(sch);
struct nlattr *nest;
struct tc_tbf_qopt opt;
int err;
err = tbf_offload_dump(sch);
if (err)
return err;
sch->qstats.backlog = q->qdisc->qstats.backlog;
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;

View File

@ -1,29 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
humanize()
{
local speed=$1; shift
for unit in bps Kbps Mbps Gbps; do
if (($(echo "$speed < 1024" | bc))); then
break
fi
speed=$(echo "scale=1; $speed / 1024" | bc)
done
echo "$speed${unit}"
}
rate()
{
local t0=$1; shift
local t1=$1; shift
local interval=$1; shift
echo $((8 * (t1 - t0) / interval))
}
check_rate()
{
local rate=$1; shift

View File

@ -0,0 +1,9 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
source qos_lib.sh
bail_on_lldpad
lib_dir=$(dirname $0)/../../../net/forwarding
TCFLAGS=skip_sw
source $lib_dir/sch_tbf_ets.sh

View File

@ -0,0 +1,9 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
source qos_lib.sh
bail_on_lldpad
lib_dir=$(dirname $0)/../../../net/forwarding
TCFLAGS=skip_sw
source $lib_dir/sch_tbf_prio.sh

View File

@ -0,0 +1,9 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
source qos_lib.sh
bail_on_lldpad
lib_dir=$(dirname $0)/../../../net/forwarding
TCFLAGS=skip_sw
source $lib_dir/sch_tbf_root.sh

View File

@ -248,6 +248,24 @@ busywait()
done
}
until_counter_is()
{
local value=$1; shift
local current=$("$@")
echo $((current))
((current >= value))
}
busywait_for_counter()
{
local timeout=$1; shift
local delta=$1; shift
local base=$("$@")
busywait "$timeout" until_counter_is $((base + delta)) "$@"
}
setup_wait_dev()
{
local dev=$1; shift
@ -575,9 +593,10 @@ tc_rule_stats_get()
local dev=$1; shift
local pref=$1; shift
local dir=$1; shift
local selector=${1:-.packets}; shift
tc -j -s filter show dev $dev ${dir:-ingress} pref $pref \
| jq '.[1].options.actions[].stats.packets'
| jq ".[1].options.actions[].stats$selector"
}
ethtool_stats_get()
@ -588,6 +607,30 @@ ethtool_stats_get()
ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
}
humanize()
{
local speed=$1; shift
for unit in bps Kbps Mbps Gbps; do
if (($(echo "$speed < 1024" | bc))); then
break
fi
speed=$(echo "scale=1; $speed / 1024" | bc)
done
echo "$speed${unit}"
}
rate()
{
local t0=$1; shift
local t1=$1; shift
local interval=$1; shift
echo $((8 * (t1 - t0) / interval))
}
mac_get()
{
local if_name=$1

View File

@ -0,0 +1,233 @@
# SPDX-License-Identifier: GPL-2.0
# This test sends a stream of traffic from H1 through a switch, to H2. On the
# egress port from the switch ($swp2), a shaper is installed. The test verifies
# that the rates on the port match the configured shaper.
#
# In order to test per-class shaping, $swp2 actually contains TBF under PRIO or
# ETS, with two different configurations. Traffic is prioritized using 802.1p.
#
# +-------------------------------------------+
# | H1 |
# | + $h1.10 $h1.11 + |
# | | 192.0.2.1/28 192.0.2.17/28 | |
# | | | |
# | \______________ _____________/ |
# | \ / |
# | + $h1 |
# +---------------------|---------------------+
# |
# +---------------------|---------------------+
# | SW + $swp1 |
# | _______________/ \_______________ |
# | / \ |
# | +-|--------------+ +--------------|-+ |
# | | + $swp1.10 | | $swp1.11 + | |
# | | | | | |
# | | BR10 | | BR11 | |
# | | | | | |
# | | + $swp2.10 | | $swp2.11 + | |
# | +-|--------------+ +--------------|-+ |
# | \_______________ ______________/ |
# | \ / |
# | + $swp2 |
# +---------------------|---------------------+
# |
# +---------------------|---------------------+
# | H2 + $h2 |
# | ______________/ \______________ |
# | / \ |
# | | | |
# | + $h2.10 $h2.11 + |
# | 192.0.2.2/28 192.0.2.18/28 |
# +-------------------------------------------+
NUM_NETIFS=4
CHECK_TC="yes"
source $lib_dir/lib.sh
ipaddr()
{
local host=$1; shift
local vlan=$1; shift
echo 192.0.2.$((16 * (vlan - 10) + host))
}
host_create()
{
local dev=$1; shift
local host=$1; shift
simple_if_init $dev
mtu_set $dev 10000
vlan_create $dev 10 v$dev $(ipaddr $host 10)/28
ip link set dev $dev.10 type vlan egress 0:0
vlan_create $dev 11 v$dev $(ipaddr $host 11)/28
ip link set dev $dev.11 type vlan egress 0:1
}
host_destroy()
{
local dev=$1; shift
vlan_destroy $dev 11
vlan_destroy $dev 10
mtu_restore $dev
simple_if_fini $dev
}
h1_create()
{
host_create $h1 1
}
h1_destroy()
{
host_destroy $h1
}
h2_create()
{
host_create $h2 2
tc qdisc add dev $h2 clsact
tc filter add dev $h2 ingress pref 1010 prot 802.1q \
flower $TCFLAGS vlan_id 10 action pass
tc filter add dev $h2 ingress pref 1011 prot 802.1q \
flower $TCFLAGS vlan_id 11 action pass
}
h2_destroy()
{
tc qdisc del dev $h2 clsact
host_destroy $h2
}
switch_create()
{
local intf
local vlan
ip link add dev br10 type bridge
ip link add dev br11 type bridge
for intf in $swp1 $swp2; do
ip link set dev $intf up
mtu_set $intf 10000
for vlan in 10 11; do
vlan_create $intf $vlan
ip link set dev $intf.$vlan master br$vlan
ip link set dev $intf.$vlan up
done
done
for vlan in 10 11; do
ip link set dev $swp1.$vlan type vlan ingress 0:0 1:1
done
ip link set dev br10 up
ip link set dev br11 up
}
switch_destroy()
{
local intf
local vlan
# A test may have been interrupted mid-run, with Qdisc installed. Delete
# it here.
tc qdisc del dev $swp2 root 2>/dev/null
ip link set dev br11 down
ip link set dev br10 down
for intf in $swp2 $swp1; do
for vlan in 11 10; do
ip link set dev $intf.$vlan down
ip link set dev $intf.$vlan nomaster
vlan_destroy $intf $vlan
done
mtu_restore $intf
ip link set dev $intf down
done
ip link del dev br11
ip link del dev br10
}
setup_prepare()
{
h1=${NETIFS[p1]}
swp1=${NETIFS[p2]}
swp2=${NETIFS[p3]}
h2=${NETIFS[p4]}
swp3=${NETIFS[p5]}
h3=${NETIFS[p6]}
swp4=${NETIFS[p7]}
swp5=${NETIFS[p8]}
h2_mac=$(mac_get $h2)
vrf_prepare
h1_create
h2_create
switch_create
}
cleanup()
{
pre_cleanup
switch_destroy
h2_destroy
h1_destroy
vrf_cleanup
}
ping_ipv4()
{
ping_test $h1.10 $(ipaddr 2 10) " vlan 10"
ping_test $h1.11 $(ipaddr 2 11) " vlan 11"
}
tbf_get_counter()
{
local vlan=$1; shift
tc_rule_stats_get $h2 10$vlan ingress .bytes
}
do_tbf_test()
{
local vlan=$1; shift
local mbit=$1; shift
start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 2 $vlan) $h2_mac
sleep 5 # Wait for the burst to dwindle
local t2=$(busywait_for_counter 1000 +1 tbf_get_counter $vlan)
sleep 10
local t3=$(tbf_get_counter $vlan)
stop_traffic
RET=0
# Note: TBF uses 10^6 Mbits, not 2^20 ones.
local er=$((mbit * 1000 * 1000))
local nr=$(rate $t2 $t3 10)
local nr_pct=$((100 * (nr - er) / er))
((-5 <= nr_pct && nr_pct <= 5))
check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-5%."
log_test "TC $((vlan - 10)): TBF rate ${mbit}Mbit"
}

View File

@ -0,0 +1,6 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
QDISC="ets strict"
: ${lib_dir:=.}
source $lib_dir/sch_tbf_etsprio.sh

View File

@ -0,0 +1,39 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
ALL_TESTS="
ping_ipv4
tbf_test
"
source $lib_dir/sch_tbf_core.sh
tbf_test_one()
{
local bs=$1; shift
tc qdisc replace dev $swp2 parent 10:3 handle 103: tbf \
rate 400Mbit burst $bs limit 1M
tc qdisc replace dev $swp2 parent 10:2 handle 102: tbf \
rate 800Mbit burst $bs limit 1M
do_tbf_test 10 400 $bs
do_tbf_test 11 800 $bs
}
tbf_test()
{
# This test is used for both ETS and PRIO. Even though we only need two
# bands, PRIO demands a minimum of three.
tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0
tbf_test_one 128K
tc qdisc del dev $swp2 root
}
trap cleanup EXIT
setup_prepare
setup_wait
tests_run
exit $EXIT_STATUS

View File

@ -0,0 +1,6 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
QDISC="prio bands"
: ${lib_dir:=.}
source $lib_dir/sch_tbf_etsprio.sh

View File

@ -0,0 +1,33 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
ALL_TESTS="
ping_ipv4
tbf_test
"
: ${lib_dir:=.}
source $lib_dir/sch_tbf_core.sh
tbf_test_one()
{
local bs=$1; shift
tc qdisc replace dev $swp2 root handle 108: tbf \
rate 400Mbit burst $bs limit 1M
do_tbf_test 10 400 $bs
}
tbf_test()
{
tbf_test_one 128K
tc qdisc del dev $swp2 root
}
trap cleanup EXIT
setup_prepare
setup_wait
tests_run
exit $EXIT_STATUS