From 6e0972e0c5d8f78b64274fd1a512136aee404610 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 30 Sep 2020 12:49:07 +0200 Subject: [PATCH 1/6] selftests: forwarding: devlink_lib: Split devlink_..._set() into save & set Changing pool type from static to dynamic causes reinterpretation of threshold values. They therefore need to be saved before pool type is changed, then the pool type can be changed, and then the new values need to be set up. For that reason, set cannot subsume save, because it would be saving the wrong thing, with possibly a nonsensical value, and restore would then fail to restore the nonsensical value. Thus extract a _save() from each of the relevant _set()'s. This way it is possible to save everything up front, then to tweak it, and then restore in the required order. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/qos_ets_strict.sh | 9 +++ .../drivers/net/mlxsw/qos_mc_aware.sh | 5 ++ .../selftests/drivers/net/mlxsw/sch_ets.sh | 6 ++ .../drivers/net/mlxsw/sch_red_core.sh | 1 + .../selftests/net/forwarding/devlink_lib.sh | 62 +++++++++++++++---- 5 files changed, 72 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh index 6d1790b5de7a..e9f8718af979 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh @@ -147,17 +147,26 @@ switch_create() # Make sure that ingress quotas are smaller than egress so that there is # room for both streams of traffic to be admitted to shared buffer. + devlink_pool_size_thtype_save 0 devlink_pool_size_thtype_set 0 dynamic 10000000 + devlink_pool_size_thtype_save 4 devlink_pool_size_thtype_set 4 dynamic 10000000 + devlink_port_pool_th_save $swp1 0 devlink_port_pool_th_set $swp1 0 6 + devlink_tc_bind_pool_th_save $swp1 1 ingress devlink_tc_bind_pool_th_set $swp1 1 ingress 0 6 + devlink_port_pool_th_save $swp2 0 devlink_port_pool_th_set $swp2 0 6 + devlink_tc_bind_pool_th_save $swp2 2 ingress devlink_tc_bind_pool_th_set $swp2 2 ingress 0 6 + devlink_tc_bind_pool_th_save $swp3 1 egress devlink_tc_bind_pool_th_set $swp3 1 egress 4 7 + devlink_tc_bind_pool_th_save $swp3 2 egress devlink_tc_bind_pool_th_set $swp3 2 egress 4 7 + devlink_port_pool_th_save $swp3 4 devlink_port_pool_th_set $swp3 4 7 } diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index b025daea062d..8f164c80e215 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -145,12 +145,17 @@ switch_create() # Make sure that ingress quotas are smaller than egress so that there is # room for both streams of traffic to be admitted to shared buffer. + devlink_port_pool_th_save $swp1 0 devlink_port_pool_th_set $swp1 0 5 + devlink_tc_bind_pool_th_save $swp1 0 ingress devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5 + devlink_port_pool_th_save $swp2 0 devlink_port_pool_th_set $swp2 0 5 + devlink_tc_bind_pool_th_save $swp2 1 ingress devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5 + devlink_port_pool_th_save $swp3 4 devlink_port_pool_th_set $swp3 4 12 } diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh index 94c37124a840..af64bc9ea8ab 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh @@ -27,11 +27,17 @@ switch_create() # amount of traffic that is admitted to the shared buffers. This makes # sure that there is always enough traffic of all types to select from # for the DWRR process. + devlink_port_pool_th_save $swp1 0 devlink_port_pool_th_set $swp1 0 12 + devlink_tc_bind_pool_th_save $swp1 0 ingress devlink_tc_bind_pool_th_set $swp1 0 ingress 0 12 + devlink_port_pool_th_save $swp2 4 devlink_port_pool_th_set $swp2 4 12 + devlink_tc_bind_pool_th_save $swp2 7 egress devlink_tc_bind_pool_th_set $swp2 7 egress 4 5 + devlink_tc_bind_pool_th_save $swp2 6 egress devlink_tc_bind_pool_th_set $swp2 6 egress 4 5 + devlink_tc_bind_pool_th_save $swp2 5 egress devlink_tc_bind_pool_th_set $swp2 5 egress 4 5 # Note: sch_ets_core.sh uses VLAN ingress-qos-map to assign packet diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index 517297a14ecf..b0cb1aaffdda 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -208,6 +208,7 @@ switch_create() ip link set dev br2_11 up local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1) + devlink_port_pool_th_save $swp3 8 devlink_port_pool_th_set $swp3 8 $size } diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 75fe24bcb9cd..ba6aca848702 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -117,6 +117,12 @@ devlink_reload() declare -A DEVLINK_ORIG +# Changing pool type from static to dynamic causes reinterpretation of threshold +# values. They therefore need to be saved before pool type is changed, then the +# pool type can be changed, and then the new values need to be set up. Therefore +# instead of saving the current state implicitly in the _set call, provide +# functions for all three primitives: save, set, and restore. + devlink_port_pool_threshold() { local port=$1; shift @@ -126,14 +132,21 @@ devlink_port_pool_threshold() | jq '.port_pool."'"$port"'"[].threshold' } +devlink_port_pool_th_save() +{ + local port=$1; shift + local pool=$1; shift + local key="port_pool($port,$pool).threshold" + + DEVLINK_ORIG[$key]=$(devlink_port_pool_threshold $port $pool) +} + devlink_port_pool_th_set() { local port=$1; shift local pool=$1; shift local th=$1; shift - local key="port_pool($port,$pool).threshold" - DEVLINK_ORIG[$key]=$(devlink_port_pool_threshold $port $pool) devlink sb port pool set $port pool $pool th $th } @@ -142,8 +155,13 @@ devlink_port_pool_th_restore() local port=$1; shift local pool=$1; shift local key="port_pool($port,$pool).threshold" + local -a orig=(${DEVLINK_ORIG[$key]}) - devlink sb port pool set $port pool $pool th ${DEVLINK_ORIG[$key]} + if [[ -z $orig ]]; then + echo "WARNING: Mismatched devlink_port_pool_th_restore" + else + devlink sb port pool set $port pool $pool th $orig + fi } devlink_pool_size_thtype() @@ -154,14 +172,20 @@ devlink_pool_size_thtype() | jq -r '.pool[][] | (.size, .thtype)' } +devlink_pool_size_thtype_save() +{ + local pool=$1; shift + local key="pool($pool).size_thtype" + + DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool) +} + devlink_pool_size_thtype_set() { local pool=$1; shift local thtype=$1; shift local size=$1; shift - local key="pool($pool).size_thtype" - DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool) devlink sb pool set "$DEVLINK_DEV" pool $pool size $size thtype $thtype } @@ -171,8 +195,12 @@ devlink_pool_size_thtype_restore() local key="pool($pool).size_thtype" local -a orig=(${DEVLINK_ORIG[$key]}) - devlink sb pool set "$DEVLINK_DEV" pool $pool \ - size ${orig[0]} thtype ${orig[1]} + if [[ -z ${orig[0]} ]]; then + echo "WARNING: Mismatched devlink_pool_size_thtype_restore" + else + devlink sb pool set "$DEVLINK_DEV" pool $pool \ + size ${orig[0]} thtype ${orig[1]} + fi } devlink_tc_bind_pool_th() @@ -185,6 +213,16 @@ devlink_tc_bind_pool_th() | jq -r '.tc_bind[][] | (.pool, .threshold)' } +devlink_tc_bind_pool_th_save() +{ + local port=$1; shift + local tc=$1; shift + local dir=$1; shift + local key="tc_bind($port,$dir,$tc).pool_th" + + DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir) +} + devlink_tc_bind_pool_th_set() { local port=$1; shift @@ -192,9 +230,7 @@ devlink_tc_bind_pool_th_set() local dir=$1; shift local pool=$1; shift local th=$1; shift - local key="tc_bind($port,$dir,$tc).pool_th" - DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir) devlink sb tc bind set $port tc $tc type $dir pool $pool th $th } @@ -206,8 +242,12 @@ devlink_tc_bind_pool_th_restore() local key="tc_bind($port,$dir,$tc).pool_th" local -a orig=(${DEVLINK_ORIG[$key]}) - devlink sb tc bind set $port tc $tc type $dir \ - pool ${orig[0]} th ${orig[1]} + if [[ -z ${orig[0]} ]]; then + echo "WARNING: Mismatched devlink_tc_bind_pool_th_restore" + else + devlink sb tc bind set $port tc $tc type $dir \ + pool ${orig[0]} th ${orig[1]} + fi } devlink_traps_num_get() From 294f44c19fa65b0c813608d7ddfbb378875eb6b0 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 30 Sep 2020 12:49:08 +0200 Subject: [PATCH 2/6] selftests: forwarding: devlink_lib: Add devlink_cell_size_get() Add a helper that answers the cell size of the devlink device. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/devlink_lib.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index ba6aca848702..cf4e5daf767b 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -549,3 +549,9 @@ devlink_cpu_port_get() echo "$DEVLINK_DEV/$cpu_dl_port_num" } + +devlink_cell_size_get() +{ + devlink sb pool show "$DEVLINK_DEV" pool 0 -j \ + | jq '.pool[][].cell_size' +} From 5b3a53c9c843b84955d5a3abda358518a3031630 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 30 Sep 2020 12:49:09 +0200 Subject: [PATCH 3/6] selftests: forwarding: devlink_lib: Support port-less topologies Some selftests may not need any actual ports. Technically those are not forwarding selftests, but devlink_lib can still be handy. Fall back on NETIF_NO_CABLE in those cases. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/devlink_lib.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index cf4e5daf767b..9c12c4fd3afc 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -5,7 +5,7 @@ # Defines if [[ ! -v DEVLINK_DEV ]]; then - DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \ + DEVLINK_DEV=$(devlink port show "${NETIFS[p1]:-$NETIF_NO_CABLE}" -j \ | jq -r '.port | keys[]' | cut -d/ -f-2) if [ -z "$DEVLINK_DEV" ]; then echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it" From 4b94a2fad835ad5022a099105b7bf27eb584d754 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 30 Sep 2020 12:49:10 +0200 Subject: [PATCH 4/6] selftests: mlxsw: qos_lib: Add a wrapper for running mlnx_qos mlnx_qos is a script for configuration of DCB. Despite the name it is not actually Mellanox-specific in any way. It is currently the only ad-hoc tool available (in contrast to a daemon that manages an interface on an ongoing basis). However, it is very verbose and parsing out error messages is not really possible. Add a wrapper that makes it easier to use the tool. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- .../testing/selftests/drivers/net/mlxsw/qos_lib.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh index faa51012cdac..0bf76f13c030 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh @@ -82,3 +82,17 @@ bail_on_lldpad() fi fi } + +__mlnx_qos() +{ + local err + + mlnx_qos "$@" 2>/dev/null + err=$? + + if ((err)); then + echo "Error ($err) in mlnx_qos $@" >/dev/stderr + fi + + return $err +} From a65cc53a0eb882c5f6d49cda947aac7fdc9b0f73 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 30 Sep 2020 12:49:11 +0200 Subject: [PATCH 5/6] selftests: mlxsw: Add headroom handling test Add a test for headroom configuration. This covers projection of ETS configuration to ingress, PFC, adjustments for MTU, the qdisc / TC mode and the effect of egress SPAN session on buffer configuration. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/qos_headroom.sh | 379 ++++++++++++++++++ 1 file changed, 379 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh new file mode 100755 index 000000000000..27de3d9ed08e --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh @@ -0,0 +1,379 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + test_defaults + test_dcb_ets + test_mtu + test_pfc + test_int_buf + test_tc_priomap + test_tc_mtu + test_tc_sizes + test_tc_int_buf +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=0 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source qos_lib.sh + +swp=$NETIF_NO_CABLE + +cleanup() +{ + pre_cleanup +} + +get_prio_pg() +{ + __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' | + grep buffer | sed 's/ \+/ /g' | cut -d' ' -f 2- +} + +get_prio_pfc() +{ + __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' | + grep enabled | sed 's/ \+/ /g' | cut -d' ' -f 2- +} + +get_prio_tc() +{ + __mlnx_qos -i $swp | sed -n '/^tc/,$p' | + awk '/^tc/ { TC = $2 } + /priority:/ { PRIO[$2]=TC } + END { + for (i in PRIO) + printf("%d ", PRIO[i]) + }' +} + +get_buf_size() +{ + local idx=$1; shift + + __mlnx_qos -i $swp | grep Receive | sed 's/.*: //' | cut -d, -f $((idx + 1)) +} + +get_tot_size() +{ + __mlnx_qos -i $swp | grep Receive | sed 's/.*total_size=//' +} + +check_prio_pg() +{ + local expect=$1; shift + + local current=$(get_prio_pg) + test "$current" = "$expect" + check_err $? "prio2buffer is '$current', expected '$expect'" +} + +check_prio_pfc() +{ + local expect=$1; shift + + local current=$(get_prio_pfc) + test "$current" = "$expect" + check_err $? "prio PFC is '$current', expected '$expect'" +} + +check_prio_tc() +{ + local expect=$1; shift + + local current=$(get_prio_tc) + test "$current" = "$expect" + check_err $? "prio_tc is '$current', expected '$expect'" +} + +__check_buf_size() +{ + local idx=$1; shift + local expr=$1; shift + local what=$1; shift + + local current=$(get_buf_size $idx) + ((current $expr)) + check_err $? "${what}buffer $idx size is '$current', expected '$expr'" + echo $current +} + +check_buf_size() +{ + __check_buf_size "$@" > /dev/null +} + +test_defaults() +{ + RET=0 + + check_prio_pg "0 0 0 0 0 0 0 0 " + check_prio_tc "0 0 0 0 0 0 0 0 " + check_prio_pfc "0 0 0 0 0 0 0 0 " + + log_test "Default headroom configuration" +} + +test_dcb_ets() +{ + RET=0 + + __mlnx_qos -i $swp --prio_tc=0,2,4,6,1,3,5,7 > /dev/null + + check_prio_pg "0 2 4 6 1 3 5 7 " + check_prio_tc "0 2 4 6 1 3 5 7 " + check_prio_pfc "0 0 0 0 0 0 0 0 " + + __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null + + check_prio_pg "0 0 0 0 0 0 0 0 " + check_prio_tc "0 0 0 0 0 0 0 0 " + + __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 &> /dev/null + check_fail $? "prio2buffer accepted in DCB mode" + + log_test "Configuring headroom through ETS" +} + +test_mtu() +{ + local what=$1; shift + local buf0size_2 + local buf0size + + RET=0 + buf0size=$(__check_buf_size 0 "> 0") + + mtu_set $swp 3000 + buf0size_2=$(__check_buf_size 0 "> $buf0size" "MTU 3000: ") + mtu_restore $swp + + mtu_set $swp 6000 + check_buf_size 0 "> $buf0size_2" "MTU 6000: " + mtu_restore $swp + + check_buf_size 0 "== $buf0size" + + log_test "${what}MTU impacts buffer size" +} + +test_tc_mtu() +{ + # In TC mode, MTU still impacts the threshold below which a buffer is + # not permitted to go. + + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + test_mtu "TC: " + tc qdisc delete dev $swp root +} + +test_pfc() +{ + RET=0 + + __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,1,2,3 > /dev/null + + local buf0size=$(get_buf_size 0) + local buf1size=$(get_buf_size 1) + local buf2size=$(get_buf_size 2) + local buf3size=$(get_buf_size 3) + check_buf_size 0 "> 0" + check_buf_size 1 "> 0" + check_buf_size 2 "> 0" + check_buf_size 3 "> 0" + check_buf_size 4 "== 0" + check_buf_size 5 "== 0" + check_buf_size 6 "== 0" + check_buf_size 7 "== 0" + + log_test "Buffer size sans PFC" + + RET=0 + + __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=0 > /dev/null + + check_prio_pg "0 0 0 0 0 1 2 3 " + check_prio_pfc "0 0 0 0 0 1 1 1 " + check_buf_size 0 "== $buf0size" + check_buf_size 1 "> $buf1size" + check_buf_size 2 "> $buf2size" + check_buf_size 3 "> $buf3size" + + local buf1size=$(get_buf_size 1) + check_buf_size 2 "== $buf1size" + check_buf_size 3 "== $buf1size" + + log_test "PFC: Cable length 0" + + RET=0 + + __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=1000 > /dev/null + + check_buf_size 0 "== $buf0size" + check_buf_size 1 "> $buf1size" + check_buf_size 2 "> $buf1size" + check_buf_size 3 "> $buf1size" + + log_test "PFC: Cable length 1000" + + RET=0 + + __mlnx_qos -i $swp --pfc=0,0,0,0,0,0,0,0 --cable_len=0 > /dev/null + __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null + + check_prio_pg "0 0 0 0 0 0 0 0 " + check_prio_tc "0 0 0 0 0 0 0 0 " + check_buf_size 0 "> 0" + check_buf_size 1 "== 0" + check_buf_size 2 "== 0" + check_buf_size 3 "== 0" + check_buf_size 4 "== 0" + check_buf_size 5 "== 0" + check_buf_size 6 "== 0" + check_buf_size 7 "== 0" + + log_test "PFC: Restore defaults" +} + +test_tc_priomap() +{ + RET=0 + + __mlnx_qos -i $swp --prio_tc=0,1,2,3,4,5,6,7 > /dev/null + check_prio_pg "0 1 2 3 4 5 6 7 " + + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + check_prio_pg "0 0 0 0 0 0 0 0 " + + __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 > /dev/null + check_prio_pg "1 3 5 7 0 2 4 6 " + + tc qdisc delete dev $swp root + check_prio_pg "0 1 2 3 4 5 6 7 " + + # Clean up. + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + __mlnx_qos -i $swp --prio2buffer=0,0,0,0,0,0,0,0 > /dev/null + tc qdisc delete dev $swp root + __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null + + log_test "TC: priomap" +} + +test_tc_sizes() +{ + local cell_size=$(devlink_cell_size_get) + local size=$((cell_size * 1000)) + + RET=0 + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null + check_fail $? "buffer_size should fail before qdisc is added" + + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + check_err $? "buffer_size should pass after qdisc is added" + check_buf_size 0 "== $size" "set size: " + + mtu_set $swp 6000 + check_buf_size 0 "== $size" "set MTU: " + mtu_restore $swp + + __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null + + # After replacing the qdisc for the same kind, buffer_size still has to + # work. + tc qdisc replace dev $swp root handle 1: bfifo limit 1M + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + check_buf_size 0 "== $size" "post replace, set size: " + + __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null + + # Likewise after replacing for a different kind. + tc qdisc replace dev $swp root handle 2: prio bands 8 + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + check_buf_size 0 "== $size" "post replace different kind, set size: " + + tc qdisc delete dev $swp root + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null + check_fail $? "buffer_size should fail after qdisc is deleted" + + log_test "TC: buffer size" +} + +test_int_buf() +{ + local what=$1; shift + + RET=0 + + local buf0size=$(get_buf_size 0) + local tot_size=$(get_tot_size) + + # Size of internal buffer and buffer 9. + local dsize=$((tot_size - buf0size)) + + tc qdisc add dev $swp clsact + tc filter add dev $swp egress matchall skip_sw action mirred egress mirror dev $swp + + local buf0size_2=$(get_buf_size 0) + local tot_size_2=$(get_tot_size) + local dsize_2=$((tot_size_2 - buf0size_2)) + + # Egress SPAN should have added to the "invisible" buffer configuration. + ((dsize_2 > dsize)) + check_err $? "Invisible buffers account for '$dsize_2', expected '> $dsize'" + + mtu_set $swp 3000 + + local buf0size_3=$(get_buf_size 0) + local tot_size_3=$(get_tot_size) + local dsize_3=$((tot_size_3 - buf0size_3)) + + # MTU change might change buffer 0, which will show at total, but the + # hidden buffers should stay the same size. + ((dsize_3 == dsize_2)) + check_err $? "MTU change: Invisible buffers account for '$dsize_3', expected '== $dsize_2'" + + mtu_restore $swp + tc qdisc del dev $swp clsact + + # After SPAN removal, hidden buffers should be back to the original sizes. + local buf0size_4=$(get_buf_size 0) + local tot_size_4=$(get_tot_size) + local dsize_4=$((tot_size_4 - buf0size_4)) + ((dsize_4 == dsize)) + check_err $? "SPAN removed: Invisible buffers account for '$dsize_4', expected '== $dsize'" + + log_test "${what}internal buffer size" +} + +test_tc_int_buf() +{ + local cell_size=$(devlink_cell_size_get) + local size=$((cell_size * 1000)) + + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + test_int_buf "TC: " + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + test_int_buf "TC+buffsize: " + + __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null + tc qdisc delete dev $swp root +} + +trap cleanup EXIT + +bail_on_lldpad +setup_wait +tests_run + +exit $EXIT_STATUS From bfa804784e321eb6a309a0959a17c64b08fb7ac7 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 30 Sep 2020 12:49:12 +0200 Subject: [PATCH 6/6] selftests: mlxsw: Add a PFC test Add a test for PFC. Runs 10MB of traffic through a bottleneck and checks that none of it gets lost. Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/qos_pfc.sh | 403 ++++++++++++++++++ 1 file changed, 403 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh new file mode 100755 index 000000000000..4d900bc1f76c --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh @@ -0,0 +1,403 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test injects a 10-MB burst of traffic with VLAN tag and 802.1p priority +# of 1. This stream is consistently prioritized as priority 1, is put to PG +# buffer 1, and scheduled at TC 1. +# +# - the stream first ingresses through $swp1, where it is forwarded to $swp3 +# +# - then it ingresses through $swp4. Here it is put to a lossless buffer and put +# to a small pool ("PFC pool"). The traffic is forwarded to $swp2, which is +# shaped, and thus the PFC pool eventually fills, therefore the headroom +# fills, and $swp3 is paused. +# +# - since $swp3 now can't send traffic, the traffic ingressing $swp1 is kept at +# a pool ("overflow pool"). The overflow pool needs to be large enough to +# contain the whole burst. +# +# - eventually the PFC pool gets some traffic out, headroom therefore gets some +# traffic to the pool, and $swp3 is unpaused again. This way the traffic is +# gradually forwarded from the overflow pool, through the PFC pool, out of +# $swp2, and eventually to $h2. +# +# - if PFC works, all lossless flow packets that ingress through $swp1 should +# also be seen ingressing $h2. If it doesn't, there will be drops due to +# discrepancy between the speeds of $swp1 and $h2. +# +# - it should all play out relatively quickly, so that SLL and HLL will not +# cause drops. +# +# +-----------------------+ +# | H1 | +# | + $h1.111 | +# | | 192.0.2.33/28 | +# | | | +# | + $h1 | +# +---|-------------------+ +--------------------+ +# | | | +# +---|----------------------|--------------------|---------------------------+ +# | + $swp1 $swp3 + + $swp4 | +# | | iPOOL1 iPOOL0 | | iPOOL2 | +# | | ePOOL4 ePOOL5 | | ePOOL4 | +# | | 1Gbps | | 1Gbps | +# | | PFC:enabled=1 | | PFC:enabled=1 | +# | +-|----------------------|-+ +-|------------------------+ | +# | | + $swp1.111 $swp3.111 + | | + $swp4.111 | | +# | | | | | | +# | | BR1 | | BR2 | | +# | | | | | | +# | | | | + $swp2.111 | | +# | +--------------------------+ +---------|----------------+ | +# | | | +# | iPOOL0: 500KB dynamic | | +# | iPOOL1: 10MB static | | +# | iPOOL2: 1MB static + $swp2 | +# | ePOOL4: 500KB dynamic | iPOOL0 | +# | ePOOL5: 10MB static | ePOOL6 | +# | ePOOL6: "infinite" static | 200Mbps shaper | +# +-------------------------------------------------------|-------------------+ +# | +# +---|-------------------+ +# | + $h2 H2 | +# | | | +# | + $h2.111 | +# | 192.0.2.34/28 | +# +-----------------------+ +# +# iPOOL0+ePOOL4 is a helper pool for control traffic etc. +# iPOOL1+ePOOL5 are overflow pools. +# iPOOL2+ePOOL6 are PFC pools. + +ALL_TESTS=" + ping_ipv4 + test_qos_pfc +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=6 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source qos_lib.sh + +_1KB=1000 +_100KB=$((100 * _1KB)) +_500KB=$((500 * _1KB)) +_1MB=$((1000 * _1KB)) +_10MB=$((10 * _1MB)) + +h1_create() +{ + simple_if_init $h1 + mtu_set $h1 10000 + + vlan_create $h1 111 v$h1 192.0.2.33/28 +} + +h1_destroy() +{ + vlan_destroy $h1 111 + + mtu_restore $h1 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + mtu_set $h2 10000 + + vlan_create $h2 111 v$h2 192.0.2.34/28 +} + +h2_destroy() +{ + vlan_destroy $h2 111 + + mtu_restore $h2 + simple_if_fini $h2 +} + +switch_create() +{ + # pools + # ----- + + devlink_pool_size_thtype_save 0 + devlink_pool_size_thtype_save 4 + devlink_pool_size_thtype_save 1 + devlink_pool_size_thtype_save 5 + devlink_pool_size_thtype_save 2 + devlink_pool_size_thtype_save 6 + + devlink_port_pool_th_save $swp1 1 + devlink_port_pool_th_save $swp2 6 + devlink_port_pool_th_save $swp3 5 + devlink_port_pool_th_save $swp4 2 + + devlink_tc_bind_pool_th_save $swp1 1 ingress + devlink_tc_bind_pool_th_save $swp2 1 egress + devlink_tc_bind_pool_th_save $swp3 1 egress + devlink_tc_bind_pool_th_save $swp4 1 ingress + + # Control traffic pools. Just reduce the size. Keep them dynamic so that + # we don't need to change all the uninteresting quotas. + devlink_pool_size_thtype_set 0 dynamic $_500KB + devlink_pool_size_thtype_set 4 dynamic $_500KB + + # Overflow pools. + devlink_pool_size_thtype_set 1 static $_10MB + devlink_pool_size_thtype_set 5 static $_10MB + + # PFC pools. As per the writ, the size of egress PFC pool should be + # infinice, but actually it just needs to be large enough to not matter + # in practice, so reuse the 10MB limit. + devlink_pool_size_thtype_set 2 static $_1MB + devlink_pool_size_thtype_set 6 static $_10MB + + # $swp1 + # ----- + + ip link set dev $swp1 up + mtu_set $swp1 10000 + vlan_create $swp1 111 + ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp1 1 $_10MB + devlink_tc_bind_pool_th_set $swp1 1 ingress 1 $_10MB + + # Configure qdisc so that we can configure PG and therefore pool + # assignment. + tc qdisc replace dev $swp1 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + __mlnx_qos -i $swp1 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null + + # $swp2 + # ----- + + ip link set dev $swp2 up + mtu_set $swp2 10000 + vlan_create $swp2 111 + ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp2 6 $_10MB + devlink_tc_bind_pool_th_set $swp2 1 egress 6 $_10MB + + # prio 0->TC0 (band 7), 1->TC1 (band 6). TC1 is shaped. + tc qdisc replace dev $swp2 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + tc qdisc replace dev $swp2 parent 1:7 handle 17: \ + tbf rate 200Mbit burst 131072 limit 1M + + # $swp3 + # ----- + + ip link set dev $swp3 up + mtu_set $swp3 10000 + vlan_create $swp3 111 + ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp3 5 $_10MB + devlink_tc_bind_pool_th_set $swp3 1 egress 5 $_10MB + + # prio 0->TC0 (band 7), 1->TC1 (band 6) + tc qdisc replace dev $swp3 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + + # Need to enable PFC so that PAUSE takes effect. Therefore need to put + # the lossless prio into a buffer of its own. Don't bother with buffer + # sizes though, there is not going to be any pressure in the "backward" + # direction. + __mlnx_qos -i $swp3 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp3 --pfc=0,1,0,0,0,0,0,0 >/dev/null + + # $swp4 + # ----- + + ip link set dev $swp4 up + mtu_set $swp4 10000 + vlan_create $swp4 111 + ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp4 2 $_1MB + devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $_1MB + + # Configure qdisc so that we can hand-tune headroom. + tc qdisc replace dev $swp4 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + __mlnx_qos -i $swp4 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null + # PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which + # is (-2*MTU) about 80K of delay provision. + __mlnx_qos -i $swp3 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null + + # bridges + # ------- + + ip link add name br1 type bridge vlan_filtering 0 + ip link set dev $swp1.111 master br1 + ip link set dev $swp3.111 master br1 + ip link set dev br1 up + + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev $swp2.111 master br2 + ip link set dev $swp4.111 master br2 + ip link set dev br2 up +} + +switch_destroy() +{ + # Do this first so that we can reset the limits to values that are only + # valid for the original static / dynamic setting. + devlink_pool_size_thtype_restore 6 + devlink_pool_size_thtype_restore 5 + devlink_pool_size_thtype_restore 4 + devlink_pool_size_thtype_restore 2 + devlink_pool_size_thtype_restore 1 + devlink_pool_size_thtype_restore 0 + + # bridges + # ------- + + ip link set dev br2 down + ip link set dev $swp4.111 nomaster + ip link set dev $swp2.111 nomaster + ip link del dev br2 + + ip link set dev br1 down + ip link set dev $swp3.111 nomaster + ip link set dev $swp1.111 nomaster + ip link del dev br1 + + # $swp4 + # ----- + + __mlnx_qos -i $swp4 --buffer_size=0,0,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp4 --pfc=0,0,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp4 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null + tc qdisc del dev $swp4 root + + devlink_tc_bind_pool_th_restore $swp4 1 ingress + devlink_port_pool_th_restore $swp4 2 + + vlan_destroy $swp4 111 + mtu_restore $swp4 + ip link set dev $swp4 down + + # $swp3 + # ----- + + __mlnx_qos -i $swp3 --pfc=0,0,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp3 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null + tc qdisc del dev $swp3 root + + devlink_tc_bind_pool_th_restore $swp3 1 egress + devlink_port_pool_th_restore $swp3 5 + + vlan_destroy $swp3 111 + mtu_restore $swp3 + ip link set dev $swp3 down + + # $swp2 + # ----- + + tc qdisc del dev $swp2 parent 1:7 + tc qdisc del dev $swp2 root + + devlink_tc_bind_pool_th_restore $swp2 1 egress + devlink_port_pool_th_restore $swp2 6 + + vlan_destroy $swp2 111 + mtu_restore $swp2 + ip link set dev $swp2 down + + # $swp1 + # ----- + + __mlnx_qos -i $swp1 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null + tc qdisc del dev $swp1 root + + devlink_tc_bind_pool_th_restore $swp1 1 ingress + devlink_port_pool_th_restore $swp1 1 + + vlan_destroy $swp1 111 + mtu_restore $swp1 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + swp4=${NETIFS[p6]} + + h2mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.34 +} + +test_qos_pfc() +{ + RET=0 + + # 10M pool, each packet is 8K of payload + headers + local pkts=$((_10MB / 8050)) + local size=$((pkts * 8050)) + local in0=$(ethtool_stats_get $swp1 rx_octets_prio_1) + local out0=$(ethtool_stats_get $swp2 tx_octets_prio_1) + + $MZ $h1 -p 8000 -Q 1:111 -A 192.0.2.33 -B 192.0.2.34 \ + -a own -b $h2mac -c $pkts -t udp -q + sleep 2 + + local in1=$(ethtool_stats_get $swp1 rx_octets_prio_1) + local out1=$(ethtool_stats_get $swp2 tx_octets_prio_1) + + local din=$((in1 - in0)) + local dout=$((out1 - out0)) + + local pct_in=$((din * 100 / size)) + + ((pct_in > 95 && pct_in < 105)) + check_err $? "Relative ingress out of expected bounds, $pct_in% should be 100%" + + ((dout == din)) + check_err $? "$((din - dout)) bytes out of $din ingressed got lost" + + log_test "PFC" +} + +trap cleanup EXIT + +bail_on_lldpad +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS