From 8b931821aa04823e2e5df0ae93937baabbd23286 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 31 Oct 2018 09:56:42 +0000 Subject: [PATCH 1/4] mlxsw: reg: QEEC: Add minimum shaper fields Add QEEC.mise (minimum shaper enable) and QEEC.min_shaper_rate to enable configuration of minimum shaper. Increase the QEEC length to 0x20 as well: that's the length that the register has had for a long time now, but with the configurations that mlxsw typically exercises, the firmware tolerated 0x1C-sized packets. With mise=true however, FW rejects packets unless they have the full required length. Fixes: b9b7cee40579 ("mlxsw: reg: Add QoS ETS Element Configuration register") Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 32cb6718bb17..db3d2790aeec 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3284,7 +3284,7 @@ static inline void mlxsw_reg_qtct_pack(char *payload, u8 local_port, * Configures the ETS elements. */ #define MLXSW_REG_QEEC_ID 0x400D -#define MLXSW_REG_QEEC_LEN 0x1C +#define MLXSW_REG_QEEC_LEN 0x20 MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN); @@ -3326,6 +3326,15 @@ MLXSW_ITEM32(reg, qeec, element_index, 0x04, 0, 8); */ MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8); +/* reg_qeec_mise + * Min shaper configuration enable. Enables configuration of the min + * shaper on this ETS element + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, mise, 0x0C, 31, 1); + enum { MLXSW_REG_QEEC_BYTES_MODE, MLXSW_REG_QEEC_PACKETS_MODE, @@ -3342,6 +3351,17 @@ enum { */ MLXSW_ITEM32(reg, qeec, pb, 0x0C, 28, 1); +/* The smallest permitted min shaper rate. */ +#define MLXSW_REG_QEEC_MIS_MIN 200000 /* Kbps */ + +/* reg_qeec_min_shaper_rate + * Min shaper information rate. + * For CPU port, can only be configured for port hierarchy. + * When in bytes mode, value is specified in units of 1000bps. + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, min_shaper_rate, 0x0C, 0, 28); + /* reg_qeec_mase * Max shaper configuration enable. Enables configuration of the max * shaper on this ETS element. From 0fe64023162aef123de2f1993ba13a35a786e1de Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 31 Oct 2018 09:56:44 +0000 Subject: [PATCH 2/4] mlxsw: spectrum: Set minimum shaper on MC TCs An MC-aware mode was introduced in commit 7b8195306694 ("mlxsw: spectrum: Configure MC-aware mode on mlxsw ports"). In MC-aware mode, BUM traffic gets a special treatment by being assigned to a separate set of traffic classes 8..15. Pairs of TCs 0 and 8, 1 and 9, etc., are then configured to strictly prioritize the lower-numbered ones. The intention is to prevent BUM traffic from flooding the switch and push out all UC traffic, which would otherwise happen, and instead give UC traffic precedence. However strictly prioritizing UC traffic has the effect that UC overload pushes out all BUM traffic, such as legitimate ARP queries. These packets are kept in queues for a while, but under sustained UC overload, their lifetime eventually expires and these packets are dropped. That is detrimental to network performance as well. Therefore configure the MC TCs (8..15) with minimum shaper of 200Mbps (a minimum permitted value) to allow a trickle of necessary control traffic to get through. Fixes: 7b8195306694 ("mlxsw: spectrum: Configure MC-aware mode on mlxsw ports") Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 8a4983adae94..a2df12b79f8e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2740,6 +2740,21 @@ int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl); } +static int mlxsw_sp_port_min_bw_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qeec_hr hr, u8 index, + u8 next_index, u32 minrate) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qeec_pl[MLXSW_REG_QEEC_LEN]; + + mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index, + next_index); + mlxsw_reg_qeec_mise_set(qeec_pl, true); + mlxsw_reg_qeec_min_shaper_rate_set(qeec_pl, minrate); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl); +} + int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 switch_prio, u8 tclass) { @@ -2817,6 +2832,16 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) return err; } + /* Configure the min shaper for multicast TCs. */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_min_bw_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_TC, + i + 8, i, + MLXSW_REG_QEEC_MIS_MIN); + if (err) + return err; + } + /* Map all priorities to traffic class 0. */ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, 0); From 8f3f09358c81248109463b3cae254b7db4ea9af0 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 31 Oct 2018 09:56:45 +0000 Subject: [PATCH 3/4] selftests: mlxsw: qos_mc_aware: Tweak for min shaper Since the minimum shaper is now being enabled for MC TCs, it's unreasonable to expect no UC traffic loss. Minimal min shaper value is 200Mbps, which is 20% of the 1Gbps that this test configures on egress. To cover for glitches, tolerate up to 25% UC degradation under MC overload. Fixes: b5638d46c90a ("selftests: mlxsw: Add a test for UC behavior under MC flood") Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index 0150bb2741eb..a8fc36d670e1 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -311,7 +311,7 @@ test_mc_aware() ret = 100 * ($ucth1 - $ucth2) / $ucth1 if (ret > 0) { ret } else { 0 } ") - check_err $(bc <<< "$deg > 10") + check_err $(bc <<< "$deg > 25") local interval=$((d1 - d0)) local mc_ir=$(rate $u0 $u1 $interval) From a5ee171d087ee632a5f190bb3ce1c0f98e06ec0a Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 31 Oct 2018 09:56:46 +0000 Subject: [PATCH 4/4] selftests: mlxsw: qos_mc_aware: Add a test for UC awareness In a previous patch, mlxsw was updated to configure a minimum bandwidth allowance on MC TCs. Test that this indeed fixes the problem of UC traffic overload pushing out all MC traffic. Fixes: b5638d46c90a ("selftests: mlxsw: Add a test for UC behavior under MC flood") Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/qos_mc_aware.sh | 93 ++++++++++++++----- 1 file changed, 70 insertions(+), 23 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index a8fc36d670e1..117f6f35d72f 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -25,24 +25,24 @@ # Thus we set MTU to 10K on all involved interfaces. Then both unicast and # multicast traffic uses 8K frames. # -# +-----------------------+ +----------------------------------+ -# | H1 | | H2 | -# | | | unicast --> + $h2.111 | -# | | | traffic | 192.0.2.129/28 | -# | multicast | | | e-qos-map 0:1 | -# | traffic | | | | -# | $h1 + <----- | | + $h2 | -# +-----|-----------------+ +--------------|-------------------+ -# | | -# +-----|-------------------------------------------------|-------------------+ -# | + $swp1 + $swp2 | -# | | >1Gbps | >1Gbps | -# | +---|----------------+ +----------|----------------+ | -# | | + $swp1.1 | | + $swp2.111 | | +# +---------------------------+ +----------------------------------+ +# | H1 | | H2 | +# | | | unicast --> + $h2.111 | +# | multicast | | traffic | 192.0.2.129/28 | +# | traffic | | | e-qos-map 0:1 | +# | $h1 + <----- | | | | +# | 192.0.2.65/28 | | | + $h2 | +# +---------------|-----------+ +--------------|-------------------+ +# | | +# +---------------|---------------------------------------|-------------------+ +# | $swp1 + + $swp2 | +# | >1Gbps | | >1Gbps | +# | +-------------|------+ +----------|----------------+ | +# | | $swp1.1 + | | + $swp2.111 | | # | | BR1 | SW | BR111 | | -# | | + $swp3.1 | | + $swp3.111 | | -# | +---|----------------+ +----------|----------------+ | -# | \_________________________________________________/ | +# | | $swp3.1 + | | + $swp3.111 | | +# | +-------------|------+ +----------|----------------+ | +# | \_______________________________________/ | # | | | # | + $swp3 | # | | 1Gbps bottleneck | @@ -51,6 +51,7 @@ # | # +--|-----------------+ # | + $h3 H3 | +# | | 192.0.2.66/28 | # | | | # | + $h3.111 | # | 192.0.2.130/28 | @@ -59,6 +60,7 @@ ALL_TESTS=" ping_ipv4 test_mc_aware + test_uc_aware " lib_dir=$(dirname $0)/../../../net/forwarding @@ -68,14 +70,14 @@ source $lib_dir/lib.sh h1_create() { - simple_if_init $h1 + simple_if_init $h1 192.0.2.65/28 mtu_set $h1 10000 } h1_destroy() { mtu_restore $h1 - simple_if_fini $h1 + simple_if_fini $h1 192.0.2.65/28 } h2_create() @@ -97,7 +99,7 @@ h2_destroy() h3_create() { - simple_if_init $h3 + simple_if_init $h3 192.0.2.66/28 mtu_set $h3 10000 vlan_create $h3 111 v$h3 192.0.2.130/28 @@ -108,7 +110,7 @@ h3_destroy() vlan_destroy $h3 111 mtu_restore $h3 - simple_if_fini $h3 + simple_if_fini $h3 192.0.2.66/28 } switch_create() @@ -251,7 +253,7 @@ measure_uc_rate() # average ingress rate to somewhat mitigate this. local min_ingress=2147483648 - mausezahn $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \ + $MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \ -a own -b $h3mac -t udp -q & sleep 1 @@ -291,7 +293,7 @@ test_mc_aware() check_err $? "Could not get high enough UC-only ingress rate" local ucth1=${uc_rate[1]} - mausezahn $h1 -p 8000 -c 0 -a own -b bc -t udp -q & + $MZ $h1 -p 8000 -c 0 -a own -b bc -t udp -q & local d0=$(date +%s) local t0=$(ethtool_stats_get $h3 rx_octets_prio_0) @@ -335,6 +337,51 @@ test_mc_aware() echo " egress UC throughput $(humanize ${uc_rate_2[1]})" echo " ingress MC throughput $(humanize $mc_ir)" echo " egress MC throughput $(humanize $mc_er)" + echo +} + +test_uc_aware() +{ + RET=0 + + $MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \ + -a own -b $h3mac -t udp -q & + + local d0=$(date +%s) + local t0=$(ethtool_stats_get $h3 rx_octets_prio_1) + local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1) + sleep 1 + + local attempts=50 + local passes=0 + local i + + for ((i = 0; i < attempts; ++i)); do + if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 0.1; then + ((passes++)) + fi + + sleep 0.1 + done + + local d1=$(date +%s) + local t1=$(ethtool_stats_get $h3 rx_octets_prio_1) + local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1) + + local interval=$((d1 - d0)) + local uc_ir=$(rate $u0 $u1 $interval) + local uc_er=$(rate $t0 $t1 $interval) + + ((attempts == passes)) + check_err $? + + # Suppress noise from killing mausezahn. + { kill %% && wait; } 2>/dev/null + + log_test "MC performace under UC overload" + echo " ingress UC throughput $(humanize ${uc_ir})" + echo " egress UC throughput $(humanize ${uc_er})" + echo " sent $attempts BC ARPs, got $passes responses" } trap cleanup EXIT