Merge branch 'mlxsw-Enable-minimum-shaper-on-MC-TCs'

Ido Schimmel says:

====================
mlxsw: Enable minimum shaper on MC TCs

Petr says:

An MC-aware mode was introduced in commit 7b81953066 ("mlxsw:
spectrum: Configure MC-aware mode on mlxsw ports"). In MC-aware mode,
BUM traffic gets a special treatment by being assigned to a separate set
of traffic classes 8..15. Pairs of TCs 0 and 8, 1 and 9, etc., are then
configured to strictly prioritize the lower-numbered ones. The intention
is to prevent BUM traffic from flooding the switch and push out all UC
traffic, which would otherwise happen, and instead give UC traffic
precedence.

However strictly prioritizing UC traffic has the effect that UC overload
pushes out all BUM traffic, such as legitimate ARP queries. These
packets are kept in queues for a while, but under sustained UC overload,
their lifetime eventually expires and these packets are dropped. That is
detrimental to network performance as well.

In this patchset, MC TCs (8..15) are configured with minimum shaper of
200Mbps (a minimum permitted value) to allow a trickle of necessary
control traffic to get through.

First in patch #1, the QEEC register is extended with fields necessary
to configure the minimum shaper.

In patch #2, minimum shaper is enabled on TCs 8..15.

In patches #3 and #4, first the MC-awareness test is tweaked to support
the minimum shaper, and then a new test is introduced to test that MC
traffic behaves well under UC overload.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-10-31 12:56:59 -07:00
commit e2acdddde0
3 changed files with 117 additions and 25 deletions

View File

@ -3284,7 +3284,7 @@ static inline void mlxsw_reg_qtct_pack(char *payload, u8 local_port,
* Configures the ETS elements.
*/
#define MLXSW_REG_QEEC_ID 0x400D
#define MLXSW_REG_QEEC_LEN 0x1C
#define MLXSW_REG_QEEC_LEN 0x20
MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN);
@ -3326,6 +3326,15 @@ MLXSW_ITEM32(reg, qeec, element_index, 0x04, 0, 8);
*/
MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8);
/* reg_qeec_mise
* Min shaper configuration enable. Enables configuration of the min
* shaper on this ETS element
* 0 - Disable
* 1 - Enable
* Access: RW
*/
MLXSW_ITEM32(reg, qeec, mise, 0x0C, 31, 1);
enum {
MLXSW_REG_QEEC_BYTES_MODE,
MLXSW_REG_QEEC_PACKETS_MODE,
@ -3342,6 +3351,17 @@ enum {
*/
MLXSW_ITEM32(reg, qeec, pb, 0x0C, 28, 1);
/* The smallest permitted min shaper rate. */
#define MLXSW_REG_QEEC_MIS_MIN 200000 /* Kbps */
/* reg_qeec_min_shaper_rate
* Min shaper information rate.
* For CPU port, can only be configured for port hierarchy.
* When in bytes mode, value is specified in units of 1000bps.
* Access: RW
*/
MLXSW_ITEM32(reg, qeec, min_shaper_rate, 0x0C, 0, 28);
/* reg_qeec_mase
* Max shaper configuration enable. Enables configuration of the max
* shaper on this ETS element.

View File

@ -2740,6 +2740,21 @@ int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
}
static int mlxsw_sp_port_min_bw_set(struct mlxsw_sp_port *mlxsw_sp_port,
enum mlxsw_reg_qeec_hr hr, u8 index,
u8 next_index, u32 minrate)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
char qeec_pl[MLXSW_REG_QEEC_LEN];
mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index,
next_index);
mlxsw_reg_qeec_mise_set(qeec_pl, true);
mlxsw_reg_qeec_min_shaper_rate_set(qeec_pl, minrate);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
}
int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port,
u8 switch_prio, u8 tclass)
{
@ -2817,6 +2832,16 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
return err;
}
/* Configure the min shaper for multicast TCs. */
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
err = mlxsw_sp_port_min_bw_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HIERARCY_TC,
i + 8, i,
MLXSW_REG_QEEC_MIS_MIN);
if (err)
return err;
}
/* Map all priorities to traffic class 0. */
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, 0);

View File

@ -25,24 +25,24 @@
# Thus we set MTU to 10K on all involved interfaces. Then both unicast and
# multicast traffic uses 8K frames.
#
# +-----------------------+ +----------------------------------+
# +---------------------------+ +----------------------------------+
# | H1 | | H2 |
# | | | unicast --> + $h2.111 |
# | | | traffic | 192.0.2.129/28 |
# | multicast | | | e-qos-map 0:1 |
# | traffic | | | |
# | $h1 + <----- | | + $h2 |
# +-----|-----------------+ +--------------|-------------------+
# | multicast | | traffic | 192.0.2.129/28 |
# | traffic | | | e-qos-map 0:1 |
# | $h1 + <----- | | | |
# | 192.0.2.65/28 | | | + $h2 |
# +---------------|-----------+ +--------------|-------------------+
# | |
# +-----|-------------------------------------------------|-------------------+
# | + $swp1 + $swp2 |
# | | >1Gbps | >1Gbps |
# | +---|----------------+ +----------|----------------+ |
# | | + $swp1.1 | | + $swp2.111 | |
# +---------------|---------------------------------------|-------------------+
# | $swp1 + + $swp2 |
# | >1Gbps | | >1Gbps |
# | +-------------|------+ +----------|----------------+ |
# | | $swp1.1 + | | + $swp2.111 | |
# | | BR1 | SW | BR111 | |
# | | + $swp3.1 | | + $swp3.111 | |
# | +---|----------------+ +----------|----------------+ |
# | \_________________________________________________/ |
# | | $swp3.1 + | | + $swp3.111 | |
# | +-------------|------+ +----------|----------------+ |
# | \_______________________________________/ |
# | | |
# | + $swp3 |
# | | 1Gbps bottleneck |
@ -51,6 +51,7 @@
# |
# +--|-----------------+
# | + $h3 H3 |
# | | 192.0.2.66/28 |
# | | |
# | + $h3.111 |
# | 192.0.2.130/28 |
@ -59,6 +60,7 @@
ALL_TESTS="
ping_ipv4
test_mc_aware
test_uc_aware
"
lib_dir=$(dirname $0)/../../../net/forwarding
@ -68,14 +70,14 @@ source $lib_dir/lib.sh
h1_create()
{
simple_if_init $h1
simple_if_init $h1 192.0.2.65/28
mtu_set $h1 10000
}
h1_destroy()
{
mtu_restore $h1
simple_if_fini $h1
simple_if_fini $h1 192.0.2.65/28
}
h2_create()
@ -97,7 +99,7 @@ h2_destroy()
h3_create()
{
simple_if_init $h3
simple_if_init $h3 192.0.2.66/28
mtu_set $h3 10000
vlan_create $h3 111 v$h3 192.0.2.130/28
@ -108,7 +110,7 @@ h3_destroy()
vlan_destroy $h3 111
mtu_restore $h3
simple_if_fini $h3
simple_if_fini $h3 192.0.2.66/28
}
switch_create()
@ -251,7 +253,7 @@ measure_uc_rate()
# average ingress rate to somewhat mitigate this.
local min_ingress=2147483648
mausezahn $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
$MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
-a own -b $h3mac -t udp -q &
sleep 1
@ -291,7 +293,7 @@ test_mc_aware()
check_err $? "Could not get high enough UC-only ingress rate"
local ucth1=${uc_rate[1]}
mausezahn $h1 -p 8000 -c 0 -a own -b bc -t udp -q &
$MZ $h1 -p 8000 -c 0 -a own -b bc -t udp -q &
local d0=$(date +%s)
local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
@ -311,7 +313,7 @@ test_mc_aware()
ret = 100 * ($ucth1 - $ucth2) / $ucth1
if (ret > 0) { ret } else { 0 }
")
check_err $(bc <<< "$deg > 10")
check_err $(bc <<< "$deg > 25")
local interval=$((d1 - d0))
local mc_ir=$(rate $u0 $u1 $interval)
@ -335,6 +337,51 @@ test_mc_aware()
echo " egress UC throughput $(humanize ${uc_rate_2[1]})"
echo " ingress MC throughput $(humanize $mc_ir)"
echo " egress MC throughput $(humanize $mc_er)"
echo
}
test_uc_aware()
{
RET=0
$MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
-a own -b $h3mac -t udp -q &
local d0=$(date +%s)
local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
sleep 1
local attempts=50
local passes=0
local i
for ((i = 0; i < attempts; ++i)); do
if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 0.1; then
((passes++))
fi
sleep 0.1
done
local d1=$(date +%s)
local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
local interval=$((d1 - d0))
local uc_ir=$(rate $u0 $u1 $interval)
local uc_er=$(rate $t0 $t1 $interval)
((attempts == passes))
check_err $?
# Suppress noise from killing mausezahn.
{ kill %% && wait; } 2>/dev/null
log_test "MC performace under UC overload"
echo " ingress UC throughput $(humanize ${uc_ir})"
echo " egress UC throughput $(humanize ${uc_er})"
echo " sent $attempts BC ARPs, got $passes responses"
}
trap cleanup EXIT