Commit 982c3e29 authored by David S. Miller's avatar David S. Miller

Merge branch 'mlxsw-L3-HW-stats-improvements'

Ido Schimmel says:

====================
mlxsw: L3 HW stats improvements

While testing L3 HW stats [1] on top of mlxsw, two issues were found:

1. Stats cannot be enabled for more than 205 netdevs. This was fixed in
commit 4b7a632a ("mlxsw: spectrum_cnt: Reorder counter pools").

2. ARP packets are counted as errors. Patch #1 takes care of that. See
the commit message for details.

The goal of the majority of the rest of the patches is to add selftests
that would have discovered that only about 205 netdevs can have L3 HW
stats supported, despite the HW supporting much more. The obvious place
to plug this in is the scale test framework.

The scale tests are currently testing two things: that some number of
instances of a given resource can actually be created; and that when an
attempt is made to create more than the supported amount, the failures
are noted and handled gracefully.

However the ability to allocate the resource does not mean that the
resource actually works when passing traffic. For that, make it possible
for a given scale to also test traffic.

To that end, this patchset adds traffic tests. The goal of these is to
run traffic and observe whether a sample of the allocated resource
instances actually perform their task. Traffic tests are only run on the
positive leg of the scale test (no point trying to pass traffic when the
expected outcome is that the resource will not be allocated). They are
opt-in, if a given test does not expose it, it is not run.

The patchset proceeds as follows:

- Patches #2 and #3 add to "devlink resource" support for number of
  allocated RIFs, and the capacity. This is necessary, because when
  evaluating how many L3 HW stats instances it should be possible to
  allocate, the limiting resource on Spectrum-2 and above currently is
  not the counters themselves, but actually the RIFs.

- Patch #6 adds support for invocation of a traffic test, if a given scale
  tests exposes it.

- Patch #7 adds support for skipping a given scale test. Because on
  Spectrum-2 and above, the limiting factor to L3 HW stats instances is
  actually the number of RIFs, there is no point in running the failing leg
  of a scale tests, because it would test exhaustion of RIFs, not of RIF
  counters.

- With patch #8, the scale tests drivers pass the target number to the
  cleanup function of a scale test.

- In patch #9, add a traffic test to the tc_flower selftests. This makes
  sure that the flow counters installed with the ACLs actually do count as
  they are supposed to.

- In patch #10, add a new scale selftest for RIF counter scale, including a
  traffic test.

- In patch #11, the scale target for the tc_flower selftest is
  dynamically set instead of being hard coded.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=ca0a53dcec9495d1dc5bbc369c810c520d728373
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e42134b5 ed62af45
......@@ -3580,6 +3580,25 @@ mlxsw_sp_resources_rif_mac_profile_register(struct mlxsw_core *mlxsw_core)
&size_params);
}
static int mlxsw_sp_resources_rifs_register(struct mlxsw_core *mlxsw_core)
{
struct devlink *devlink = priv_to_devlink(mlxsw_core);
struct devlink_resource_size_params size_params;
u64 max_rifs;
if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_RIFS))
return -EIO;
max_rifs = MLXSW_CORE_RES_GET(mlxsw_core, MAX_RIFS);
devlink_resource_size_params_init(&size_params, max_rifs, max_rifs,
1, DEVLINK_RESOURCE_UNIT_ENTRY);
return devlink_resource_register(devlink, "rifs", max_rifs,
MLXSW_SP_RESOURCE_RIFS,
DEVLINK_RESOURCE_ID_PARENT_TOP,
&size_params);
}
static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core)
{
int err;
......@@ -3604,8 +3623,13 @@ static int mlxsw_sp1_resources_register(struct mlxsw_core *mlxsw_core)
if (err)
goto err_resources_rif_mac_profile_register;
err = mlxsw_sp_resources_rifs_register(mlxsw_core);
if (err)
goto err_resources_rifs_register;
return 0;
err_resources_rifs_register:
err_resources_rif_mac_profile_register:
err_policer_resources_register:
err_resources_counter_register:
......@@ -3638,8 +3662,13 @@ static int mlxsw_sp2_resources_register(struct mlxsw_core *mlxsw_core)
if (err)
goto err_resources_rif_mac_profile_register;
err = mlxsw_sp_resources_rifs_register(mlxsw_core);
if (err)
goto err_resources_rifs_register;
return 0;
err_resources_rifs_register:
err_resources_rif_mac_profile_register:
err_policer_resources_register:
err_resources_counter_register:
......
......@@ -68,6 +68,7 @@ enum mlxsw_sp_resource_id {
MLXSW_SP_RESOURCE_GLOBAL_POLICERS,
MLXSW_SP_RESOURCE_SINGLE_RATE_POLICERS,
MLXSW_SP_RESOURCE_RIF_MAC_PROFILES,
MLXSW_SP_RESOURCE_RIFS,
};
struct mlxsw_sp_port;
......
......@@ -8134,6 +8134,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_rif_counters_alloc(rif);
}
atomic_inc(&mlxsw_sp->router->rifs_count);
return rif;
err_stats_enable:
......@@ -8163,6 +8164,7 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
struct mlxsw_sp_vr *vr;
int i;
atomic_dec(&mlxsw_sp->router->rifs_count);
mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
vr = &mlxsw_sp->router->vrs[rif->vr_id];
......@@ -8321,6 +8323,13 @@ static u64 mlxsw_sp_rif_mac_profiles_occ_get(void *priv)
return atomic_read(&mlxsw_sp->router->rif_mac_profiles_count);
}
static u64 mlxsw_sp_rifs_occ_get(void *priv)
{
const struct mlxsw_sp *mlxsw_sp = priv;
return atomic_read(&mlxsw_sp->router->rifs_count);
}
static struct mlxsw_sp_rif_mac_profile *
mlxsw_sp_rif_mac_profile_create(struct mlxsw_sp *mlxsw_sp, const char *mac,
struct netlink_ext_ack *extack)
......@@ -9652,6 +9661,7 @@ mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
if (err)
goto ul_rif_op_err;
atomic_inc(&mlxsw_sp->router->rifs_count);
return ul_rif;
ul_rif_op_err:
......@@ -9664,6 +9674,7 @@ static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
{
struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
atomic_dec(&mlxsw_sp->router->rifs_count);
mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
kfree(ul_rif);
......@@ -9819,10 +9830,15 @@ static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
idr_init(&mlxsw_sp->router->rif_mac_profiles_idr);
atomic_set(&mlxsw_sp->router->rif_mac_profiles_count, 0);
atomic_set(&mlxsw_sp->router->rifs_count, 0);
devlink_resource_occ_get_register(devlink,
MLXSW_SP_RESOURCE_RIF_MAC_PROFILES,
mlxsw_sp_rif_mac_profiles_occ_get,
mlxsw_sp);
devlink_resource_occ_get_register(devlink,
MLXSW_SP_RESOURCE_RIFS,
mlxsw_sp_rifs_occ_get,
mlxsw_sp);
return 0;
}
......@@ -9832,9 +9848,11 @@ static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
int i;
WARN_ON_ONCE(atomic_read(&mlxsw_sp->router->rifs_count));
for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
devlink_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_RIFS);
devlink_resource_occ_get_unregister(devlink,
MLXSW_SP_RESOURCE_RIF_MAC_PROFILES);
WARN_ON(!idr_is_empty(&mlxsw_sp->router->rif_mac_profiles_idr));
......
......@@ -20,6 +20,7 @@ struct mlxsw_sp_router {
struct mlxsw_sp_rif **rifs;
struct idr rif_mac_profiles_idr;
atomic_t rif_mac_profiles_count;
atomic_t rifs_count;
u8 max_rif_mac_profile;
struct mlxsw_sp_vr *vrs;
struct rhashtable neigh_ht;
......
......@@ -953,16 +953,16 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
.trap = MLXSW_SP_TRAP_CONTROL(ARP_REQUEST, NEIGH_DISCOVERY,
MIRROR),
.listeners_arr = {
MLXSW_SP_RXL_MARK(ARPBC, NEIGH_DISCOVERY, MIRROR_TO_CPU,
false),
MLXSW_SP_RXL_MARK(ROUTER_ARPBC, NEIGH_DISCOVERY,
TRAP_TO_CPU, false),
},
},
{
.trap = MLXSW_SP_TRAP_CONTROL(ARP_RESPONSE, NEIGH_DISCOVERY,
MIRROR),
.listeners_arr = {
MLXSW_SP_RXL_MARK(ARPUC, NEIGH_DISCOVERY, MIRROR_TO_CPU,
false),
MLXSW_SP_RXL_MARK(ROUTER_ARPUC, NEIGH_DISCOVERY,
TRAP_TO_CPU, false),
},
},
{
......
......@@ -27,8 +27,6 @@ enum {
MLXSW_TRAP_ID_PKT_SAMPLE = 0x38,
MLXSW_TRAP_ID_FID_MISS = 0x3D,
MLXSW_TRAP_ID_DECAP_ECN0 = 0x40,
MLXSW_TRAP_ID_ARPBC = 0x50,
MLXSW_TRAP_ID_ARPUC = 0x51,
MLXSW_TRAP_ID_MTUERROR = 0x52,
MLXSW_TRAP_ID_TTLERROR = 0x53,
MLXSW_TRAP_ID_LBERROR = 0x54,
......@@ -71,6 +69,8 @@ enum {
MLXSW_TRAP_ID_IPV6_BFD = 0xD1,
MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6,
MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7,
MLXSW_TRAP_ID_ROUTER_ARPBC = 0xE0,
MLXSW_TRAP_ID_ROUTER_ARPUC = 0xE1,
MLXSW_TRAP_ID_DISCARD_NON_ROUTABLE = 0x11A,
MLXSW_TRAP_ID_DISCARD_ROUTER2 = 0x130,
MLXSW_TRAP_ID_DISCARD_ROUTER3 = 0x131,
......
# SPDX-License-Identifier: GPL-2.0
RIF_COUNTER_NUM_NETIFS=2
rif_counter_addr4()
{
local i=$1; shift
local p=$1; shift
printf 192.0.%d.%d $((i / 64)) $(((4 * i % 256) + p))
}
rif_counter_addr4pfx()
{
rif_counter_addr4 $@
printf /30
}
rif_counter_h1_create()
{
simple_if_init $h1
}
rif_counter_h1_destroy()
{
simple_if_fini $h1
}
rif_counter_h2_create()
{
simple_if_init $h2
}
rif_counter_h2_destroy()
{
simple_if_fini $h2
}
rif_counter_setup_prepare()
{
h1=${NETIFS[p1]}
h2=${NETIFS[p2]}
vrf_prepare
rif_counter_h1_create
rif_counter_h2_create
}
rif_counter_cleanup()
{
local count=$1; shift
pre_cleanup
for ((i = 1; i <= count; i++)); do
vlan_destroy $h2 $i
done
rif_counter_h2_destroy
rif_counter_h1_destroy
vrf_cleanup
if [[ -v RIF_COUNTER_BATCH_FILE ]]; then
rm -f $RIF_COUNTER_BATCH_FILE
fi
}
rif_counter_test()
{
local count=$1; shift
local should_fail=$1; shift
RIF_COUNTER_BATCH_FILE="$(mktemp)"
for ((i = 1; i <= count; i++)); do
vlan_create $h2 $i v$h2 $(rif_counter_addr4pfx $i 2)
done
for ((i = 1; i <= count; i++)); do
cat >> $RIF_COUNTER_BATCH_FILE <<-EOF
stats set dev $h2.$i l3_stats on
EOF
done
ip -b $RIF_COUNTER_BATCH_FILE
check_err_fail $should_fail $? "RIF counter enablement"
}
rif_counter_traffic_test()
{
local count=$1; shift
local i;
for ((i = count; i > 0; i /= 2)); do
$MZ $h1 -Q $i -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \
-A $(rif_counter_addr4 $i 1) \
-B $(rif_counter_addr4 $i 2) \
-q -t udp sp=54321,dp=12345
done
for ((i = count; i > 0; i /= 2)); do
busywait "$TC_HIT_TIMEOUT" until_counter_is "== 1" \
hw_stats_get l3_stats $h2.$i rx packets > /dev/null
check_err $? "Traffic not seen at RIF $h2.$i"
done
}
......@@ -25,7 +25,16 @@ cleanup()
trap cleanup EXIT
ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile"
ALL_TESTS="
router
tc_flower
mirror_gre
tc_police
port
rif_mac_profile
rif_counter
"
for current_test in ${TESTS:-$ALL_TESTS}; do
RET_FIN=0
source ${current_test}_scale.sh
......@@ -36,16 +45,32 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
for should_fail in 0 1; do
RET=0
target=$(${current_test}_get_target "$should_fail")
if ((target == 0)); then
log_test_skip "'$current_test' should_fail=$should_fail test"
continue
fi
${current_test}_setup_prepare
setup_wait $num_netifs
# Update target in case occupancy of a certain resource changed
# following the test setup.
target=$(${current_test}_get_target "$should_fail")
${current_test}_test "$target" "$should_fail"
${current_test}_cleanup
devlink_reload
if [[ "$should_fail" -eq 0 ]]; then
log_test "'$current_test' $target"
if ((!RET)); then
tt=${current_test}_traffic_test
if [[ $(type -t $tt) == "function" ]]; then
$tt "$target"
log_test "'$current_test' $target traffic test"
fi
fi
else
log_test "'$current_test' overflow $target"
fi
${current_test}_cleanup $target
devlink_reload
RET_FIN=$(( RET_FIN || RET ))
done
done
......
../spectrum/rif_counter_scale.sh
\ No newline at end of file
......@@ -4,17 +4,22 @@ source ../tc_flower_scale.sh
tc_flower_get_target()
{
local should_fail=$1; shift
local max_cnts
# The driver associates a counter with each tc filter, which means the
# number of supported filters is bounded by the number of available
# counters.
# Currently, the driver supports 30K (30,720) flow counters and six of
# these are used for multicast routing.
local target=30714
max_cnts=$(devlink_resource_size_get counters flow)
# Remove already allocated counters.
((max_cnts -= $(devlink_resource_occ_get counters flow)))
# Each rule uses two counters, for packets and bytes.
((max_cnts /= 2))
if ((! should_fail)); then
echo $target
echo $max_cnts
else
echo $((target + 1))
echo $((max_cnts + 1))
fi
}
......@@ -22,7 +22,16 @@ cleanup()
devlink_sp_read_kvd_defaults
trap cleanup EXIT
ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile"
ALL_TESTS="
router
tc_flower
mirror_gre
tc_police
port
rif_mac_profile
rif_counter
"
for current_test in ${TESTS:-$ALL_TESTS}; do
RET_FIN=0
source ${current_test}_scale.sh
......@@ -41,15 +50,31 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
for should_fail in 0 1; do
RET=0
target=$(${current_test}_get_target "$should_fail")
if ((target == 0)); then
log_test_skip "'$current_test' [$profile] should_fail=$should_fail test"
continue
fi
${current_test}_setup_prepare
setup_wait $num_netifs
# Update target in case occupancy of a certain resource
# changed following the test setup.
target=$(${current_test}_get_target "$should_fail")
${current_test}_test "$target" "$should_fail"
${current_test}_cleanup
if [[ "$should_fail" -eq 0 ]]; then
log_test "'$current_test' [$profile] $target"
if ((!RET)); then
tt=${current_test}_traffic_test
if [[ $(type -t $tt) == "function" ]]
then
$tt "$target"
log_test "'$current_test' [$profile] $target traffic test"
fi
fi
else
log_test "'$current_test' [$profile] overflow $target"
fi
${current_test}_cleanup $target
RET_FIN=$(( RET_FIN || RET ))
done
done
......
# SPDX-License-Identifier: GPL-2.0
source ../rif_counter_scale.sh
rif_counter_get_target()
{
local should_fail=$1; shift
local max_cnts
local max_rifs
local target
max_rifs=$(devlink_resource_size_get rifs)
max_cnts=$(devlink_resource_size_get counters rif)
# Remove already allocated RIFs.
((max_rifs -= $(devlink_resource_occ_get rifs)))
# 10 KVD slots per counter, ingress+egress counters per RIF
((max_cnts /= 20))
# Pointless to run the overflow test if we don't have enough RIFs to
# host all the counters.
if ((max_cnts > max_rifs && should_fail)); then
echo 0
return
fi
target=$((max_rifs < max_cnts ? max_rifs : max_cnts))
if ((! should_fail)); then
echo $target
else
echo $((target + 1))
fi
}
......@@ -77,6 +77,7 @@ tc_flower_rules_create()
filter add dev $h2 ingress \
prot ipv6 \
pref 1000 \
handle 42$i \
flower $tcflags dst_ip $(tc_flower_addr $i) \
action drop
EOF
......@@ -121,3 +122,19 @@ tc_flower_test()
tcflags="skip_sw"
__tc_flower_test $count $should_fail
}
tc_flower_traffic_test()
{
local count=$1; shift
local i;
for ((i = count - 1; i > 0; i /= 2)); do
$MZ -6 $h1 -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \
-A $(tc_flower_addr 0) -B $(tc_flower_addr $i) \
-q -t udp sp=54321,dp=12345
done
for ((i = count - 1; i > 0; i /= 2)); do
tc_check_packets "dev $h2 ingress" 42$i 1
check_err $? "Traffic not seen at rule #$i"
done
}
......@@ -141,12 +141,13 @@ switch_create()
ip link set dev $swp4 up
ip link add name br1 type bridge vlan_filtering 1
ip link set dev br1 up
__addr_add_del br1 add 192.0.2.129/32
ip -4 route add 192.0.2.130/32 dev br1
team_create lag loadbalance $swp3 $swp4
ip link set dev lag master br1
ip link set dev br1 up
__addr_add_del br1 add 192.0.2.129/32
ip -4 route add 192.0.2.130/32 dev br1
}
switch_destroy()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment