Commit 838b6c9c authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'mlxsw-support-for-nexthop-group-statistics'

Petr Machata says:

====================
mlxsw: Support for nexthop group statistics

ECMP is a fundamental component in L3 designs. However, it's fragile. Many
factors influence whether an ECMP group will operate as intended: hash
policy (i.e. the set of fields that contribute to ECMP hash calculation),
neighbor validity, hash seed (which might lead to polarization) or the type
of ECMP group used (hash-threshold or resilient).

At the same time, collection of statistics that would help an operator
determine that the group performs as desired, is difficult.

Support for nexthop group statistics and their HW collection has been
introduced recently. In this patch set, add HW stats collection support
to mlxsw.

This patchset progresses as follows:

- Patches #1 and #2 add nexthop IDs to notifiers.
- Patches #3 and #4 are code-shaping.
- Patches #5, #6 and #7 adjust the flow counter code.
- Patches #8 and #9 add HW nexthop counters.
- Patch #10 adjusts the HW counter code to allow sharing the same counter
  for several resilient group buckets with the same NH ID.
- Patch #11 adds a selftest.
====================

Link: https://lore.kernel.org/r/cover.1709901020.git.petrm@nvidia.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents e5b7aefe a22b0426
...@@ -176,13 +176,15 @@ MLXSW_ITEM32(tx, hdr, fid, 0x08, 16, 16); ...@@ -176,13 +176,15 @@ MLXSW_ITEM32(tx, hdr, fid, 0x08, 16, 16);
MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4); MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4);
int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp,
unsigned int counter_index, u64 *packets, unsigned int counter_index, bool clear,
u64 *bytes) u64 *packets, u64 *bytes)
{ {
enum mlxsw_reg_mgpc_opcode op = clear ? MLXSW_REG_MGPC_OPCODE_CLEAR :
MLXSW_REG_MGPC_OPCODE_NOP;
char mgpc_pl[MLXSW_REG_MGPC_LEN]; char mgpc_pl[MLXSW_REG_MGPC_LEN];
int err; int err;
mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_NOP, mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, op,
MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES); MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES);
err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl);
if (err) if (err)
......
...@@ -706,8 +706,8 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev, ...@@ -706,8 +706,8 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev,
int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin,
u16 vid_end, bool is_member, bool untagged); u16 vid_end, bool is_member, bool untagged);
int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp,
unsigned int counter_index, u64 *packets, unsigned int counter_index, bool clear,
u64 *bytes); u64 *packets, u64 *bytes);
int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp, int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp,
unsigned int *p_counter_index); unsigned int *p_counter_index);
void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp, void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp,
......
...@@ -1024,7 +1024,7 @@ int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp, ...@@ -1024,7 +1024,7 @@ int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp,
rulei = mlxsw_sp_acl_rule_rulei(rule); rulei = mlxsw_sp_acl_rule_rulei(rule);
if (rulei->counter_valid) { if (rulei->counter_valid) {
err = mlxsw_sp_flow_counter_get(mlxsw_sp, rulei->counter_index, err = mlxsw_sp_flow_counter_get(mlxsw_sp, rulei->counter_index,
&current_packets, false, &current_packets,
&current_bytes); &current_bytes);
if (err) if (err)
return err; return err;
......
...@@ -1181,9 +1181,11 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable) ...@@ -1181,9 +1181,11 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
char ratr_pl[MLXSW_REG_RATR_LEN]; char ratr_pl[MLXSW_REG_RATR_LEN];
struct mlxsw_sp *mlxsw_sp = priv; struct mlxsw_sp *mlxsw_sp = priv;
struct mlxsw_sp_nexthop *nh; struct mlxsw_sp_nexthop *nh;
unsigned int n_done = 0;
u32 adj_hash_index = 0; u32 adj_hash_index = 0;
u32 adj_index = 0; u32 adj_index = 0;
u32 adj_size = 0; u32 adj_size = 0;
int err;
mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) { mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
if (!mlxsw_sp_nexthop_is_forward(nh) || if (!mlxsw_sp_nexthop_is_forward(nh) ||
...@@ -1192,15 +1194,27 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable) ...@@ -1192,15 +1194,27 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size, mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_size,
&adj_hash_index); &adj_hash_index);
if (enable) if (enable) {
mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); err = mlxsw_sp_nexthop_counter_enable(mlxsw_sp, nh);
else if (err)
mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); goto err_counter_enable;
} else {
mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
}
mlxsw_sp_nexthop_eth_update(mlxsw_sp, mlxsw_sp_nexthop_eth_update(mlxsw_sp,
adj_index + adj_hash_index, nh, adj_index + adj_hash_index, nh,
true, ratr_pl); true, ratr_pl);
n_done++;
} }
return 0; return 0;
err_counter_enable:
mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
if (!n_done--)
break;
mlxsw_sp_nexthop_counter_disable(mlxsw_sp, nh);
}
return err;
} }
static u64 static u64
......
...@@ -361,7 +361,7 @@ static int mlxsw_sp_mr_tcam_route_stats(struct mlxsw_sp *mlxsw_sp, ...@@ -361,7 +361,7 @@ static int mlxsw_sp_mr_tcam_route_stats(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_mr_tcam_route *route = route_priv; struct mlxsw_sp_mr_tcam_route *route = route_priv;
return mlxsw_sp_flow_counter_get(mlxsw_sp, route->counter_index, return mlxsw_sp_flow_counter_get(mlxsw_sp, route->counter_index,
packets, bytes); false, packets, bytes);
} }
static int static int
......
...@@ -156,10 +156,10 @@ int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp, ...@@ -156,10 +156,10 @@ int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
struct mlxsw_sp_nexthop *nh, bool force, struct mlxsw_sp_nexthop *nh, bool force,
char *ratr_pl); char *ratr_pl);
void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, int mlxsw_sp_nexthop_counter_enable(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh); struct mlxsw_sp_nexthop *nh);
void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, void mlxsw_sp_nexthop_counter_disable(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh); struct mlxsw_sp_nexthop *nh);
static inline bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1, static inline bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1,
const union mlxsw_sp_l3addr *addr2) const union mlxsw_sp_l3addr *addr2)
......
...@@ -185,6 +185,7 @@ struct nh_notifier_single_info { ...@@ -185,6 +185,7 @@ struct nh_notifier_single_info {
__be32 ipv4; __be32 ipv4;
struct in6_addr ipv6; struct in6_addr ipv6;
}; };
u32 id;
u8 is_reject:1, u8 is_reject:1,
is_fdb:1, is_fdb:1,
has_encap:1; has_encap:1;
...@@ -192,7 +193,6 @@ struct nh_notifier_single_info { ...@@ -192,7 +193,6 @@ struct nh_notifier_single_info {
struct nh_notifier_grp_entry_info { struct nh_notifier_grp_entry_info {
u8 weight; u8 weight;
u32 id;
struct nh_notifier_single_info nh; struct nh_notifier_single_info nh;
}; };
......
...@@ -104,6 +104,7 @@ __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info, ...@@ -104,6 +104,7 @@ __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
else if (nh_info->gw_family == AF_INET6) else if (nh_info->gw_family == AF_INET6)
nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6; nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6;
nh_info->id = nhi->nh_parent->id;
nh_info->is_reject = nhi->reject_nh; nh_info->is_reject = nhi->reject_nh;
nh_info->is_fdb = nhi->fdb_nh; nh_info->is_fdb = nhi->fdb_nh;
nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate; nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate;
...@@ -150,7 +151,6 @@ static int nh_notifier_mpath_info_init(struct nh_notifier_info *info, ...@@ -150,7 +151,6 @@ static int nh_notifier_mpath_info_init(struct nh_notifier_info *info,
struct nh_info *nhi; struct nh_info *nhi;
nhi = rtnl_dereference(nhge->nh->nh_info); nhi = rtnl_dereference(nhge->nh->nh_info);
info->nh_grp->nh_entries[i].id = nhge->nh->id;
info->nh_grp->nh_entries[i].weight = nhge->weight; info->nh_grp->nh_entries[i].weight = nhge->weight;
__nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh, __nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh,
nhi); nhi);
...@@ -407,6 +407,7 @@ static int call_nexthop_res_table_notifiers(struct net *net, struct nexthop *nh, ...@@ -407,6 +407,7 @@ static int call_nexthop_res_table_notifiers(struct net *net, struct nexthop *nh,
struct nh_notifier_info info = { struct nh_notifier_info info = {
.net = net, .net = net,
.extack = extack, .extack = extack,
.id = nh->id,
}; };
struct nh_group *nhg; struct nh_group *nhg;
int err; int err;
......
...@@ -123,6 +123,7 @@ TEST_FILES := devlink_lib.sh \ ...@@ -123,6 +123,7 @@ TEST_FILES := devlink_lib.sh \
mirror_gre_topo_lib.sh \ mirror_gre_topo_lib.sh \
mirror_lib.sh \ mirror_lib.sh \
mirror_topo_lib.sh \ mirror_topo_lib.sh \
router_mpath_nh_lib.sh \
sch_ets_core.sh \ sch_ets_core.sh \
sch_ets_tests.sh \ sch_ets_tests.sh \
sch_tbf_core.sh \ sch_tbf_core.sh \
......
...@@ -900,6 +900,33 @@ hw_stats_get() ...@@ -900,6 +900,33 @@ hw_stats_get()
jq ".[0].stats64.$dir.$stat" jq ".[0].stats64.$dir.$stat"
} }
__nh_stats_get()
{
local key=$1; shift
local group_id=$1; shift
local member_id=$1; shift
ip -j -s -s nexthop show id $group_id |
jq --argjson member_id "$member_id" --arg key "$key" \
'.[].group_stats[] | select(.id == $member_id) | .[$key]'
}
nh_stats_get()
{
local group_id=$1; shift
local member_id=$1; shift
__nh_stats_get packets "$group_id" "$member_id"
}
nh_stats_get_hw()
{
local group_id=$1; shift
local member_id=$1; shift
__nh_stats_get packets_hw "$group_id" "$member_id"
}
humanize() humanize()
{ {
local speed=$1; shift local speed=$1; shift
...@@ -2010,3 +2037,10 @@ bail_on_lldpad() ...@@ -2010,3 +2037,10 @@ bail_on_lldpad()
fi fi
fi fi
} }
absval()
{
local v=$1; shift
echo $((v > 0 ? v : -v))
}
...@@ -7,9 +7,12 @@ ALL_TESTS=" ...@@ -7,9 +7,12 @@ ALL_TESTS="
multipath_test multipath_test
ping_ipv4_blackhole ping_ipv4_blackhole
ping_ipv6_blackhole ping_ipv6_blackhole
nh_stats_test_v4
nh_stats_test_v6
" "
NUM_NETIFS=8 NUM_NETIFS=8
source lib.sh source lib.sh
source router_mpath_nh_lib.sh
h1_create() h1_create()
{ {
...@@ -325,6 +328,16 @@ ping_ipv6_blackhole() ...@@ -325,6 +328,16 @@ ping_ipv6_blackhole()
ip -6 nexthop del id 1001 ip -6 nexthop del id 1001
} }
nh_stats_test_v4()
{
__nh_stats_test_v4 mpath
}
nh_stats_test_v6()
{
__nh_stats_test_v6 mpath
}
setup_prepare() setup_prepare()
{ {
h1=${NETIFS[p1]} h1=${NETIFS[p1]}
......
# SPDX-License-Identifier: GPL-2.0
nh_stats_do_test()
{
local what=$1; shift
local nh1_id=$1; shift
local nh2_id=$1; shift
local group_id=$1; shift
local stats_get=$1; shift
local mz="$@"
local dp
RET=0
sleep 2
for ((dp=0; dp < 60000; dp += 10000)); do
local dd
local t0_rp12=$(link_stats_tx_packets_get $rp12)
local t0_rp13=$(link_stats_tx_packets_get $rp13)
local t0_nh1=$($stats_get $group_id $nh1_id)
local t0_nh2=$($stats_get $group_id $nh2_id)
ip vrf exec vrf-h1 \
$mz -q -p 64 -d 0 -t udp \
"sp=1024,dp=$((dp))-$((dp + 10000))"
sleep 2
local t1_rp12=$(link_stats_tx_packets_get $rp12)
local t1_rp13=$(link_stats_tx_packets_get $rp13)
local t1_nh1=$($stats_get $group_id $nh1_id)
local t1_nh2=$($stats_get $group_id $nh2_id)
local d_rp12=$((t1_rp12 - t0_rp12))
local d_rp13=$((t1_rp13 - t0_rp13))
local d_nh1=$((t1_nh1 - t0_nh1))
local d_nh2=$((t1_nh2 - t0_nh2))
dd=$(absval $((d_rp12 - d_nh1)))
((dd < 10))
check_err $? "Discrepancy between link and $stats_get: d_rp12=$d_rp12 d_nh1=$d_nh1"
dd=$(absval $((d_rp13 - d_nh2)))
((dd < 10))
check_err $? "Discrepancy between link and $stats_get: d_rp13=$d_rp13 d_nh2=$d_nh2"
done
log_test "NH stats test $what"
}
nh_stats_test_dispatch_swhw()
{
local what=$1; shift
local nh1_id=$1; shift
local nh2_id=$1; shift
local group_id=$1; shift
local mz="$@"
local used
nh_stats_do_test "$what" "$nh1_id" "$nh2_id" "$group_id" \
nh_stats_get "${mz[@]}"
used=$(ip -s -j -d nexthop show id $group_id |
jq '.[].hw_stats.used')
kind=$(ip -j -d link show dev $rp11 |
jq -r '.[].linkinfo.info_kind')
if [[ $used == true ]]; then
nh_stats_do_test "HW $what" "$nh1_id" "$nh2_id" "$group_id" \
nh_stats_get_hw "${mz[@]}"
elif [[ $kind == veth ]]; then
log_test_skip "HW stats not offloaded on veth topology"
fi
}
nh_stats_test_dispatch()
{
local nhgtype=$1; shift
local what=$1; shift
local nh1_id=$1; shift
local nh2_id=$1; shift
local group_id=$1; shift
local mz="$@"
local enabled
local kind
if ! ip nexthop help 2>&1 | grep -q hw_stats; then
log_test_skip "NH stats test: ip doesn't support HW stats"
return
fi
ip nexthop replace id $group_id group $nh1_id/$nh2_id \
hw_stats on type $nhgtype
enabled=$(ip -s -j -d nexthop show id $group_id |
jq '.[].hw_stats.enabled')
if [[ $enabled == true ]]; then
nh_stats_test_dispatch_swhw "$what" "$nh1_id" "$nh2_id" \
"$group_id" "${mz[@]}"
elif [[ $enabled == false ]]; then
check_err 1 "HW stats still disabled after enabling"
log_test "NH stats test"
else
log_test_skip "NH stats test: ip doesn't report hw_stats info"
fi
ip nexthop replace id $group_id group $nh1_id/$nh2_id \
hw_stats off type $nhgtype
}
__nh_stats_test_v4()
{
local nhgtype=$1; shift
sysctl_set net.ipv4.fib_multipath_hash_policy 1
nh_stats_test_dispatch $nhgtype "IPv4" 101 102 103 \
$MZ $h1 -A 192.0.2.2 -B 198.51.100.2
sysctl_restore net.ipv4.fib_multipath_hash_policy
}
__nh_stats_test_v6()
{
local nhgtype=$1; shift
sysctl_set net.ipv6.fib_multipath_hash_policy 1
nh_stats_test_dispatch $nhgtype "IPv6" 104 105 106 \
$MZ -6 $h1 -A 2001:db8:1::2 -B 2001:db8:2::2
sysctl_restore net.ipv6.fib_multipath_hash_policy
}
...@@ -5,9 +5,12 @@ ALL_TESTS=" ...@@ -5,9 +5,12 @@ ALL_TESTS="
ping_ipv4 ping_ipv4
ping_ipv6 ping_ipv6
multipath_test multipath_test
nh_stats_test_v4
nh_stats_test_v6
" "
NUM_NETIFS=8 NUM_NETIFS=8
source lib.sh source lib.sh
source router_mpath_nh_lib.sh
h1_create() h1_create()
{ {
...@@ -333,6 +336,16 @@ multipath_test() ...@@ -333,6 +336,16 @@ multipath_test()
ip nexthop replace id 106 group 104,1/105,1 type resilient ip nexthop replace id 106 group 104,1/105,1 type resilient
} }
nh_stats_test_v4()
{
__nh_stats_test_v4 resilient
}
nh_stats_test_v6()
{
__nh_stats_test_v6 resilient
}
setup_prepare() setup_prepare()
{ {
h1=${NETIFS[p1]} h1=${NETIFS[p1]}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment