Commit 3333e50b authored by David S. Miller's avatar David S. Miller

Merge branch 'mlxsw-Offload-TBF'

Ido Schimmel says:

====================
mlxsw: Offload TBF

Petr says:

In order to allow configuration of shapers on Spectrum family of
machines, recognize TBF either as root Qdisc, or as a child of ETS or
PRIO. Configure rate of maximum shaper according to TBF rate setting,
and maximum shaper burst size according to TBF burst setting.

- Patches #1 and #2 make the TBF shaper suitable for offloading.
- Patches #3, #4 and #5 are refactoring aimed at easier support of leaf
  Qdiscs in general.
- Patches #6 to #10 gradually introduce TBF offload.
- Patches #11 to #14 add selftests.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 2f64ab27 e814c58d
......@@ -3563,8 +3563,8 @@ MLXSW_ITEM32(reg, qeec, min_shaper_rate, 0x0C, 0, 28);
*/
MLXSW_ITEM32(reg, qeec, mase, 0x10, 31, 1);
/* A large max rate will disable the max shaper. */
#define MLXSW_REG_QEEC_MAS_DIS 200000000 /* Kbps */
/* The largest max shaper value possible to disable the shaper. */
#define MLXSW_REG_QEEC_MAS_DIS ((1u << 31) - 1) /* Kbps */
/* reg_qeec_max_shaper_rate
* Max shaper information rate.
......@@ -3602,6 +3602,21 @@ MLXSW_ITEM32(reg, qeec, dwrr, 0x18, 15, 1);
*/
MLXSW_ITEM32(reg, qeec, dwrr_weight, 0x18, 0, 8);
/* reg_qeec_max_shaper_bs
* Max shaper burst size
* Burst size is 2^max_shaper_bs * 512 bits
* For Spectrum-1: Range is: 5..25
* For Spectrum-2: Range is: 11..25
* Reserved when ptps = 1
* Access: RW
*/
MLXSW_ITEM32(reg, qeec, max_shaper_bs, 0x1C, 0, 6);
#define MLXSW_REG_QEEC_HIGHEST_SHAPER_BS 25
#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1 5
#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2 11
#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3 5
static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port,
enum mlxsw_reg_qeec_hr hr, u8 index,
u8 next_index)
......
......@@ -1796,6 +1796,8 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type,
return mlxsw_sp_setup_tc_prio(mlxsw_sp_port, type_data);
case TC_SETUP_QDISC_ETS:
return mlxsw_sp_setup_tc_ets(mlxsw_sp_port, type_data);
case TC_SETUP_QDISC_TBF:
return mlxsw_sp_setup_tc_tbf(mlxsw_sp_port, type_data);
default:
return -EOPNOTSUPP;
}
......@@ -3577,7 +3579,7 @@ int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
enum mlxsw_reg_qeec_hr hr, u8 index,
u8 next_index, u32 maxrate)
u8 next_index, u32 maxrate, u8 burst_size)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
char qeec_pl[MLXSW_REG_QEEC_LEN];
......@@ -3586,6 +3588,7 @@ int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
next_index);
mlxsw_reg_qeec_mase_set(qeec_pl, true);
mlxsw_reg_qeec_max_shaper_rate_set(qeec_pl, maxrate);
mlxsw_reg_qeec_max_shaper_bs_set(qeec_pl, burst_size);
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
}
......@@ -3654,14 +3657,14 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
*/
err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_PORT, 0, 0,
MLXSW_REG_QEEC_MAS_DIS);
MLXSW_REG_QEEC_MAS_DIS, 0);
if (err)
return err;
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_SUBGROUP,
i, 0,
MLXSW_REG_QEEC_MAS_DIS);
MLXSW_REG_QEEC_MAS_DIS, 0);
if (err)
return err;
}
......@@ -3669,14 +3672,14 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_TC,
i, i,
MLXSW_REG_QEEC_MAS_DIS);
MLXSW_REG_QEEC_MAS_DIS, 0);
if (err)
return err;
err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_TC,
i + 8, i,
MLXSW_REG_QEEC_MAS_DIS);
MLXSW_REG_QEEC_MAS_DIS, 0);
if (err)
return err;
}
......@@ -5173,6 +5176,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->span_ops = &mlxsw_sp1_span_ops;
mlxsw_sp->listeners = mlxsw_sp1_listener;
mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener);
mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1;
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
}
......@@ -5197,6 +5201,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops;
mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops;
mlxsw_sp->span_ops = &mlxsw_sp2_span_ops;
mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2;
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
}
......@@ -5219,6 +5224,7 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops;
mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops;
mlxsw_sp->span_ops = &mlxsw_sp2_span_ops;
mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3;
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack);
}
......
......@@ -189,6 +189,7 @@ struct mlxsw_sp {
const struct mlxsw_sp_span_ops *span_ops;
const struct mlxsw_listener *listeners;
size_t listeners_count;
u32 lowest_shaper_bs;
};
static inline struct mlxsw_sp_upper *
......@@ -487,7 +488,7 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu,
struct ieee_pfc *my_pfc);
int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
enum mlxsw_reg_qeec_hr hr, u8 index,
u8 next_index, u32 maxrate);
u8 next_index, u32 maxrate, u8 burst_size);
enum mlxsw_reg_spms_state mlxsw_sp_stp_spms_state(u8 stp_state);
int mlxsw_sp_port_vid_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
u8 state);
......@@ -861,6 +862,8 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_prio_qopt_offload *p);
int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_ets_qopt_offload *p);
int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_tbf_qopt_offload *p);
/* spectrum_fid.c */
bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index);
......
......@@ -526,7 +526,7 @@ static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev,
err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_SUBGROUP,
i, 0,
maxrate->tc_maxrate[i]);
maxrate->tc_maxrate[i], 0);
if (err) {
netdev_err(dev, "Failed to set maxrate for TC %d\n", i);
goto err_port_ets_maxrate_set;
......@@ -541,7 +541,8 @@ static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev,
for (i--; i >= 0; i--)
mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_SUBGROUP,
i, 0, my_maxrate->tc_maxrate[i]);
i, 0,
my_maxrate->tc_maxrate[i], 0);
return err;
}
......
......@@ -850,6 +850,7 @@ enum tc_setup_type {
TC_SETUP_QDISC_TAPRIO,
TC_SETUP_FT,
TC_SETUP_QDISC_ETS,
TC_SETUP_QDISC_TBF,
};
/* These structures hold the attributes of bpf state that are being passed
......
......@@ -854,4 +854,26 @@ struct tc_ets_qopt_offload {
};
};
enum tc_tbf_command {
TC_TBF_REPLACE,
TC_TBF_DESTROY,
TC_TBF_STATS,
};
struct tc_tbf_qopt_offload_replace_params {
struct psched_ratecfg rate;
u32 max_size;
struct gnet_stats_queue *qstats;
};
struct tc_tbf_qopt_offload {
enum tc_tbf_command command;
u32 handle;
u32 parent;
union {
struct tc_tbf_qopt_offload_replace_params replace_params;
struct tc_qopt_offload_stats stats;
};
};
#endif
......@@ -15,6 +15,7 @@
#include <linux/skbuff.h>
#include <net/netlink.h>
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
#include <net/pkt_sched.h>
......@@ -137,6 +138,52 @@ static u64 psched_ns_t2l(const struct psched_ratecfg *r,
return len;
}
static void tbf_offload_change(struct Qdisc *sch)
{
struct tbf_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
struct tc_tbf_qopt_offload qopt;
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return;
qopt.command = TC_TBF_REPLACE;
qopt.handle = sch->handle;
qopt.parent = sch->parent;
qopt.replace_params.rate = q->rate;
qopt.replace_params.max_size = q->max_size;
qopt.replace_params.qstats = &sch->qstats;
dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
}
static void tbf_offload_destroy(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct tc_tbf_qopt_offload qopt;
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return;
qopt.command = TC_TBF_DESTROY;
qopt.handle = sch->handle;
qopt.parent = sch->parent;
dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt);
}
static int tbf_offload_dump(struct Qdisc *sch)
{
struct tc_tbf_qopt_offload qopt;
qopt.command = TC_TBF_STATS;
qopt.handle = sch->handle;
qopt.parent = sch->parent;
qopt.stats.bstats = &sch->bstats;
qopt.stats.qstats = &sch->qstats;
return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt);
}
/* GSO packet is too big, segment it so that tbf can transmit
* each segment in time
*/
......@@ -407,6 +454,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_unlock(sch);
err = 0;
tbf_offload_change(sch);
done:
return err;
}
......@@ -432,6 +481,7 @@ static void tbf_destroy(struct Qdisc *sch)
struct tbf_sched_data *q = qdisc_priv(sch);
qdisc_watchdog_cancel(&q->watchdog);
tbf_offload_destroy(sch);
qdisc_put(q->qdisc);
}
......@@ -440,8 +490,12 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
struct tbf_sched_data *q = qdisc_priv(sch);
struct nlattr *nest;
struct tc_tbf_qopt opt;
int err;
err = tbf_offload_dump(sch);
if (err)
return err;
sch->qstats.backlog = q->qdisc->qstats.backlog;
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
......
# SPDX-License-Identifier: GPL-2.0
humanize()
{
local speed=$1; shift
for unit in bps Kbps Mbps Gbps; do
if (($(echo "$speed < 1024" | bc))); then
break
fi
speed=$(echo "scale=1; $speed / 1024" | bc)
done
echo "$speed${unit}"
}
rate()
{
local t0=$1; shift
local t1=$1; shift
local interval=$1; shift
echo $((8 * (t1 - t0) / interval))
}
check_rate()
{
local rate=$1; shift
......
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
source qos_lib.sh
bail_on_lldpad
lib_dir=$(dirname $0)/../../../net/forwarding
TCFLAGS=skip_sw
source $lib_dir/sch_tbf_ets.sh
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
source qos_lib.sh
bail_on_lldpad
lib_dir=$(dirname $0)/../../../net/forwarding
TCFLAGS=skip_sw
source $lib_dir/sch_tbf_prio.sh
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
source qos_lib.sh
bail_on_lldpad
lib_dir=$(dirname $0)/../../../net/forwarding
TCFLAGS=skip_sw
source $lib_dir/sch_tbf_root.sh
......@@ -248,6 +248,24 @@ busywait()
done
}
until_counter_is()
{
local value=$1; shift
local current=$("$@")
echo $((current))
((current >= value))
}
busywait_for_counter()
{
local timeout=$1; shift
local delta=$1; shift
local base=$("$@")
busywait "$timeout" until_counter_is $((base + delta)) "$@"
}
setup_wait_dev()
{
local dev=$1; shift
......@@ -575,9 +593,10 @@ tc_rule_stats_get()
local dev=$1; shift
local pref=$1; shift
local dir=$1; shift
local selector=${1:-.packets}; shift
tc -j -s filter show dev $dev ${dir:-ingress} pref $pref \
| jq '.[1].options.actions[].stats.packets'
| jq ".[1].options.actions[].stats$selector"
}
ethtool_stats_get()
......@@ -588,6 +607,30 @@ ethtool_stats_get()
ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
}
humanize()
{
local speed=$1; shift
for unit in bps Kbps Mbps Gbps; do
if (($(echo "$speed < 1024" | bc))); then
break
fi
speed=$(echo "scale=1; $speed / 1024" | bc)
done
echo "$speed${unit}"
}
rate()
{
local t0=$1; shift
local t1=$1; shift
local interval=$1; shift
echo $((8 * (t1 - t0) / interval))
}
mac_get()
{
local if_name=$1
......
# SPDX-License-Identifier: GPL-2.0
# This test sends a stream of traffic from H1 through a switch, to H2. On the
# egress port from the switch ($swp2), a shaper is installed. The test verifies
# that the rates on the port match the configured shaper.
#
# In order to test per-class shaping, $swp2 actually contains TBF under PRIO or
# ETS, with two different configurations. Traffic is prioritized using 802.1p.
#
# +-------------------------------------------+
# | H1 |
# | + $h1.10 $h1.11 + |
# | | 192.0.2.1/28 192.0.2.17/28 | |
# | | | |
# | \______________ _____________/ |
# | \ / |
# | + $h1 |
# +---------------------|---------------------+
# |
# +---------------------|---------------------+
# | SW + $swp1 |
# | _______________/ \_______________ |
# | / \ |
# | +-|--------------+ +--------------|-+ |
# | | + $swp1.10 | | $swp1.11 + | |
# | | | | | |
# | | BR10 | | BR11 | |
# | | | | | |
# | | + $swp2.10 | | $swp2.11 + | |
# | +-|--------------+ +--------------|-+ |
# | \_______________ ______________/ |
# | \ / |
# | + $swp2 |
# +---------------------|---------------------+
# |
# +---------------------|---------------------+
# | H2 + $h2 |
# | ______________/ \______________ |
# | / \ |
# | | | |
# | + $h2.10 $h2.11 + |
# | 192.0.2.2/28 192.0.2.18/28 |
# +-------------------------------------------+
NUM_NETIFS=4
CHECK_TC="yes"
source $lib_dir/lib.sh
ipaddr()
{
local host=$1; shift
local vlan=$1; shift
echo 192.0.2.$((16 * (vlan - 10) + host))
}
host_create()
{
local dev=$1; shift
local host=$1; shift
simple_if_init $dev
mtu_set $dev 10000
vlan_create $dev 10 v$dev $(ipaddr $host 10)/28
ip link set dev $dev.10 type vlan egress 0:0
vlan_create $dev 11 v$dev $(ipaddr $host 11)/28
ip link set dev $dev.11 type vlan egress 0:1
}
host_destroy()
{
local dev=$1; shift
vlan_destroy $dev 11
vlan_destroy $dev 10
mtu_restore $dev
simple_if_fini $dev
}
h1_create()
{
host_create $h1 1
}
h1_destroy()
{
host_destroy $h1
}
h2_create()
{
host_create $h2 2
tc qdisc add dev $h2 clsact
tc filter add dev $h2 ingress pref 1010 prot 802.1q \
flower $TCFLAGS vlan_id 10 action pass
tc filter add dev $h2 ingress pref 1011 prot 802.1q \
flower $TCFLAGS vlan_id 11 action pass
}
h2_destroy()
{
tc qdisc del dev $h2 clsact
host_destroy $h2
}
switch_create()
{
local intf
local vlan
ip link add dev br10 type bridge
ip link add dev br11 type bridge
for intf in $swp1 $swp2; do
ip link set dev $intf up
mtu_set $intf 10000
for vlan in 10 11; do
vlan_create $intf $vlan
ip link set dev $intf.$vlan master br$vlan
ip link set dev $intf.$vlan up
done
done
for vlan in 10 11; do
ip link set dev $swp1.$vlan type vlan ingress 0:0 1:1
done
ip link set dev br10 up
ip link set dev br11 up
}
switch_destroy()
{
local intf
local vlan
# A test may have been interrupted mid-run, with Qdisc installed. Delete
# it here.
tc qdisc del dev $swp2 root 2>/dev/null
ip link set dev br11 down
ip link set dev br10 down
for intf in $swp2 $swp1; do
for vlan in 11 10; do
ip link set dev $intf.$vlan down
ip link set dev $intf.$vlan nomaster
vlan_destroy $intf $vlan
done
mtu_restore $intf
ip link set dev $intf down
done
ip link del dev br11
ip link del dev br10
}
setup_prepare()
{
h1=${NETIFS[p1]}
swp1=${NETIFS[p2]}
swp2=${NETIFS[p3]}
h2=${NETIFS[p4]}
swp3=${NETIFS[p5]}
h3=${NETIFS[p6]}
swp4=${NETIFS[p7]}
swp5=${NETIFS[p8]}
h2_mac=$(mac_get $h2)
vrf_prepare
h1_create
h2_create
switch_create
}
cleanup()
{
pre_cleanup
switch_destroy
h2_destroy
h1_destroy
vrf_cleanup
}
ping_ipv4()
{
ping_test $h1.10 $(ipaddr 2 10) " vlan 10"
ping_test $h1.11 $(ipaddr 2 11) " vlan 11"
}
tbf_get_counter()
{
local vlan=$1; shift
tc_rule_stats_get $h2 10$vlan ingress .bytes
}
do_tbf_test()
{
local vlan=$1; shift
local mbit=$1; shift
start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 2 $vlan) $h2_mac
sleep 5 # Wait for the burst to dwindle
local t2=$(busywait_for_counter 1000 +1 tbf_get_counter $vlan)
sleep 10
local t3=$(tbf_get_counter $vlan)
stop_traffic
RET=0
# Note: TBF uses 10^6 Mbits, not 2^20 ones.
local er=$((mbit * 1000 * 1000))
local nr=$(rate $t2 $t3 10)
local nr_pct=$((100 * (nr - er) / er))
((-5 <= nr_pct && nr_pct <= 5))
check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-5%."
log_test "TC $((vlan - 10)): TBF rate ${mbit}Mbit"
}
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
QDISC="ets strict"
: ${lib_dir:=.}
source $lib_dir/sch_tbf_etsprio.sh
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
ALL_TESTS="
ping_ipv4
tbf_test
"
source $lib_dir/sch_tbf_core.sh
tbf_test_one()
{
local bs=$1; shift
tc qdisc replace dev $swp2 parent 10:3 handle 103: tbf \
rate 400Mbit burst $bs limit 1M
tc qdisc replace dev $swp2 parent 10:2 handle 102: tbf \
rate 800Mbit burst $bs limit 1M
do_tbf_test 10 400 $bs
do_tbf_test 11 800 $bs
}
tbf_test()
{
# This test is used for both ETS and PRIO. Even though we only need two
# bands, PRIO demands a minimum of three.
tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0
tbf_test_one 128K
tc qdisc del dev $swp2 root
}
trap cleanup EXIT
setup_prepare
setup_wait
tests_run
exit $EXIT_STATUS
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
QDISC="prio bands"
: ${lib_dir:=.}
source $lib_dir/sch_tbf_etsprio.sh
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
ALL_TESTS="
ping_ipv4
tbf_test
"
: ${lib_dir:=.}
source $lib_dir/sch_tbf_core.sh
tbf_test_one()
{
local bs=$1; shift
tc qdisc replace dev $swp2 root handle 108: tbf \
rate 400Mbit burst $bs limit 1M
do_tbf_test 10 400 $bs
}
tbf_test()
{
tbf_test_one 128K
tc qdisc del dev $swp2 root
}
trap cleanup EXIT
setup_prepare
setup_wait
tests_run
exit $EXIT_STATUS
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment