Commit 1ce1a745 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'fix-dropping-of-oversize-preemptible-frames-with-felix-dsa-driver'

Vladimir Oltean says:

====================
Fix dropping of oversize preemptible frames with felix DSA driver

It has been reported that preemptible traffic doesn't completely behave
as expected. Namely, large packets should be able to be squeezed
(through fragmentation) through taprio time slots smaller than the
transmission time of the full frame. That does not happen due to logic
in the driver (for oversize frame dropping with taprio) that was not
updated in order for this use case to work.

I am not sure whether it qualifies as "net" material, because some
structural changes are involved, and it is a "never worked" scenario.
OTOH, this is a complaint coming from users for a v6.4 kernel.
It's up to maintainers to decide whether this series can be considered;
I've submitted it as non-RFC in the optimistic case that it will be :)

Demo script illustrating the issue below.

add_taprio()
{
	local ifname=$1

	echo "Creating root taprio"
	tc qdisc replace dev $ifname handle 8001: parent root stab overhead 24 taprio \
		num_tc 8 \
		map 0 1 2 3 4 5 6 7 \
		queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
		base-time 0 \
		sched-entry S 01 1216 \
		sched-entry S fe 12368 \
		fp P E E E E E E E \
		flags 0x2
}

remove_taprio()
{
	local ifname=$1

	echo "Removing taprio"
	tc qdisc del dev $ifname root
}

ip netns add ns0
ip link set eno0 netns ns0 && ip -n ns0 link set eno0 up && ip -n ns0 addr add 192.168.100.1/24 dev eno0
ip addr add 192.168.100.2/24 dev swp0 && ip link set swp0 up
ip netns exec ns0 ethtool --set-mm eno0 pmac-enabled on verify-enabled off tx-enabled on
ethtool --set-mm swp0 pmac-enabled on verify-enabled off tx-enabled on
add_taprio swp0

ping 192.168.100.1 -s 1000 -c 5 # sent through TC0
ethtool -I --show-mm swp0 | grep MACMergeFragCountTx # should increase

ip addr flush swp0 && ip link set swp0 down
remove_taprio swp0
ethtool --set-mm swp0 pmac-enabled off verify-enabled off tx-enabled off
ip netns exec ns0 ethtool --set-mm eno0 pmac-enabled off verify-enabled off tx-enabled off
ip netns del ns0
====================

Link: https://lore.kernel.org/r/20230705104422.49025-1-vladimir.oltean@nxp.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents ceb20a3c c6efb4ae
......@@ -1786,16 +1786,15 @@ static int felix_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
{
struct ocelot *ocelot = ds->priv;
struct ocelot_port *ocelot_port = ocelot->ports[port];
struct felix *felix = ocelot_to_felix(ocelot);
ocelot_port_set_maxlen(ocelot, port, new_mtu);
mutex_lock(&ocelot->tas_lock);
mutex_lock(&ocelot->fwd_domain_lock);
if (ocelot_port->taprio && felix->info->tas_guard_bands_update)
felix->info->tas_guard_bands_update(ocelot, port);
if (ocelot_port->taprio && ocelot->ops->tas_guard_bands_update)
ocelot->ops->tas_guard_bands_update(ocelot, port);
mutex_unlock(&ocelot->tas_lock);
mutex_unlock(&ocelot->fwd_domain_lock);
return 0;
}
......
......@@ -57,7 +57,6 @@ struct felix_info {
void (*mdio_bus_free)(struct ocelot *ocelot);
int (*port_setup_tc)(struct dsa_switch *ds, int port,
enum tc_setup_type type, void *type_data);
void (*tas_guard_bands_update)(struct ocelot *ocelot, int port);
void (*port_sched_speed_set)(struct ocelot *ocelot, int port,
u32 speed);
void (*phylink_mac_config)(struct ocelot *ocelot, int port,
......
......@@ -1209,15 +1209,17 @@ static u32 vsc9959_tas_tc_max_sdu(struct tc_taprio_qopt_offload *taprio, int tc)
static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port)
{
struct ocelot_port *ocelot_port = ocelot->ports[port];
struct ocelot_mm_state *mm = &ocelot->mm[port];
struct tc_taprio_qopt_offload *taprio;
u64 min_gate_len[OCELOT_NUM_TC];
u32 val, maxlen, add_frag_size;
u64 needed_min_frag_time_ps;
int speed, picos_per_byte;
u64 needed_bit_time_ps;
u32 val, maxlen;
u8 tas_speed;
int tc;
lockdep_assert_held(&ocelot->tas_lock);
lockdep_assert_held(&ocelot->fwd_domain_lock);
taprio = ocelot_port->taprio;
......@@ -1253,14 +1255,21 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port)
*/
needed_bit_time_ps = (u64)(maxlen + 24) * picos_per_byte;
/* Preemptible TCs don't need to pass a full MTU, the port will
* automatically emit a HOLD request when a preemptible TC gate closes
*/
val = ocelot_read_rix(ocelot, QSYS_PREEMPTION_CFG, port);
add_frag_size = QSYS_PREEMPTION_CFG_MM_ADD_FRAG_SIZE_X(val);
needed_min_frag_time_ps = picos_per_byte *
(u64)(24 + 2 * ethtool_mm_frag_size_add_to_min(add_frag_size));
dev_dbg(ocelot->dev,
"port %d: max frame size %d needs %llu ps at speed %d\n",
port, maxlen, needed_bit_time_ps, speed);
"port %d: max frame size %d needs %llu ps, %llu ps for mPackets at speed %d\n",
port, maxlen, needed_bit_time_ps, needed_min_frag_time_ps,
speed);
vsc9959_tas_min_gate_lengths(taprio, min_gate_len);
mutex_lock(&ocelot->fwd_domain_lock);
for (tc = 0; tc < OCELOT_NUM_TC; tc++) {
u32 requested_max_sdu = vsc9959_tas_tc_max_sdu(taprio, tc);
u64 remaining_gate_len_ps;
......@@ -1269,7 +1278,9 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port)
remaining_gate_len_ps =
vsc9959_tas_remaining_gate_len_ps(min_gate_len[tc]);
if (remaining_gate_len_ps > needed_bit_time_ps) {
if ((mm->active_preemptible_tcs & BIT(tc)) ?
remaining_gate_len_ps > needed_min_frag_time_ps :
remaining_gate_len_ps > needed_bit_time_ps) {
/* Setting QMAXSDU_CFG to 0 disables oversized frame
* dropping.
*/
......@@ -1323,8 +1334,6 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port)
ocelot_write_rix(ocelot, maxlen, QSYS_PORT_MAX_SDU, port);
ocelot->ops->cut_through_fwd(ocelot);
mutex_unlock(&ocelot->fwd_domain_lock);
}
static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port,
......@@ -1351,7 +1360,7 @@ static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port,
break;
}
mutex_lock(&ocelot->tas_lock);
mutex_lock(&ocelot->fwd_domain_lock);
ocelot_rmw_rix(ocelot,
QSYS_TAG_CONFIG_LINK_SPEED(tas_speed),
......@@ -1361,7 +1370,7 @@ static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port,
if (ocelot_port->taprio)
vsc9959_tas_guard_bands_update(ocelot, port);
mutex_unlock(&ocelot->tas_lock);
mutex_unlock(&ocelot->fwd_domain_lock);
}
static void vsc9959_new_base_time(struct ocelot *ocelot, ktime_t base_time,
......@@ -1409,7 +1418,7 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
int ret, i;
u32 val;
mutex_lock(&ocelot->tas_lock);
mutex_lock(&ocelot->fwd_domain_lock);
if (taprio->cmd == TAPRIO_CMD_DESTROY) {
ocelot_port_mqprio(ocelot, port, &taprio->mqprio);
......@@ -1421,7 +1430,7 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
vsc9959_tas_guard_bands_update(ocelot, port);
mutex_unlock(&ocelot->tas_lock);
mutex_unlock(&ocelot->fwd_domain_lock);
return 0;
} else if (taprio->cmd != TAPRIO_CMD_REPLACE) {
ret = -EOPNOTSUPP;
......@@ -1504,7 +1513,7 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
ocelot_port->taprio = taprio_offload_get(taprio);
vsc9959_tas_guard_bands_update(ocelot, port);
mutex_unlock(&ocelot->tas_lock);
mutex_unlock(&ocelot->fwd_domain_lock);
return 0;
......@@ -1512,7 +1521,7 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
taprio->mqprio.qopt.num_tc = 0;
ocelot_port_mqprio(ocelot, port, &taprio->mqprio);
err_unlock:
mutex_unlock(&ocelot->tas_lock);
mutex_unlock(&ocelot->fwd_domain_lock);
return ret;
}
......@@ -1525,7 +1534,7 @@ static void vsc9959_tas_clock_adjust(struct ocelot *ocelot)
int port;
u32 val;
mutex_lock(&ocelot->tas_lock);
mutex_lock(&ocelot->fwd_domain_lock);
for (port = 0; port < ocelot->num_phys_ports; port++) {
ocelot_port = ocelot->ports[port];
......@@ -1563,7 +1572,7 @@ static void vsc9959_tas_clock_adjust(struct ocelot *ocelot)
QSYS_TAG_CONFIG_ENABLE,
QSYS_TAG_CONFIG, port);
}
mutex_unlock(&ocelot->tas_lock);
mutex_unlock(&ocelot->fwd_domain_lock);
}
static int vsc9959_qos_port_cbs_set(struct dsa_switch *ds, int port,
......@@ -1634,6 +1643,18 @@ static int vsc9959_qos_query_caps(struct tc_query_caps_base *base)
}
}
static int vsc9959_qos_port_mqprio(struct ocelot *ocelot, int port,
struct tc_mqprio_qopt_offload *mqprio)
{
int ret;
mutex_lock(&ocelot->fwd_domain_lock);
ret = ocelot_port_mqprio(ocelot, port, mqprio);
mutex_unlock(&ocelot->fwd_domain_lock);
return ret;
}
static int vsc9959_port_setup_tc(struct dsa_switch *ds, int port,
enum tc_setup_type type,
void *type_data)
......@@ -1646,7 +1667,7 @@ static int vsc9959_port_setup_tc(struct dsa_switch *ds, int port,
case TC_SETUP_QDISC_TAPRIO:
return vsc9959_qos_port_tas_set(ocelot, port, type_data);
case TC_SETUP_QDISC_MQPRIO:
return ocelot_port_mqprio(ocelot, port, type_data);
return vsc9959_qos_port_mqprio(ocelot, port, type_data);
case TC_SETUP_QDISC_CBS:
return vsc9959_qos_port_cbs_set(ds, port, type_data);
default:
......@@ -2591,6 +2612,7 @@ static const struct ocelot_ops vsc9959_ops = {
.cut_through_fwd = vsc9959_cut_through_fwd,
.tas_clock_adjust = vsc9959_tas_clock_adjust,
.update_stats = vsc9959_update_stats,
.tas_guard_bands_update = vsc9959_tas_guard_bands_update,
};
static const struct felix_info felix_info_vsc9959 = {
......@@ -2616,7 +2638,6 @@ static const struct felix_info felix_info_vsc9959 = {
.port_modes = vsc9959_port_modes,
.port_setup_tc = vsc9959_port_setup_tc,
.port_sched_speed_set = vsc9959_sched_speed_set,
.tas_guard_bands_update = vsc9959_tas_guard_bands_update,
};
/* The INTB interrupt is shared between for PTP TX timestamp availability
......
......@@ -2927,7 +2927,6 @@ int ocelot_init(struct ocelot *ocelot)
mutex_init(&ocelot->mact_lock);
mutex_init(&ocelot->fwd_domain_lock);
mutex_init(&ocelot->tas_lock);
spin_lock_init(&ocelot->ptp_clock_lock);
spin_lock_init(&ocelot->ts_id_lock);
......
......@@ -67,10 +67,13 @@ void ocelot_port_update_active_preemptible_tcs(struct ocelot *ocelot, int port)
val = mm->preemptible_tcs;
/* Cut through switching doesn't work for preemptible priorities,
* so first make sure it is disabled.
* so first make sure it is disabled. Also, changing the preemptible
* TCs affects the oversized frame dropping logic, so that needs to be
* re-triggered. And since tas_guard_bands_update() also implicitly
* calls cut_through_fwd(), we don't need to explicitly call it.
*/
mm->active_preemptible_tcs = val;
ocelot->ops->cut_through_fwd(ocelot);
ocelot->ops->tas_guard_bands_update(ocelot, port);
dev_dbg(ocelot->dev,
"port %d %s/%s, MM TX %s, preemptible TCs 0x%x, active 0x%x\n",
......@@ -89,17 +92,14 @@ void ocelot_port_change_fp(struct ocelot *ocelot, int port,
{
struct ocelot_mm_state *mm = &ocelot->mm[port];
mutex_lock(&ocelot->fwd_domain_lock);
lockdep_assert_held(&ocelot->fwd_domain_lock);
if (mm->preemptible_tcs == preemptible_tcs)
goto out_unlock;
return;
mm->preemptible_tcs = preemptible_tcs;
ocelot_port_update_active_preemptible_tcs(ocelot, port);
out_unlock:
mutex_unlock(&ocelot->fwd_domain_lock);
}
static void ocelot_mm_update_port_status(struct ocelot *ocelot, int port)
......
......@@ -663,6 +663,7 @@ struct ocelot_ops {
struct flow_stats *stats);
void (*cut_through_fwd)(struct ocelot *ocelot);
void (*tas_clock_adjust)(struct ocelot *ocelot);
void (*tas_guard_bands_update)(struct ocelot *ocelot, int port);
void (*update_stats)(struct ocelot *ocelot);
};
......@@ -863,12 +864,12 @@ struct ocelot {
struct mutex stat_view_lock;
/* Lock for serializing access to the MAC table */
struct mutex mact_lock;
/* Lock for serializing forwarding domain changes */
/* Lock for serializing forwarding domain changes, including the
* configuration of the Time-Aware Shaper, MAC Merge layer and
* cut-through forwarding, on which it depends
*/
struct mutex fwd_domain_lock;
/* Lock for serializing Time-Aware Shaper changes */
struct mutex tas_lock;
struct workqueue_struct *owq;
u8 ptp:1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment