Commit 5d498214 authored by David S. Miller's avatar David S. Miller

Merge branch 'mlxsw-Offload-PRIO-qdisc'

Jiri Pirko says:

====================
mlxsw: Offload PRIO qdisc

Nogah says:

Add an offload support for PRIO qdisc for mlxsw driver.
PRIO qdisc is being offloaded by using ndo_setup_tc. It has three
commands, to set or tune the qdisc, to remove it and to get its stats.

Like RED offloading, offloading this qdisc is not enforced on the driver
and determining its offload state is done in the dump action, when the
stats are being updated.
In the driver, offloading of PRIO is supported as root qdisc only. It
supports only priorities 0-7 (the range that is used by the current static
mapping of DSCP to skb prio and by 1:1 PCP values mapping) and up to 8
bands.

Patches 1-2 offload DSCP to priority mapping in the mlxsw_sp driver.
Patch 3 adds offload support for PRIO qdisc.
Patches 4-5 Add PRIO offload support in the mlxsw_sp driver.

---
v1->v2:
- Patch 1/5:
 - Rewrite patch msg
- Patch 3/5:
 - Send all the qstats in the replace command (and not just backlog)
- Patch 5/5:
 - Align with the changes from 3/5
 - Move backlog to the generic qdisc stats struct
 - Delete extra newline
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 1988c795 93d8a4c1
......@@ -42,7 +42,7 @@
struct mlxsw_item {
unsigned short offset; /* bytes in container */
unsigned short step; /* step in bytes for indexed items */
short step; /* step in bytes for indexed items */
unsigned short in_step_offset; /* offset within one step */
unsigned char shift; /* shift in bits */
unsigned char element_size; /* size of element in bit array */
......
......@@ -4827,6 +4827,42 @@ static inline void mlxsw_reg_ratr_counter_pack(char *payload, u64 counter_index,
mlxsw_reg_ratr_counter_set_type_set(payload, set_type);
}
/* RDPM - Router DSCP to Priority Mapping
* --------------------------------------
* Controls the mapping from DSCP field to switch priority on routed packets
*/
#define MLXSW_REG_RDPM_ID 0x8009
#define MLXSW_REG_RDPM_BASE_LEN 0x00
#define MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN 0x01
#define MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT 64
#define MLXSW_REG_RDPM_LEN 0x40
#define MLXSW_REG_RDPM_LAST_ENTRY (MLXSW_REG_RDPM_BASE_LEN + \
MLXSW_REG_RDPM_LEN - \
MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN)
MLXSW_REG_DEFINE(rdpm, MLXSW_REG_RDPM_ID, MLXSW_REG_RDPM_LEN);
/* reg_dscp_entry_e
* Enable update of the specific entry
* Access: Index
*/
MLXSW_ITEM8_INDEXED(reg, rdpm, dscp_entry_e, MLXSW_REG_RDPM_LAST_ENTRY, 7, 1,
-MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN, 0x00, false);
/* reg_dscp_entry_prio
* Switch Priority
* Access: RW
*/
MLXSW_ITEM8_INDEXED(reg, rdpm, dscp_entry_prio, MLXSW_REG_RDPM_LAST_ENTRY, 0, 4,
-MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN, 0x00, false);
static inline void mlxsw_reg_rdpm_pack(char *payload, unsigned short index,
u8 prio)
{
mlxsw_reg_rdpm_dscp_entry_e_set(payload, index, 1);
mlxsw_reg_rdpm_dscp_entry_prio_set(payload, index, prio);
}
/* RICNT - Router Interface Counter Register
* -----------------------------------------
* The RICNT register retrieves per port performance counters
......@@ -7640,6 +7676,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(rtar),
MLXSW_REG(ratr),
MLXSW_REG(rtdp),
MLXSW_REG(rdpm),
MLXSW_REG(ricnt),
MLXSW_REG(rrcr),
MLXSW_REG(ralta),
......
......@@ -1830,6 +1830,8 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type,
return mlxsw_sp_setup_tc_block(mlxsw_sp_port, type_data);
case TC_SETUP_QDISC_RED:
return mlxsw_sp_setup_tc_red(mlxsw_sp_port, type_data);
case TC_SETUP_QDISC_PRIO:
return mlxsw_sp_setup_tc_prio(mlxsw_sp_port, type_data);
default:
return -EOPNOTSUPP;
}
......
......@@ -565,6 +565,8 @@ int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port);
void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port);
int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_red_qopt_offload *p);
int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_prio_qopt_offload *p);
/* spectrum_fid.c */
int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid,
......
......@@ -41,9 +41,12 @@
#include "spectrum.h"
#include "reg.h"
#define MLXSW_SP_PRIO_BAND_TO_TCLASS(band) (IEEE_8021QAZ_MAX_TCS - band - 1)
enum mlxsw_sp_qdisc_type {
MLXSW_SP_QDISC_NO_QDISC,
MLXSW_SP_QDISC_RED,
MLXSW_SP_QDISC_PRIO,
};
struct mlxsw_sp_qdisc_ops {
......@@ -63,6 +66,11 @@ struct mlxsw_sp_qdisc_ops {
void *xstats_ptr);
void (*clean_stats)(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc);
/* unoffload - to be used for a qdisc that stops being offloaded without
* being destroyed.
*/
void (*unoffload)(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params);
};
struct mlxsw_sp_qdisc {
......@@ -76,6 +84,7 @@ struct mlxsw_sp_qdisc {
u64 tx_packets;
u64 drops;
u64 overlimits;
u64 backlog;
} stats_base;
struct mlxsw_sp_qdisc_ops *ops;
......@@ -141,6 +150,9 @@ mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
err_bad_param:
err_config:
if (mlxsw_sp_qdisc->handle == handle && ops->unoffload)
ops->unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, params);
mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
return err;
}
......@@ -403,6 +415,165 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
}
}
static int
mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
{
int i;
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i,
MLXSW_SP_PORT_DEFAULT_TCLASS);
return 0;
}
static int
mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
void *params)
{
struct tc_prio_qopt_offload_params *p = params;
if (p->bands > IEEE_8021QAZ_MAX_TCS)
return -EOPNOTSUPP;
return 0;
}
static int
mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
void *params)
{
struct tc_prio_qopt_offload_params *p = params;
int tclass, i;
int err;
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->priomap[i]);
err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, tclass);
if (err)
return err;
}
return 0;
}
void
mlxsw_sp_qdisc_prio_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
void *params)
{
struct tc_prio_qopt_offload_params *p = params;
u64 backlog;
backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
mlxsw_sp_qdisc->stats_base.backlog);
p->qstats->backlog -= backlog;
}
static int
mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
struct tc_qopt_offload_stats *stats_ptr)
{
u64 tx_bytes, tx_packets, drops = 0, backlog = 0;
struct mlxsw_sp_qdisc_stats *stats_base;
struct mlxsw_sp_port_xstats *xstats;
struct rtnl_link_stats64 *stats;
int i;
xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
stats = &mlxsw_sp_port->periodic_hw_stats.stats;
stats_base = &mlxsw_sp_qdisc->stats_base;
tx_bytes = stats->tx_bytes - stats_base->tx_bytes;
tx_packets = stats->tx_packets - stats_base->tx_packets;
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
drops += xstats->tail_drop[i];
backlog += xstats->backlog[i];
}
drops = drops - stats_base->drops;
_bstats_update(stats_ptr->bstats, tx_bytes, tx_packets);
stats_ptr->qstats->drops += drops;
stats_ptr->qstats->backlog +=
mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
backlog) -
mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
stats_base->backlog);
stats_base->backlog = backlog;
stats_base->drops += drops;
stats_base->tx_bytes += tx_bytes;
stats_base->tx_packets += tx_packets;
return 0;
}
static void
mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
{
struct mlxsw_sp_qdisc_stats *stats_base;
struct mlxsw_sp_port_xstats *xstats;
struct rtnl_link_stats64 *stats;
int i;
xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
stats = &mlxsw_sp_port->periodic_hw_stats.stats;
stats_base = &mlxsw_sp_qdisc->stats_base;
stats_base->tx_packets = stats->tx_packets;
stats_base->tx_bytes = stats->tx_bytes;
stats_base->drops = 0;
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
stats_base->drops += xstats->tail_drop[i];
mlxsw_sp_qdisc->stats_base.backlog = 0;
}
static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
.type = MLXSW_SP_QDISC_PRIO,
.check_params = mlxsw_sp_qdisc_prio_check_params,
.replace = mlxsw_sp_qdisc_prio_replace,
.unoffload = mlxsw_sp_qdisc_prio_unoffload,
.destroy = mlxsw_sp_qdisc_prio_destroy,
.get_stats = mlxsw_sp_qdisc_get_prio_stats,
.clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
};
int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_prio_qopt_offload *p)
{
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
if (p->parent != TC_H_ROOT)
return -EOPNOTSUPP;
mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc;
if (p->command == TC_PRIO_REPLACE)
return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
mlxsw_sp_qdisc,
&mlxsw_sp_qdisc_ops_prio,
&p->replace_params);
if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle,
MLXSW_SP_QDISC_PRIO))
return -EOPNOTSUPP;
switch (p->command) {
case TC_PRIO_DESTROY:
return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
case TC_PRIO_STATS:
return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
&p->stats);
default:
return -EOPNOTSUPP;
}
}
int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
{
mlxsw_sp_port->root_qdisc = kzalloc(sizeof(*mlxsw_sp_port->root_qdisc),
......
......@@ -7011,6 +7011,24 @@ static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
}
#endif
static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
{
char rdpm_pl[MLXSW_REG_RDPM_LEN];
unsigned int i;
MLXSW_REG_ZERO(rdpm, rdpm_pl);
/* HW is determining switch priority based on DSCP-bits, but the
* kernel is still doing that based on the ToS. Since there's a
* mismatch in bits we need to make sure to translate the right
* value ToS would observe, skipping the 2 least-significant ECN bits.
*/
for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
}
static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
{
char rgcr_pl[MLXSW_REG_RGCR_LEN];
......@@ -7023,6 +7041,7 @@ static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
mlxsw_reg_rgcr_usp_set(rgcr_pl, true);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
if (err)
return err;
......@@ -7098,6 +7117,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
if (err)
goto err_mp_hash_init;
err = mlxsw_sp_dscp_init(mlxsw_sp);
if (err)
goto err_dscp_init;
mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
mlxsw_sp_router_fib_dump_flush);
......@@ -7107,6 +7130,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
return 0;
err_register_fib_notifier:
err_dscp_init:
err_mp_hash_init:
unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
err_register_netevent_notifier:
......
......@@ -780,6 +780,7 @@ enum tc_setup_type {
TC_SETUP_BLOCK,
TC_SETUP_QDISC_CBS,
TC_SETUP_QDISC_RED,
TC_SETUP_QDISC_PRIO,
};
/* These structures hold the attributes of bpf state that are being passed
......
......@@ -761,4 +761,29 @@ struct tc_red_qopt_offload {
};
};
enum tc_prio_command {
TC_PRIO_REPLACE,
TC_PRIO_DESTROY,
TC_PRIO_STATS,
};
struct tc_prio_qopt_offload_params {
int bands;
u8 priomap[TC_PRIO_MAX + 1];
/* In case that a prio qdisc is offloaded and now is changed to a
* non-offloadedable config, it needs to update the backlog & qlen
* values to negate the HW backlog & qlen values (and only them).
*/
struct gnet_stats_queue *qstats;
};
struct tc_prio_qopt_offload {
enum tc_prio_command command;
u32 handle;
u32 parent;
union {
struct tc_prio_qopt_offload_params replace_params;
struct tc_qopt_offload_stats stats;
};
};
#endif
......@@ -142,6 +142,31 @@ prio_reset(struct Qdisc *sch)
sch->q.qlen = 0;
}
static int prio_offload(struct Qdisc *sch, bool enable)
{
struct prio_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
struct tc_prio_qopt_offload opt = {
.handle = sch->handle,
.parent = sch->parent,
};
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return -EOPNOTSUPP;
if (enable) {
opt.command = TC_PRIO_REPLACE;
opt.replace_params.bands = q->bands;
memcpy(&opt.replace_params.priomap, q->prio2band,
TC_PRIO_MAX + 1);
opt.replace_params.qstats = &sch->qstats;
} else {
opt.command = TC_PRIO_DESTROY;
}
return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO, &opt);
}
static void
prio_destroy(struct Qdisc *sch)
{
......@@ -149,6 +174,7 @@ prio_destroy(struct Qdisc *sch)
struct prio_sched_data *q = qdisc_priv(sch);
tcf_block_put(q->block);
prio_offload(sch, false);
for (prio = 0; prio < q->bands; prio++)
qdisc_destroy(q->queues[prio]);
}
......@@ -204,6 +230,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
}
sch_tree_unlock(sch);
prio_offload(sch, true);
return 0;
}
......@@ -223,15 +250,47 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt,
return prio_tune(sch, opt, extack);
}
static int prio_dump_offload(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct tc_prio_qopt_offload hw_stats = {
.handle = sch->handle,
.parent = sch->parent,
.command = TC_PRIO_STATS,
.stats.bstats = &sch->bstats,
.stats.qstats = &sch->qstats,
};
int err;
sch->flags &= ~TCQ_F_OFFLOADED;
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return 0;
err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO,
&hw_stats);
if (err == -EOPNOTSUPP)
return 0;
if (!err)
sch->flags |= TCQ_F_OFFLOADED;
return err;
}
static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct prio_sched_data *q = qdisc_priv(sch);
unsigned char *b = skb_tail_pointer(skb);
struct tc_prio_qopt opt;
int err;
opt.bands = q->bands;
memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1);
err = prio_dump_offload(sch);
if (err)
goto nla_put_failure;
if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
goto nla_put_failure;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment