Commit 35187642 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'htb-offload'

Maxim Mikityanskiy says:

====================
HTB offload

This series adds support for HTB offload to the HTB qdisc, and adds
usage to mlx5 driver.

The previous RFCs are available at [1], [2].

The feature is intended to solve the performance bottleneck caused by
the single lock of the HTB qdisc, which prevents it from scaling well.
The HTB algorithm itself is offloaded to the device, eliminating the
need to take the root lock of HTB on every packet. Classification part
is done in clsact (still in software) to avoid acquiring the lock, which
imposes a limitation that filters can target only leaf classes.

The speedup on Mellanox ConnectX-6 Dx was 14.2 times in the UDP
multi-stream test, compared to software HTB implementation (more details
in the mlx5 patch).

[1]: https://www.spinics.net/lists/netdev/msg628422.html
[2]: https://www.spinics.net/lists/netdev/msg663548.html

v2 changes:

Fixed sparse and smatch warnings. Formatted HTB patches to 80 chars per
line.

v3 changes:

Fixed the CI failure on parisc with 16-bit xchg by replacing it with
WRITE_ONCE. Fixed the capability bits in mlx5_ifc.h and the value of
MLX5E_QOS_MAX_LEAF_NODES.

v4 changes:

Check if HTB is root when offloading. Add extack for hardware errors.
Rephrase explanations of how it works in the commit message. Remove %hu
from format strings. Add resiliency when leaf_del_last fails to create a
new leaf node.
====================

Link: https://lore.kernel.org/r/20210119120815.463334-1-maximmi@mellanox.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 04a88637 214baf22
......@@ -16,7 +16,8 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \
diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o fw_reset.o
diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
fw_reset.o qos.o
#
# Netdev basic
......@@ -25,7 +26,8 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
en_selftest.o en/port.o en/monitor_stats.o en/health.o \
en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o
en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \
en/qos.o
#
# Netdev extra
......
......@@ -55,6 +55,7 @@
#include "en_stats.h"
#include "en/dcbnl.h"
#include "en/fs.h"
#include "en/qos.h"
#include "lib/hv_vhca.h"
extern const struct net_device_ops mlx5e_netdev_ops;
......@@ -161,6 +162,9 @@ do { \
##__VA_ARGS__); \
} while (0)
#define mlx5e_state_dereference(priv, p) \
rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock))
enum mlx5e_rq_group {
MLX5E_RQ_GROUP_REGULAR,
MLX5E_RQ_GROUP_XSK,
......@@ -663,11 +667,13 @@ struct mlx5e_channel {
struct mlx5e_xdpsq rq_xdpsq;
struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC];
struct mlx5e_icosq icosq; /* internal control operations */
struct mlx5e_txqsq __rcu * __rcu *qos_sqs;
bool xdp;
struct napi_struct napi;
struct device *pdev;
struct net_device *netdev;
__be32 mkey_be;
u16 qos_sqs_size;
u8 num_tc;
u8 lag_port;
......@@ -756,6 +762,8 @@ struct mlx5e_modify_sq_param {
int next_state;
int rl_update;
int rl_index;
bool qos_update;
u16 qos_queue_group_id;
};
#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
......@@ -788,10 +796,20 @@ struct mlx5e_scratchpad {
cpumask_var_t cpumask;
};
struct mlx5e_htb {
DECLARE_HASHTABLE(qos_tc2node, order_base_2(MLX5E_QOS_MAX_LEAF_NODES));
DECLARE_BITMAP(qos_used_qids, MLX5E_QOS_MAX_LEAF_NODES);
struct mlx5e_sq_stats **qos_sq_stats;
u16 max_qos_sqs;
u16 maj_id;
u16 defcls;
};
struct mlx5e_priv {
/* priv data path fields - start */
/* +1 for port ptp ts */
struct mlx5e_txqsq *txq2sq[(MLX5E_MAX_NUM_CHANNELS + 1) * MLX5E_MAX_NUM_TC];
struct mlx5e_txqsq *txq2sq[(MLX5E_MAX_NUM_CHANNELS + 1) * MLX5E_MAX_NUM_TC +
MLX5E_QOS_MAX_LEAF_NODES];
int channel_tc2realtxq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC];
int port_ptp_tc2realtxq[MLX5E_MAX_NUM_TC];
#ifdef CONFIG_MLX5_CORE_EN_DCB
......@@ -859,6 +877,7 @@ struct mlx5e_priv {
struct mlx5e_hv_vhca_stats_agent stats_agent;
#endif
struct mlx5e_scratchpad scratchpad;
struct mlx5e_htb htb;
};
struct mlx5e_rx_handlers {
......@@ -986,6 +1005,7 @@ int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
struct mlx5e_channels *new_chs,
mlx5e_fp_preactivate preactivate,
void *context);
int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv);
int mlx5e_num_channels_changed(struct mlx5e_priv *priv);
int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context);
void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
......@@ -1010,6 +1030,9 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq);
int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
struct mlx5e_modify_sq_param *p);
int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix,
struct mlx5e_params *params, struct mlx5e_sq_param *param,
struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, u16 qos_qid);
void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq);
void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq);
void mlx5e_free_txqsq(struct mlx5e_txqsq *sq);
......@@ -1020,8 +1043,10 @@ struct mlx5e_create_sq_param;
int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
struct mlx5e_sq_param *param,
struct mlx5e_create_sq_param *csp,
u16 qos_queue_group_id,
u32 *sqn);
void mlx5e_tx_err_cqe_work(struct work_struct *recover_work);
void mlx5e_close_txqsq(struct mlx5e_txqsq *sq);
static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev)
{
......
......@@ -118,6 +118,8 @@ void mlx5e_build_rq_param(struct mlx5e_priv *priv,
struct mlx5e_rq_param *param);
void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
struct mlx5e_sq_param *param);
void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params,
struct mlx5e_sq_param *param);
void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
struct mlx5e_params *params,
struct mlx5e_xsk_param *xsk,
......
......@@ -261,7 +261,7 @@ static int mlx5e_ptp_open_txqsq(struct mlx5e_port_ptp *c, u32 tisn,
csp.min_inline_mode = txqsq->min_inline_mode;
csp.ts_cqe_to_dest_cqn = ptpsq->ts_cq.mcq.cqn;
err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, &txqsq->sqn);
err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, 0, &txqsq->sqn);
if (err)
goto err_free_txqsq;
......
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
#ifndef __MLX5E_EN_QOS_H
#define __MLX5E_EN_QOS_H
#include <linux/mlx5/driver.h>
#define MLX5E_QOS_MAX_LEAF_NODES 256
struct mlx5e_priv;
struct mlx5e_channels;
struct mlx5e_channel;
int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev);
int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv);
/* TX datapath API */
int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid);
struct mlx5e_txqsq *mlx5e_get_sq(struct mlx5e_priv *priv, int qid);
/* SQ lifecycle */
int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
void mlx5e_qos_activate_queues(struct mlx5e_priv *priv);
void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c);
void mlx5e_qos_close_queues(struct mlx5e_channel *c);
/* HTB API */
int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls,
struct netlink_ext_ack *extack);
int mlx5e_htb_root_del(struct mlx5e_priv *priv);
int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid,
u32 parent_classid, u64 rate, u64 ceil,
struct netlink_ext_ack *extack);
int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid,
u64 rate, u64 ceil, struct netlink_ext_ack *extack);
int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid,
u16 *new_qid, struct netlink_ext_ack *extack);
int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force,
struct netlink_ext_ack *extack);
int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil,
struct netlink_ext_ack *extack);
#endif
......@@ -447,6 +447,17 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
goto out;
}
/* Don't allow changing the number of channels if HTB offload is active,
* because the numeration of the QoS SQs will change, while per-queue
* qdiscs are attached.
*/
if (priv->htb.maj_id) {
err = -EINVAL;
netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the number of channels\n",
__func__);
goto out;
}
new_channels.params = priv->channels.params;
new_channels.params.num_channels = count;
......@@ -1966,6 +1977,16 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable)
if (!MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn))
return -EOPNOTSUPP;
/* Don't allow changing the PTP state if HTB offload is active, because
* the numeration of the QoS SQs will change, while per-queue qdiscs are
* attached.
*/
if (priv->htb.maj_id) {
netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the PTP state\n",
__func__);
return -EINVAL;
}
new_channels.params = priv->channels.params;
MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_TX_PORT_TS, enable);
/* No need to verify SQ stop room as
......
......@@ -420,6 +420,25 @@ static void mlx5e_stats_grp_sw_update_stats_ptp(struct mlx5e_priv *priv,
}
}
static void mlx5e_stats_grp_sw_update_stats_qos(struct mlx5e_priv *priv,
struct mlx5e_sw_stats *s)
{
struct mlx5e_sq_stats **stats;
u16 max_qos_sqs;
int i;
/* Pairs with smp_store_release in mlx5e_open_qos_sq. */
max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs);
stats = READ_ONCE(priv->htb.qos_sq_stats);
for (i = 0; i < max_qos_sqs; i++) {
mlx5e_stats_grp_sw_update_stats_sq(s, READ_ONCE(stats[i]));
/* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
barrier();
}
}
static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw)
{
struct mlx5e_sw_stats *s = &priv->stats.sw;
......@@ -449,6 +468,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw)
}
}
mlx5e_stats_grp_sw_update_stats_ptp(priv, s);
mlx5e_stats_grp_sw_update_stats_qos(priv, s);
}
static const struct counter_desc q_stats_desc[] = {
......@@ -1740,6 +1760,41 @@ static const struct counter_desc ptp_cq_stats_desc[] = {
{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) },
};
static const struct counter_desc qos_sq_stats_desc[] = {
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, packets) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, bytes) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_packets) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_bytes) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, nop) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) },
#ifdef CONFIG_MLX5_EN_TLS
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ctx) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ooo) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_resync_bytes) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_skip_no_sync_data) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) },
#endif
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_none) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, stopped) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, dropped) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, recover) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqes) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, wake) },
{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqe_err) },
};
#define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc)
#define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc)
#define NUM_XDPSQ_STATS ARRAY_SIZE(xdpsq_stats_desc)
......@@ -1750,6 +1805,49 @@ static const struct counter_desc ptp_cq_stats_desc[] = {
#define NUM_PTP_SQ_STATS ARRAY_SIZE(ptp_sq_stats_desc)
#define NUM_PTP_CH_STATS ARRAY_SIZE(ptp_ch_stats_desc)
#define NUM_PTP_CQ_STATS ARRAY_SIZE(ptp_cq_stats_desc)
#define NUM_QOS_SQ_STATS ARRAY_SIZE(qos_sq_stats_desc)
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qos)
{
/* Pairs with smp_store_release in mlx5e_open_qos_sq. */
return NUM_QOS_SQ_STATS * smp_load_acquire(&priv->htb.max_qos_sqs);
}
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qos)
{
/* Pairs with smp_store_release in mlx5e_open_qos_sq. */
u16 max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs);
int i, qid;
for (qid = 0; qid < max_qos_sqs; qid++)
for (i = 0; i < NUM_QOS_SQ_STATS; i++)
sprintf(data + (idx++) * ETH_GSTRING_LEN,
qos_sq_stats_desc[i].format, qid);
return idx;
}
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qos)
{
struct mlx5e_sq_stats **stats;
u16 max_qos_sqs;
int i, qid;
/* Pairs with smp_store_release in mlx5e_open_qos_sq. */
max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs);
stats = READ_ONCE(priv->htb.qos_sq_stats);
for (qid = 0; qid < max_qos_sqs; qid++) {
struct mlx5e_sq_stats *s = READ_ONCE(stats[qid]);
for (i = 0; i < NUM_QOS_SQ_STATS; i++)
data[idx++] = MLX5E_READ_CTR64_CPU(s, qos_sq_stats_desc, i);
}
return idx;
}
static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qos) { return; }
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ptp)
{
......@@ -1932,6 +2030,7 @@ MLX5E_DEFINE_STATS_GRP(per_port_buff_congest, 0);
MLX5E_DEFINE_STATS_GRP(eth_ext, 0);
static MLX5E_DEFINE_STATS_GRP(tls, 0);
static MLX5E_DEFINE_STATS_GRP(ptp, 0);
static MLX5E_DEFINE_STATS_GRP(qos, 0);
/* The stats groups order is opposite to the update_stats() order calls */
mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = {
......@@ -1955,6 +2054,7 @@ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = {
&MLX5E_STATS_GRP(channels),
&MLX5E_STATS_GRP(per_port_buff_congest),
&MLX5E_STATS_GRP(ptp),
&MLX5E_STATS_GRP(qos),
};
unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv)
......
......@@ -55,6 +55,8 @@
#define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld)
#define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld)
#define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld)
struct counter_desc {
char format[ETH_GSTRING_LEN];
size_t offset; /* Byte offset */
......
......@@ -106,28 +106,53 @@ static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb)
return priv->port_ptp_tc2realtxq[up];
}
static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb,
u16 htb_maj_id)
{
u16 classid;
if ((TC_H_MAJ(skb->priority) >> 16) == htb_maj_id)
classid = TC_H_MIN(skb->priority);
else
classid = READ_ONCE(priv->htb.defcls);
if (!classid)
return 0;
return mlx5e_get_txq_by_classid(priv, classid);
}
u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
int num_tc_x_num_ch;
int txq_ix;
int up = 0;
int ch_ix;
if (unlikely(priv->channels.port_ptp)) {
int num_tc_x_num_ch;
/* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */
num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch);
if (unlikely(dev->real_num_tx_queues > num_tc_x_num_ch)) {
/* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */
u16 htb_maj_id = smp_load_acquire(&priv->htb.maj_id);
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
mlx5e_use_ptpsq(skb))
return mlx5e_select_ptpsq(dev, skb);
if (unlikely(htb_maj_id)) {
txq_ix = mlx5e_select_htb_queue(priv, skb, htb_maj_id);
if (txq_ix > 0)
return txq_ix;
}
/* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */
num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch);
if (unlikely(priv->channels.port_ptp))
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
mlx5e_use_ptpsq(skb))
return mlx5e_select_ptpsq(dev, skb);
txq_ix = netdev_pick_tx(dev, skb, NULL);
/* Fix netdev_pick_tx() not to choose ptp_channel txqs.
/* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs.
* If they are selected, switch to regular queues.
* Driver to select these queues only at mlx5e_select_ptpsq().
* Driver to select these queues only at mlx5e_select_ptpsq()
* and mlx5e_select_htb_queue().
*/
if (unlikely(txq_ix >= num_tc_x_num_ch))
txq_ix %= num_tc_x_num_ch;
......@@ -702,6 +727,10 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
u16 pi;
sq = priv->txq2sq[skb_get_queue_mapping(skb)];
if (unlikely(!sq)) {
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
/* May send SKBs and WQEs. */
if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel)))
......
......@@ -115,17 +115,21 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
napi);
struct mlx5e_ch_stats *ch_stats = c->stats;
struct mlx5e_xdpsq *xsksq = &c->xsksq;
struct mlx5e_txqsq __rcu **qos_sqs;
struct mlx5e_rq *xskrq = &c->xskrq;
struct mlx5e_rq *rq = &c->rq;
bool aff_change = false;
bool busy_xsk = false;
bool busy = false;
int work_done = 0;
u16 qos_sqs_size;
bool xsk_open;
int i;
rcu_read_lock();
qos_sqs = rcu_dereference(c->qos_sqs);
xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
ch_stats->poll++;
......@@ -133,6 +137,18 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
for (i = 0; i < c->num_tc; i++)
busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget);
if (unlikely(qos_sqs)) {
smp_rmb(); /* Pairs with mlx5e_qos_alloc_queues. */
qos_sqs_size = READ_ONCE(c->qos_sqs_size);
for (i = 0; i < qos_sqs_size; i++) {
struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]);
if (sq)
busy |= mlx5e_poll_tx_cq(&sq->cq, budget);
}
}
busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
if (c->xdp)
......@@ -186,6 +202,16 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
mlx5e_handle_tx_dim(&c->sq[i]);
mlx5e_cq_arm(&c->sq[i].cq);
}
if (unlikely(qos_sqs)) {
for (i = 0; i < qos_sqs_size; i++) {
struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]);
if (sq) {
mlx5e_handle_tx_dim(sq);
mlx5e_cq_arm(&sq->cq);
}
}
}
mlx5e_handle_rx_dim(rq);
......
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
#include "qos.h"
#define MLX5_QOS_DEFAULT_DWRR_UID 0
bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev)
{
if (!MLX5_CAP_GEN(mdev, qos))
return false;
if (!MLX5_CAP_QOS(mdev, nic_sq_scheduling))
return false;
if (!MLX5_CAP_QOS(mdev, nic_bw_share))
return false;
if (!MLX5_CAP_QOS(mdev, nic_rate_limit))
return false;
return true;
}
int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev)
{
return 1 << MLX5_CAP_QOS(mdev, log_max_qos_nic_queue_group);
}
int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id,
u32 bw_share, u32 max_avg_bw, u32 *id)
{
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
MLX5_SET(scheduling_context, sched_ctx, element_type,
SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP);
MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw);
return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC,
sched_ctx, id);
}
int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id,
u32 bw_share, u32 max_avg_bw, u32 *id)
{
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
void *attr;
MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
MLX5_SET(scheduling_context, sched_ctx, element_type,
SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw);
attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR);
return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC,
sched_ctx, id);
}
int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id)
{
return mlx5_qos_create_inner_node(mdev, MLX5_QOS_DEFAULT_DWRR_UID, 0, 0, id);
}
int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 parent_id,
u32 bw_share, u32 max_avg_bw, u32 id)
{
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
u32 bitmask = 0;
MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw);
bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
return mlx5_modify_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC,
sched_ctx, id, bitmask);
}
int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id)
{
return mlx5_destroy_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, id);
}
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
#ifndef __MLX5_QOS_H
#define __MLX5_QOS_H
#include "mlx5_core.h"
#define MLX5_DEBUG_QOS_MASK BIT(4)
#define qos_err(mdev, fmt, ...) \
mlx5_core_err(mdev, "QoS: " fmt, ##__VA_ARGS__)
#define qos_warn(mdev, fmt, ...) \
mlx5_core_warn(mdev, "QoS: " fmt, ##__VA_ARGS__)
#define qos_dbg(mdev, fmt, ...) \
mlx5_core_dbg_mask(mdev, MLX5_DEBUG_QOS_MASK, "QoS: " fmt, ##__VA_ARGS__)
bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev);
int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev);
int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id,
u32 bw_share, u32 max_avg_bw, u32 *id);
int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id,
u32 bw_share, u32 max_avg_bw, u32 *id);
int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id);
int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 parent_id, u32 bw_share,
u32 max_avg_bw, u32 id);
int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id);
#endif
......@@ -842,11 +842,16 @@ struct mlx5_ifc_qos_cap_bits {
u8 reserved_at_4[0x1];
u8 packet_pacing_burst_bound[0x1];
u8 packet_pacing_typical_size[0x1];
u8 reserved_at_7[0x4];
u8 reserved_at_7[0x1];
u8 nic_sq_scheduling[0x1];
u8 nic_bw_share[0x1];
u8 nic_rate_limit[0x1];
u8 packet_pacing_uid[0x1];
u8 reserved_at_c[0x14];
u8 reserved_at_20[0x20];
u8 reserved_at_20[0xb];
u8 log_max_qos_nic_queue_group[0x5];
u8 reserved_at_30[0x10];
u8 packet_pacing_max_rate[0x20];
......@@ -3347,7 +3352,7 @@ struct mlx5_ifc_sqc_bits {
u8 reserved_at_e0[0x10];
u8 packet_pacing_rate_limit_index[0x10];
u8 tis_lst_sz[0x10];
u8 reserved_at_110[0x10];
u8 qos_queue_group_id[0x10];
u8 reserved_at_120[0x40];
......@@ -3362,6 +3367,7 @@ enum {
SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT = 0x1,
SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC = 0x2,
SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3,
SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP = 0x4,
};
enum {
......@@ -4805,6 +4811,7 @@ struct mlx5_ifc_query_scheduling_element_out_bits {
enum {
SCHEDULING_HIERARCHY_E_SWITCH = 0x2,
SCHEDULING_HIERARCHY_NIC = 0x3,
};
struct mlx5_ifc_query_scheduling_element_in_bits {
......
......@@ -858,6 +858,7 @@ enum tc_setup_type {
TC_SETUP_QDISC_ETS,
TC_SETUP_QDISC_TBF,
TC_SETUP_QDISC_FIFO,
TC_SETUP_QDISC_HTB,
};
/* These structures hold the attributes of bpf state that are being passed
......
......@@ -783,6 +783,42 @@ struct tc_mq_qopt_offload {
};
};
enum tc_htb_command {
/* Root */
TC_HTB_CREATE, /* Initialize HTB offload. */
TC_HTB_DESTROY, /* Destroy HTB offload. */
/* Classes */
/* Allocate qid and create leaf. */
TC_HTB_LEAF_ALLOC_QUEUE,
/* Convert leaf to inner, preserve and return qid, create new leaf. */
TC_HTB_LEAF_TO_INNER,
/* Delete leaf, while siblings remain. */
TC_HTB_LEAF_DEL,
/* Delete leaf, convert parent to leaf, preserving qid. */
TC_HTB_LEAF_DEL_LAST,
/* TC_HTB_LEAF_DEL_LAST, but delete driver data on hardware errors. */
TC_HTB_LEAF_DEL_LAST_FORCE,
/* Modify parameters of a node. */
TC_HTB_NODE_MODIFY,
/* Class qdisc */
TC_HTB_LEAF_QUERY_QUEUE, /* Query qid by classid. */
};
struct tc_htb_qopt_offload {
struct netlink_ext_ack *extack;
enum tc_htb_command command;
u16 classid;
u32 parent_classid;
u16 qid;
u16 moved_qid;
u64 rate;
u64 ceil;
};
#define TC_HTB_CLASSID_ROOT U32_MAX
enum tc_red_command {
TC_RED_REPLACE,
TC_RED_DESTROY,
......
......@@ -210,7 +210,8 @@ struct Qdisc_class_ops {
int (*change)(struct Qdisc *, u32, u32,
struct nlattr **, unsigned long *,
struct netlink_ext_ack *);
int (*delete)(struct Qdisc *, unsigned long);
int (*delete)(struct Qdisc *, unsigned long,
struct netlink_ext_ack *);
void (*walk)(struct Qdisc *, struct qdisc_walker * arg);
/* Filter manipulation */
......@@ -552,14 +553,20 @@ static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
return qdisc->dev_queue->dev;
}
static inline void sch_tree_lock(const struct Qdisc *q)
static inline void sch_tree_lock(struct Qdisc *q)
{
spin_lock_bh(qdisc_root_sleeping_lock(q));
if (q->flags & TCQ_F_MQROOT)
spin_lock_bh(qdisc_lock(q));
else
spin_lock_bh(qdisc_root_sleeping_lock(q));
}
static inline void sch_tree_unlock(const struct Qdisc *q)
static inline void sch_tree_unlock(struct Qdisc *q)
{
spin_unlock_bh(qdisc_root_sleeping_lock(q));
if (q->flags & TCQ_F_MQROOT)
spin_unlock_bh(qdisc_lock(q));
else
spin_unlock_bh(qdisc_root_sleeping_lock(q));
}
extern struct Qdisc noop_qdisc;
......
......@@ -434,6 +434,7 @@ enum {
TCA_HTB_RATE64,
TCA_HTB_CEIL64,
TCA_HTB_PAD,
TCA_HTB_OFFLOAD,
__TCA_HTB_MAX,
};
......
......@@ -1866,7 +1866,8 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
static int tclass_del_notify(struct net *net,
const struct Qdisc_class_ops *cops,
struct sk_buff *oskb, struct nlmsghdr *n,
struct Qdisc *q, unsigned long cl)
struct Qdisc *q, unsigned long cl,
struct netlink_ext_ack *extack)
{
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
struct sk_buff *skb;
......@@ -1885,7 +1886,7 @@ static int tclass_del_notify(struct net *net,
return -EINVAL;
}
err = cops->delete(q, cl);
err = cops->delete(q, cl, extack);
if (err) {
kfree_skb(skb);
return err;
......@@ -2088,7 +2089,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
goto out;
break;
case RTM_DELTCLASS:
err = tclass_del_notify(net, cops, skb, n, q, cl);
err = tclass_del_notify(net, cops, skb, n, q, cl, extack);
/* Unbind the class with flilters with 0 */
tc_bind_tclass(q, portid, clid, 0);
goto out;
......
......@@ -320,7 +320,8 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
return error;
}
static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
static int atm_tc_delete(struct Qdisc *sch, unsigned long arg,
struct netlink_ext_ack *extack)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow = (struct atm_flow_data *)arg;
......
......@@ -1675,7 +1675,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
return err;
}
static int cbq_delete(struct Qdisc *sch, unsigned long arg)
static int cbq_delete(struct Qdisc *sch, unsigned long arg,
struct netlink_ext_ack *extack)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl = (struct cbq_class *)arg;
......
......@@ -146,7 +146,8 @@ static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl)
kfree(cl);
}
static int drr_delete_class(struct Qdisc *sch, unsigned long arg)
static int drr_delete_class(struct Qdisc *sch, unsigned long arg,
struct netlink_ext_ack *extack)
{
struct drr_sched *q = qdisc_priv(sch);
struct drr_class *cl = (struct drr_class *)arg;
......
......@@ -150,7 +150,8 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
return err;
}
static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
static int dsmark_delete(struct Qdisc *sch, unsigned long arg,
struct netlink_ext_ack *extack)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
......
......@@ -1090,7 +1090,8 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
}
static int
hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
hfsc_delete_class(struct Qdisc *sch, unsigned long arg,
struct netlink_ext_ack *extack)
{
struct hfsc_sched *q = qdisc_priv(sch);
struct hfsc_class *cl = (struct hfsc_class *)arg;
......
This diff is collapsed.
......@@ -529,7 +529,8 @@ static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
kfree(cl);
}
static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
static int qfq_delete_class(struct Qdisc *sch, unsigned long arg,
struct netlink_ext_ack *extack)
{
struct qfq_sched *q = qdisc_priv(sch);
struct qfq_class *cl = (struct qfq_class *)arg;
......
......@@ -649,7 +649,8 @@ static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
return -ENOSYS;
}
static int sfb_delete(struct Qdisc *sch, unsigned long cl)
static int sfb_delete(struct Qdisc *sch, unsigned long cl,
struct netlink_ext_ack *extack)
{
return -ENOSYS;
}
......
......@@ -414,6 +414,7 @@ enum {
TCA_HTB_RATE64,
TCA_HTB_CEIL64,
TCA_HTB_PAD,
TCA_HTB_OFFLOAD,
__TCA_HTB_MAX,
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment