Commit 9b3e446c authored by David S. Miller's avatar David S. Miller

Merge tag 'mlx5-updates-2022-02-14' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5-updates-2022-02-14

mlx5 TX routines improvements

1) From Aya and Tariq, first 3 patches, Use the Max size of the TX descriptor
as advertised by the device and not the fixed value of 16 that the driver
always assumed, this is not a bug fix as all existing devices have Max value
larger than 16, but the series is necessary for future proofing the driver.

2) TX Synchronization improvements from Maxim, last 12 patches

Maxim Mikityanskiy Says:
=======================
mlx5e: Synchronize ndo_select_queue with configuration changes

The kernel can call ndo_select_queue at any time, and there is no direct
way to block it. The implementation of ndo_select_queue in mlx5e expects
the parameters to be consistent and may crash (invalid pointer, division
by zero) if they aren't.

There were attempts to partially fix some of the most frequent crashes,
see commit 846d6da1 ("net/mlx5e: Fix division by 0 in
mlx5e_select_queue") and commit 84c8a874 ("net/mlx5e: Fix division
by 0 in mlx5e_select_queue for representors"). However, they don't
address the issue completely.

This series introduces the proper synchronization mechanism between
mlx5e configuration and TX data path:

1. txq2sq updates are synchronized properly with ndo_start_xmit
   (mlx5e_xmit). The TX queue is stopped when it configuration is being
   updated, and memory barriers ensure the changes are visible before
   restarting.

2. The set of parameters needed for mlx5e_select_queue is reduced, and
   synchronization using RCU is implemented. This way, changes are
   atomic, and the state in mlx5e_select_queue is always consistent.

3. A few optimizations are applied to the new implementation of
   mlx5e_select_queue.

=======================

====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d0b78ab1 71753b8e
......@@ -28,7 +28,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \
en_selftest.o en/port.o en/monitor_stats.o en/health.o \
en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \
en/qos.o en/trap.o en/fs_tt_redirect.o
en/qos.o en/trap.o en/fs_tt_redirect.o en/selq.o
#
# Netdev extra
......
......@@ -59,6 +59,7 @@
#include "lib/hv_vhca.h"
#include "lib/clock.h"
#include "en/rx_res.h"
#include "en/selq.h"
extern const struct net_device_ops mlx5e_netdev_ops;
struct page_pool;
......@@ -172,8 +173,9 @@ struct page_pool;
#define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\
ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT)
#define MLX5E_MAX_KLM_PER_WQE \
MLX5E_KLM_ENTRIES_PER_WQE(MLX5E_TX_MPW_MAX_NUM_DS << MLX5_MKEY_BSF_OCTO_SIZE)
#define MLX5E_MAX_KLM_PER_WQE(mdev) \
MLX5E_KLM_ENTRIES_PER_WQE(mlx5e_get_sw_max_sq_mpw_wqebbs(mlx5e_get_max_sq_wqebbs(mdev)) \
<< MLX5_MKEY_BSF_OCTO_SIZE)
#define MLX5E_MSG_LEVEL NETIF_MSG_LINK
......@@ -221,6 +223,32 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS);
}
/* The maximum WQE size can be retrieved by max_wqe_sz_sq in
* bytes units. Driver hardens the limitation to 1KB (16
* WQEBBs), unless firmware capability is stricter.
*/
static inline u16 mlx5e_get_max_sq_wqebbs(struct mlx5_core_dev *mdev)
{
return min_t(u16, MLX5_SEND_WQE_MAX_WQEBBS,
MLX5_CAP_GEN(mdev, max_wqe_sz_sq) / MLX5_SEND_WQE_BB);
}
static inline u16 mlx5e_get_sw_max_sq_mpw_wqebbs(u16 max_sq_wqebbs)
{
/* The return value will be multiplied by MLX5_SEND_WQEBB_NUM_DS.
* Since max_sq_wqebbs may be up to MLX5_SEND_WQE_MAX_WQEBBS == 16,
* see mlx5e_get_max_sq_wqebbs(), the multiplication (16 * 4 == 64)
* overflows the 6-bit DS field of Ctrl Segment. Use a bound lower
* than MLX5_SEND_WQE_MAX_WQEBBS to let a full-session WQE be
* cache-aligned.
*/
#if L1_CACHE_BYTES < 128
return min_t(u16, max_sq_wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 1);
#else
return min_t(u16, max_sq_wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 2);
#endif
}
struct mlx5e_tx_wqe {
struct mlx5_wqe_ctrl_seg ctrl;
struct mlx5_wqe_eth_seg eth;
......@@ -427,12 +455,12 @@ struct mlx5e_txqsq {
struct netdev_queue *txq;
u32 sqn;
u16 stop_room;
u16 max_sq_mpw_wqebbs;
u8 min_inline_mode;
struct device *pdev;
__be32 mkey_be;
unsigned long state;
unsigned int hw_mtu;
struct hwtstamp_config *tstamp;
struct mlx5_clock *clock;
struct net_device *netdev;
struct mlx5_core_dev *mdev;
......@@ -446,6 +474,7 @@ struct mlx5e_txqsq {
struct work_struct recover_work;
struct mlx5e_ptpsq *ptpsq;
cqe_ts_to_ns ptp_cyc2time;
u16 max_sq_wqebbs;
} ____cacheline_aligned_in_smp;
struct mlx5e_dma_info {
......@@ -540,6 +569,8 @@ struct mlx5e_xdpsq {
u32 sqn;
struct device *pdev;
__be32 mkey_be;
u16 stop_room;
u16 max_sq_mpw_wqebbs;
u8 min_inline_mode;
unsigned long state;
unsigned int hw_mtu;
......@@ -547,6 +578,7 @@ struct mlx5e_xdpsq {
/* control path */
struct mlx5_wq_ctrl wq_ctrl;
struct mlx5e_channel *channel;
u16 max_sq_wqebbs;
} ____cacheline_aligned_in_smp;
struct mlx5e_ktls_resync_resp;
......@@ -575,6 +607,7 @@ struct mlx5e_icosq {
/* control path */
struct mlx5_wq_ctrl wq_ctrl;
struct mlx5e_channel *channel;
u16 max_sq_wqebbs;
struct work_struct recover_work;
} ____cacheline_aligned_in_smp;
......@@ -876,9 +909,8 @@ struct mlx5e_trap;
struct mlx5e_priv {
/* priv data path fields - start */
struct mlx5e_selq selq;
struct mlx5e_txqsq **txq2sq;
int **channel_tc2realtxq;
int port_ptp_tc2realtxq[MLX5E_MAX_NUM_TC];
#ifdef CONFIG_MLX5_CORE_EN_DCB
struct mlx5e_dcbx_dp dcbx_dp;
#endif
......@@ -921,7 +953,6 @@ struct mlx5e_priv {
u16 drop_rq_q_counter;
struct notifier_block events_nb;
struct notifier_block blocking_events_nb;
int num_tc_x_num_ch;
struct udp_tunnel_nic_info nic_info;
#ifdef CONFIG_MLX5_CORE_EN_DCB
......
......@@ -196,13 +196,13 @@ u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par
u16 stop_room;
stop_room = mlx5e_tls_get_stop_room(mdev, params);
stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
stop_room += mlx5e_stop_room_for_max_wqe(mdev);
if (is_mpwqe)
/* A MPWQE can take up to the maximum-sized WQE + all the normal
* stop room can be taken if a new packet breaks the active
* MPWQE session and allocates its WQEs right away.
*/
stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
stop_room += mlx5e_stop_room_for_max_wqe(mdev);
return stop_room;
}
......@@ -717,7 +717,7 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev,
int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
u32 wqebbs;
max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE;
max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev);
max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param);
max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr;
rest = max_hd_per_wqe % max_klm_per_umr;
......@@ -774,10 +774,10 @@ static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev,
void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
mlx5e_build_sq_param_common(mdev, param);
param->stop_room = mlx5e_stop_room_for_wqe(1); /* for XSK NOP */
param->stop_room = mlx5e_stop_room_for_wqe(mdev, 1); /* for XSK NOP */
param->is_tls = mlx5e_accel_is_ktls_rx(mdev);
if (param->is_tls)
param->stop_room += mlx5e_stop_room_for_wqe(1); /* for TLS RX resync NOP */
param->stop_room += mlx5e_stop_room_for_wqe(mdev, 1); /* for TLS RX resync NOP */
MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(mdev, reg_umr_sq));
MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
mlx5e_build_ico_cq_param(mdev, log_wq_size, &param->cqp);
......
......@@ -195,7 +195,6 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix,
int node;
sq->pdev = c->pdev;
sq->tstamp = c->tstamp;
sq->clock = &mdev->clock;
sq->mkey_be = c->mkey_be;
sq->netdev = c->netdev;
......@@ -449,7 +448,7 @@ static void mlx5e_ptp_build_sq_param(struct mlx5_core_dev *mdev,
wq = MLX5_ADDR_OF(sqc, sqc, wq);
MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
param->stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
param->stop_room = mlx5e_stop_room_for_max_wqe(mdev);
mlx5e_build_tx_cq_param(mdev, params, &param->cqp);
}
......
......@@ -50,7 +50,6 @@ static int mlx5e_find_unused_qos_qid(struct mlx5e_priv *priv)
struct mlx5e_qos_node {
struct hlist_node hnode;
struct rcu_head rcu;
struct mlx5e_qos_node *parent;
u64 rate;
u32 bw_share;
......@@ -132,7 +131,11 @@ static void mlx5e_sw_node_delete(struct mlx5e_priv *priv, struct mlx5e_qos_node
__clear_bit(node->qid, priv->htb.qos_used_qids);
mlx5e_update_tx_netdev_queues(priv);
}
kfree_rcu(node, rcu);
/* Make sure this qid is no longer selected by mlx5e_select_queue, so
* that mlx5e_reactivate_qos_sq can safely restart the netdev TX queue.
*/
synchronize_net();
kfree(node);
}
/* TX datapath API */
......@@ -273,10 +276,18 @@ static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs
static void mlx5e_activate_qos_sq(struct mlx5e_priv *priv, struct mlx5e_qos_node *node)
{
struct mlx5e_txqsq *sq;
u16 qid;
sq = mlx5e_get_qos_sq(priv, node->qid);
WRITE_ONCE(priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, node->qid)], sq);
qid = mlx5e_qid_from_qos(&priv->channels, node->qid);
/* If it's a new queue, it will be marked as started at this point.
* Stop it before updating txq2sq.
*/
mlx5e_tx_disable_queue(netdev_get_tx_queue(priv->netdev, qid));
priv->txq2sq[qid] = sq;
/* Make the change to txq2sq visible before the queue is started.
* As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
......@@ -299,8 +310,13 @@ static void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid)
qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid);
mlx5e_deactivate_txqsq(sq);
/* The queue is disabled, no synchronization with datapath is needed. */
priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL;
/* Make the change to txq2sq visible before the queue is started again.
* As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
* which pairs with this barrier.
*/
smp_wmb();
}
static void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid)
......@@ -485,9 +501,11 @@ int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls,
opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
if (opened) {
mlx5e_selq_prepare(&priv->selq, &priv->channels.params, true);
err = mlx5e_qos_alloc_queues(priv, &priv->channels);
if (err)
return err;
goto err_cancel_selq;
}
root = mlx5e_sw_node_create_root(priv);
......@@ -508,6 +526,9 @@ int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls,
*/
smp_store_release(&priv->htb.maj_id, htb_maj_id);
if (opened)
mlx5e_selq_apply(&priv->selq);
return 0;
err_sw_node_delete:
......@@ -516,6 +537,8 @@ int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls,
err_free_queues:
if (opened)
mlx5e_qos_close_all_queues(&priv->channels);
err_cancel_selq:
mlx5e_selq_cancel(&priv->selq);
return err;
}
......@@ -526,8 +549,15 @@ int mlx5e_htb_root_del(struct mlx5e_priv *priv)
qos_dbg(priv->mdev, "TC_HTB_DESTROY\n");
/* Wait until real_num_tx_queues is updated for mlx5e_select_queue,
* so that we can safely switch to its non-HTB non-PTP fastpath.
*/
synchronize_net();
mlx5e_selq_prepare(&priv->selq, &priv->channels.params, false);
mlx5e_selq_apply(&priv->selq);
WRITE_ONCE(priv->htb.maj_id, 0);
synchronize_rcu(); /* Sync with mlx5e_select_htb_queue and TX data path. */
root = mlx5e_sw_node_find(priv, MLX5E_HTB_CLASSID_ROOT);
if (!root) {
......
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
#include "selq.h"
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/rcupdate.h>
#include "en.h"
#include "en/ptp.h"
struct mlx5e_selq_params {
unsigned int num_regular_queues;
unsigned int num_channels;
unsigned int num_tcs;
union {
u8 is_special_queues;
struct {
bool is_htb : 1;
bool is_ptp : 1;
};
};
};
int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock)
{
struct mlx5e_selq_params *init_params;
selq->state_lock = state_lock;
selq->standby = kvzalloc(sizeof(*selq->standby), GFP_KERNEL);
if (!selq->standby)
return -ENOMEM;
init_params = kvzalloc(sizeof(*selq->active), GFP_KERNEL);
if (!init_params) {
kvfree(selq->standby);
selq->standby = NULL;
return -ENOMEM;
}
/* Assign dummy values, so that mlx5e_select_queue won't crash. */
*init_params = (struct mlx5e_selq_params) {
.num_regular_queues = 1,
.num_channels = 1,
.num_tcs = 1,
.is_htb = false,
.is_ptp = false,
};
rcu_assign_pointer(selq->active, init_params);
return 0;
}
void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
{
WARN_ON_ONCE(selq->is_prepared);
kvfree(selq->standby);
selq->standby = NULL;
selq->is_prepared = true;
mlx5e_selq_apply(selq);
kvfree(selq->standby);
selq->standby = NULL;
}
void mlx5e_selq_prepare(struct mlx5e_selq *selq, struct mlx5e_params *params, bool htb)
{
lockdep_assert_held(selq->state_lock);
WARN_ON_ONCE(selq->is_prepared);
selq->is_prepared = true;
selq->standby->num_channels = params->num_channels;
selq->standby->num_tcs = mlx5e_get_dcb_num_tc(params);
selq->standby->num_regular_queues =
selq->standby->num_channels * selq->standby->num_tcs;
selq->standby->is_htb = htb;
selq->standby->is_ptp = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_TX_PORT_TS);
}
void mlx5e_selq_apply(struct mlx5e_selq *selq)
{
struct mlx5e_selq_params *old_params;
WARN_ON_ONCE(!selq->is_prepared);
selq->is_prepared = false;
old_params = rcu_replace_pointer(selq->active, selq->standby,
lockdep_is_held(selq->state_lock));
synchronize_net(); /* Wait until ndo_select_queue starts emitting correct values. */
selq->standby = old_params;
}
void mlx5e_selq_cancel(struct mlx5e_selq *selq)
{
lockdep_assert_held(selq->state_lock);
WARN_ON_ONCE(!selq->is_prepared);
selq->is_prepared = false;
}
#ifdef CONFIG_MLX5_CORE_EN_DCB
static int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb)
{
int dscp_cp = 0;
if (skb->protocol == htons(ETH_P_IP))
dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
else if (skb->protocol == htons(ETH_P_IPV6))
dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
return priv->dcbx_dp.dscp2prio[dscp_cp];
}
#endif
static int mlx5e_get_up(struct mlx5e_priv *priv, struct sk_buff *skb)
{
#ifdef CONFIG_MLX5_CORE_EN_DCB
if (READ_ONCE(priv->dcbx_dp.trust_state) == MLX5_QPTS_TRUST_DSCP)
return mlx5e_get_dscp_up(priv, skb);
#endif
if (skb_vlan_tag_present(skb))
return skb_vlan_tag_get_prio(skb);
return 0;
}
static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb,
struct mlx5e_selq_params *selq)
{
struct mlx5e_priv *priv = netdev_priv(dev);
int up;
up = selq->num_tcs > 1 ? mlx5e_get_up(priv, skb) : 0;
return selq->num_regular_queues + up;
}
static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb)
{
u16 classid;
/* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */
if ((TC_H_MAJ(skb->priority) >> 16) == smp_load_acquire(&priv->htb.maj_id))
classid = TC_H_MIN(skb->priority);
else
classid = READ_ONCE(priv->htb.defcls);
if (!classid)
return 0;
return mlx5e_get_txq_by_classid(priv, classid);
}
u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_selq_params *selq;
int txq_ix, up;
selq = rcu_dereference_bh(priv->selq.active);
/* This is a workaround needed only for the mlx5e_netdev_change_profile
* flow that zeroes out the whole priv without unregistering the netdev
* and without preventing ndo_select_queue from being called.
*/
if (unlikely(!selq))
return 0;
if (likely(!selq->is_special_queues)) {
/* No special queues, netdev_pick_tx returns one of the regular ones. */
txq_ix = netdev_pick_tx(dev, skb, NULL);
if (selq->num_tcs <= 1)
return txq_ix;
up = mlx5e_get_up(priv, skb);
/* Normalize any picked txq_ix to [0, num_channels),
* So we can return a txq_ix that matches the channel and
* packet UP.
*/
return mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels) +
up * selq->num_channels;
}
if (unlikely(selq->is_htb)) {
/* num_tcs == 1, shortcut for PTP */
txq_ix = mlx5e_select_htb_queue(priv, skb);
if (txq_ix > 0)
return txq_ix;
if (unlikely(selq->is_ptp && mlx5e_use_ptpsq(skb)))
return selq->num_channels;
txq_ix = netdev_pick_tx(dev, skb, NULL);
/* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs.
* If they are selected, switch to regular queues.
* Driver to select these queues only at mlx5e_select_ptpsq()
* and mlx5e_select_htb_queue().
*/
return mlx5e_txq_to_ch_ix_htb(txq_ix, selq->num_channels);
}
/* PTP is enabled */
if (mlx5e_use_ptpsq(skb))
return mlx5e_select_ptpsq(dev, skb, selq);
txq_ix = netdev_pick_tx(dev, skb, NULL);
/* Normalize any picked txq_ix to [0, num_channels). Queues in range
* [0, num_regular_queues) will be mapped to the corresponding channel
* index, so that we can apply the packet's UP (if num_tcs > 1).
* If netdev_pick_tx() picks ptp_channel, switch to a regular queue,
* because driver should select the PTP only at mlx5e_select_ptpsq().
*/
txq_ix = mlx5e_txq_to_ch_ix(txq_ix, selq->num_channels);
if (selq->num_tcs <= 1)
return txq_ix;
up = mlx5e_get_up(priv, skb);
return txq_ix + up * selq->num_channels;
}
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
#ifndef __MLX5_EN_SELQ_H__
#define __MLX5_EN_SELQ_H__
#include <linux/kernel.h>
struct mlx5e_selq_params;
struct mlx5e_selq {
struct mlx5e_selq_params __rcu *active;
struct mlx5e_selq_params *standby;
struct mutex *state_lock; /* points to priv->state_lock */
bool is_prepared;
};
struct mlx5e_params;
struct net_device;
struct sk_buff;
int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock);
void mlx5e_selq_cleanup(struct mlx5e_selq *selq);
void mlx5e_selq_prepare(struct mlx5e_selq *selq, struct mlx5e_params *params, bool htb);
void mlx5e_selq_apply(struct mlx5e_selq *selq);
void mlx5e_selq_cancel(struct mlx5e_selq *selq);
static inline u16 mlx5e_txq_to_ch_ix(u16 txq, u16 num_channels)
{
while (unlikely(txq >= num_channels))
txq -= num_channels;
return txq;
}
static inline u16 mlx5e_txq_to_ch_ix_htb(u16 txq, u16 num_channels)
{
if (unlikely(txq >= num_channels)) {
if (unlikely(txq >= num_channels << 3))
txq %= num_channels;
else
do
txq -= num_channels;
while (txq >= num_channels);
}
return txq;
}
u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev);
#endif /* __MLX5_EN_SELQ_H__ */
......@@ -9,19 +9,6 @@
#define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
* (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
* We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
* full-session WQE be cache-aligned.
*/
#if L1_CACHE_BYTES < 128
#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
#else
#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
#endif
#define MLX5E_TX_MPW_MAX_NUM_DS (MLX5E_TX_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS)
#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)
......@@ -68,8 +55,6 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq);
/* TX */
u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev);
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
......@@ -308,9 +293,9 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more);
void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq);
static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session)
static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs)
{
return session->ds_count == MLX5E_TX_MPW_MAX_NUM_DS;
return session->ds_count == max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS;
}
static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq)
......@@ -431,10 +416,10 @@ mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
}
}
static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size)
{
BUILD_BUG_ON(PAGE_SIZE / MLX5_SEND_WQE_BB < MLX5_SEND_WQE_MAX_WQEBBS);
#define MLX5E_STOP_ROOM(wqebbs) ((wqebbs) * 2 - 1)
static inline u16 mlx5e_stop_room_for_wqe(struct mlx5_core_dev *mdev, u16 wqe_size)
{
/* A WQE must not cross the page boundary, hence two conditions:
* 1. Its size must not exceed the page size.
* 2. If the WQE size is X, and the space remaining in a page is less
......@@ -443,18 +428,28 @@ static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size)
* stop room of X-1 + X.
* WQE size is also limited by the hardware limit.
*/
WARN_ONCE(wqe_size > mlx5e_get_max_sq_wqebbs(mdev),
"wqe_size %u is greater than max SQ WQEBBs %u",
wqe_size, mlx5e_get_max_sq_wqebbs(mdev));
if (__builtin_constant_p(wqe_size))
BUILD_BUG_ON(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS);
else
WARN_ON_ONCE(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS);
return wqe_size * 2 - 1;
return MLX5E_STOP_ROOM(wqe_size);
}
static inline u16 mlx5e_stop_room_for_max_wqe(struct mlx5_core_dev *mdev)
{
return MLX5E_STOP_ROOM(mlx5e_get_max_sq_wqebbs(mdev));
}
static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size)
{
u16 room = sq->reserved_room + mlx5e_stop_room_for_wqe(wqe_size);
u16 room = sq->reserved_room;
WARN_ONCE(wqe_size > sq->max_sq_wqebbs,
"wqe_size %u is greater than max SQ WQEBBs %u",
wqe_size, sq->max_sq_wqebbs);
room += MLX5E_STOP_ROOM(wqe_size);
return mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room);
}
......
......@@ -199,7 +199,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
struct mlx5e_tx_wqe *wqe;
u16 pi;
pi = mlx5e_xdpsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS);
pi = mlx5e_xdpsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
wqe = MLX5E_TX_FETCH_WQE(sq, pi);
net_prefetchw(wqe->data);
......@@ -245,10 +245,8 @@ enum {
INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
{
if (unlikely(!sq->mpwqe.wqe)) {
const u16 stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
stop_room))) {
sq->stop_room))) {
/* SQ is full, ring doorbell */
mlx5e_xmit_xdp_doorbell(sq);
sq->stats->full++;
......@@ -288,7 +286,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx
mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
if (unlikely(mlx5e_xdp_mpqwe_is_full(session)))
if (unlikely(mlx5e_xdp_mpqwe_is_full(session, sq->max_sq_mpw_wqebbs)))
mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
......
......@@ -123,12 +123,13 @@ static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur)
return cur;
}
static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session)
static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs)
{
if (session->inline_on)
return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT >
MLX5E_TX_MPW_MAX_NUM_DS;
return mlx5e_tx_mpwqe_is_full(session);
max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS;
return mlx5e_tx_mpwqe_is_full(session, max_sq_mpw_wqebbs);
}
struct mlx5e_xdp_wqe_info {
......
......@@ -32,9 +32,9 @@ u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *pa
num_dumps = mlx5e_ktls_dumps_num_wqes(params, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE);
stop_room += mlx5e_stop_room_for_wqe(MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS);
stop_room += mlx5e_stop_room_for_wqe(MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS);
stop_room += num_dumps * mlx5e_stop_room_for_wqe(MLX5E_KTLS_DUMP_WQEBBS);
stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS);
stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS);
stop_room += num_dumps * mlx5e_stop_room_for_wqe(mdev, MLX5E_KTLS_DUMP_WQEBBS);
return stop_room;
}
......
......@@ -386,5 +386,5 @@ u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *par
/* FPGA */
/* Resync SKB. */
return mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
return mlx5e_stop_room_for_max_wqe(mdev);
}
......@@ -1142,7 +1142,7 @@ static int mlx5e_update_trust_state_hw(struct mlx5e_priv *priv, void *context)
err = mlx5_set_trust_state(priv->mdev, *trust_state);
if (err)
return err;
priv->dcbx_dp.trust_state = *trust_state;
WRITE_ONCE(priv->dcbx_dp.trust_state, *trust_state);
return 0;
}
......@@ -1187,16 +1187,18 @@ static int mlx5e_set_dscp2prio(struct mlx5e_priv *priv, u8 dscp, u8 prio)
static int mlx5e_trust_initialize(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
u8 trust_state;
int err;
priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP;
if (!MLX5_DSCP_SUPPORTED(mdev))
if (!MLX5_DSCP_SUPPORTED(mdev)) {
WRITE_ONCE(priv->dcbx_dp.trust_state, MLX5_QPTS_TRUST_PCP);
return 0;
}
err = mlx5_query_trust_state(priv->mdev, &priv->dcbx_dp.trust_state);
err = mlx5_query_trust_state(priv->mdev, &trust_state);
if (err)
return err;
WRITE_ONCE(priv->dcbx_dp.trust_state, trust_state);
mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &priv->channels.params,
priv->dcbx_dp.trust_state);
......
......@@ -72,12 +72,13 @@
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
{
bool striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) &&
MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
MLX5_CAP_ETH(mdev, reg_umr_sq);
u16 max_wqe_sz_cap = MLX5_CAP_GEN(mdev, max_wqe_sz_sq);
bool inline_umr = MLX5E_UMR_WQE_INLINE_SZ <= max_wqe_sz_cap;
bool striding_rq_umr, inline_umr;
u16 max_wqe_sz_cap;
striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
MLX5_CAP_ETH(mdev, reg_umr_sq);
max_wqe_sz_cap = mlx5e_get_max_sq_wqebbs(mdev) * MLX5_SEND_WQE_BB;
inline_umr = max_wqe_sz_cap >= MLX5E_UMR_WQE_INLINE_SZ;
if (!striding_rq_umr)
return false;
if (!inline_umr) {
......@@ -1164,6 +1165,9 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
is_redirect ?
&c->priv->channel_stats[c->ix]->xdpsq :
&c->priv->channel_stats[c->ix]->rq_xdpsq;
sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev);
sq->stop_room = MLX5E_STOP_ROOM(sq->max_sq_wqebbs);
sq->max_sq_mpw_wqebbs = mlx5e_get_sw_max_sq_mpw_wqebbs(sq->max_sq_wqebbs);
param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
......@@ -1238,6 +1242,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
sq->channel = c;
sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
sq->reserved_room = param->stop_room;
sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev);
param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
......@@ -1313,7 +1318,6 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
int err;
sq->pdev = c->pdev;
sq->tstamp = c->tstamp;
sq->clock = &mdev->clock;
sq->mkey_be = c->mkey_be;
sq->netdev = c->netdev;
......@@ -1324,6 +1328,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
sq->min_inline_mode = params->tx_min_inline_mode;
sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
sq->max_sq_wqebbs = mlx5e_get_max_sq_wqebbs(mdev);
sq->max_sq_mpw_wqebbs = mlx5e_get_sw_max_sq_mpw_wqebbs(sq->max_sq_wqebbs);
INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
......@@ -2677,39 +2683,41 @@ static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
struct mlx5e_txqsq *sq = &c->sq[tc];
priv->txq2sq[sq->txq_ix] = sq;
priv->channel_tc2realtxq[i][tc] = i + tc * ch;
}
}
if (!priv->channels.ptp)
return;
goto out;
if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state))
return;
goto out;
for (tc = 0; tc < num_tc; tc++) {
struct mlx5e_ptp *c = priv->channels.ptp;
struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq;
priv->txq2sq[sq->txq_ix] = sq;
priv->port_ptp_tc2realtxq[tc] = priv->num_tc_x_num_ch + tc;
}
}
static void mlx5e_update_num_tc_x_num_ch(struct mlx5e_priv *priv)
{
/* Sync with mlx5e_select_queue. */
WRITE_ONCE(priv->num_tc_x_num_ch,
mlx5e_get_dcb_num_tc(&priv->channels.params) * priv->channels.num);
out:
/* Make the change to txq2sq visible before the queue is started.
* As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
* which pairs with this barrier.
*/
smp_wmb();
}
void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
{
mlx5e_update_num_tc_x_num_ch(priv);
mlx5e_build_txq_maps(priv);
mlx5e_activate_channels(&priv->channels);
mlx5e_qos_activate_queues(priv);
mlx5e_xdp_tx_enable(priv);
/* dev_watchdog() wants all TX queues to be started when the carrier is
* OK, including the ones in range real_num_tx_queues..num_tx_queues-1.
* Make it happy to avoid TX timeout false alarms.
*/
netif_tx_start_all_queues(priv->netdev);
if (mlx5e_is_vport_rep(priv))
......@@ -2729,11 +2737,13 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
if (mlx5e_is_vport_rep(priv))
mlx5e_remove_sqs_fwd_rules(priv);
/* FIXME: This is a W/A only for tx timeout watch dog false alarm when
* polling for inactive tx queues.
/* The results of ndo_select_queue are unreliable, while netdev config
* is being changed (real_num_tx_queues, num_tc). Stop all queues to
* prevent ndo_start_xmit from being called, so that it can assume that
* the selected queue is always valid.
*/
netif_tx_stop_all_queues(priv->netdev);
netif_tx_disable(priv->netdev);
mlx5e_xdp_tx_disable(priv);
mlx5e_deactivate_channels(&priv->channels);
}
......@@ -2793,6 +2803,7 @@ static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
mlx5e_close_channels(&old_chs);
priv->profile->update_rx(priv);
mlx5e_selq_apply(&priv->selq);
out:
mlx5e_activate_priv_channels(priv);
......@@ -2816,13 +2827,24 @@ int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
return mlx5e_switch_priv_params(priv, params, preactivate, context);
new_chs.params = *params;
mlx5e_selq_prepare(&priv->selq, &new_chs.params, !!priv->htb.maj_id);
err = mlx5e_open_channels(priv, &new_chs);
if (err)
return err;
goto err_cancel_selq;
err = mlx5e_switch_priv_channels(priv, &new_chs, preactivate, context);
if (err)
mlx5e_close_channels(&new_chs);
goto err_close;
return 0;
err_close:
mlx5e_close_channels(&new_chs);
err_cancel_selq:
mlx5e_selq_cancel(&priv->selq);
return err;
}
......@@ -2862,6 +2884,8 @@ int mlx5e_open_locked(struct net_device *netdev)
struct mlx5e_priv *priv = netdev_priv(netdev);
int err;
mlx5e_selq_prepare(&priv->selq, &priv->channels.params, !!priv->htb.maj_id);
set_bit(MLX5E_STATE_OPENED, &priv->state);
err = mlx5e_open_channels(priv, &priv->channels);
......@@ -2869,6 +2893,7 @@ int mlx5e_open_locked(struct net_device *netdev)
goto err_clear_state_opened_flag;
priv->profile->update_rx(priv);
mlx5e_selq_apply(&priv->selq);
mlx5e_activate_priv_channels(priv);
mlx5e_apply_traps(priv, true);
if (priv->profile->update_carrier)
......@@ -2879,6 +2904,7 @@ int mlx5e_open_locked(struct net_device *netdev)
err_clear_state_opened_flag:
clear_bit(MLX5E_STATE_OPENED, &priv->state);
mlx5e_selq_cancel(&priv->selq);
return err;
}
......@@ -4637,11 +4663,6 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
priv->max_nch);
mlx5e_params_mqprio_reset(params);
/* Set an initial non-zero value, so that mlx5e_select_queue won't
* divide by zero if called before first activating channels.
*/
priv->num_tc_x_num_ch = params->num_channels * params->mqprio.num_tc;
/* SQ */
params->log_sq_size = is_kdump_kernel() ?
MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
......@@ -5194,7 +5215,8 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,
struct net_device *netdev,
struct mlx5_core_dev *mdev)
{
int nch, num_txqs, node, i;
int nch, num_txqs, node;
int err;
num_txqs = netdev->num_tx_queues;
nch = mlx5e_calc_max_nch(mdev, netdev, profile);
......@@ -5211,6 +5233,11 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,
return -ENOMEM;
mutex_init(&priv->state_lock);
err = mlx5e_selq_init(&priv->selq, &priv->state_lock);
if (err)
goto err_free_cpumask;
hash_init(priv->htb.qos_tc2node);
INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
......@@ -5219,7 +5246,7 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,
priv->wq = create_singlethread_workqueue("mlx5e");
if (!priv->wq)
goto err_free_cpumask;
goto err_free_selq;
priv->txq2sq = kcalloc_node(num_txqs, sizeof(*priv->txq2sq), GFP_KERNEL, node);
if (!priv->txq2sq)
......@@ -5229,36 +5256,21 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,
if (!priv->tx_rates)
goto err_free_txq2sq;
priv->channel_tc2realtxq =
kcalloc_node(nch, sizeof(*priv->channel_tc2realtxq), GFP_KERNEL, node);
if (!priv->channel_tc2realtxq)
goto err_free_tx_rates;
for (i = 0; i < nch; i++) {
priv->channel_tc2realtxq[i] =
kcalloc_node(profile->max_tc, sizeof(**priv->channel_tc2realtxq),
GFP_KERNEL, node);
if (!priv->channel_tc2realtxq[i])
goto err_free_channel_tc2realtxq;
}
priv->channel_stats =
kcalloc_node(nch, sizeof(*priv->channel_stats), GFP_KERNEL, node);
if (!priv->channel_stats)
goto err_free_channel_tc2realtxq;
goto err_free_tx_rates;
return 0;
err_free_channel_tc2realtxq:
while (--i >= 0)
kfree(priv->channel_tc2realtxq[i]);
kfree(priv->channel_tc2realtxq);
err_free_tx_rates:
kfree(priv->tx_rates);
err_free_txq2sq:
kfree(priv->txq2sq);
err_destroy_workqueue:
destroy_workqueue(priv->wq);
err_free_selq:
mlx5e_selq_cleanup(&priv->selq);
err_free_cpumask:
free_cpumask_var(priv->scratchpad.cpumask);
return -ENOMEM;
......@@ -5275,12 +5287,12 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
for (i = 0; i < priv->stats_nch; i++)
kvfree(priv->channel_stats[i]);
kfree(priv->channel_stats);
for (i = 0; i < priv->max_nch; i++)
kfree(priv->channel_tc2realtxq[i]);
kfree(priv->channel_tc2realtxq);
kfree(priv->tx_rates);
kfree(priv->txq2sq);
destroy_workqueue(priv->wq);
mutex_lock(&priv->state_lock);
mlx5e_selq_cleanup(&priv->selq);
mutex_unlock(&priv->state_lock);
free_cpumask_var(priv->scratchpad.cpumask);
for (i = 0; i < priv->htb.max_qos_sqs; i++)
......@@ -5346,6 +5358,7 @@ mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *prof
}
netif_carrier_off(netdev);
netif_tx_disable(netdev);
dev_net_set(netdev, mlx5_core_net(mdev));
return netdev;
......
......@@ -632,11 +632,6 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
params->mqprio.num_tc = 1;
params->tunneled_offload_en = false;
/* Set an initial non-zero value, so that mlx5e_select_queue won't
* divide by zero if called before first activating channels.
*/
priv->num_tc_x_num_ch = params->num_channels * params->mqprio.num_tc;
mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
}
......
......@@ -620,7 +620,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
struct mlx5e_icosq *sq = rq->icosq;
int i, err, max_klm_entries, len;
max_klm_entries = MLX5E_MAX_KLM_PER_WQE;
max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev);
klm_entries = bitmap_find_window(shampo->bitmap,
shampo->hd_per_wqe,
shampo->hd_per_wq, shampo->pi);
......
......@@ -53,117 +53,6 @@ static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma)
}
}
#ifdef CONFIG_MLX5_CORE_EN_DCB
static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb)
{
int dscp_cp = 0;
if (skb->protocol == htons(ETH_P_IP))
dscp_cp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
else if (skb->protocol == htons(ETH_P_IPV6))
dscp_cp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
return priv->dcbx_dp.dscp2prio[dscp_cp];
}
#endif
static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb)
{
struct mlx5e_priv *priv = netdev_priv(dev);
int up = 0;
if (!netdev_get_num_tc(dev))
goto return_txq;
#ifdef CONFIG_MLX5_CORE_EN_DCB
if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP)
up = mlx5e_get_dscp_up(priv, skb);
else
#endif
if (skb_vlan_tag_present(skb))
up = skb_vlan_tag_get_prio(skb);
return_txq:
return priv->port_ptp_tc2realtxq[up];
}
static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb,
u16 htb_maj_id)
{
u16 classid;
if ((TC_H_MAJ(skb->priority) >> 16) == htb_maj_id)
classid = TC_H_MIN(skb->priority);
else
classid = READ_ONCE(priv->htb.defcls);
if (!classid)
return 0;
return mlx5e_get_txq_by_classid(priv, classid);
}
u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
int num_tc_x_num_ch;
int txq_ix;
int up = 0;
int ch_ix;
/* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */
num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch);
if (unlikely(dev->real_num_tx_queues > num_tc_x_num_ch)) {
struct mlx5e_ptp *ptp_channel;
/* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */
u16 htb_maj_id = smp_load_acquire(&priv->htb.maj_id);
if (unlikely(htb_maj_id)) {
txq_ix = mlx5e_select_htb_queue(priv, skb, htb_maj_id);
if (txq_ix > 0)
return txq_ix;
}
ptp_channel = READ_ONCE(priv->channels.ptp);
if (unlikely(ptp_channel &&
test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) &&
mlx5e_use_ptpsq(skb)))
return mlx5e_select_ptpsq(dev, skb);
txq_ix = netdev_pick_tx(dev, skb, NULL);
/* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs.
* If they are selected, switch to regular queues.
* Driver to select these queues only at mlx5e_select_ptpsq()
* and mlx5e_select_htb_queue().
*/
if (unlikely(txq_ix >= num_tc_x_num_ch))
txq_ix %= num_tc_x_num_ch;
} else {
txq_ix = netdev_pick_tx(dev, skb, NULL);
}
if (!netdev_get_num_tc(dev))
return txq_ix;
#ifdef CONFIG_MLX5_CORE_EN_DCB
if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_DSCP)
up = mlx5e_get_dscp_up(priv, skb);
else
#endif
if (skb_vlan_tag_present(skb))
up = skb_vlan_tag_get_prio(skb);
/* Normalize any picked txq_ix to [0, num_channels),
* So we can return a txq_ix that matches the channel and
* packet UP.
*/
ch_ix = priv->txq2sq[txq_ix]->ch_ix;
return priv->channel_tc2realtxq[ch_ix][up];
}
static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb)
{
#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
......@@ -544,7 +433,7 @@ static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe *wqe;
u16 pi;
pi = mlx5e_txqsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS);
pi = mlx5e_txqsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
wqe = MLX5E_TX_FETCH_WQE(sq, pi);
net_prefetchw(wqe->data);
......@@ -645,7 +534,7 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
mlx5e_tx_skb_update_hwts_flags(skb);
if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) {
if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) {
/* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */
cseg = mlx5e_tx_mpwqe_session_complete(sq);
......@@ -691,8 +580,21 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
struct mlx5e_txqsq *sq;
u16 pi;
/* All changes to txq2sq are performed in sync with mlx5e_xmit, when the
* queue being changed is disabled, and smp_wmb guarantees that the
* changes are visible before mlx5e_xmit tries to read from txq2sq. It
* guarantees that the value of txq2sq[qid] doesn't change while
* mlx5e_xmit is running on queue number qid. smb_wmb is paired with
* HARD_TX_LOCK around ndo_start_xmit, which serves as an ACQUIRE.
*/
sq = priv->txq2sq[skb_get_queue_mapping(skb)];
if (unlikely(!sq)) {
/* Two cases when sq can be NULL:
* 1. The HTB node is registered, and mlx5e_select_queue
* selected its queue ID, but the SQ itself is not yet created.
* 2. HTB SQ creation failed. Similar to the previous case, but
* the SQ won't be created.
*/
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment