Commit 67936e13 authored by Tariq Toukan's avatar Tariq Toukan Committed by Saeed Mahameed

net/mlx5e: Let channels be SD-aware

Distribute the channels between the different SD-devices to acheive
local numa node performance on multiple numas.

Each channel works against one specific mdev, creating all datapath
queues against it.

We distribute channels to mdevs in a round-robin policy.

Example for 2 mdevs and 6 channels:
+-------+---------+
| ch ix | mdev ix |
+-------+---------+
|   0   |    0    |
|   1   |    1    |
|   2   |    0    |
|   3   |    1    |
|   4   |    0    |
|   5   |    1    |
+-------+---------+

This round-robin distribution policy is preferred over another suggested
intuitive distribution, in which we first distribute one half of the
channels to mdev #0 and then the second half to mdev #1.

We prefer round-robin for a reason: it is less influenced by changes in
the number of channels. The mapping between channel index and mdev is
fixed, no matter how many channels the user configures. As the channel
stats are persistent to channels closure, changing the mapping every
single time would turn the accumulative stats less representing of the
channel's history.

Per-channel objects should stop using the primary mdev (priv->mdev)
directly, and instead move to using their own channel's mdev.
Signed-off-by: default avatarTariq Toukan <tariqt@nvidia.com>
Reviewed-by: default avatarGal Pressman <gal@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 846122b1
...@@ -792,6 +792,7 @@ struct mlx5e_channel { ...@@ -792,6 +792,7 @@ struct mlx5e_channel {
struct hwtstamp_config *tstamp; struct hwtstamp_config *tstamp;
DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES);
int ix; int ix;
int vec_ix;
int cpu; int cpu;
/* Sync between icosq recovery and XSK enable/disable. */ /* Sync between icosq recovery and XSK enable/disable. */
struct mutex icosq_recovery_lock; struct mutex icosq_recovery_lock;
......
...@@ -688,7 +688,7 @@ void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e ...@@ -688,7 +688,7 @@ void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e
.napi = &c->napi, .napi = &c->napi,
.ch_stats = c->stats, .ch_stats = c->stats,
.node = cpu_to_node(c->cpu), .node = cpu_to_node(c->cpu),
.ix = c->ix, .ix = c->vec_ix,
}; };
} }
......
...@@ -122,8 +122,8 @@ int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs, ...@@ -122,8 +122,8 @@ int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
memset(&param_sq, 0, sizeof(param_sq)); memset(&param_sq, 0, sizeof(param_sq));
memset(&param_cq, 0, sizeof(param_cq)); memset(&param_cq, 0, sizeof(param_cq));
mlx5e_build_sq_param(priv->mdev, params, &param_sq); mlx5e_build_sq_param(c->mdev, params, &param_sq);
mlx5e_build_tx_cq_param(priv->mdev, params, &param_cq); mlx5e_build_tx_cq_param(c->mdev, params, &param_cq);
err = mlx5e_open_cq(c->mdev, params->tx_cq_moderation, &param_cq, &ccp, &sq->cq); err = mlx5e_open_cq(c->mdev, params->tx_cq_moderation, &param_cq, &ccp, &sq->cq);
if (err) if (err)
goto err_free_sq; goto err_free_sq;
...@@ -176,7 +176,7 @@ int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id) ...@@ -176,7 +176,7 @@ int mlx5e_activate_qos_sq(void *data, u16 node_qid, u32 hw_id)
*/ */
smp_wmb(); smp_wmb();
qos_dbg(priv->mdev, "Activate QoS SQ qid %u\n", node_qid); qos_dbg(sq->mdev, "Activate QoS SQ qid %u\n", node_qid);
mlx5e_activate_txqsq(sq); mlx5e_activate_txqsq(sq);
return 0; return 0;
...@@ -190,7 +190,7 @@ void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid) ...@@ -190,7 +190,7 @@ void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid)
if (!sq) /* Handle the case when the SQ failed to open. */ if (!sq) /* Handle the case when the SQ failed to open. */
return; return;
qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid); qos_dbg(sq->mdev, "Deactivate QoS SQ qid %u\n", qid);
mlx5e_deactivate_txqsq(sq); mlx5e_deactivate_txqsq(sq);
priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL; priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL;
......
...@@ -294,8 +294,8 @@ static void mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq, ...@@ -294,8 +294,8 @@ static void mlx5e_rx_reporter_diagnose_generic_rq(struct mlx5e_rq *rq,
params = &priv->channels.params; params = &priv->channels.params;
rq_sz = mlx5e_rqwq_get_size(rq); rq_sz = mlx5e_rqwq_get_size(rq);
real_time = mlx5_is_real_time_rq(priv->mdev); real_time = mlx5_is_real_time_rq(rq->mdev);
rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL)); rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(rq->mdev, params, NULL));
mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ"); mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ");
devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type); devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type);
......
...@@ -219,7 +219,6 @@ mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg, ...@@ -219,7 +219,6 @@ mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg,
struct mlx5e_txqsq *sq, int tc) struct mlx5e_txqsq *sq, int tc)
{ {
bool stopped = netif_xmit_stopped(sq->txq); bool stopped = netif_xmit_stopped(sq->txq);
struct mlx5e_priv *priv = sq->priv;
u8 state; u8 state;
int err; int err;
...@@ -227,7 +226,7 @@ mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg, ...@@ -227,7 +226,7 @@ mlx5e_tx_reporter_build_diagnose_output_sq_common(struct devlink_fmsg *fmsg,
devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix); devlink_fmsg_u32_pair_put(fmsg, "txq ix", sq->txq_ix);
devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn); devlink_fmsg_u32_pair_put(fmsg, "sqn", sq->sqn);
err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state); err = mlx5_core_query_sq_state(sq->mdev, sq->sqn, &state);
if (!err) if (!err)
devlink_fmsg_u8_pair_put(fmsg, "HW state", state); devlink_fmsg_u8_pair_put(fmsg, "HW state", state);
......
...@@ -6,10 +6,10 @@ ...@@ -6,10 +6,10 @@
#include "setup.h" #include "setup.h"
#include "en/params.h" #include "en/params.h"
static int mlx5e_xsk_map_pool(struct mlx5e_priv *priv, static int mlx5e_xsk_map_pool(struct mlx5_core_dev *mdev,
struct xsk_buff_pool *pool) struct xsk_buff_pool *pool)
{ {
struct device *dev = mlx5_core_dma_dev(priv->mdev); struct device *dev = mlx5_core_dma_dev(mdev);
return xsk_pool_dma_map(pool, dev, DMA_ATTR_SKIP_CPU_SYNC); return xsk_pool_dma_map(pool, dev, DMA_ATTR_SKIP_CPU_SYNC);
} }
...@@ -89,7 +89,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, ...@@ -89,7 +89,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
if (unlikely(!mlx5e_xsk_is_pool_sane(pool))) if (unlikely(!mlx5e_xsk_is_pool_sane(pool)))
return -EINVAL; return -EINVAL;
err = mlx5e_xsk_map_pool(priv, pool); err = mlx5e_xsk_map_pool(mlx5_sd_ch_ix_get_dev(priv->mdev, ix), pool);
if (unlikely(err)) if (unlikely(err))
return err; return err;
......
...@@ -267,7 +267,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, ...@@ -267,7 +267,7 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq,
goto err_out; goto err_out;
} }
pdev = mlx5_core_dma_dev(sq->channel->priv->mdev); pdev = mlx5_core_dma_dev(sq->channel->mdev);
buf->dma_addr = dma_map_single(pdev, &buf->progress, buf->dma_addr = dma_map_single(pdev, &buf->progress,
PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(pdev, buf->dma_addr))) { if (unlikely(dma_mapping_error(pdev, buf->dma_addr))) {
...@@ -425,14 +425,12 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, ...@@ -425,14 +425,12 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
{ {
struct mlx5e_ktls_rx_resync_buf *buf = wi->tls_get_params.buf; struct mlx5e_ktls_rx_resync_buf *buf = wi->tls_get_params.buf;
struct mlx5e_ktls_offload_context_rx *priv_rx; struct mlx5e_ktls_offload_context_rx *priv_rx;
struct mlx5e_ktls_rx_resync_ctx *resync;
u8 tracker_state, auth_state, *ctx; u8 tracker_state, auth_state, *ctx;
struct device *dev; struct device *dev;
u32 hw_seq; u32 hw_seq;
priv_rx = buf->priv_rx; priv_rx = buf->priv_rx;
resync = &priv_rx->resync; dev = mlx5_core_dma_dev(sq->channel->mdev);
dev = mlx5_core_dma_dev(resync->priv->mdev);
if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags)))
goto out; goto out;
......
...@@ -2529,14 +2529,20 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, ...@@ -2529,14 +2529,20 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
struct xsk_buff_pool *xsk_pool, struct xsk_buff_pool *xsk_pool,
struct mlx5e_channel **cp) struct mlx5e_channel **cp)
{ {
int cpu = mlx5_comp_vector_get_cpu(priv->mdev, ix);
struct net_device *netdev = priv->netdev; struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev;
struct mlx5e_xsk_param xsk; struct mlx5e_xsk_param xsk;
struct mlx5e_channel *c; struct mlx5e_channel *c;
unsigned int irq; unsigned int irq;
int vec_ix;
int cpu;
int err; int err;
err = mlx5_comp_irqn_get(priv->mdev, ix, &irq); mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, ix);
vec_ix = mlx5_sd_ch_ix_get_vec_ix(mdev, ix);
cpu = mlx5_comp_vector_get_cpu(mdev, vec_ix);
err = mlx5_comp_irqn_get(mdev, vec_ix, &irq);
if (err) if (err)
return err; return err;
...@@ -2549,18 +2555,19 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, ...@@ -2549,18 +2555,19 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
return -ENOMEM; return -ENOMEM;
c->priv = priv; c->priv = priv;
c->mdev = priv->mdev; c->mdev = mdev;
c->tstamp = &priv->tstamp; c->tstamp = &priv->tstamp;
c->ix = ix; c->ix = ix;
c->vec_ix = vec_ix;
c->cpu = cpu; c->cpu = cpu;
c->pdev = mlx5_core_dma_dev(priv->mdev); c->pdev = mlx5_core_dma_dev(mdev);
c->netdev = priv->netdev; c->netdev = priv->netdev;
c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey); c->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey);
c->num_tc = mlx5e_get_dcb_num_tc(params); c->num_tc = mlx5e_get_dcb_num_tc(params);
c->xdp = !!params->xdp_prog; c->xdp = !!params->xdp_prog;
c->stats = &priv->channel_stats[ix]->ch; c->stats = &priv->channel_stats[ix]->ch;
c->aff_mask = irq_get_effective_affinity_mask(irq); c->aff_mask = irq_get_effective_affinity_mask(irq);
c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix); c->lag_port = mlx5e_enumerate_lag_port(mdev, ix);
netif_napi_add(netdev, &c->napi, mlx5e_napi_poll); netif_napi_add(netdev, &c->napi, mlx5e_napi_poll);
netif_napi_set_irq(&c->napi, irq); netif_napi_set_irq(&c->napi, irq);
...@@ -2943,15 +2950,18 @@ static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_update_netdev_queues); ...@@ -2943,15 +2950,18 @@ static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_update_netdev_queues);
static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv,
struct mlx5e_params *params) struct mlx5e_params *params)
{ {
struct mlx5_core_dev *mdev = priv->mdev; int ix;
int num_comp_vectors, ix, irq;
num_comp_vectors = mlx5_comp_vectors_max(mdev);
for (ix = 0; ix < params->num_channels; ix++) { for (ix = 0; ix < params->num_channels; ix++) {
int num_comp_vectors, irq, vec_ix;
struct mlx5_core_dev *mdev;
mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, ix);
num_comp_vectors = mlx5_comp_vectors_max(mdev);
cpumask_clear(priv->scratchpad.cpumask); cpumask_clear(priv->scratchpad.cpumask);
vec_ix = mlx5_sd_ch_ix_get_vec_ix(mdev, ix);
for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) { for (irq = vec_ix; irq < num_comp_vectors; irq += params->num_channels) {
int cpu = mlx5_comp_vector_get_cpu(mdev, irq); int cpu = mlx5_comp_vector_get_cpu(mdev, irq);
cpumask_set_cpu(cpu, priv->scratchpad.cpumask); cpumask_set_cpu(cpu, priv->scratchpad.cpumask);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment