Commit 043dc78e authored by Tariq Toukan's avatar Tariq Toukan Committed by Saeed Mahameed

net/mlx5e: TX, Use actual WQE size for SQ edge fill

We fill SQ edge with NOPs to avoid WQEs wrap.
Here, instead of doing that in advance for the maximum possible
WQE size, we do it on-demand using the actual WQE size.
We re-order some parts in mlx5e_sq_xmit to finish the calculation
of WQE size (ds_cnt) before doing any writes to the WQE buffer.

When SQ work queue is fragmented (introduced in an downstream patch),
dealing with WQE wraps becomes more frequent. This change would drastically
reduce the overhead in this case.

Performance tests:
ConnectX-5 100Gbps, CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz
Packet rate of 64B packets, single transmit ring, size 8K.

Before: 14.9 Mpps
After:  15.8 Mpps

Improvement of 6%.
Signed-off-by: default avatarTariq Toukan <tariqt@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
parent ddf385e3
...@@ -183,6 +183,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) ...@@ -183,6 +183,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
struct mlx5e_tx_wqe { struct mlx5e_tx_wqe {
struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_ctrl_seg ctrl;
struct mlx5_wqe_eth_seg eth; struct mlx5_wqe_eth_seg eth;
struct mlx5_wqe_data_seg data[0];
}; };
struct mlx5e_rx_wqe { struct mlx5e_rx_wqe {
...@@ -374,7 +375,6 @@ struct mlx5e_txqsq { ...@@ -374,7 +375,6 @@ struct mlx5e_txqsq {
struct netdev_queue *txq; struct netdev_queue *txq;
u32 sqn; u32 sqn;
u8 min_inline_mode; u8 min_inline_mode;
u16 edge;
struct device *pdev; struct device *pdev;
__be32 mkey_be; __be32 mkey_be;
unsigned long state; unsigned long state;
...@@ -439,7 +439,6 @@ struct mlx5e_icosq { ...@@ -439,7 +439,6 @@ struct mlx5e_icosq {
struct mlx5_wq_cyc wq; struct mlx5_wq_cyc wq;
void __iomem *uar_map; void __iomem *uar_map;
u32 sqn; u32 sqn;
u16 edge;
unsigned long state; unsigned long state;
/* control path */ /* control path */
......
...@@ -959,8 +959,6 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, ...@@ -959,8 +959,6 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
if (err) if (err)
goto err_sq_wq_destroy; goto err_sq_wq_destroy;
sq->edge = mlx5_wq_cyc_get_size(wq) - MLX5E_ICOSQ_MAX_WQEBBS;
return 0; return 0;
err_sq_wq_destroy: err_sq_wq_destroy:
...@@ -1039,8 +1037,6 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, ...@@ -1039,8 +1037,6 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
INIT_WORK(&sq->dim.work, mlx5e_tx_dim_work); INIT_WORK(&sq->dim.work, mlx5e_tx_dim_work);
sq->dim.mode = params->tx_cq_moderation.cq_period_mode; sq->dim.mode = params->tx_cq_moderation.cq_period_mode;
sq->edge = mlx5_wq_cyc_get_size(wq) - MLX5_SEND_WQE_MAX_WQEBBS;
return 0; return 0;
err_sq_wq_destroy: err_sq_wq_destroy:
......
...@@ -383,6 +383,22 @@ static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq) ...@@ -383,6 +383,22 @@ static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
return sq->pc >> MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; return sq->pc >> MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
} }
static inline void mlx5e_fill_icosq_edge(struct mlx5e_icosq *sq,
struct mlx5_wq_cyc *wq,
u16 pi)
{
struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
u8 nnops = mlx5_wq_cyc_get_size(wq) - pi;
edge_wi = wi + nnops;
/* fill sq edge with nops to avoid wqe wrapping two pages */
for (; wi < edge_wi; wi++) {
wi->opcode = MLX5_OPCODE_NOP;
mlx5e_post_nop(wq, sq->sqn, &sq->pc);
}
}
static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
{ {
struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
...@@ -391,14 +407,15 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) ...@@ -391,14 +407,15 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5e_umr_wqe *umr_wqe; struct mlx5e_umr_wqe *umr_wqe;
u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1); u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
int err;
u16 pi; u16 pi;
int err;
int i; int i;
/* fill sq edge with nops to avoid wqe wrap around */ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
while ((pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc)) > sq->edge) {
sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; if (unlikely(pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_size(wq))) {
mlx5e_post_nop(wq, sq->sqn, &sq->pc); mlx5e_fill_icosq_edge(sq, wq, pi);
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
} }
umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
......
...@@ -93,6 +93,29 @@ const struct mlx5e_profile *mlx5i_pkey_get_profile(void); ...@@ -93,6 +93,29 @@ const struct mlx5e_profile *mlx5i_pkey_get_profile(void);
/* Extract mlx5e_priv from IPoIB netdev */ /* Extract mlx5e_priv from IPoIB netdev */
#define mlx5i_epriv(netdev) ((void *)(((struct mlx5i_priv *)netdev_priv(netdev))->mlx5e_priv)) #define mlx5i_epriv(netdev) ((void *)(((struct mlx5i_priv *)netdev_priv(netdev))->mlx5e_priv))
struct mlx5_wqe_eth_pad {
u8 rsvd0[16];
};
struct mlx5i_tx_wqe {
struct mlx5_wqe_ctrl_seg ctrl;
struct mlx5_wqe_datagram_seg datagram;
struct mlx5_wqe_eth_pad pad;
struct mlx5_wqe_eth_seg eth;
struct mlx5_wqe_data_seg data[0];
};
static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq,
struct mlx5i_tx_wqe **wqe,
u16 *pi)
{
struct mlx5_wq_cyc *wq = &sq->wq;
*pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
*wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
memset(*wqe, 0, sizeof(**wqe));
}
netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5_av *av, u32 dqpn, u32 dqkey); struct mlx5_av *av, u32 dqpn, u32 dqkey);
void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment