Commit 939de57d authored by Daniel Jurgens's avatar Daniel Jurgens Committed by Saeed Mahameed

net/mlx5e: Use CQE padding for Ethernet CQs

Writing 64B CQEs to 128B cache lines results in a RMW operation. Padding
the CQEs to 128B if possible improves performance on 128B cache line
systems like PPC.

Testing on PPC showed up to a 24% improvement in small packet throughput
vs the default behavior, depending on the workload and system topology.
Signed-off-by: default avatarDaniel Jurgens <danielj@mellanox.com>
Reviewed-by: default avatarTariq Toukan <tariqt@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
parent 8c4dc42b
...@@ -2224,6 +2224,8 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, ...@@ -2224,6 +2224,8 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
void *cqc = param->cqc; void *cqc = param->cqc;
MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index); MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index);
if (MLX5_CAP_GEN(priv->mdev, cqe_128_always) && cache_line_size() >= 128)
MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
} }
static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
......
...@@ -155,7 +155,8 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, ...@@ -155,7 +155,8 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *cqc, struct mlx5_cqwq *wq, void *cqc, struct mlx5_cqwq *wq,
struct mlx5_wq_ctrl *wq_ctrl) struct mlx5_wq_ctrl *wq_ctrl)
{ {
u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) + 6; /* CQE_STRIDE_128 and CQE_STRIDE_128_PAD both mean 128B stride */
u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) == CQE_STRIDE_64 ? 6 : 7;
u8 log_wq_sz = MLX5_GET(cqc, cqc, log_cq_size); u8 log_wq_sz = MLX5_GET(cqc, cqc, log_cq_size);
int err; int err;
......
...@@ -179,7 +179,12 @@ static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq) ...@@ -179,7 +179,12 @@ static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq)
static inline struct mlx5_cqe64 *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix) static inline struct mlx5_cqe64 *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
{ {
return mlx5_frag_buf_get_wqe(&wq->fbc, ix); struct mlx5_cqe64 *cqe = mlx5_frag_buf_get_wqe(&wq->fbc, ix);
/* For 128B CQEs the data is in the last 64B */
cqe += wq->fbc.log_stride == 7;
return cqe;
} }
static inline u32 mlx5_cqwq_get_ctr_wrap_cnt(struct mlx5_cqwq *wq, u32 ctr) static inline u32 mlx5_cqwq_get_ctr_wrap_cnt(struct mlx5_cqwq *wq, u32 ctr)
......
...@@ -125,9 +125,9 @@ struct mlx5_cq_modify_params { ...@@ -125,9 +125,9 @@ struct mlx5_cq_modify_params {
}; };
enum { enum {
CQE_SIZE_64 = 0, CQE_STRIDE_64 = 0,
CQE_SIZE_128 = 1, CQE_STRIDE_128 = 1,
CQE_SIZE_128_PAD = 2, CQE_STRIDE_128_PAD = 2,
}; };
#define MLX5_MAX_CQ_PERIOD (BIT(__mlx5_bit_sz(cqc, cq_period)) - 1) #define MLX5_MAX_CQ_PERIOD (BIT(__mlx5_bit_sz(cqc, cq_period)) - 1)
...@@ -135,8 +135,8 @@ enum { ...@@ -135,8 +135,8 @@ enum {
static inline int cqe_sz_to_mlx_sz(u8 size, int padding_128_en) static inline int cqe_sz_to_mlx_sz(u8 size, int padding_128_en)
{ {
return padding_128_en ? CQE_SIZE_128_PAD : return padding_128_en ? CQE_STRIDE_128_PAD :
size == 64 ? CQE_SIZE_64 : CQE_SIZE_128; size == 64 ? CQE_STRIDE_64 : CQE_STRIDE_128;
} }
static inline void mlx5_cq_set_ci(struct mlx5_core_cq *cq) static inline void mlx5_cq_set_ci(struct mlx5_core_cq *cq)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment