Commit 338c46c6 authored by Maxim Mikityanskiy's avatar Maxim Mikityanskiy Committed by Saeed Mahameed

net/mlx5e: Support multiple SKBs in a TX WQE

TX MPWQE support for SKBs is coming in one of the following patches, and
a single MPWQE can send multiple SKBs. This commit prepares the TX path
code to handle such cases:

1. An additional FIFO for SKBs is added, just like the FIFO for DMA
chunks.

2. struct mlx5e_tx_wqe_info will contain num_fifo_pkts. If a given WQE
contains only one packet, num_fifo_pkts will be zero, and the SKB will
be stored in mlx5e_tx_wqe_info, as usual. If num_fifo_pkts > 0, the SKB
pointer will be NULL, and the SKBs will be stored in the FIFO.

This change has no performance impact in TCP single stream test and
XDP_TX single stream test.

When compiled with a recent GCC, this change shows no visible
performance impact on UDP pktgen (burst 32) single stream test either:
  Packet rate: 16.95 Mpps (±0.15 Mpps) -> 16.96 Mpps (±0.12 Mpps)
  Instructions per packet: 429 -> 421
  Cycles per packet: 160 -> 156
  Instructions per cycle: 2.69 -> 2.70

CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz (x86_64)
NIC: Mellanox ConnectX-6 Dx
GCC 10.2.0
Signed-off-by: default avatarMaxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: default avatarTariq Toukan <tariqt@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 56e4da66
...@@ -318,11 +318,13 @@ struct mlx5e_txqsq { ...@@ -318,11 +318,13 @@ struct mlx5e_txqsq {
/* dirtied @completion */ /* dirtied @completion */
u16 cc; u16 cc;
u16 skb_fifo_cc;
u32 dma_fifo_cc; u32 dma_fifo_cc;
struct dim dim; /* Adaptive Moderation */ struct dim dim; /* Adaptive Moderation */
/* dirtied @xmit */ /* dirtied @xmit */
u16 pc ____cacheline_aligned_in_smp; u16 pc ____cacheline_aligned_in_smp;
u16 skb_fifo_pc;
u32 dma_fifo_pc; u32 dma_fifo_pc;
struct mlx5e_cq cq; struct mlx5e_cq cq;
...@@ -330,9 +332,11 @@ struct mlx5e_txqsq { ...@@ -330,9 +332,11 @@ struct mlx5e_txqsq {
/* read only */ /* read only */
struct mlx5_wq_cyc wq; struct mlx5_wq_cyc wq;
u32 dma_fifo_mask; u32 dma_fifo_mask;
u16 skb_fifo_mask;
struct mlx5e_sq_stats *stats; struct mlx5e_sq_stats *stats;
struct { struct {
struct mlx5e_sq_dma *dma_fifo; struct mlx5e_sq_dma *dma_fifo;
struct sk_buff **skb_fifo;
struct mlx5e_tx_wqe_info *wqe_info; struct mlx5e_tx_wqe_info *wqe_info;
} db; } db;
void __iomem *uar_map; void __iomem *uar_map;
......
...@@ -105,6 +105,7 @@ struct mlx5e_tx_wqe_info { ...@@ -105,6 +105,7 @@ struct mlx5e_tx_wqe_info {
u32 num_bytes; u32 num_bytes;
u8 num_wqebbs; u8 num_wqebbs;
u8 num_dma; u8 num_dma;
u8 num_fifo_pkts;
#ifdef CONFIG_MLX5_EN_TLS #ifdef CONFIG_MLX5_EN_TLS
struct page *resync_dump_frag_page; struct page *resync_dump_frag_page;
#endif #endif
...@@ -231,6 +232,23 @@ mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size, ...@@ -231,6 +232,23 @@ mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size,
dma->type = map_type; dma->type = map_type;
} }
static inline struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_txqsq *sq, u16 i)
{
return &sq->db.skb_fifo[i & sq->skb_fifo_mask];
}
static inline void mlx5e_skb_fifo_push(struct mlx5e_txqsq *sq, struct sk_buff *skb)
{
struct sk_buff **skb_item = mlx5e_skb_fifo_get(sq, sq->skb_fifo_pc++);
*skb_item = skb;
}
static inline struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_txqsq *sq)
{
return *mlx5e_skb_fifo_get(sq, sq->skb_fifo_cc++);
}
static inline void static inline void
mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
{ {
......
...@@ -29,20 +29,24 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, ...@@ -29,20 +29,24 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi, struct mlx5e_tx_wqe_info *wi,
u32 *dma_fifo_cc); u32 *dma_fifo_cc);
static inline void static inline bool
mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi, struct mlx5e_tx_wqe_info *wi,
u32 *dma_fifo_cc) u32 *dma_fifo_cc)
{ {
if (unlikely(wi->resync_dump_frag_page)) if (unlikely(wi->resync_dump_frag_page)) {
mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma_fifo_cc); mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma_fifo_cc);
return true;
}
return false;
} }
#else #else
static inline void static inline bool
mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi, struct mlx5e_tx_wqe_info *wi,
u32 *dma_fifo_cc) u32 *dma_fifo_cc)
{ {
return false;
} }
#endif /* CONFIG_MLX5_EN_TLS */ #endif /* CONFIG_MLX5_EN_TLS */
......
...@@ -1047,6 +1047,7 @@ static void mlx5e_free_icosq(struct mlx5e_icosq *sq) ...@@ -1047,6 +1047,7 @@ static void mlx5e_free_icosq(struct mlx5e_icosq *sq)
static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq) static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
{ {
kvfree(sq->db.wqe_info); kvfree(sq->db.wqe_info);
kvfree(sq->db.skb_fifo);
kvfree(sq->db.dma_fifo); kvfree(sq->db.dma_fifo);
} }
...@@ -1058,15 +1059,19 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) ...@@ -1058,15 +1059,19 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
sq->db.dma_fifo = kvzalloc_node(array_size(df_sz, sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
sizeof(*sq->db.dma_fifo)), sizeof(*sq->db.dma_fifo)),
GFP_KERNEL, numa); GFP_KERNEL, numa);
sq->db.skb_fifo = kvzalloc_node(array_size(df_sz,
sizeof(*sq->db.skb_fifo)),
GFP_KERNEL, numa);
sq->db.wqe_info = kvzalloc_node(array_size(wq_sz, sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
sizeof(*sq->db.wqe_info)), sizeof(*sq->db.wqe_info)),
GFP_KERNEL, numa); GFP_KERNEL, numa);
if (!sq->db.dma_fifo || !sq->db.wqe_info) { if (!sq->db.dma_fifo || !sq->db.skb_fifo || !sq->db.wqe_info) {
mlx5e_free_txqsq_db(sq); mlx5e_free_txqsq_db(sq);
return -ENOMEM; return -ENOMEM;
} }
sq->dma_fifo_mask = df_sz - 1; sq->dma_fifo_mask = df_sz - 1;
sq->skb_fifo_mask = df_sz - 1;
return 0; return 0;
} }
......
...@@ -342,6 +342,7 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, ...@@ -342,6 +342,7 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
.num_bytes = attr->num_bytes, .num_bytes = attr->num_bytes,
.num_dma = num_dma, .num_dma = num_dma,
.num_wqebbs = wqe_attr->num_wqebbs, .num_wqebbs = wqe_attr->num_wqebbs,
.num_fifo_pkts = 0,
}; };
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode); cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode);
...@@ -489,6 +490,18 @@ static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb, ...@@ -489,6 +490,18 @@ static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb,
napi_consume_skb(skb, napi_budget); napi_consume_skb(skb, napi_budget);
} }
static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
struct mlx5_cqe64 *cqe, int napi_budget)
{
int i;
for (i = 0; i < wi->num_fifo_pkts; i++) {
struct sk_buff *skb = mlx5e_skb_fifo_pop(sq);
mlx5e_consume_skb(sq, skb, cqe, napi_budget);
}
}
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
{ {
struct mlx5e_sq_stats *stats; struct mlx5e_sq_stats *stats;
...@@ -534,26 +547,33 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) ...@@ -534,26 +547,33 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
wqe_counter = be16_to_cpu(cqe->wqe_counter); wqe_counter = be16_to_cpu(cqe->wqe_counter);
do { do {
struct sk_buff *skb;
last_wqe = (sqcc == wqe_counter); last_wqe = (sqcc == wqe_counter);
ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
wi = &sq->db.wqe_info[ci]; wi = &sq->db.wqe_info[ci];
skb = wi->skb;
sqcc += wi->num_wqebbs; sqcc += wi->num_wqebbs;
if (unlikely(!skb)) { if (likely(wi->skb)) {
mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc); mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget);
npkts++;
nbytes += wi->num_bytes;
continue; continue;
} }
mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi,
mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget); &dma_fifo_cc)))
continue;
npkts++; if (wi->num_fifo_pkts) {
nbytes += wi->num_bytes; mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
mlx5e_tx_wi_consume_fifo_skbs(sq, wi, cqe, napi_budget);
npkts += wi->num_fifo_pkts;
nbytes += wi->num_bytes;
}
} while (!last_wqe); } while (!last_wqe);
if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) { if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
...@@ -592,12 +612,19 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) ...@@ -592,12 +612,19 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
return (i == MLX5E_TX_CQ_POLL_BUDGET); return (i == MLX5E_TX_CQ_POLL_BUDGET);
} }
static void mlx5e_tx_wi_kfree_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi)
{
int i;
for (i = 0; i < wi->num_fifo_pkts; i++)
dev_kfree_skb_any(mlx5e_skb_fifo_pop(sq));
}
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
{ {
struct mlx5e_tx_wqe_info *wi; struct mlx5e_tx_wqe_info *wi;
u32 dma_fifo_cc, nbytes = 0; u32 dma_fifo_cc, nbytes = 0;
u16 ci, sqcc, npkts = 0; u16 ci, sqcc, npkts = 0;
struct sk_buff *skb;
sqcc = sq->cc; sqcc = sq->cc;
dma_fifo_cc = sq->dma_fifo_cc; dma_fifo_cc = sq->dma_fifo_cc;
...@@ -605,20 +632,28 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) ...@@ -605,20 +632,28 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
while (sqcc != sq->pc) { while (sqcc != sq->pc) {
ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
wi = &sq->db.wqe_info[ci]; wi = &sq->db.wqe_info[ci];
skb = wi->skb;
sqcc += wi->num_wqebbs; sqcc += wi->num_wqebbs;
if (!skb) { if (likely(wi->skb)) {
mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc); mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
dev_kfree_skb_any(wi->skb);
npkts++;
nbytes += wi->num_bytes;
continue; continue;
} }
mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc)))
dev_kfree_skb_any(skb); continue;
npkts++; if (wi->num_fifo_pkts) {
nbytes += wi->num_bytes; mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
mlx5e_tx_wi_kfree_fifo_skbs(sq, wi);
npkts += wi->num_fifo_pkts;
nbytes += wi->num_bytes;
}
} }
sq->dma_fifo_cc = dma_fifo_cc; sq->dma_fifo_cc = dma_fifo_cc;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment