Commit 338c46c6 authored by Maxim Mikityanskiy's avatar Maxim Mikityanskiy Committed by Saeed Mahameed

net/mlx5e: Support multiple SKBs in a TX WQE

TX MPWQE support for SKBs is coming in one of the following patches, and
a single MPWQE can send multiple SKBs. This commit prepares the TX path
code to handle such cases:

1. An additional FIFO for SKBs is added, just like the FIFO for DMA
chunks.

2. struct mlx5e_tx_wqe_info will contain num_fifo_pkts. If a given WQE
contains only one packet, num_fifo_pkts will be zero, and the SKB will
be stored in mlx5e_tx_wqe_info, as usual. If num_fifo_pkts > 0, the SKB
pointer will be NULL, and the SKBs will be stored in the FIFO.

This change has no performance impact in TCP single stream test and
XDP_TX single stream test.

When compiled with a recent GCC, this change shows no visible
performance impact on UDP pktgen (burst 32) single stream test either:
  Packet rate: 16.95 Mpps (±0.15 Mpps) -> 16.96 Mpps (±0.12 Mpps)
  Instructions per packet: 429 -> 421
  Cycles per packet: 160 -> 156
  Instructions per cycle: 2.69 -> 2.70

CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz (x86_64)
NIC: Mellanox ConnectX-6 Dx
GCC 10.2.0
Signed-off-by: default avatarMaxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: default avatarTariq Toukan <tariqt@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 56e4da66
......@@ -318,11 +318,13 @@ struct mlx5e_txqsq {
/* dirtied @completion */
u16 cc;
u16 skb_fifo_cc;
u32 dma_fifo_cc;
struct dim dim; /* Adaptive Moderation */
/* dirtied @xmit */
u16 pc ____cacheline_aligned_in_smp;
u16 skb_fifo_pc;
u32 dma_fifo_pc;
struct mlx5e_cq cq;
......@@ -330,9 +332,11 @@ struct mlx5e_txqsq {
/* read only */
struct mlx5_wq_cyc wq;
u32 dma_fifo_mask;
u16 skb_fifo_mask;
struct mlx5e_sq_stats *stats;
struct {
struct mlx5e_sq_dma *dma_fifo;
struct sk_buff **skb_fifo;
struct mlx5e_tx_wqe_info *wqe_info;
} db;
void __iomem *uar_map;
......
......@@ -105,6 +105,7 @@ struct mlx5e_tx_wqe_info {
u32 num_bytes;
u8 num_wqebbs;
u8 num_dma;
u8 num_fifo_pkts;
#ifdef CONFIG_MLX5_EN_TLS
struct page *resync_dump_frag_page;
#endif
......@@ -231,6 +232,23 @@ mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size,
dma->type = map_type;
}
static inline struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_txqsq *sq, u16 i)
{
return &sq->db.skb_fifo[i & sq->skb_fifo_mask];
}
static inline void mlx5e_skb_fifo_push(struct mlx5e_txqsq *sq, struct sk_buff *skb)
{
struct sk_buff **skb_item = mlx5e_skb_fifo_get(sq, sq->skb_fifo_pc++);
*skb_item = skb;
}
static inline struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_txqsq *sq)
{
return *mlx5e_skb_fifo_get(sq, sq->skb_fifo_cc++);
}
static inline void
mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
{
......
......@@ -29,20 +29,24 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi,
u32 *dma_fifo_cc);
static inline void
static inline bool
mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi,
u32 *dma_fifo_cc)
{
if (unlikely(wi->resync_dump_frag_page))
if (unlikely(wi->resync_dump_frag_page)) {
mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma_fifo_cc);
return true;
}
return false;
}
#else
static inline void
static inline bool
mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi,
u32 *dma_fifo_cc)
{
return false;
}
#endif /* CONFIG_MLX5_EN_TLS */
......
......@@ -1047,6 +1047,7 @@ static void mlx5e_free_icosq(struct mlx5e_icosq *sq)
static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
{
kvfree(sq->db.wqe_info);
kvfree(sq->db.skb_fifo);
kvfree(sq->db.dma_fifo);
}
......@@ -1058,15 +1059,19 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
sizeof(*sq->db.dma_fifo)),
GFP_KERNEL, numa);
sq->db.skb_fifo = kvzalloc_node(array_size(df_sz,
sizeof(*sq->db.skb_fifo)),
GFP_KERNEL, numa);
sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
sizeof(*sq->db.wqe_info)),
GFP_KERNEL, numa);
if (!sq->db.dma_fifo || !sq->db.wqe_info) {
if (!sq->db.dma_fifo || !sq->db.skb_fifo || !sq->db.wqe_info) {
mlx5e_free_txqsq_db(sq);
return -ENOMEM;
}
sq->dma_fifo_mask = df_sz - 1;
sq->skb_fifo_mask = df_sz - 1;
return 0;
}
......
......@@ -342,6 +342,7 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
.num_bytes = attr->num_bytes,
.num_dma = num_dma,
.num_wqebbs = wqe_attr->num_wqebbs,
.num_fifo_pkts = 0,
};
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode);
......@@ -489,6 +490,18 @@ static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb,
napi_consume_skb(skb, napi_budget);
}
static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
struct mlx5_cqe64 *cqe, int napi_budget)
{
int i;
for (i = 0; i < wi->num_fifo_pkts; i++) {
struct sk_buff *skb = mlx5e_skb_fifo_pop(sq);
mlx5e_consume_skb(sq, skb, cqe, napi_budget);
}
}
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
{
struct mlx5e_sq_stats *stats;
......@@ -534,26 +547,33 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
wqe_counter = be16_to_cpu(cqe->wqe_counter);
do {
struct sk_buff *skb;
last_wqe = (sqcc == wqe_counter);
ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
wi = &sq->db.wqe_info[ci];
skb = wi->skb;
sqcc += wi->num_wqebbs;
if (unlikely(!skb)) {
mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
if (likely(wi->skb)) {
mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget);
npkts++;
nbytes += wi->num_bytes;
continue;
}
mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget);
if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi,
&dma_fifo_cc)))
continue;
npkts++;
nbytes += wi->num_bytes;
if (wi->num_fifo_pkts) {
mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
mlx5e_tx_wi_consume_fifo_skbs(sq, wi, cqe, napi_budget);
npkts += wi->num_fifo_pkts;
nbytes += wi->num_bytes;
}
} while (!last_wqe);
if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
......@@ -592,12 +612,19 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
return (i == MLX5E_TX_CQ_POLL_BUDGET);
}
static void mlx5e_tx_wi_kfree_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi)
{
int i;
for (i = 0; i < wi->num_fifo_pkts; i++)
dev_kfree_skb_any(mlx5e_skb_fifo_pop(sq));
}
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
{
struct mlx5e_tx_wqe_info *wi;
u32 dma_fifo_cc, nbytes = 0;
u16 ci, sqcc, npkts = 0;
struct sk_buff *skb;
sqcc = sq->cc;
dma_fifo_cc = sq->dma_fifo_cc;
......@@ -605,20 +632,28 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
while (sqcc != sq->pc) {
ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
wi = &sq->db.wqe_info[ci];
skb = wi->skb;
sqcc += wi->num_wqebbs;
if (!skb) {
mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
if (likely(wi->skb)) {
mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
dev_kfree_skb_any(wi->skb);
npkts++;
nbytes += wi->num_bytes;
continue;
}
mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
dev_kfree_skb_any(skb);
if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc)))
continue;
npkts++;
nbytes += wi->num_bytes;
if (wi->num_fifo_pkts) {
mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
mlx5e_tx_wi_kfree_fifo_skbs(sq, wi);
npkts += wi->num_fifo_pkts;
nbytes += wi->num_bytes;
}
}
sq->dma_fifo_cc = dma_fifo_cc;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment