Commit 39a1665d authored by Maxim Mikityanskiy's avatar Maxim Mikityanskiy Committed by Saeed Mahameed

net/mlx5e: Implement sending multi buffer XDP frames

xmit_xdp_frame is extended to support sending fragmented XDP frames. The
next commit will start using this functionality.
Signed-off-by: default avatarMaxim Mikityanskiy <maximmi@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 9ded70fa
...@@ -538,6 +538,7 @@ struct mlx5e_xdpsq; ...@@ -538,6 +538,7 @@ struct mlx5e_xdpsq;
typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *); typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *);
typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *, typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *,
struct mlx5e_xmit_data *, struct mlx5e_xmit_data *,
struct skb_shared_info *,
int); int);
struct mlx5e_xdpsq { struct mlx5e_xdpsq {
......
...@@ -120,7 +120,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, ...@@ -120,7 +120,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
} }
if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
mlx5e_xmit_xdp_frame, sq, &xdptxd, 0))) mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0)))
return false; return false;
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi);
...@@ -263,13 +263,27 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq ...@@ -263,13 +263,27 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq
return MLX5E_XDP_CHECK_OK; return MLX5E_XDP_CHECK_OK;
} }
INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
struct skb_shared_info *sinfo, int check_result);
INDIRECT_CALLABLE_SCOPE bool INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
int check_result) struct skb_shared_info *sinfo, int check_result)
{ {
struct mlx5e_tx_mpwqe *session = &sq->mpwqe; struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats; struct mlx5e_xdpsq_stats *stats = sq->stats;
if (unlikely(sinfo)) {
/* MPWQE is enabled, but a multi-buffer packet is queued for
* transmission. MPWQE can't send fragmented packets, so close
* the current session and fall back to a regular WQE.
*/
if (unlikely(sq->mpwqe.wqe))
mlx5e_xdp_mpwqe_complete(sq);
return mlx5e_xmit_xdp_frame(sq, xdptxd, sinfo, 0);
}
if (unlikely(xdptxd->len > sq->hw_mtu)) { if (unlikely(xdptxd->len > sq->hw_mtu)) {
stats->err++; stats->err++;
return false; return false;
...@@ -297,9 +311,9 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx ...@@ -297,9 +311,9 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx
return true; return true;
} }
INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq) static int mlx5e_xmit_xdp_frame_check_stop_room(struct mlx5e_xdpsq *sq, int stop_room)
{ {
if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) { if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, stop_room))) {
/* SQ is full, ring doorbell */ /* SQ is full, ring doorbell */
mlx5e_xmit_xdp_doorbell(sq); mlx5e_xmit_xdp_doorbell(sq);
sq->stats->full++; sq->stats->full++;
...@@ -309,37 +323,66 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq) ...@@ -309,37 +323,66 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
return MLX5E_XDP_CHECK_OK; return MLX5E_XDP_CHECK_OK;
} }
INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
{
return mlx5e_xmit_xdp_frame_check_stop_room(sq, 1);
}
INDIRECT_CALLABLE_SCOPE bool INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
int check_result) struct skb_shared_info *sinfo, int check_result)
{ {
struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5_wq_cyc *wq = &sq->wq;
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); struct mlx5_wqe_ctrl_seg *cseg;
struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); struct mlx5_wqe_data_seg *dseg;
struct mlx5_wqe_eth_seg *eseg;
struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5e_tx_wqe *wqe;
struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
struct mlx5_wqe_data_seg *dseg = wqe->data;
dma_addr_t dma_addr = xdptxd->dma_addr; dma_addr_t dma_addr = xdptxd->dma_addr;
u32 dma_len = xdptxd->len; u32 dma_len = xdptxd->len;
u16 ds_cnt, inline_hdr_sz; u16 ds_cnt, inline_hdr_sz;
u8 num_wqebbs = 1;
int num_frags = 0;
u16 pi;
struct mlx5e_xdpsq_stats *stats = sq->stats; struct mlx5e_xdpsq_stats *stats = sq->stats;
net_prefetchw(wqe);
if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) { if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) {
stats->err++; stats->err++;
return false; return false;
} }
if (!check_result) ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1;
check_result = mlx5e_xmit_xdp_frame_check(sq); if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE)
ds_cnt++;
/* check_result must be 0 if sinfo is passed. */
if (!check_result) {
int stop_room = 1;
if (unlikely(sinfo)) {
ds_cnt += sinfo->nr_frags;
num_frags = sinfo->nr_frags;
num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
/* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big
* enough to hold all fragments.
*/
stop_room = MLX5E_STOP_ROOM(num_wqebbs);
}
check_result = mlx5e_xmit_xdp_frame_check_stop_room(sq, stop_room);
}
if (unlikely(check_result < 0)) if (unlikely(check_result < 0))
return false; return false;
ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1; pi = mlx5e_xdpsq_get_next_pi(sq, num_wqebbs);
wqe = mlx5_wq_cyc_get_wqe(wq, pi);
net_prefetchw(wqe);
cseg = &wqe->ctrl;
eseg = &wqe->eth;
dseg = wqe->data;
inline_hdr_sz = 0; inline_hdr_sz = 0;
/* copy the inline part if required */ /* copy the inline part if required */
...@@ -351,7 +394,6 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, ...@@ -351,7 +394,6 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
dma_addr += MLX5E_XDP_MIN_INLINE; dma_addr += MLX5E_XDP_MIN_INLINE;
inline_hdr_sz = MLX5E_XDP_MIN_INLINE; inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
dseg++; dseg++;
ds_cnt++;
} }
/* write the dma part */ /* write the dma part */
...@@ -361,8 +403,8 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, ...@@ -361,8 +403,8 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
if (unlikely(test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state))) { if (unlikely(test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state))) {
u8 num_pkts = 1; u8 num_pkts = 1 + num_frags;
u8 num_wqebbs; int i;
memset(&cseg->signature, 0, sizeof(*cseg) - memset(&cseg->signature, 0, sizeof(*cseg) -
sizeof(cseg->opmod_idx_opcode) - sizeof(cseg->qpn_ds)); sizeof(cseg->opmod_idx_opcode) - sizeof(cseg->qpn_ds));
...@@ -371,9 +413,21 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, ...@@ -371,9 +413,21 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
dseg->lkey = sq->mkey_be; dseg->lkey = sq->mkey_be;
for (i = 0; i < num_frags; i++) {
skb_frag_t *frag = &sinfo->frags[i];
dma_addr_t addr;
addr = page_pool_get_dma_addr(skb_frag_page(frag)) +
skb_frag_off(frag);
dseg++;
dseg->addr = cpu_to_be64(addr);
dseg->byte_count = cpu_to_be32(skb_frag_size(frag));
dseg->lkey = sq->mkey_be;
}
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) { sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) {
.num_wqebbs = num_wqebbs, .num_wqebbs = num_wqebbs,
.num_pkts = num_pkts, .num_pkts = num_pkts,
...@@ -566,7 +620,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, ...@@ -566,7 +620,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
xdpi.frame.dma_addr = xdptxd.dma_addr; xdpi.frame.dma_addr = xdptxd.dma_addr;
ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
mlx5e_xmit_xdp_frame, sq, &xdptxd, 0); mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, 0);
if (unlikely(!ret)) { if (unlikely(!ret)) {
dma_unmap_single(sq->pdev, xdptxd.dma_addr, dma_unmap_single(sq->pdev, xdptxd.dma_addr,
xdptxd.len, DMA_TO_DEVICE); xdptxd.len, DMA_TO_DEVICE);
......
...@@ -59,9 +59,11 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, ...@@ -59,9 +59,11 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
struct mlx5e_xmit_data *xdptxd, struct mlx5e_xmit_data *xdptxd,
struct skb_shared_info *sinfo,
int check_result)); int check_result));
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
struct mlx5e_xmit_data *xdptxd, struct mlx5e_xmit_data *xdptxd,
struct skb_shared_info *sinfo,
int check_result)); int check_result));
INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)); INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq));
INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)); INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq));
......
...@@ -103,7 +103,8 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) ...@@ -103,7 +103,8 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len); xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len);
ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe,
mlx5e_xmit_xdp_frame, sq, &xdptxd, check_result); mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL,
check_result);
if (unlikely(!ret)) { if (unlikely(!ret)) {
if (sq->mpwqe.wqe) if (sq->mpwqe.wqe)
mlx5e_xdp_mpwqe_complete(sq); mlx5e_xdp_mpwqe_complete(sq);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment