Commit 8d4b475e authored by Maxim Mikityanskiy's avatar Maxim Mikityanskiy Committed by Saeed Mahameed

net/mlx5e: Fix usage of DMA sync API

DMA sync functions should use the same direction that was used by DMA
mapping. Use DMA_BIDIRECTIONAL for XDP_TX from regular RQ, which reuses
the same mapping that was used for RX, and DMA_TO_DEVICE for XDP_TX from
XSK RQ and XDP_REDIRECT, which establish a new mapping in this
direction. On the RX side, use the same direction that was used when
setting up the mapping (DMA_BIDIRECTIONAL for XDP, DMA_FROM_DEVICE
otherwise).

Also don't skip sync for device when establishing a DMA_FROM_DEVICE
mapping for RX, as some architectures (ARM) may require invalidating
caches before the device can use the mapping. It doesn't break the
bugfix made in
commit 0b7cfa40 ("net/mlx5e: Fix page DMA map/unmap attributes"),
since the bug happened on unmap.

Fixes: 0b7cfa40 ("net/mlx5e: Fix page DMA map/unmap attributes")
Fixes: b5503b99 ("net/mlx5e: XDP TX forwarding support")
Signed-off-by: default avatarMaxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: default avatarGal Pressman <gal@nvidia.com>
Reviewed-by: default avatarTariq Toukan <tariqt@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent f9c955b4
...@@ -117,7 +117,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, ...@@ -117,7 +117,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
xdpi.page.rq = rq; xdpi.page.rq = rq;
dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf); dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf);
dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_TO_DEVICE); dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_BIDIRECTIONAL);
if (unlikely(xdp_frame_has_frags(xdpf))) { if (unlikely(xdp_frame_has_frags(xdpf))) {
sinfo = xdp_get_shared_info_from_frame(xdpf); sinfo = xdp_get_shared_info_from_frame(xdpf);
...@@ -131,7 +131,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, ...@@ -131,7 +131,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
skb_frag_off(frag); skb_frag_off(frag);
len = skb_frag_size(frag); len = skb_frag_size(frag);
dma_sync_single_for_device(sq->pdev, addr, len, dma_sync_single_for_device(sq->pdev, addr, len,
DMA_TO_DEVICE); DMA_BIDIRECTIONAL);
} }
} }
......
...@@ -266,7 +266,7 @@ static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, union mlx5e_alloc_uni ...@@ -266,7 +266,7 @@ static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, union mlx5e_alloc_uni
addr = page_pool_get_dma_addr(au->page); addr = page_pool_get_dma_addr(au->page);
/* Non-XSK always uses PAGE_SIZE. */ /* Non-XSK always uses PAGE_SIZE. */
dma_sync_single_for_device(rq->pdev, addr, PAGE_SIZE, DMA_FROM_DEVICE); dma_sync_single_for_device(rq->pdev, addr, PAGE_SIZE, rq->buff.map_dir);
return true; return true;
} }
...@@ -282,8 +282,7 @@ static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, union mlx5e_alloc_u ...@@ -282,8 +282,7 @@ static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, union mlx5e_alloc_u
return -ENOMEM; return -ENOMEM;
/* Non-XSK always uses PAGE_SIZE. */ /* Non-XSK always uses PAGE_SIZE. */
addr = dma_map_page_attrs(rq->pdev, au->page, 0, PAGE_SIZE, addr = dma_map_page(rq->pdev, au->page, 0, PAGE_SIZE, rq->buff.map_dir);
rq->buff.map_dir, DMA_ATTR_SKIP_CPU_SYNC);
if (unlikely(dma_mapping_error(rq->pdev, addr))) { if (unlikely(dma_mapping_error(rq->pdev, addr))) {
page_pool_recycle_direct(rq->page_pool, au->page); page_pool_recycle_direct(rq->page_pool, au->page);
au->page = NULL; au->page = NULL;
...@@ -427,14 +426,15 @@ mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, ...@@ -427,14 +426,15 @@ mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
{ {
dma_addr_t addr = page_pool_get_dma_addr(au->page); dma_addr_t addr = page_pool_get_dma_addr(au->page);
dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, DMA_FROM_DEVICE); dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len,
rq->buff.map_dir);
page_ref_inc(au->page); page_ref_inc(au->page);
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
au->page, frag_offset, len, truesize); au->page, frag_offset, len, truesize);
} }
static inline void static inline void
mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb, mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb,
struct page *page, dma_addr_t addr, struct page *page, dma_addr_t addr,
int offset_from, int dma_offset, u32 headlen) int offset_from, int dma_offset, u32 headlen)
{ {
...@@ -442,7 +442,8 @@ mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb, ...@@ -442,7 +442,8 @@ mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb,
/* Aligning len to sizeof(long) optimizes memcpy performance */ /* Aligning len to sizeof(long) optimizes memcpy performance */
unsigned int len = ALIGN(headlen, sizeof(long)); unsigned int len = ALIGN(headlen, sizeof(long));
dma_sync_single_for_cpu(pdev, addr + dma_offset, len, DMA_FROM_DEVICE); dma_sync_single_for_cpu(rq->pdev, addr + dma_offset, len,
rq->buff.map_dir);
skb_copy_to_linear_data(skb, from, len); skb_copy_to_linear_data(skb, from, len);
} }
...@@ -1538,7 +1539,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, ...@@ -1538,7 +1539,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
addr = page_pool_get_dma_addr(au->page); addr = page_pool_get_dma_addr(au->page);
dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset,
frag_size, DMA_FROM_DEVICE); frag_size, rq->buff.map_dir);
net_prefetch(data); net_prefetch(data);
prog = rcu_dereference(rq->xdp_prog); prog = rcu_dereference(rq->xdp_prog);
...@@ -1587,7 +1588,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi ...@@ -1587,7 +1588,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
addr = page_pool_get_dma_addr(au->page); addr = page_pool_get_dma_addr(au->page);
dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset,
rq->buff.frame0_sz, DMA_FROM_DEVICE); rq->buff.frame0_sz, rq->buff.map_dir);
net_prefetchw(va); /* xdp_frame data area */ net_prefetchw(va); /* xdp_frame data area */
net_prefetch(va + rx_headroom); net_prefetch(va + rx_headroom);
...@@ -1608,7 +1609,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi ...@@ -1608,7 +1609,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
addr = page_pool_get_dma_addr(au->page); addr = page_pool_get_dma_addr(au->page);
dma_sync_single_for_cpu(rq->pdev, addr + wi->offset, dma_sync_single_for_cpu(rq->pdev, addr + wi->offset,
frag_consumed_bytes, DMA_FROM_DEVICE); frag_consumed_bytes, rq->buff.map_dir);
if (!xdp_buff_has_frags(&xdp)) { if (!xdp_buff_has_frags(&xdp)) {
/* Init on the first fragment to avoid cold cache access /* Init on the first fragment to avoid cold cache access
...@@ -1905,7 +1906,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w ...@@ -1905,7 +1906,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
mlx5e_fill_skb_data(skb, rq, au, byte_cnt, frag_offset); mlx5e_fill_skb_data(skb, rq, au, byte_cnt, frag_offset);
/* copy header */ /* copy header */
addr = page_pool_get_dma_addr(head_au->page); addr = page_pool_get_dma_addr(head_au->page);
mlx5e_copy_skb_header(rq->pdev, skb, head_au->page, addr, mlx5e_copy_skb_header(rq, skb, head_au->page, addr,
head_offset, head_offset, headlen); head_offset, head_offset, headlen);
/* skb linear part was allocated with headlen and aligned to long */ /* skb linear part was allocated with headlen and aligned to long */
skb->tail += headlen; skb->tail += headlen;
...@@ -1939,7 +1940,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, ...@@ -1939,7 +1940,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
addr = page_pool_get_dma_addr(au->page); addr = page_pool_get_dma_addr(au->page);
dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset, dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset,
frag_size, DMA_FROM_DEVICE); frag_size, rq->buff.map_dir);
net_prefetch(data); net_prefetch(data);
prog = rcu_dereference(rq->xdp_prog); prog = rcu_dereference(rq->xdp_prog);
...@@ -1987,7 +1988,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, ...@@ -1987,7 +1988,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) { if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) {
/* build SKB around header */ /* build SKB around header */
dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, DMA_FROM_DEVICE); dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, rq->buff.map_dir);
prefetchw(hdr); prefetchw(hdr);
prefetch(data); prefetch(data);
skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size, 0); skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size, 0);
...@@ -2009,7 +2010,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, ...@@ -2009,7 +2010,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
} }
prefetchw(skb->data); prefetchw(skb->data);
mlx5e_copy_skb_header(rq->pdev, skb, head->page, head->addr, mlx5e_copy_skb_header(rq, skb, head->page, head->addr,
head_offset + rx_headroom, head_offset + rx_headroom,
rx_headroom, head_size); rx_headroom, head_size);
/* skb linear part was allocated with headlen and aligned to long */ /* skb linear part was allocated with headlen and aligned to long */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment