Commit 930bc4cc authored by David S. Miller's avatar David S. Miller

Merge branch 'rework-mvneta-napi_poll-loop-for-XDP-multi-buffers'

Lorenzo Bianconi says:

====================
rework mvneta napi_poll loop for XDP multi-buffers

Rework mvneta_rx_swbm routine in order to process all rx descriptors before
building the skb or run the xdp program attached to the interface.
Introduce xdp_get_shared_info_from_{buff,frame} utility routines to get the
skb_shared_info pointer from xdp_buff or xdp_frame.
This is a preliminary series to enable multi-buffers and jumbo frames for XDP
according to [1]

[1] https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org

Changes since v1:
- rely on skb_frag_* utility routines to access page/offset/len of the xdp multi-buffer
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents bed1ce78 c7a3a8cd
...@@ -698,10 +698,6 @@ struct mvneta_rx_queue { ...@@ -698,10 +698,6 @@ struct mvneta_rx_queue {
/* Index of first RX DMA descriptor to refill */ /* Index of first RX DMA descriptor to refill */
int first_to_refill; int first_to_refill;
u32 refill_num; u32 refill_num;
/* pointer to uncomplete skb buffer */
struct sk_buff *skb;
int left_size;
}; };
static enum cpuhp_state online_hpstate; static enum cpuhp_state online_hpstate;
...@@ -2026,6 +2022,20 @@ int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq) ...@@ -2026,6 +2022,20 @@ int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq)
return i; return i;
} }
static void
mvneta_xdp_put_buff(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
struct xdp_buff *xdp, int sync_len, bool napi)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
int i;
page_pool_put_page(rxq->page_pool, virt_to_head_page(xdp->data),
sync_len, napi);
for (i = 0; i < sinfo->nr_frags; i++)
page_pool_put_full_page(rxq->page_pool,
skb_frag_page(&sinfo->frags[i]), napi);
}
static int static int
mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq, mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq,
struct xdp_frame *xdpf, bool dma_map) struct xdp_frame *xdpf, bool dma_map)
...@@ -2158,13 +2168,13 @@ mvneta_xdp_xmit(struct net_device *dev, int num_frame, ...@@ -2158,13 +2168,13 @@ mvneta_xdp_xmit(struct net_device *dev, int num_frame,
static int static int
mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
struct bpf_prog *prog, struct xdp_buff *xdp, struct bpf_prog *prog, struct xdp_buff *xdp,
struct mvneta_stats *stats) u32 frame_sz, struct mvneta_stats *stats)
{ {
unsigned int len, sync; unsigned int len, data_len, sync;
struct page *page;
u32 ret, act; u32 ret, act;
len = xdp->data_end - xdp->data_hard_start - pp->rx_offset_correction; len = xdp->data_end - xdp->data_hard_start - pp->rx_offset_correction;
data_len = xdp->data_end - xdp->data;
act = bpf_prog_run_xdp(prog, xdp); act = bpf_prog_run_xdp(prog, xdp);
/* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */ /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
...@@ -2180,9 +2190,8 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, ...@@ -2180,9 +2190,8 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
err = xdp_do_redirect(pp->dev, xdp, prog); err = xdp_do_redirect(pp->dev, xdp, prog);
if (unlikely(err)) { if (unlikely(err)) {
mvneta_xdp_put_buff(pp, rxq, xdp, sync, true);
ret = MVNETA_XDP_DROPPED; ret = MVNETA_XDP_DROPPED;
page = virt_to_head_page(xdp->data);
page_pool_put_page(rxq->page_pool, page, sync, true);
} else { } else {
ret = MVNETA_XDP_REDIR; ret = MVNETA_XDP_REDIR;
stats->xdp_redirect++; stats->xdp_redirect++;
...@@ -2191,10 +2200,8 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, ...@@ -2191,10 +2200,8 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
} }
case XDP_TX: case XDP_TX:
ret = mvneta_xdp_xmit_back(pp, xdp); ret = mvneta_xdp_xmit_back(pp, xdp);
if (ret != MVNETA_XDP_TX) { if (ret != MVNETA_XDP_TX)
page = virt_to_head_page(xdp->data); mvneta_xdp_put_buff(pp, rxq, xdp, sync, true);
page_pool_put_page(rxq->page_pool, page, sync, true);
}
break; break;
default: default:
bpf_warn_invalid_xdp_action(act); bpf_warn_invalid_xdp_action(act);
...@@ -2203,25 +2210,23 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, ...@@ -2203,25 +2210,23 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
trace_xdp_exception(pp->dev, prog, act); trace_xdp_exception(pp->dev, prog, act);
/* fall through */ /* fall through */
case XDP_DROP: case XDP_DROP:
page = virt_to_head_page(xdp->data); mvneta_xdp_put_buff(pp, rxq, xdp, sync, true);
page_pool_put_page(rxq->page_pool, page, sync, true);
ret = MVNETA_XDP_DROPPED; ret = MVNETA_XDP_DROPPED;
stats->xdp_drop++; stats->xdp_drop++;
break; break;
} }
stats->rx_bytes += xdp->data_end - xdp->data; stats->rx_bytes += frame_sz + xdp->data_end - xdp->data - data_len;
stats->rx_packets++; stats->rx_packets++;
return ret; return ret;
} }
static int static void
mvneta_swbm_rx_frame(struct mvneta_port *pp, mvneta_swbm_rx_frame(struct mvneta_port *pp,
struct mvneta_rx_desc *rx_desc, struct mvneta_rx_desc *rx_desc,
struct mvneta_rx_queue *rxq, struct mvneta_rx_queue *rxq,
struct xdp_buff *xdp, struct xdp_buff *xdp, int *size,
struct bpf_prog *xdp_prog,
struct page *page, struct page *page,
struct mvneta_stats *stats) struct mvneta_stats *stats)
{ {
...@@ -2229,7 +2234,7 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp, ...@@ -2229,7 +2234,7 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp,
int data_len = -MVNETA_MH_SIZE, len; int data_len = -MVNETA_MH_SIZE, len;
struct net_device *dev = pp->dev; struct net_device *dev = pp->dev;
enum dma_data_direction dma_dir; enum dma_data_direction dma_dir;
int ret = 0; struct skb_shared_info *sinfo;
if (MVNETA_SKB_SIZE(rx_desc->data_size) > PAGE_SIZE) { if (MVNETA_SKB_SIZE(rx_desc->data_size) > PAGE_SIZE) {
len = MVNETA_MAX_RX_BUF_SIZE; len = MVNETA_MAX_RX_BUF_SIZE;
...@@ -2252,71 +2257,81 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp, ...@@ -2252,71 +2257,81 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp,
xdp->data_end = xdp->data + data_len; xdp->data_end = xdp->data + data_len;
xdp_set_data_meta_invalid(xdp); xdp_set_data_meta_invalid(xdp);
if (xdp_prog) { sinfo = xdp_get_shared_info_from_buff(xdp);
ret = mvneta_run_xdp(pp, rxq, xdp_prog, xdp, stats); sinfo->nr_frags = 0;
if (ret)
goto out;
}
rxq->skb = build_skb(xdp->data_hard_start, PAGE_SIZE);
if (unlikely(!rxq->skb)) {
struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
netdev_err(dev, "Can't allocate skb on queue %d\n", rxq->id);
u64_stats_update_begin(&stats->syncp); *size = rx_desc->data_size - len;
stats->es.skb_alloc_error++;
stats->rx_dropped++;
u64_stats_update_end(&stats->syncp);
return -ENOMEM;
}
page_pool_release_page(rxq->page_pool, page);
skb_reserve(rxq->skb,
xdp->data - xdp->data_hard_start);
skb_put(rxq->skb, xdp->data_end - xdp->data);
mvneta_rx_csum(pp, rx_desc->status, rxq->skb);
rxq->left_size = rx_desc->data_size - len;
out:
rx_desc->buf_phys_addr = 0; rx_desc->buf_phys_addr = 0;
return ret;
} }
static void static void
mvneta_swbm_add_rx_fragment(struct mvneta_port *pp, mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
struct mvneta_rx_desc *rx_desc, struct mvneta_rx_desc *rx_desc,
struct mvneta_rx_queue *rxq, struct mvneta_rx_queue *rxq,
struct xdp_buff *xdp, int *size,
struct page *page) struct page *page)
{ {
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
struct net_device *dev = pp->dev; struct net_device *dev = pp->dev;
enum dma_data_direction dma_dir; enum dma_data_direction dma_dir;
int data_len, len; int data_len, len;
if (rxq->left_size > MVNETA_MAX_RX_BUF_SIZE) { if (*size > MVNETA_MAX_RX_BUF_SIZE) {
len = MVNETA_MAX_RX_BUF_SIZE; len = MVNETA_MAX_RX_BUF_SIZE;
data_len = len; data_len = len;
} else { } else {
len = rxq->left_size; len = *size;
data_len = len - ETH_FCS_LEN; data_len = len - ETH_FCS_LEN;
} }
dma_dir = page_pool_get_dma_dir(rxq->page_pool); dma_dir = page_pool_get_dma_dir(rxq->page_pool);
dma_sync_single_for_cpu(dev->dev.parent, dma_sync_single_for_cpu(dev->dev.parent,
rx_desc->buf_phys_addr, rx_desc->buf_phys_addr,
len, dma_dir); len, dma_dir);
if (data_len > 0) {
/* refill descriptor with new buffer later */ if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) {
skb_add_rx_frag(rxq->skb, skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags];
skb_shinfo(rxq->skb)->nr_frags,
page, pp->rx_offset_correction, data_len, skb_frag_off_set(frag, pp->rx_offset_correction);
PAGE_SIZE); skb_frag_size_set(frag, data_len);
__skb_frag_set_page(frag, page);
sinfo->nr_frags++;
rx_desc->buf_phys_addr = 0;
} }
*size -= len;
}
static struct sk_buff *
mvneta_swbm_build_skb(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
struct xdp_buff *xdp, u32 desc_status)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
int i, num_frags = sinfo->nr_frags;
skb_frag_t frags[MAX_SKB_FRAGS];
struct sk_buff *skb;
memcpy(frags, sinfo->frags, sizeof(skb_frag_t) * num_frags);
skb = build_skb(xdp->data_hard_start, PAGE_SIZE);
if (!skb)
return ERR_PTR(-ENOMEM);
page_pool_release_page(rxq->page_pool, virt_to_page(xdp->data));
skb_reserve(skb, xdp->data - xdp->data_hard_start);
skb_put(skb, xdp->data_end - xdp->data);
mvneta_rx_csum(pp, desc_status, skb);
for (i = 0; i < num_frags; i++) {
struct page *page = skb_frag_page(&frags[i]);
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
page, skb_frag_off(&frags[i]),
skb_frag_size(&frags[i]), PAGE_SIZE);
page_pool_release_page(rxq->page_pool, page); page_pool_release_page(rxq->page_pool, page);
rx_desc->buf_phys_addr = 0; }
rxq->left_size -= len;
return skb;
} }
/* Main rx processing when using software buffer management */ /* Main rx processing when using software buffer management */
...@@ -2324,24 +2339,27 @@ static int mvneta_rx_swbm(struct napi_struct *napi, ...@@ -2324,24 +2339,27 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
struct mvneta_port *pp, int budget, struct mvneta_port *pp, int budget,
struct mvneta_rx_queue *rxq) struct mvneta_rx_queue *rxq)
{ {
int rx_proc = 0, rx_todo, refill; int rx_proc = 0, rx_todo, refill, size = 0;
struct net_device *dev = pp->dev; struct net_device *dev = pp->dev;
struct xdp_buff xdp_buf = {
.frame_sz = PAGE_SIZE,
.rxq = &rxq->xdp_rxq,
};
struct mvneta_stats ps = {}; struct mvneta_stats ps = {};
struct bpf_prog *xdp_prog; struct bpf_prog *xdp_prog;
struct xdp_buff xdp_buf; u32 desc_status, frame_sz;
/* Get number of received packets */ /* Get number of received packets */
rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq); rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);
rcu_read_lock(); rcu_read_lock();
xdp_prog = READ_ONCE(pp->xdp_prog); xdp_prog = READ_ONCE(pp->xdp_prog);
xdp_buf.rxq = &rxq->xdp_rxq;
xdp_buf.frame_sz = PAGE_SIZE;
/* Fairness NAPI loop */ /* Fairness NAPI loop */
while (rx_proc < budget && rx_proc < rx_todo) { while (rx_proc < budget && rx_proc < rx_todo) {
struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq); struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);
u32 rx_status, index; u32 rx_status, index;
struct sk_buff *skb;
struct page *page; struct page *page;
index = rx_desc - rxq->descs; index = rx_desc - rxq->descs;
...@@ -2352,54 +2370,66 @@ static int mvneta_rx_swbm(struct napi_struct *napi, ...@@ -2352,54 +2370,66 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
rxq->refill_num++; rxq->refill_num++;
if (rx_status & MVNETA_RXD_FIRST_DESC) { if (rx_status & MVNETA_RXD_FIRST_DESC) {
int err;
/* Check errors only for FIRST descriptor */ /* Check errors only for FIRST descriptor */
if (rx_status & MVNETA_RXD_ERR_SUMMARY) { if (rx_status & MVNETA_RXD_ERR_SUMMARY) {
mvneta_rx_error(pp, rx_desc); mvneta_rx_error(pp, rx_desc);
/* leave the descriptor untouched */ goto next;
continue;
} }
err = mvneta_swbm_rx_frame(pp, rx_desc, rxq, &xdp_buf, size = rx_desc->data_size;
xdp_prog, page, &ps); frame_sz = size - ETH_FCS_LEN;
if (err) desc_status = rx_desc->status;
continue;
mvneta_swbm_rx_frame(pp, rx_desc, rxq, &xdp_buf,
&size, page, &ps);
} else { } else {
if (unlikely(!rxq->skb)) { if (unlikely(!xdp_buf.data_hard_start))
pr_debug("no skb for rx_status 0x%x\n",
rx_status);
continue; continue;
}
mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, page); mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, &xdp_buf,
&size, page);
} /* Middle or Last descriptor */ } /* Middle or Last descriptor */
if (!(rx_status & MVNETA_RXD_LAST_DESC)) if (!(rx_status & MVNETA_RXD_LAST_DESC))
/* no last descriptor this time */ /* no last descriptor this time */
continue; continue;
if (rxq->left_size) { if (size) {
pr_err("get last desc, but left_size (%d) != 0\n", mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1, true);
rxq->left_size); goto next;
dev_kfree_skb_any(rxq->skb);
rxq->left_size = 0;
rxq->skb = NULL;
continue;
} }
ps.rx_bytes += rxq->skb->len; if (xdp_prog &&
ps.rx_packets++; mvneta_run_xdp(pp, rxq, xdp_prog, &xdp_buf, frame_sz, &ps))
goto next;
/* Linux processing */ skb = mvneta_swbm_build_skb(pp, rxq, &xdp_buf, desc_status);
rxq->skb->protocol = eth_type_trans(rxq->skb, dev); if (IS_ERR(skb)) {
struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1, true);
u64_stats_update_begin(&stats->syncp);
stats->es.skb_alloc_error++;
stats->rx_dropped++;
u64_stats_update_end(&stats->syncp);
goto next;
}
napi_gro_receive(napi, rxq->skb); ps.rx_bytes += skb->len;
ps.rx_packets++;
/* clean uncomplete skb pointer in queue */ skb->protocol = eth_type_trans(skb, dev);
rxq->skb = NULL; napi_gro_receive(napi, skb);
next:
xdp_buf.data_hard_start = NULL;
} }
rcu_read_unlock(); rcu_read_unlock();
if (xdp_buf.data_hard_start)
mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1, true);
if (ps.xdp_redirect) if (ps.xdp_redirect)
xdp_do_flush_map(); xdp_do_flush_map();
...@@ -3328,9 +3358,6 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp, ...@@ -3328,9 +3358,6 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp,
{ {
mvneta_rxq_drop_pkts(pp, rxq); mvneta_rxq_drop_pkts(pp, rxq);
if (rxq->skb)
dev_kfree_skb_any(rxq->skb);
if (rxq->descs) if (rxq->descs)
dma_free_coherent(pp->dev->dev.parent, dma_free_coherent(pp->dev->dev.parent,
rxq->size * MVNETA_DESC_ALIGNED_SIZE, rxq->size * MVNETA_DESC_ALIGNED_SIZE,
...@@ -3343,8 +3370,6 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp, ...@@ -3343,8 +3370,6 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp,
rxq->descs_phys = 0; rxq->descs_phys = 0;
rxq->first_to_refill = 0; rxq->first_to_refill = 0;
rxq->refill_num = 0; rxq->refill_num = 0;
rxq->skb = NULL;
rxq->left_size = 0;
} }
static int mvneta_txq_sw_init(struct mvneta_port *pp, static int mvneta_txq_sw_init(struct mvneta_port *pp,
......
...@@ -85,6 +85,12 @@ struct xdp_buff { ...@@ -85,6 +85,12 @@ struct xdp_buff {
((xdp)->data_hard_start + (xdp)->frame_sz - \ ((xdp)->data_hard_start + (xdp)->frame_sz - \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
static inline struct skb_shared_info *
xdp_get_shared_info_from_buff(struct xdp_buff *xdp)
{
return (struct skb_shared_info *)xdp_data_hard_end(xdp);
}
struct xdp_frame { struct xdp_frame {
void *data; void *data;
u16 len; u16 len;
...@@ -98,6 +104,15 @@ struct xdp_frame { ...@@ -98,6 +104,15 @@ struct xdp_frame {
struct net_device *dev_rx; /* used by cpumap */ struct net_device *dev_rx; /* used by cpumap */
}; };
static inline struct skb_shared_info *
xdp_get_shared_info_from_frame(struct xdp_frame *frame)
{
void *data_hard_start = frame->data - frame->headroom - sizeof(*frame);
return (struct skb_shared_info *)(data_hard_start + frame->frame_sz -
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
}
/* Clear kernel pointers in xdp_frame */ /* Clear kernel pointers in xdp_frame */
static inline void xdp_scrub_frame(struct xdp_frame *frame) static inline void xdp_scrub_frame(struct xdp_frame *frame)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment