Commit 505e315b authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'virtio_net-refactor-xdp-codes'

Xuan Zhuo says:

====================
virtio_net: refactor xdp codes

Due to historical reasons, the implementation of XDP in virtio-net
is relatively chaotic. For example, the processing of XDP actions
has two copies of similar code. Such as page, xdp_page processing, etc.

The purpose of this patch set is to refactor these code. Reduce the difficulty
of subsequent maintenance. Subsequent developers will not introduce new bugs
because of some complex logical relationships.

In addition, the supporting to AF_XDP that I want to submit later will
also need to reuse the logic of XDP, such as the processing of actions,
I don't want to introduce a new similar code. In this way, I can reuse
these codes in the future.
====================

Link: https://lore.kernel.org/r/20230508061417.65297-1-xuanzhuo@linux.alibaba.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 559ae55c 21e26a71
......@@ -443,6 +443,22 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
}
static struct sk_buff *virtnet_build_skb(void *buf, unsigned int buflen,
unsigned int headroom,
unsigned int len)
{
struct sk_buff *skb;
skb = build_skb(buf, buflen);
if (unlikely(!skb))
return NULL;
skb_reserve(skb, headroom);
skb_put(skb, len);
return skb;
}
/* Called from bottom half context */
static struct sk_buff *page_to_skb(struct virtnet_info *vi,
struct receive_queue *rq,
......@@ -476,13 +492,10 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
/* copy small packet so we can reuse these pages */
if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
skb = build_skb(buf, truesize);
skb = virtnet_build_skb(buf, truesize, p - buf, len);
if (unlikely(!skb))
return NULL;
skb_reserve(skb, p - buf);
skb_put(skb, len);
page = (struct page *)page->private;
if (page)
give_pages(rq, page);
......@@ -789,6 +802,75 @@ static int virtnet_xdp_xmit(struct net_device *dev,
return ret;
}
static void put_xdp_frags(struct xdp_buff *xdp)
{
struct skb_shared_info *shinfo;
struct page *xdp_page;
int i;
if (xdp_buff_has_frags(xdp)) {
shinfo = xdp_get_shared_info_from_buff(xdp);
for (i = 0; i < shinfo->nr_frags; i++) {
xdp_page = skb_frag_page(&shinfo->frags[i]);
put_page(xdp_page);
}
}
}
static int virtnet_xdp_handler(struct bpf_prog *xdp_prog, struct xdp_buff *xdp,
struct net_device *dev,
unsigned int *xdp_xmit,
struct virtnet_rq_stats *stats)
{
struct xdp_frame *xdpf;
int err;
u32 act;
act = bpf_prog_run_xdp(xdp_prog, xdp);
stats->xdp_packets++;
switch (act) {
case XDP_PASS:
return act;
case XDP_TX:
stats->xdp_tx++;
xdpf = xdp_convert_buff_to_frame(xdp);
if (unlikely(!xdpf)) {
netdev_dbg(dev, "convert buff to frame failed for xdp\n");
return XDP_DROP;
}
err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
if (unlikely(!err)) {
xdp_return_frame_rx_napi(xdpf);
} else if (unlikely(err < 0)) {
trace_xdp_exception(dev, xdp_prog, act);
return XDP_DROP;
}
*xdp_xmit |= VIRTIO_XDP_TX;
return act;
case XDP_REDIRECT:
stats->xdp_redirects++;
err = xdp_do_redirect(dev, xdp, xdp_prog);
if (err)
return XDP_DROP;
*xdp_xmit |= VIRTIO_XDP_REDIR;
return act;
default:
bpf_warn_invalid_xdp_action(dev, xdp_prog, act);
fallthrough;
case XDP_ABORTED:
trace_xdp_exception(dev, xdp_prog, act);
fallthrough;
case XDP_DROP:
return XDP_DROP;
}
}
static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
{
return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0;
......@@ -862,54 +944,58 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
return NULL;
}
static struct sk_buff *receive_small(struct net_device *dev,
static struct sk_buff *receive_small_build_skb(struct virtnet_info *vi,
unsigned int xdp_headroom,
void *buf,
unsigned int len)
{
unsigned int header_offset;
unsigned int headroom;
unsigned int buflen;
struct sk_buff *skb;
header_offset = VIRTNET_RX_PAD + xdp_headroom;
headroom = vi->hdr_len + header_offset;
buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
skb = virtnet_build_skb(buf, buflen, headroom, len);
if (unlikely(!skb))
return NULL;
buf += header_offset;
memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
return skb;
}
static struct sk_buff *receive_small_xdp(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
void *buf, void *ctx,
struct bpf_prog *xdp_prog,
void *buf,
unsigned int xdp_headroom,
unsigned int len,
unsigned int *xdp_xmit,
struct virtnet_rq_stats *stats)
{
struct sk_buff *skb;
struct bpf_prog *xdp_prog;
unsigned int xdp_headroom = (unsigned long)ctx;
unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
unsigned int headroom = vi->hdr_len + header_offset;
unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
struct page *page = virt_to_head_page(buf);
unsigned int delta = 0;
struct page *xdp_page;
int err;
unsigned int metasize = 0;
len -= vi->hdr_len;
stats->bytes += len;
if (unlikely(len > GOOD_PACKET_LEN)) {
pr_debug("%s: rx error: len %u exceeds max size %d\n",
dev->name, len, GOOD_PACKET_LEN);
dev->stats.rx_length_errors++;
goto err;
}
if (likely(!vi->xdp_enabled)) {
xdp_prog = NULL;
goto skip_xdp;
}
rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
if (xdp_prog) {
struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
struct xdp_frame *xdpf;
unsigned int buflen;
struct xdp_buff xdp;
void *orig_data;
struct sk_buff *skb;
unsigned int metasize = 0;
u32 act;
if (unlikely(hdr->hdr.gso_type))
goto err_xdp;
buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
int offset = buf - page_address(page) + header_offset;
unsigned int tlen = len + vi->hdr_len;
......@@ -934,62 +1020,27 @@ static struct sk_buff *receive_small(struct net_device *dev,
xdp_init_buff(&xdp, buflen, &rq->xdp_rxq);
xdp_prepare_buff(&xdp, buf + VIRTNET_RX_PAD + vi->hdr_len,
xdp_headroom, len, true);
orig_data = xdp.data;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
stats->xdp_packets++;
act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats);
switch (act) {
case XDP_PASS:
/* Recalculate length in case bpf program changed it */
delta = orig_data - xdp.data;
len = xdp.data_end - xdp.data;
metasize = xdp.data - xdp.data_meta;
break;
case XDP_TX:
stats->xdp_tx++;
xdpf = xdp_convert_buff_to_frame(&xdp);
if (unlikely(!xdpf))
goto err_xdp;
err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
if (unlikely(!err)) {
xdp_return_frame_rx_napi(xdpf);
} else if (unlikely(err < 0)) {
trace_xdp_exception(vi->dev, xdp_prog, act);
goto err_xdp;
}
*xdp_xmit |= VIRTIO_XDP_TX;
rcu_read_unlock();
goto xdp_xmit;
case XDP_REDIRECT:
stats->xdp_redirects++;
err = xdp_do_redirect(dev, &xdp, xdp_prog);
if (err)
goto err_xdp;
*xdp_xmit |= VIRTIO_XDP_REDIR;
rcu_read_unlock();
goto xdp_xmit;
default:
bpf_warn_invalid_xdp_action(vi->dev, xdp_prog, act);
fallthrough;
case XDP_ABORTED:
trace_xdp_exception(vi->dev, xdp_prog, act);
goto err_xdp;
case XDP_DROP:
goto err_xdp;
}
}
rcu_read_unlock();
skip_xdp:
skb = build_skb(buf, buflen);
if (!skb)
skb = virtnet_build_skb(buf, buflen, xdp.data - buf, len);
if (unlikely(!skb))
goto err;
skb_reserve(skb, headroom - delta);
skb_put(skb, len);
if (!xdp_prog) {
buf += header_offset;
memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
} /* keep zeroed vnet hdr since XDP is loaded */
if (metasize)
skb_metadata_set(skb, metasize);
......@@ -997,7 +1048,6 @@ static struct sk_buff *receive_small(struct net_device *dev,
return skb;
err_xdp:
rcu_read_unlock();
stats->xdp_drops++;
err:
stats->drops++;
......@@ -1006,6 +1056,53 @@ static struct sk_buff *receive_small(struct net_device *dev,
return NULL;
}
static struct sk_buff *receive_small(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
void *buf, void *ctx,
unsigned int len,
unsigned int *xdp_xmit,
struct virtnet_rq_stats *stats)
{
unsigned int xdp_headroom = (unsigned long)ctx;
struct page *page = virt_to_head_page(buf);
struct sk_buff *skb;
len -= vi->hdr_len;
stats->bytes += len;
if (unlikely(len > GOOD_PACKET_LEN)) {
pr_debug("%s: rx error: len %u exceeds max size %d\n",
dev->name, len, GOOD_PACKET_LEN);
dev->stats.rx_length_errors++;
goto err;
}
if (unlikely(vi->xdp_enabled)) {
struct bpf_prog *xdp_prog;
rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
if (xdp_prog) {
skb = receive_small_xdp(dev, vi, rq, xdp_prog, buf,
xdp_headroom, len, xdp_xmit,
stats);
rcu_read_unlock();
return skb;
}
rcu_read_unlock();
}
skb = receive_small_build_skb(vi, xdp_headroom, buf, len);
if (likely(skb))
return skb;
err:
stats->drops++;
put_page(page);
return NULL;
}
static struct sk_buff *receive_big(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
......@@ -1029,6 +1126,28 @@ static struct sk_buff *receive_big(struct net_device *dev,
return NULL;
}
static void mergeable_buf_free(struct receive_queue *rq, int num_buf,
struct net_device *dev,
struct virtnet_rq_stats *stats)
{
struct page *page;
void *buf;
int len;
while (num_buf-- > 1) {
buf = virtqueue_get_buf(rq->vq, &len);
if (unlikely(!buf)) {
pr_debug("%s: rx error: %d buffers missing\n",
dev->name, num_buf);
dev->stats.rx_length_errors++;
break;
}
stats->bytes += len;
page = virt_to_head_page(buf);
put_page(page);
}
}
/* Why not use xdp_build_skb_from_frame() ?
* XDP core assumes that xdp frags are PAGE_SIZE in length, while in
* virtio-net there are 2 points that do not match its requirements:
......@@ -1130,7 +1249,7 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
dev->name, *num_buf,
virtio16_to_cpu(vi->vdev, hdr->num_buffers));
dev->stats.rx_length_errors++;
return -EINVAL;
goto err;
}
stats->bytes += len;
......@@ -1149,7 +1268,7 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
dev->name, len, (unsigned long)(truesize - room));
dev->stats.rx_length_errors++;
return -EINVAL;
goto err;
}
frag = &shinfo->frags[shinfo->nr_frags++];
......@@ -1164,70 +1283,46 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
*xdp_frags_truesize = xdp_frags_truesz;
return 0;
err:
put_xdp_frags(xdp);
return -EINVAL;
}
static struct sk_buff *receive_mergeable(struct net_device *dev,
struct virtnet_info *vi,
static void *mergeable_xdp_get_buf(struct virtnet_info *vi,
struct receive_queue *rq,
void *buf,
struct bpf_prog *xdp_prog,
void *ctx,
unsigned int len,
unsigned int *xdp_xmit,
struct virtnet_rq_stats *stats)
unsigned int *frame_sz,
int *num_buf,
struct page **page,
int offset,
unsigned int *len,
struct virtio_net_hdr_mrg_rxbuf *hdr)
{
struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
struct page *page = virt_to_head_page(buf);
int offset = buf - page_address(page);
struct sk_buff *head_skb, *curr_skb;
struct bpf_prog *xdp_prog;
unsigned int truesize = mergeable_ctx_to_truesize(ctx);
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
unsigned int frame_sz, xdp_room;
int err;
head_skb = NULL;
stats->bytes += len - vi->hdr_len;
if (unlikely(len > truesize - room)) {
pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
dev->name, len, (unsigned long)(truesize - room));
dev->stats.rx_length_errors++;
goto err_skb;
}
if (likely(!vi->xdp_enabled)) {
xdp_prog = NULL;
goto skip_xdp;
}
rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
if (xdp_prog) {
unsigned int xdp_frags_truesz = 0;
struct skb_shared_info *shinfo;
struct xdp_frame *xdpf;
struct page *xdp_page;
struct xdp_buff xdp;
void *data;
u32 act;
int i;
unsigned int xdp_room;
/* Transient failure which in theory could occur if
* in-flight packets from before XDP was enabled reach
* the receive path after XDP is loaded.
*/
if (unlikely(hdr->hdr.gso_type))
goto err_xdp;
return NULL;
/* Now XDP core assumes frag size is PAGE_SIZE, but buffers
* with headroom may add hole in truesize, which
* make their length exceed PAGE_SIZE. So we disabled the
* hole mechanism for xdp. See add_recvbuf_mergeable().
*/
frame_sz = truesize;
*frame_sz = truesize;
if (likely(headroom >= virtnet_get_headroom(vi) &&
(*num_buf == 1 || xdp_prog->aux->xdp_has_frags))) {
return page_address(*page) + offset;
}
/* This happens when headroom is not enough because
* of the buffer was prefilled before XDP is set.
......@@ -1237,110 +1332,140 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
* support it, and we don't want to bother users who are
* using xdp normally.
*/
if (!xdp_prog->aux->xdp_has_frags &&
(num_buf > 1 || headroom < virtnet_get_headroom(vi))) {
if (!xdp_prog->aux->xdp_has_frags) {
/* linearize data for XDP */
xdp_page = xdp_linearize_page(rq, &num_buf,
page, offset,
xdp_page = xdp_linearize_page(rq, num_buf,
*page, offset,
VIRTIO_XDP_HEADROOM,
&len);
frame_sz = PAGE_SIZE;
len);
if (!xdp_page)
goto err_xdp;
offset = VIRTIO_XDP_HEADROOM;
} else if (unlikely(headroom < virtnet_get_headroom(vi))) {
return NULL;
} else {
xdp_room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM +
sizeof(struct skb_shared_info));
if (len + xdp_room > PAGE_SIZE)
goto err_xdp;
if (*len + xdp_room > PAGE_SIZE)
return NULL;
xdp_page = alloc_page(GFP_ATOMIC);
if (!xdp_page)
goto err_xdp;
return NULL;
memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM,
page_address(page) + offset, len);
frame_sz = PAGE_SIZE;
offset = VIRTIO_XDP_HEADROOM;
} else {
xdp_page = page;
page_address(*page) + offset, *len);
}
data = page_address(xdp_page) + offset;
*frame_sz = PAGE_SIZE;
put_page(*page);
*page = xdp_page;
return page_address(*page) + VIRTIO_XDP_HEADROOM;
}
static struct sk_buff *receive_mergeable_xdp(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
struct bpf_prog *xdp_prog,
void *buf,
void *ctx,
unsigned int len,
unsigned int *xdp_xmit,
struct virtnet_rq_stats *stats)
{
struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
struct page *page = virt_to_head_page(buf);
int offset = buf - page_address(page);
unsigned int xdp_frags_truesz = 0;
struct sk_buff *head_skb;
unsigned int frame_sz;
struct xdp_buff xdp;
void *data;
u32 act;
int err;
data = mergeable_xdp_get_buf(vi, rq, xdp_prog, ctx, &frame_sz, &num_buf, &page,
offset, &len, hdr);
if (unlikely(!data))
goto err_xdp;
err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz,
&num_buf, &xdp_frags_truesz, stats);
if (unlikely(err))
goto err_xdp_frags;
goto err_xdp;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
stats->xdp_packets++;
act = virtnet_xdp_handler(xdp_prog, &xdp, dev, xdp_xmit, stats);
switch (act) {
case XDP_PASS:
head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz);
if (unlikely(!head_skb))
goto err_xdp_frags;
if (unlikely(xdp_page != page))
put_page(page);
rcu_read_unlock();
break;
return head_skb;
case XDP_TX:
stats->xdp_tx++;
xdpf = xdp_convert_buff_to_frame(&xdp);
if (unlikely(!xdpf)) {
netdev_dbg(dev, "convert buff to frame failed for xdp\n");
goto err_xdp_frags;
}
err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
if (unlikely(!err)) {
xdp_return_frame_rx_napi(xdpf);
} else if (unlikely(err < 0)) {
trace_xdp_exception(vi->dev, xdp_prog, act);
goto err_xdp_frags;
}
*xdp_xmit |= VIRTIO_XDP_TX;
if (unlikely(xdp_page != page))
put_page(page);
rcu_read_unlock();
goto xdp_xmit;
case XDP_REDIRECT:
stats->xdp_redirects++;
err = xdp_do_redirect(dev, &xdp, xdp_prog);
if (err)
goto err_xdp_frags;
*xdp_xmit |= VIRTIO_XDP_REDIR;
if (unlikely(xdp_page != page))
put_page(page);
rcu_read_unlock();
goto xdp_xmit;
return NULL;
default:
bpf_warn_invalid_xdp_action(vi->dev, xdp_prog, act);
fallthrough;
case XDP_ABORTED:
trace_xdp_exception(vi->dev, xdp_prog, act);
fallthrough;
case XDP_DROP:
goto err_xdp_frags;
break;
}
err_xdp_frags:
if (unlikely(xdp_page != page))
__free_pages(xdp_page, 0);
if (xdp_buff_has_frags(&xdp)) {
shinfo = xdp_get_shared_info_from_buff(&xdp);
for (i = 0; i < shinfo->nr_frags; i++) {
xdp_page = skb_frag_page(&shinfo->frags[i]);
put_page(xdp_page);
}
put_xdp_frags(&xdp);
err_xdp:
put_page(page);
mergeable_buf_free(rq, num_buf, dev, stats);
stats->xdp_drops++;
stats->drops++;
return NULL;
}
static struct sk_buff *receive_mergeable(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
void *buf,
void *ctx,
unsigned int len,
unsigned int *xdp_xmit,
struct virtnet_rq_stats *stats)
{
struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
int num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
struct page *page = virt_to_head_page(buf);
int offset = buf - page_address(page);
struct sk_buff *head_skb, *curr_skb;
unsigned int truesize = mergeable_ctx_to_truesize(ctx);
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
head_skb = NULL;
stats->bytes += len - vi->hdr_len;
if (unlikely(len > truesize - room)) {
pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
dev->name, len, (unsigned long)(truesize - room));
dev->stats.rx_length_errors++;
goto err_skb;
}
goto err_xdp;
if (unlikely(vi->xdp_enabled)) {
struct bpf_prog *xdp_prog;
rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
if (xdp_prog) {
head_skb = receive_mergeable_xdp(dev, vi, rq, xdp_prog, buf, ctx,
len, xdp_xmit, stats);
rcu_read_unlock();
return head_skb;
}
rcu_read_unlock();
}
skip_xdp:
head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom);
curr_skb = head_skb;
......@@ -1406,27 +1531,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
return head_skb;
err_xdp:
rcu_read_unlock();
stats->xdp_drops++;
err_skb:
put_page(page);
while (num_buf-- > 1) {
buf = virtqueue_get_buf(rq->vq, &len);
if (unlikely(!buf)) {
pr_debug("%s: rx error: %d buffers missing\n",
dev->name, num_buf);
dev->stats.rx_length_errors++;
break;
}
stats->bytes += len;
page = virt_to_head_page(buf);
put_page(page);
}
mergeable_buf_free(rq, num_buf, dev, stats);
err_buf:
stats->drops++;
dev_kfree_skb(head_skb);
xdp_xmit:
return NULL;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment