Commit 3cc81a9a authored by Jason Wang's avatar Jason Wang Committed by David S. Miller

virtio-net: re enable XDP_REDIRECT for mergeable buffer

XDP_REDIRECT support for mergeable buffer was removed since commit
7324f539 ("virtio_net: disable XDP_REDIRECT in receive_mergeable()
case"). This is because we don't reserve enough tailroom for struct
skb_shared_info which breaks XDP assumption. So this patch fixes this
by reserving enough tailroom and using fixed size of rx buffer.
Signed-off-by: default avatarJason Wang <jasowang@redhat.com>
Acked-by: default avatarJesper Dangaard Brouer <brouer@redhat.com>
Acked-by: default avatarMichael S. Tsirkin <mst@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 77f840e3
...@@ -504,6 +504,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, ...@@ -504,6 +504,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
page_off += *len; page_off += *len;
while (--*num_buf) { while (--*num_buf) {
int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
unsigned int buflen; unsigned int buflen;
void *buf; void *buf;
int off; int off;
...@@ -518,7 +519,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, ...@@ -518,7 +519,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
/* guard against a misconfigured or uncooperative backend that /* guard against a misconfigured or uncooperative backend that
* is sending packet larger than the MTU. * is sending packet larger than the MTU.
*/ */
if ((page_off + buflen) > PAGE_SIZE) { if ((page_off + buflen + tailroom) > PAGE_SIZE) {
put_page(p); put_page(p);
goto err_buf; goto err_buf;
} }
...@@ -690,6 +691,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ...@@ -690,6 +691,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
unsigned int truesize; unsigned int truesize;
unsigned int headroom = mergeable_ctx_to_headroom(ctx); unsigned int headroom = mergeable_ctx_to_headroom(ctx);
bool sent; bool sent;
int err;
head_skb = NULL; head_skb = NULL;
...@@ -701,7 +703,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ...@@ -701,7 +703,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
void *data; void *data;
u32 act; u32 act;
/* This happens when rx buffer size is underestimated */ /* This happens when rx buffer size is underestimated
* or headroom is not enough because of the buffer
* was refilled before XDP is set. This should only
* happen for the first several packets, so we don't
* care much about its performance.
*/
if (unlikely(num_buf > 1 || if (unlikely(num_buf > 1 ||
headroom < virtnet_get_headroom(vi))) { headroom < virtnet_get_headroom(vi))) {
/* linearize data for XDP */ /* linearize data for XDP */
...@@ -736,9 +743,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ...@@ -736,9 +743,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
act = bpf_prog_run_xdp(xdp_prog, &xdp); act = bpf_prog_run_xdp(xdp_prog, &xdp);
if (act != XDP_PASS)
ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
switch (act) { switch (act) {
case XDP_PASS: case XDP_PASS:
/* recalculate offset to account for any header /* recalculate offset to account for any header
...@@ -770,6 +774,18 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ...@@ -770,6 +774,18 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
goto err_xdp; goto err_xdp;
rcu_read_unlock(); rcu_read_unlock();
goto xdp_xmit; goto xdp_xmit;
case XDP_REDIRECT:
err = xdp_do_redirect(dev, &xdp, xdp_prog);
if (err) {
if (unlikely(xdp_page != page))
put_page(xdp_page);
goto err_xdp;
}
*xdp_xmit = true;
if (unlikely(xdp_page != page))
goto err_xdp;
rcu_read_unlock();
goto xdp_xmit;
default: default:
bpf_warn_invalid_xdp_action(act); bpf_warn_invalid_xdp_action(act);
case XDP_ABORTED: case XDP_ABORTED:
...@@ -1013,13 +1029,18 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq, ...@@ -1013,13 +1029,18 @@ static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
} }
static unsigned int get_mergeable_buf_len(struct receive_queue *rq, static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
struct ewma_pkt_len *avg_pkt_len) struct ewma_pkt_len *avg_pkt_len,
unsigned int room)
{ {
const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
unsigned int len; unsigned int len;
if (room)
return PAGE_SIZE - room;
len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len), len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
rq->min_buf_len, PAGE_SIZE - hdr_len); rq->min_buf_len, PAGE_SIZE - hdr_len);
return ALIGN(len, L1_CACHE_BYTES); return ALIGN(len, L1_CACHE_BYTES);
} }
...@@ -1028,21 +1049,27 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, ...@@ -1028,21 +1049,27 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
{ {
struct page_frag *alloc_frag = &rq->alloc_frag; struct page_frag *alloc_frag = &rq->alloc_frag;
unsigned int headroom = virtnet_get_headroom(vi); unsigned int headroom = virtnet_get_headroom(vi);
unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
char *buf; char *buf;
void *ctx; void *ctx;
int err; int err;
unsigned int len, hole; unsigned int len, hole;
len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len); /* Extra tailroom is needed to satisfy XDP's assumption. This
if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp))) * means rx frags coalescing won't work, but consider we've
* disabled GSO for XDP, it won't be a big issue.
*/
len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len, room);
if (unlikely(!skb_page_frag_refill(len + room, alloc_frag, gfp)))
return -ENOMEM; return -ENOMEM;
buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
buf += headroom; /* advance address leaving hole at front of pkt */ buf += headroom; /* advance address leaving hole at front of pkt */
get_page(alloc_frag->page); get_page(alloc_frag->page);
alloc_frag->offset += len + headroom; alloc_frag->offset += len + room;
hole = alloc_frag->size - alloc_frag->offset; hole = alloc_frag->size - alloc_frag->offset;
if (hole < len + headroom) { if (hole < len + room) {
/* To avoid internal fragmentation, if there is very likely not /* To avoid internal fragmentation, if there is very likely not
* enough space for another buffer, add the remaining space to * enough space for another buffer, add the remaining space to
* the current buffer. * the current buffer.
...@@ -2578,12 +2605,15 @@ static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue, ...@@ -2578,12 +2605,15 @@ static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
{ {
struct virtnet_info *vi = netdev_priv(queue->dev); struct virtnet_info *vi = netdev_priv(queue->dev);
unsigned int queue_index = get_netdev_rx_queue_index(queue); unsigned int queue_index = get_netdev_rx_queue_index(queue);
unsigned int headroom = virtnet_get_headroom(vi);
unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
struct ewma_pkt_len *avg; struct ewma_pkt_len *avg;
BUG_ON(queue_index >= vi->max_queue_pairs); BUG_ON(queue_index >= vi->max_queue_pairs);
avg = &vi->rq[queue_index].mrg_avg_pkt_len; avg = &vi->rq[queue_index].mrg_avg_pkt_len;
return sprintf(buf, "%u\n", return sprintf(buf, "%u\n",
get_mergeable_buf_len(&vi->rq[queue_index], avg)); get_mergeable_buf_len(&vi->rq[queue_index], avg,
SKB_DATA_ALIGN(headroom + tailroom)));
} }
static struct rx_queue_attribute mergeable_rx_buffer_size_attribute = static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment