Commit 0ebab78c authored by Lorenzo Bianconi's avatar Lorenzo Bianconi Committed by Jakub Kicinski

net: veth: add page_pool for page recycling

Introduce page_pool support in veth driver in order to recycle pages
in veth_convert_skb_to_xdp_buff routine and avoid reallocating the skb
through the page allocator.
The patch has been tested sending tcp traffic to a veth pair where the
remote peer is running a simple xdp program just returning xdp_pass:

veth upstream codebase:
MTU 1500B: ~ 8Gbps
MTU 8000B: ~ 13.9Gbps

veth upstream codebase + pp support:
MTU 1500B: ~ 9.2Gbps
MTU 8000B: ~ 16.2Gbps
Tested-by: default avatarMaryam Tahhan <mtahhan@redhat.com>
Signed-off-by: default avatarLorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent ffcddcae
...@@ -402,6 +402,7 @@ config TUN_VNET_CROSS_LE ...@@ -402,6 +402,7 @@ config TUN_VNET_CROSS_LE
config VETH config VETH
tristate "Virtual ethernet pair device" tristate "Virtual ethernet pair device"
select PAGE_POOL
help help
This device is a local ethernet tunnel. Devices are created in pairs. This device is a local ethernet tunnel. Devices are created in pairs.
When one end receives the packet it appears on its pair and vice When one end receives the packet it appears on its pair and vice
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <linux/ptr_ring.h> #include <linux/ptr_ring.h>
#include <linux/bpf_trace.h> #include <linux/bpf_trace.h>
#include <linux/net_tstamp.h> #include <linux/net_tstamp.h>
#include <net/page_pool.h>
#define DRV_NAME "veth" #define DRV_NAME "veth"
#define DRV_VERSION "1.0" #define DRV_VERSION "1.0"
...@@ -65,6 +66,7 @@ struct veth_rq { ...@@ -65,6 +66,7 @@ struct veth_rq {
bool rx_notify_masked; bool rx_notify_masked;
struct ptr_ring xdp_ring; struct ptr_ring xdp_ring;
struct xdp_rxq_info xdp_rxq; struct xdp_rxq_info xdp_rxq;
struct page_pool *page_pool;
}; };
struct veth_priv { struct veth_priv {
...@@ -727,17 +729,20 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq, ...@@ -727,17 +729,20 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
goto drop; goto drop;
/* Allocate skb head */ /* Allocate skb head */
page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); page = page_pool_dev_alloc_pages(rq->page_pool);
if (!page) if (!page)
goto drop; goto drop;
nskb = build_skb(page_address(page), PAGE_SIZE); nskb = build_skb(page_address(page), PAGE_SIZE);
if (!nskb) { if (!nskb) {
put_page(page); page_pool_put_full_page(rq->page_pool, page, true);
goto drop; goto drop;
} }
skb_reserve(nskb, VETH_XDP_HEADROOM); skb_reserve(nskb, VETH_XDP_HEADROOM);
skb_copy_header(nskb, skb);
skb_mark_for_recycle(nskb);
size = min_t(u32, skb->len, max_head_size); size = min_t(u32, skb->len, max_head_size);
if (skb_copy_bits(skb, 0, nskb->data, size)) { if (skb_copy_bits(skb, 0, nskb->data, size)) {
consume_skb(nskb); consume_skb(nskb);
...@@ -745,7 +750,6 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq, ...@@ -745,7 +750,6 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
} }
skb_put(nskb, size); skb_put(nskb, size);
skb_copy_header(nskb, skb);
head_off = skb_headroom(nskb) - skb_headroom(skb); head_off = skb_headroom(nskb) - skb_headroom(skb);
skb_headers_offset_update(nskb, head_off); skb_headers_offset_update(nskb, head_off);
...@@ -754,7 +758,7 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq, ...@@ -754,7 +758,7 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
len = skb->len - off; len = skb->len - off;
for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) { for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); page = page_pool_dev_alloc_pages(rq->page_pool);
if (!page) { if (!page) {
consume_skb(nskb); consume_skb(nskb);
goto drop; goto drop;
...@@ -1002,11 +1006,37 @@ static int veth_poll(struct napi_struct *napi, int budget) ...@@ -1002,11 +1006,37 @@ static int veth_poll(struct napi_struct *napi, int budget)
return done; return done;
} }
static int veth_create_page_pool(struct veth_rq *rq)
{
struct page_pool_params pp_params = {
.order = 0,
.pool_size = VETH_RING_SIZE,
.nid = NUMA_NO_NODE,
.dev = &rq->dev->dev,
};
rq->page_pool = page_pool_create(&pp_params);
if (IS_ERR(rq->page_pool)) {
int err = PTR_ERR(rq->page_pool);
rq->page_pool = NULL;
return err;
}
return 0;
}
static int __veth_napi_enable_range(struct net_device *dev, int start, int end) static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
{ {
struct veth_priv *priv = netdev_priv(dev); struct veth_priv *priv = netdev_priv(dev);
int err, i; int err, i;
for (i = start; i < end; i++) {
err = veth_create_page_pool(&priv->rq[i]);
if (err)
goto err_page_pool;
}
for (i = start; i < end; i++) { for (i = start; i < end; i++) {
struct veth_rq *rq = &priv->rq[i]; struct veth_rq *rq = &priv->rq[i];
...@@ -1027,6 +1057,11 @@ static int __veth_napi_enable_range(struct net_device *dev, int start, int end) ...@@ -1027,6 +1057,11 @@ static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
err_xdp_ring: err_xdp_ring:
for (i--; i >= start; i--) for (i--; i >= start; i--)
ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free); ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
err_page_pool:
for (i = start; i < end; i++) {
page_pool_destroy(priv->rq[i].page_pool);
priv->rq[i].page_pool = NULL;
}
return err; return err;
} }
...@@ -1056,6 +1091,11 @@ static void veth_napi_del_range(struct net_device *dev, int start, int end) ...@@ -1056,6 +1091,11 @@ static void veth_napi_del_range(struct net_device *dev, int start, int end)
rq->rx_notify_masked = false; rq->rx_notify_masked = false;
ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free); ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
} }
for (i = start; i < end; i++) {
page_pool_destroy(priv->rq[i].page_pool);
priv->rq[i].page_pool = NULL;
}
} }
static void veth_napi_del(struct net_device *dev) static void veth_napi_del(struct net_device *dev)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment