net: veth: add page_pool for page recycling

Introduce page_pool support in veth driver in order to recycle pages in veth_convert_skb_to_xdp_buff routine and avoid reallocating the skb through the page allocator. The patch has been tested sending tcp traffic to a veth pair where the remote peer is running a simple xdp program just returning xdp_pass: veth upstream codebase: MTU 1500B: ~ 8Gbps MTU 8000B: ~ 13.9Gbps veth upstream codebase + pp support: MTU 1500B: ~ 9.2Gbps MTU 8000B: ~ 16.2Gbps Tested-by: Maryam Tahhan <mtahhan@redhat.com> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> Signed-off-by: Jakub Kicinski <kuba@kernel.org>

net: veth: add page_pool for page recycling
Introduce page_pool support in veth driver in order to recycle pages in veth_convert_skb_to_xdp_buff routine and avoid reallocating the skb through the page allocator. The patch has been tested sending tcp traffic to a veth pair where the remote peer is running a simple xdp program just returning xdp_pass: veth upstream codebase: MTU 1500B: ~ 8Gbps MTU 8000B: ~ 13.9Gbps veth upstream codebase + pp support: MTU 1500B: ~ 9.2Gbps MTU 8000B: ~ 16.2Gbps Tested-by: Maryam Tahhan <mtahhan@redhat.com> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
0ebab78c · Lorenzo Bianconi · Jakub Kicinski · ffcddcae · 0ebab78c · 0ebab78c
Commit 0ebab78c authored Apr 22, 2023 by Lorenzo Bianconi Committed by Jakub Kicinski Apr 24, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 45 additions and 4 deletions

drivers/net/Kconfig drivers/net/Kconfig +1 -0

drivers/net/veth.c drivers/net/veth.c +44 -4

No files found.
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -402,6 +402,7 @@ config TUN_VNET_CROSS_LE
 config VETH
 	tristate "Virtual ethernet pair device"
+	select PAGE_POOL
 	help
 	  This device is a local ethernet tunnel. Devices are created in pairs.
 	  When one end receives the packet it appears on its pair and vice

--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -26,6 +26,7 @@
 #include <linux/ptr_ring.h>
 #include <linux/bpf_trace.h>
 #include <linux/net_tstamp.h>
+#include <net/page_pool.h>
 #define DRV_NAME	"veth"
 #define DRV_VERSION	"1.0"
@@ -65,6 +66,7 @@ struct veth_rq {
 	bool			rx_notify_masked;
 	struct ptr_ring		xdp_ring;
 	struct xdp_rxq_info	xdp_rxq;
+	struct page_pool	*page_pool;
 };
 struct veth_priv {
@@ -727,17 +729,20 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 			goto drop;
 		/* Allocate skb head */
-		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+		page = page_pool_dev_alloc_pages(rq->page_pool);
 		if (!page)
 			goto drop;
 		nskb = build_skb(page_address(page), PAGE_SIZE);
 		if (!nskb) {
-			put_page(page);
+			page_pool_put_full_page(rq->page_pool, page, true);
 			goto drop;
 		}
 		skb_reserve(nskb, VETH_XDP_HEADROOM);
+		skb_copy_header(nskb, skb);
+		skb_mark_for_recycle(nskb);
 		size = min_t(u32, skb->len, max_head_size);
 		if (skb_copy_bits(skb, 0, nskb->data, size)) {
 			consume_skb(nskb);
@@ -745,7 +750,6 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 		}
 		skb_put(nskb, size);
-		skb_copy_header(nskb, skb);
 		head_off = skb_headroom(nskb) - skb_headroom(skb);
 		skb_headers_offset_update(nskb, head_off);
@@ -754,7 +758,7 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 		len = skb->len - off;
 		for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
-			page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+			page = page_pool_dev_alloc_pages(rq->page_pool);
 			if (!page) {
 				consume_skb(nskb);
 				goto drop;
@@ -1002,11 +1006,37 @@ static int veth_poll(struct napi_struct *napi, int budget)
 	return done;
 }
+static int veth_create_page_pool(struct veth_rq *rq)
+{
+	struct page_pool_params pp_params = {
+		.order = 0,
+		.pool_size = VETH_RING_SIZE,
+		.nid = NUMA_NO_NODE,
+		.dev = &rq->dev->dev,
+	};
+	rq->page_pool = page_pool_create(&pp_params);
+	if (IS_ERR(rq->page_pool)) {
+		int err = PTR_ERR(rq->page_pool);
+		rq->page_pool = NULL;
+		return err;
+	}
+	return 0;
+}
 static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
 {
 	struct veth_priv *priv = netdev_priv(dev);
 	int err, i;
+	for (i = start; i < end; i++) {
+		err = veth_create_page_pool(&priv->rq[i]);
+		if (err)
+			goto err_page_pool;
+	}
 	for (i = start; i < end; i++) {
 		struct veth_rq *rq = &priv->rq[i];
@@ -1027,6 +1057,11 @@ static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
 err_xdp_ring:
 	for (i--; i >= start; i--)
 		ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
+err_page_pool:
+	for (i = start; i < end; i++) {
+		page_pool_destroy(priv->rq[i].page_pool);
+		priv->rq[i].page_pool = NULL;
+	}
 	return err;
 }
@@ -1056,6 +1091,11 @@ static void veth_napi_del_range(struct net_device *dev, int start, int end)
 		rq->rx_notify_masked = false;
 		ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
 	}
+	for (i = start; i < end; i++) {
+		page_pool_destroy(priv->rq[i].page_pool);
+		priv->rq[i].page_pool = NULL;
+	}
 }
 static void veth_napi_del(struct net_device *dev)