Commit 718a18a0 authored by Lorenzo Bianconi's avatar Lorenzo Bianconi Committed by Daniel Borkmann

veth: Rework veth_xdp_rcv_skb in order to accept non-linear skb

Introduce veth_convert_skb_to_xdp_buff routine in order to
convert a non-linear skb into a xdp buffer. If the received skb
is cloned or shared, veth_convert_skb_to_xdp_buff will copy it
in a new skb composed by order-0 pages for the linear and the
fragmented area. Moreover veth_convert_skb_to_xdp_buff guarantees
we have enough headroom for xdp.
This is a preliminary patch to allow attaching xdp programs with frags
support on veth devices.
Signed-off-by: default avatarLorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarToke Høiland-Jørgensen <toke@redhat.com>
Acked-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/8d228b106bc1903571afd1d77e797bffe9a5ea7c.1646989407.git.lorenzo@kernel.org
parent 5142239a
...@@ -433,21 +433,6 @@ static void veth_set_multicast_list(struct net_device *dev) ...@@ -433,21 +433,6 @@ static void veth_set_multicast_list(struct net_device *dev)
{ {
} }
static struct sk_buff *veth_build_skb(void *head, int headroom, int len,
int buflen)
{
struct sk_buff *skb;
skb = build_skb(head, buflen);
if (!skb)
return NULL;
skb_reserve(skb, headroom);
skb_put(skb, len);
return skb;
}
static int veth_select_rxq(struct net_device *dev) static int veth_select_rxq(struct net_device *dev)
{ {
return smp_processor_id() % dev->real_num_rx_queues; return smp_processor_id() % dev->real_num_rx_queues;
...@@ -695,72 +680,143 @@ static void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames, ...@@ -695,72 +680,143 @@ static void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames,
} }
} }
static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, static void veth_xdp_get(struct xdp_buff *xdp)
struct sk_buff *skb,
struct veth_xdp_tx_bq *bq,
struct veth_stats *stats)
{ {
u32 pktlen, headroom, act, metalen, frame_sz; struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
void *orig_data, *orig_data_end; int i;
struct bpf_prog *xdp_prog;
int mac_len, delta, off;
struct xdp_buff xdp;
skb_prepare_for_gro(skb); get_page(virt_to_page(xdp->data));
if (likely(!xdp_buff_has_frags(xdp)))
return;
rcu_read_lock(); for (i = 0; i < sinfo->nr_frags; i++)
xdp_prog = rcu_dereference(rq->xdp_prog); __skb_frag_ref(&sinfo->frags[i]);
if (unlikely(!xdp_prog)) { }
rcu_read_unlock();
goto out;
}
mac_len = skb->data - skb_mac_header(skb); static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
pktlen = skb->len + mac_len; struct xdp_buff *xdp,
headroom = skb_headroom(skb) - mac_len; struct sk_buff **pskb)
{
struct sk_buff *skb = *pskb;
u32 frame_sz;
if (skb_shared(skb) || skb_head_is_locked(skb) || if (skb_shared(skb) || skb_head_is_locked(skb) ||
skb_is_nonlinear(skb) || headroom < XDP_PACKET_HEADROOM) { skb_shinfo(skb)->nr_frags) {
u32 size, len, max_head_size, off;
struct sk_buff *nskb; struct sk_buff *nskb;
int size, head_off;
void *head, *start;
struct page *page; struct page *page;
int i, head_off;
size = SKB_DATA_ALIGN(VETH_XDP_HEADROOM + pktlen) + /* We need a private copy of the skb and data buffers since
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); * the ebpf program can modify it. We segment the original skb
if (size > PAGE_SIZE) * into order-0 pages without linearize it.
*
* Make sure we have enough space for linear and paged area
*/
max_head_size = SKB_WITH_OVERHEAD(PAGE_SIZE -
VETH_XDP_HEADROOM);
if (skb->len > PAGE_SIZE * MAX_SKB_FRAGS + max_head_size)
goto drop; goto drop;
/* Allocate skb head */
page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
if (!page) if (!page)
goto drop; goto drop;
head = page_address(page); nskb = build_skb(page_address(page), PAGE_SIZE);
start = head + VETH_XDP_HEADROOM; if (!nskb) {
if (skb_copy_bits(skb, -mac_len, start, pktlen)) { put_page(page);
page_frag_free(head);
goto drop; goto drop;
} }
nskb = veth_build_skb(head, VETH_XDP_HEADROOM + mac_len, skb_reserve(nskb, VETH_XDP_HEADROOM);
skb->len, PAGE_SIZE); size = min_t(u32, skb->len, max_head_size);
if (!nskb) { if (skb_copy_bits(skb, 0, nskb->data, size)) {
page_frag_free(head); consume_skb(nskb);
goto drop; goto drop;
} }
skb_put(nskb, size);
skb_copy_header(nskb, skb); skb_copy_header(nskb, skb);
head_off = skb_headroom(nskb) - skb_headroom(skb); head_off = skb_headroom(nskb) - skb_headroom(skb);
skb_headers_offset_update(nskb, head_off); skb_headers_offset_update(nskb, head_off);
/* Allocate paged area of new skb */
off = size;
len = skb->len - off;
for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
if (!page) {
consume_skb(nskb);
goto drop;
}
size = min_t(u32, len, PAGE_SIZE);
skb_add_rx_frag(nskb, i, page, 0, size, PAGE_SIZE);
if (skb_copy_bits(skb, off, page_address(page),
size)) {
consume_skb(nskb);
goto drop;
}
len -= size;
off += size;
}
consume_skb(skb); consume_skb(skb);
skb = nskb; skb = nskb;
} else if (skb_headroom(skb) < XDP_PACKET_HEADROOM &&
pskb_expand_head(skb, VETH_XDP_HEADROOM, 0, GFP_ATOMIC)) {
goto drop;
} }
/* SKB "head" area always have tailroom for skb_shared_info */ /* SKB "head" area always have tailroom for skb_shared_info */
frame_sz = skb_end_pointer(skb) - skb->head; frame_sz = skb_end_pointer(skb) - skb->head;
frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
xdp_init_buff(&xdp, frame_sz, &rq->xdp_rxq); xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq);
xdp_prepare_buff(&xdp, skb->head, skb->mac_header, pktlen, true); xdp_prepare_buff(xdp, skb->head, skb_headroom(skb),
skb_headlen(skb), true);
if (skb_is_nonlinear(skb)) {
skb_shinfo(skb)->xdp_frags_size = skb->data_len;
xdp_buff_set_frags_flag(xdp);
} else {
xdp_buff_clear_frags_flag(xdp);
}
*pskb = skb;
return 0;
drop:
consume_skb(skb);
*pskb = NULL;
return -ENOMEM;
}
static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
struct sk_buff *skb,
struct veth_xdp_tx_bq *bq,
struct veth_stats *stats)
{
void *orig_data, *orig_data_end;
struct bpf_prog *xdp_prog;
struct xdp_buff xdp;
u32 act, metalen;
int off;
skb_prepare_for_gro(skb);
rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
if (unlikely(!xdp_prog)) {
rcu_read_unlock();
goto out;
}
__skb_push(skb, skb->data - skb_mac_header(skb));
if (veth_convert_skb_to_xdp_buff(rq, &xdp, &skb))
goto drop;
orig_data = xdp.data; orig_data = xdp.data;
orig_data_end = xdp.data_end; orig_data_end = xdp.data_end;
...@@ -771,7 +827,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, ...@@ -771,7 +827,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
case XDP_PASS: case XDP_PASS:
break; break;
case XDP_TX: case XDP_TX:
get_page(virt_to_page(xdp.data)); veth_xdp_get(&xdp);
consume_skb(skb); consume_skb(skb);
xdp.rxq->mem = rq->xdp_mem; xdp.rxq->mem = rq->xdp_mem;
if (unlikely(veth_xdp_tx(rq, &xdp, bq) < 0)) { if (unlikely(veth_xdp_tx(rq, &xdp, bq) < 0)) {
...@@ -783,7 +839,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, ...@@ -783,7 +839,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
rcu_read_unlock(); rcu_read_unlock();
goto xdp_xmit; goto xdp_xmit;
case XDP_REDIRECT: case XDP_REDIRECT:
get_page(virt_to_page(xdp.data)); veth_xdp_get(&xdp);
consume_skb(skb); consume_skb(skb);
xdp.rxq->mem = rq->xdp_mem; xdp.rxq->mem = rq->xdp_mem;
if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) { if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
...@@ -806,18 +862,27 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, ...@@ -806,18 +862,27 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
rcu_read_unlock(); rcu_read_unlock();
/* check if bpf_xdp_adjust_head was used */ /* check if bpf_xdp_adjust_head was used */
delta = orig_data - xdp.data; off = orig_data - xdp.data;
off = mac_len + delta;
if (off > 0) if (off > 0)
__skb_push(skb, off); __skb_push(skb, off);
else if (off < 0) else if (off < 0)
__skb_pull(skb, -off); __skb_pull(skb, -off);
skb->mac_header -= delta;
skb_reset_mac_header(skb);
/* check if bpf_xdp_adjust_tail was used */ /* check if bpf_xdp_adjust_tail was used */
off = xdp.data_end - orig_data_end; off = xdp.data_end - orig_data_end;
if (off != 0) if (off != 0)
__skb_put(skb, off); /* positive on grow, negative on shrink */ __skb_put(skb, off); /* positive on grow, negative on shrink */
/* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers
* (e.g. bpf_xdp_adjust_tail), we need to update data_len here.
*/
if (xdp_buff_has_frags(&xdp))
skb->data_len = skb_shinfo(skb)->xdp_frags_size;
else
skb->data_len = 0;
skb->protocol = eth_type_trans(skb, rq->dev); skb->protocol = eth_type_trans(skb, rq->dev);
metalen = xdp.data - xdp.data_meta; metalen = xdp.data - xdp.data_meta;
...@@ -833,7 +898,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, ...@@ -833,7 +898,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
return NULL; return NULL;
err_xdp: err_xdp:
rcu_read_unlock(); rcu_read_unlock();
page_frag_free(xdp.data); xdp_return_buff(&xdp);
xdp_xmit: xdp_xmit:
return NULL; return NULL;
} }
......
...@@ -528,6 +528,7 @@ void xdp_return_buff(struct xdp_buff *xdp) ...@@ -528,6 +528,7 @@ void xdp_return_buff(struct xdp_buff *xdp)
out: out:
__xdp_return(xdp->data, &xdp->rxq->mem, true, xdp); __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp);
} }
EXPORT_SYMBOL_GPL(xdp_return_buff);
/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */ /* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
void __xdp_release_frame(void *data, struct xdp_mem_info *mem) void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment