Commit 0c8493d9 authored by Björn Töpel's avatar Björn Töpel Committed by Jeff Kirsher

i40e: add XDP support for pass and drop actions

This commit adds basic XDP support for i40e derived NICs. All XDP
actions will end up in XDP_DROP.
Signed-off-by: default avatarBjörn Töpel <bjorn.topel@intel.com>
Tested-by: default avatarAndrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: default avatarJeff Kirsher <jeffrey.t.kirsher@intel.com>
parent f5c30647
...@@ -645,6 +645,8 @@ struct i40e_vsi { ...@@ -645,6 +645,8 @@ struct i40e_vsi {
u16 max_frame; u16 max_frame;
u16 rx_buf_len; u16 rx_buf_len;
struct bpf_prog *xdp_prog;
/* List of q_vectors allocated to this VSI */ /* List of q_vectors allocated to this VSI */
struct i40e_q_vector **q_vectors; struct i40e_q_vector **q_vectors;
int num_q_vectors; int num_q_vectors;
...@@ -972,4 +974,9 @@ i40e_status i40e_get_npar_bw_setting(struct i40e_pf *pf); ...@@ -972,4 +974,9 @@ i40e_status i40e_get_npar_bw_setting(struct i40e_pf *pf);
i40e_status i40e_set_npar_bw_setting(struct i40e_pf *pf); i40e_status i40e_set_npar_bw_setting(struct i40e_pf *pf);
i40e_status i40e_commit_npar_bw_setting(struct i40e_pf *pf); i40e_status i40e_commit_npar_bw_setting(struct i40e_pf *pf);
void i40e_print_link_message(struct i40e_vsi *vsi, bool isup); void i40e_print_link_message(struct i40e_vsi *vsi, bool isup);
static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
{
return !!vsi->xdp_prog;
}
#endif /* _I40E_H_ */ #endif /* _I40E_H_ */
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <linux/etherdevice.h> #include <linux/etherdevice.h>
#include <linux/of_net.h> #include <linux/of_net.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/bpf.h>
/* Local includes */ /* Local includes */
#include "i40e.h" #include "i40e.h"
...@@ -2395,6 +2396,18 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf) ...@@ -2395,6 +2396,18 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
} }
} }
/**
* i40e_max_xdp_frame_size - returns the maximum allowed frame size for XDP
* @vsi: the vsi
**/
static int i40e_max_xdp_frame_size(struct i40e_vsi *vsi)
{
if (PAGE_SIZE >= 8192 || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
return I40E_RXBUFFER_2048;
else
return I40E_RXBUFFER_3072;
}
/** /**
* i40e_change_mtu - NDO callback to change the Maximum Transfer Unit * i40e_change_mtu - NDO callback to change the Maximum Transfer Unit
* @netdev: network interface device structure * @netdev: network interface device structure
...@@ -2408,6 +2421,13 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu) ...@@ -2408,6 +2421,13 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
struct i40e_vsi *vsi = np->vsi; struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back; struct i40e_pf *pf = vsi->back;
if (i40e_enabled_xdp_vsi(vsi)) {
int frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
if (frame_size > i40e_max_xdp_frame_size(vsi))
return -EINVAL;
}
netdev_info(netdev, "changing MTU from %d to %d\n", netdev_info(netdev, "changing MTU from %d to %d\n",
netdev->mtu, new_mtu); netdev->mtu, new_mtu);
netdev->mtu = new_mtu; netdev->mtu = new_mtu;
...@@ -9311,6 +9331,72 @@ static netdev_features_t i40e_features_check(struct sk_buff *skb, ...@@ -9311,6 +9331,72 @@ static netdev_features_t i40e_features_check(struct sk_buff *skb,
return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
} }
/**
* i40e_xdp_setup - add/remove an XDP program
* @vsi: VSI to changed
* @prog: XDP program
**/
static int i40e_xdp_setup(struct i40e_vsi *vsi,
struct bpf_prog *prog)
{
int frame_size = vsi->netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
struct i40e_pf *pf = vsi->back;
struct bpf_prog *old_prog;
bool need_reset;
int i;
/* Don't allow frames that span over multiple buffers */
if (frame_size > vsi->rx_buf_len)
return -EINVAL;
if (!i40e_enabled_xdp_vsi(vsi) && !prog)
return 0;
/* When turning XDP on->off/off->on we reset and rebuild the rings. */
need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog);
if (need_reset)
i40e_prep_for_reset(pf, true);
old_prog = xchg(&vsi->xdp_prog, prog);
if (need_reset)
i40e_reset_and_rebuild(pf, true, true);
for (i = 0; i < vsi->num_queue_pairs; i++)
WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
if (old_prog)
bpf_prog_put(old_prog);
return 0;
}
/**
* i40e_xdp - implements ndo_xdp for i40e
* @dev: netdevice
* @xdp: XDP command
**/
static int i40e_xdp(struct net_device *dev,
struct netdev_xdp *xdp)
{
struct i40e_netdev_priv *np = netdev_priv(dev);
struct i40e_vsi *vsi = np->vsi;
if (vsi->type != I40E_VSI_MAIN)
return -EINVAL;
switch (xdp->command) {
case XDP_SETUP_PROG:
return i40e_xdp_setup(vsi, xdp->prog);
case XDP_QUERY_PROG:
xdp->prog_attached = i40e_enabled_xdp_vsi(vsi);
return 0;
default:
return -EINVAL;
}
}
static const struct net_device_ops i40e_netdev_ops = { static const struct net_device_ops i40e_netdev_ops = {
.ndo_open = i40e_open, .ndo_open = i40e_open,
.ndo_stop = i40e_close, .ndo_stop = i40e_close,
...@@ -9343,6 +9429,7 @@ static const struct net_device_ops i40e_netdev_ops = { ...@@ -9343,6 +9429,7 @@ static const struct net_device_ops i40e_netdev_ops = {
.ndo_features_check = i40e_features_check, .ndo_features_check = i40e_features_check,
.ndo_bridge_getlink = i40e_ndo_bridge_getlink, .ndo_bridge_getlink = i40e_ndo_bridge_getlink,
.ndo_bridge_setlink = i40e_ndo_bridge_setlink, .ndo_bridge_setlink = i40e_ndo_bridge_setlink,
.ndo_xdp = i40e_xdp,
}; };
/** /**
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <linux/prefetch.h> #include <linux/prefetch.h>
#include <net/busy_poll.h> #include <net/busy_poll.h>
#include <linux/bpf_trace.h>
#include "i40e.h" #include "i40e.h"
#include "i40e_trace.h" #include "i40e_trace.h"
#include "i40e_prototype.h" #include "i40e_prototype.h"
...@@ -1195,6 +1196,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) ...@@ -1195,6 +1196,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
void i40e_free_rx_resources(struct i40e_ring *rx_ring) void i40e_free_rx_resources(struct i40e_ring *rx_ring)
{ {
i40e_clean_rx_ring(rx_ring); i40e_clean_rx_ring(rx_ring);
rx_ring->xdp_prog = NULL;
kfree(rx_ring->rx_bi); kfree(rx_ring->rx_bi);
rx_ring->rx_bi = NULL; rx_ring->rx_bi = NULL;
...@@ -1241,6 +1243,8 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) ...@@ -1241,6 +1243,8 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
rx_ring->next_to_clean = 0; rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0; rx_ring->next_to_use = 0;
rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
return 0; return 0;
err: err:
kfree(rx_ring->rx_bi); kfree(rx_ring->rx_bi);
...@@ -1593,6 +1597,7 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring, ...@@ -1593,6 +1597,7 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring,
* i40e_cleanup_headers - Correct empty headers * i40e_cleanup_headers - Correct empty headers
* @rx_ring: rx descriptor ring packet is being transacted on * @rx_ring: rx descriptor ring packet is being transacted on
* @skb: pointer to current skb being fixed * @skb: pointer to current skb being fixed
* @rx_desc: pointer to the EOP Rx descriptor
* *
* Also address the case where we are pulling data in on pages only * Also address the case where we are pulling data in on pages only
* and as such no data is present in the skb header. * and as such no data is present in the skb header.
...@@ -1602,8 +1607,25 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring, ...@@ -1602,8 +1607,25 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring,
* *
* Returns true if an error was encountered and skb was freed. * Returns true if an error was encountered and skb was freed.
**/ **/
static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb) static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,
union i40e_rx_desc *rx_desc)
{ {
/* XDP packets use error pointer so abort at this point */
if (IS_ERR(skb))
return true;
/* ERR_MASK will only have valid bits if EOP set, and
* what we are doing here is actually checking
* I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
* the error field
*/
if (unlikely(i40e_test_staterr(rx_desc,
BIT(I40E_RXD_QW1_ERROR_SHIFT)))) {
dev_kfree_skb_any(skb);
return true;
}
/* if eth_skb_pad returns an error the skb was freed */ /* if eth_skb_pad returns an error the skb was freed */
if (eth_skb_pad(skb)) if (eth_skb_pad(skb))
return true; return true;
...@@ -1776,7 +1798,7 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring, ...@@ -1776,7 +1798,7 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
* i40e_construct_skb - Allocate skb and populate it * i40e_construct_skb - Allocate skb and populate it
* @rx_ring: rx descriptor ring to transact packets on * @rx_ring: rx descriptor ring to transact packets on
* @rx_buffer: rx buffer to pull data from * @rx_buffer: rx buffer to pull data from
* @size: size of buffer to add to skb * @xdp: xdp_buff pointing to the data
* *
* This function allocates an skb. It then populates it with the page * This function allocates an skb. It then populates it with the page
* data from the current receive descriptor, taking care to set up the * data from the current receive descriptor, taking care to set up the
...@@ -1784,9 +1806,9 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring, ...@@ -1784,9 +1806,9 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
*/ */
static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer, struct i40e_rx_buffer *rx_buffer,
unsigned int size) struct xdp_buff *xdp)
{ {
void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; unsigned int size = xdp->data_end - xdp->data;
#if (PAGE_SIZE < 8192) #if (PAGE_SIZE < 8192)
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else #else
...@@ -1796,9 +1818,9 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, ...@@ -1796,9 +1818,9 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
struct sk_buff *skb; struct sk_buff *skb;
/* prefetch first cache line of first page */ /* prefetch first cache line of first page */
prefetch(va); prefetch(xdp->data);
#if L1_CACHE_BYTES < 128 #if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES); prefetch(xdp->data + L1_CACHE_BYTES);
#endif #endif
/* allocate a skb to store the frags */ /* allocate a skb to store the frags */
...@@ -1811,10 +1833,11 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, ...@@ -1811,10 +1833,11 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
/* Determine available headroom for copy */ /* Determine available headroom for copy */
headlen = size; headlen = size;
if (headlen > I40E_RX_HDR_SIZE) if (headlen > I40E_RX_HDR_SIZE)
headlen = eth_get_headlen(va, I40E_RX_HDR_SIZE); headlen = eth_get_headlen(xdp->data, I40E_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */ /* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); memcpy(__skb_put(skb, headlen), xdp->data,
ALIGN(headlen, sizeof(long)));
/* update all of the pointers */ /* update all of the pointers */
size -= headlen; size -= headlen;
...@@ -1841,16 +1864,16 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, ...@@ -1841,16 +1864,16 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
* i40e_build_skb - Build skb around an existing buffer * i40e_build_skb - Build skb around an existing buffer
* @rx_ring: Rx descriptor ring to transact packets on * @rx_ring: Rx descriptor ring to transact packets on
* @rx_buffer: Rx buffer to pull data from * @rx_buffer: Rx buffer to pull data from
* @size: size of buffer to add to skb * @xdp: xdp_buff pointing to the data
* *
* This function builds an skb around an existing Rx buffer, taking care * This function builds an skb around an existing Rx buffer, taking care
* to set up the skb correctly and avoid any memcpy overhead. * to set up the skb correctly and avoid any memcpy overhead.
*/ */
static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer, struct i40e_rx_buffer *rx_buffer,
unsigned int size) struct xdp_buff *xdp)
{ {
void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; unsigned int size = xdp->data_end - xdp->data;
#if (PAGE_SIZE < 8192) #if (PAGE_SIZE < 8192)
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else #else
...@@ -1860,12 +1883,12 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, ...@@ -1860,12 +1883,12 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
struct sk_buff *skb; struct sk_buff *skb;
/* prefetch first cache line of first page */ /* prefetch first cache line of first page */
prefetch(va); prefetch(xdp->data);
#if L1_CACHE_BYTES < 128 #if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES); prefetch(xdp->data + L1_CACHE_BYTES);
#endif #endif
/* build an skb around the page buffer */ /* build an skb around the page buffer */
skb = build_skb(va - I40E_SKB_PAD, truesize); skb = build_skb(xdp->data_hard_start, truesize);
if (unlikely(!skb)) if (unlikely(!skb))
return NULL; return NULL;
...@@ -1944,6 +1967,46 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, ...@@ -1944,6 +1967,46 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring,
return true; return true;
} }
#define I40E_XDP_PASS 0
#define I40E_XDP_CONSUMED 1
/**
* i40e_run_xdp - run an XDP program
* @rx_ring: Rx ring being processed
* @xdp: XDP buffer containing the frame
**/
static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
struct xdp_buff *xdp)
{
int result = I40E_XDP_PASS;
struct bpf_prog *xdp_prog;
u32 act;
rcu_read_lock();
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
if (!xdp_prog)
goto xdp_out;
act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
break;
default:
bpf_warn_invalid_xdp_action(act);
case XDP_TX:
case XDP_ABORTED:
trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
/* fallthrough -- handle aborts by dropping packet */
case XDP_DROP:
result = I40E_XDP_CONSUMED;
break;
}
xdp_out:
rcu_read_unlock();
return ERR_PTR(-result);
}
/** /**
* i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
* @rx_ring: rx descriptor ring to transact packets on * @rx_ring: rx descriptor ring to transact packets on
...@@ -1966,6 +2029,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) ...@@ -1966,6 +2029,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
while (likely(total_rx_packets < budget)) { while (likely(total_rx_packets < budget)) {
struct i40e_rx_buffer *rx_buffer; struct i40e_rx_buffer *rx_buffer;
union i40e_rx_desc *rx_desc; union i40e_rx_desc *rx_desc;
struct xdp_buff xdp;
unsigned int size; unsigned int size;
u16 vlan_tag; u16 vlan_tag;
u8 rx_ptype; u8 rx_ptype;
...@@ -2006,12 +2070,27 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) ...@@ -2006,12 +2070,27 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
rx_buffer = i40e_get_rx_buffer(rx_ring, size); rx_buffer = i40e_get_rx_buffer(rx_ring, size);
/* retrieve a buffer from the ring */ /* retrieve a buffer from the ring */
if (skb) if (!skb) {
xdp.data = page_address(rx_buffer->page) +
rx_buffer->page_offset;
xdp.data_hard_start = xdp.data -
i40e_rx_offset(rx_ring);
xdp.data_end = xdp.data + size;
skb = i40e_run_xdp(rx_ring, &xdp);
}
if (IS_ERR(skb)) {
total_rx_bytes += size;
total_rx_packets++;
rx_buffer->pagecnt_bias++;
} else if (skb) {
i40e_add_rx_frag(rx_ring, rx_buffer, skb, size); i40e_add_rx_frag(rx_ring, rx_buffer, skb, size);
else if (ring_uses_build_skb(rx_ring)) } else if (ring_uses_build_skb(rx_ring)) {
skb = i40e_build_skb(rx_ring, rx_buffer, size); skb = i40e_build_skb(rx_ring, rx_buffer, &xdp);
else } else {
skb = i40e_construct_skb(rx_ring, rx_buffer, size); skb = i40e_construct_skb(rx_ring, rx_buffer, &xdp);
}
/* exit if we failed to retrieve a buffer */ /* exit if we failed to retrieve a buffer */
if (!skb) { if (!skb) {
...@@ -2026,18 +2105,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) ...@@ -2026,18 +2105,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
if (i40e_is_non_eop(rx_ring, rx_desc, skb)) if (i40e_is_non_eop(rx_ring, rx_desc, skb))
continue; continue;
/* ERR_MASK will only have valid bits if EOP set, and if (i40e_cleanup_headers(rx_ring, skb, rx_desc)) {
* what we are doing here is actually checking
* I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
* the error field
*/
if (unlikely(i40e_test_staterr(rx_desc, BIT(I40E_RXD_QW1_ERROR_SHIFT)))) {
dev_kfree_skb_any(skb);
skb = NULL;
continue;
}
if (i40e_cleanup_headers(rx_ring, skb)) {
skb = NULL; skb = NULL;
continue; continue;
} }
......
...@@ -360,6 +360,7 @@ struct i40e_ring { ...@@ -360,6 +360,7 @@ struct i40e_ring {
void *desc; /* Descriptor ring memory */ void *desc; /* Descriptor ring memory */
struct device *dev; /* Used for DMA mapping */ struct device *dev; /* Used for DMA mapping */
struct net_device *netdev; /* netdev ring maps to */ struct net_device *netdev; /* netdev ring maps to */
struct bpf_prog *xdp_prog;
union { union {
struct i40e_tx_buffer *tx_bi; struct i40e_tx_buffer *tx_bi;
struct i40e_rx_buffer *rx_bi; struct i40e_rx_buffer *rx_bi;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment