Commit b39212d5 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch '40GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue

Tony Nguyen says:

====================
i40e: support XDP multi-buffer

Tirthendu Sarkar says:

This patchset adds multi-buffer support for XDP. Tx side already has
support for multi-buffer. This patchset focuses on Rx side. The last
patch contains actual multi-buffer changes while the previous ones are
preparatory patches.

On receiving the first buffer of a packet, xdp_buff is built and its
subsequent buffers are added to it as frags. While 'next_to_clean' keeps
pointing to the first descriptor, the newly introduced 'next_to_process'
keeps track of every descriptor for the packet.

On receiving EOP buffer the XDP program is called and appropriate action
is taken (building skb for XDP_PASS, reusing page for XDP_DROP, adjusting
page offsets for XDP_{REDIRECT,TX}).

The patchset also streamlines page offset adjustments for buffer reuse
to make it easier to post process the rx_buffers after running XDP prog.

With this patchset there does not seem to be any performance degradation
for XDP_PASS and some improvement (~1% for XDP_TX, ~5% for XDP_DROP) when
measured using xdp_rxq_info program from samples/bpf/ for 64B packets.

v1: https://lore.kernel.org/netdev/20230306210822.3381942-1-anthony.l.nguyen@intel.com/

* '40GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue:
  i40e: add support for XDP multi-buffer Rx
  i40e: add xdp_buff to i40e_ring struct
  i40e: introduce next_to_process to i40e_ring
  i40e: use frame_sz instead of recalculating truesize for building skb
  i40e: Change size to truesize when using i40e_rx_buffer_flip()
  i40e: add pre-xdp page_count in rx_buffer
  i40e: change Rx buffer size for legacy-rx to support XDP multi-buffer
  i40e: consolidate maximum frame size calculation for vsi
====================

Link: https://lore.kernel.org/r/20230309212819.1198218-1-anthony.l.nguyen@intel.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents c66b2111 e213ced1
......@@ -5402,6 +5402,13 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
return -EOPNOTSUPP;
}
if ((changed_flags & I40E_FLAG_LEGACY_RX) &&
I40E_2K_TOO_SMALL_WITH_PADDING) {
dev_warn(&pf->pdev->dev,
"2k Rx buffer is too small to fit standard MTU and skb_shared_info\n");
return -EOPNOTSUPP;
}
if ((changed_flags & new_flags &
I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) &&
(new_flags & I40E_FLAG_MFP_ENABLED))
......
......@@ -2896,15 +2896,35 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
}
/**
* i40e_max_xdp_frame_size - returns the maximum allowed frame size for XDP
* i40e_calculate_vsi_rx_buf_len - Calculates buffer length
*
* @vsi: VSI to calculate rx_buf_len from
*/
static u16 i40e_calculate_vsi_rx_buf_len(struct i40e_vsi *vsi)
{
if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
return SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048);
return PAGE_SIZE < 8192 ? I40E_RXBUFFER_3072 : I40E_RXBUFFER_2048;
}
/**
* i40e_max_vsi_frame_size - returns the maximum allowed frame size for VSI
* @vsi: the vsi
* @xdp_prog: XDP program
**/
static int i40e_max_xdp_frame_size(struct i40e_vsi *vsi)
static int i40e_max_vsi_frame_size(struct i40e_vsi *vsi,
struct bpf_prog *xdp_prog)
{
if (PAGE_SIZE >= 8192 || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
return I40E_RXBUFFER_2048;
u16 rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi);
u16 chain_len;
if (xdp_prog && !xdp_prog->aux->xdp_has_frags)
chain_len = 1;
else
return I40E_RXBUFFER_3072;
chain_len = I40E_MAX_CHAINED_RX_BUFFERS;
return min_t(u16, rx_buf_len * chain_len, I40E_MAX_RXBUFFER);
}
/**
......@@ -2919,11 +2939,12 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
int frame_size;
if (i40e_enabled_xdp_vsi(vsi)) {
int frame_size = new_mtu + I40E_PACKET_HDR_PAD;
if (frame_size > i40e_max_xdp_frame_size(vsi))
frame_size = i40e_max_vsi_frame_size(vsi, vsi->xdp_prog);
if (new_mtu > frame_size - I40E_PACKET_HDR_PAD) {
netdev_err(netdev, "Error changing mtu to %d, Max is %d\n",
new_mtu, frame_size - I40E_PACKET_HDR_PAD);
return -EINVAL;
}
......@@ -3595,6 +3616,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
}
}
xdp_init_buff(&ring->xdp, i40e_rx_pg_size(ring) / 2, &ring->xdp_rxq);
rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len,
BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
......@@ -3640,10 +3663,16 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
}
/* configure Rx buffer alignment */
if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) {
if (I40E_2K_TOO_SMALL_WITH_PADDING) {
dev_info(&vsi->back->pdev->dev,
"2k Rx buffer is too small to fit standard MTU and skb_shared_info\n");
return -EOPNOTSUPP;
}
clear_ring_build_skb_enabled(ring);
else
} else {
set_ring_build_skb_enabled(ring);
}
ring->rx_offset = i40e_rx_offset(ring);
......@@ -3693,24 +3722,6 @@ static int i40e_vsi_configure_tx(struct i40e_vsi *vsi)
return err;
}
/**
* i40e_calculate_vsi_rx_buf_len - Calculates buffer length
*
* @vsi: VSI to calculate rx_buf_len from
*/
static u16 i40e_calculate_vsi_rx_buf_len(struct i40e_vsi *vsi)
{
if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
return I40E_RXBUFFER_2048;
#if (PAGE_SIZE < 8192)
if (!I40E_2K_TOO_SMALL_WITH_PADDING && vsi->netdev->mtu <= ETH_DATA_LEN)
return I40E_RXBUFFER_1536 - NET_IP_ALIGN;
#endif
return PAGE_SIZE < 8192 ? I40E_RXBUFFER_3072 : I40E_RXBUFFER_2048;
}
/**
* i40e_vsi_configure_rx - Configure the VSI for Rx
* @vsi: the VSI being configured
......@@ -3722,13 +3733,15 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi)
int err = 0;
u16 i;
vsi->max_frame = I40E_MAX_RXBUFFER;
vsi->max_frame = i40e_max_vsi_frame_size(vsi, vsi->xdp_prog);
vsi->rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi);
#if (PAGE_SIZE < 8192)
if (vsi->netdev && !I40E_2K_TOO_SMALL_WITH_PADDING &&
vsi->netdev->mtu <= ETH_DATA_LEN)
vsi->max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
vsi->netdev->mtu <= ETH_DATA_LEN) {
vsi->rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
vsi->max_frame = vsi->rx_buf_len;
}
#endif
/* set up individual rings */
......@@ -13316,15 +13329,15 @@ static netdev_features_t i40e_features_check(struct sk_buff *skb,
static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
int frame_size = vsi->netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
int frame_size = i40e_max_vsi_frame_size(vsi, prog);
struct i40e_pf *pf = vsi->back;
struct bpf_prog *old_prog;
bool need_reset;
int i;
/* Don't allow frames that span over multiple buffers */
if (frame_size > i40e_calculate_vsi_rx_buf_len(vsi)) {
NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP");
if (vsi->netdev->mtu > frame_size - I40E_PACKET_HDR_PAD) {
NL_SET_ERR_MSG_MOD(extack, "MTU too large for linear frames and XDP prog does not support frags");
return -EINVAL;
}
......@@ -13810,7 +13823,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
netdev->xdp_features = NETDEV_XDP_ACT_BASIC |
NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_XSK_ZEROCOPY;
NETDEV_XDP_ACT_XSK_ZEROCOPY |
NETDEV_XDP_ACT_RX_SG;
} else {
/* Relate the VSI_VMDQ name to the VSI_MAIN name. Note that we
* are still limited by IFNAMSIZ, but we're adding 'v%d\0' to
......
......@@ -162,45 +162,45 @@ DECLARE_EVENT_CLASS(
TP_PROTO(struct i40e_ring *ring,
union i40e_16byte_rx_desc *desc,
struct sk_buff *skb),
struct xdp_buff *xdp),
TP_ARGS(ring, desc, skb),
TP_ARGS(ring, desc, xdp),
TP_STRUCT__entry(
__field(void*, ring)
__field(void*, desc)
__field(void*, skb)
__field(void*, xdp)
__string(devname, ring->netdev->name)
),
TP_fast_assign(
__entry->ring = ring;
__entry->desc = desc;
__entry->skb = skb;
__entry->xdp = xdp;
__assign_str(devname, ring->netdev->name);
),
TP_printk(
"netdev: %s ring: %p desc: %p skb %p",
"netdev: %s ring: %p desc: %p xdp %p",
__get_str(devname), __entry->ring,
__entry->desc, __entry->skb)
__entry->desc, __entry->xdp)
);
DEFINE_EVENT(
i40e_rx_template, i40e_clean_rx_irq,
TP_PROTO(struct i40e_ring *ring,
union i40e_16byte_rx_desc *desc,
struct sk_buff *skb),
struct xdp_buff *xdp),
TP_ARGS(ring, desc, skb));
TP_ARGS(ring, desc, xdp));
DEFINE_EVENT(
i40e_rx_template, i40e_clean_rx_irq_rx,
TP_PROTO(struct i40e_ring *ring,
union i40e_16byte_rx_desc *desc,
struct sk_buff *skb),
struct xdp_buff *xdp),
TP_ARGS(ring, desc, skb));
TP_ARGS(ring, desc, xdp));
DECLARE_EVENT_CLASS(
i40e_xmit_template,
......
......@@ -1477,9 +1477,6 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
if (!rx_ring->rx_bi)
return;
dev_kfree_skb(rx_ring->skb);
rx_ring->skb = NULL;
if (rx_ring->xsk_pool) {
i40e_xsk_clean_rx_ring(rx_ring);
goto skip_free;
......@@ -1524,6 +1521,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
rx_ring->next_to_process = 0;
rx_ring->next_to_use = 0;
}
......@@ -1576,6 +1574,7 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
rx_ring->next_to_process = 0;
rx_ring->next_to_use = 0;
/* XDP RX-queue info only needed for RX rings exposed to XDP */
......@@ -1617,21 +1616,19 @@ void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
writel(val, rx_ring->tail);
}
#if (PAGE_SIZE >= 8192)
static unsigned int i40e_rx_frame_truesize(struct i40e_ring *rx_ring,
unsigned int size)
{
unsigned int truesize;
#if (PAGE_SIZE < 8192)
truesize = i40e_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
#else
truesize = rx_ring->rx_offset ?
SKB_DATA_ALIGN(size + rx_ring->rx_offset) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
SKB_DATA_ALIGN(size);
#endif
return truesize;
}
#endif
/**
* i40e_alloc_mapped_page - recycle or make a new page
......@@ -1970,7 +1967,6 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,
* i40e_can_reuse_rx_page - Determine if page can be reused for another Rx
* @rx_buffer: buffer containing the page
* @rx_stats: rx stats structure for the rx ring
* @rx_buffer_pgcnt: buffer page refcount pre xdp_do_redirect() call
*
* If page is reusable, we have a green light for calling i40e_reuse_rx_page,
* which will assign the current buffer to the buffer that next_to_alloc is
......@@ -1981,8 +1977,7 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,
* or busy if it could not be reused.
*/
static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
struct i40e_rx_queue_stats *rx_stats,
int rx_buffer_pgcnt)
struct i40e_rx_queue_stats *rx_stats)
{
unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
struct page *page = rx_buffer->page;
......@@ -1995,7 +1990,7 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
#if (PAGE_SIZE < 8192)
/* if we are only owner of page we can reuse it */
if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) {
if (unlikely((rx_buffer->page_count - pagecnt_bias) > 1)) {
rx_stats->page_busy_count++;
return false;
}
......@@ -2021,32 +2016,13 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
}
/**
* i40e_add_rx_frag - Add contents of Rx buffer to sk_buff
* @rx_ring: rx descriptor ring to transact packets on
* @rx_buffer: buffer containing page to add
* @skb: sk_buff to place the data into
* @size: packet length from rx_desc
*
* This function will add the data contained in rx_buffer->page to the skb.
* It will just attach the page as a frag to the skb.
*
* The function will then update the page offset.
* i40e_rx_buffer_flip - adjusted rx_buffer to point to an unused region
* @rx_buffer: Rx buffer to adjust
* @truesize: Size of adjustment
**/
static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer,
struct sk_buff *skb,
unsigned int size)
static void i40e_rx_buffer_flip(struct i40e_rx_buffer *rx_buffer,
unsigned int truesize)
{
#if (PAGE_SIZE < 8192)
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(size + rx_ring->rx_offset);
#endif
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
rx_buffer->page_offset, size, truesize);
/* page is being used so we must update the page offset */
#if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize;
#else
......@@ -2058,19 +2034,17 @@ static void i40e_add_rx_frag(struct i40e_ring *rx_ring,
* i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use
* @rx_ring: rx descriptor ring to transact packets on
* @size: size of buffer to add to skb
* @rx_buffer_pgcnt: buffer page refcount
*
* This function will pull an Rx buffer from the ring and synchronize it
* for use by the CPU.
*/
static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
const unsigned int size,
int *rx_buffer_pgcnt)
const unsigned int size)
{
struct i40e_rx_buffer *rx_buffer;
rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
*rx_buffer_pgcnt =
rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_process);
rx_buffer->page_count =
#if (PAGE_SIZE < 8192)
page_count(rx_buffer->page);
#else
......@@ -2092,25 +2066,82 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
}
/**
* i40e_construct_skb - Allocate skb and populate it
* i40e_put_rx_buffer - Clean up used buffer and either recycle or free
* @rx_ring: rx descriptor ring to transact packets on
* @rx_buffer: rx buffer to pull data from
*
* This function will clean up the contents of the rx_buffer. It will
* either recycle the buffer or unmap it and free the associated resources.
*/
static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer)
{
if (i40e_can_reuse_rx_page(rx_buffer, &rx_ring->rx_stats)) {
/* hand second half of page back to the ring */
i40e_reuse_rx_page(rx_ring, rx_buffer);
} else {
/* we are not reusing the buffer so unmap it */
dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
i40e_rx_pg_size(rx_ring),
DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
__page_frag_cache_drain(rx_buffer->page,
rx_buffer->pagecnt_bias);
/* clear contents of buffer_info */
rx_buffer->page = NULL;
}
}
/**
* i40e_process_rx_buffs- Processing of buffers post XDP prog or on error
* @rx_ring: Rx descriptor ring to transact packets on
* @xdp_res: Result of the XDP program
* @xdp: xdp_buff pointing to the data
**/
static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
struct xdp_buff *xdp)
{
u32 next = rx_ring->next_to_clean;
struct i40e_rx_buffer *rx_buffer;
xdp->flags = 0;
while (1) {
rx_buffer = i40e_rx_bi(rx_ring, next);
if (++next == rx_ring->count)
next = 0;
if (!rx_buffer->page)
continue;
if (xdp_res == I40E_XDP_CONSUMED)
rx_buffer->pagecnt_bias++;
else
i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz);
/* EOP buffer will be put in i40e_clean_rx_irq() */
if (next == rx_ring->next_to_process)
return;
i40e_put_rx_buffer(rx_ring, rx_buffer);
}
}
/**
* i40e_construct_skb - Allocate skb and populate it
* @rx_ring: rx descriptor ring to transact packets on
* @xdp: xdp_buff pointing to the data
* @nr_frags: number of buffers for the packet
*
* This function allocates an skb. It then populates it with the page
* data from the current receive descriptor, taking care to set up the
* skb correctly.
*/
static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer,
struct xdp_buff *xdp)
struct xdp_buff *xdp,
u32 nr_frags)
{
unsigned int size = xdp->data_end - xdp->data;
#if (PAGE_SIZE < 8192)
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(size);
#endif
struct i40e_rx_buffer *rx_buffer;
unsigned int headlen;
struct sk_buff *skb;
......@@ -2150,48 +2181,60 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
memcpy(__skb_put(skb, headlen), xdp->data,
ALIGN(headlen, sizeof(long)));
rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
/* update all of the pointers */
size -= headlen;
if (size) {
if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
dev_kfree_skb(skb);
return NULL;
}
skb_add_rx_frag(skb, 0, rx_buffer->page,
rx_buffer->page_offset + headlen,
size, truesize);
size, xdp->frame_sz);
/* buffer is used by skb, update page_offset */
#if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize;
#else
rx_buffer->page_offset += truesize;
#endif
i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz);
} else {
/* buffer is unused, reset bias back to rx_buffer */
rx_buffer->pagecnt_bias++;
}
if (unlikely(xdp_buff_has_frags(xdp))) {
struct skb_shared_info *sinfo, *skinfo = skb_shinfo(skb);
sinfo = xdp_get_shared_info_from_buff(xdp);
memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0],
sizeof(skb_frag_t) * nr_frags);
xdp_update_skb_shared_info(skb, skinfo->nr_frags + nr_frags,
sinfo->xdp_frags_size,
nr_frags * xdp->frame_sz,
xdp_buff_is_frag_pfmemalloc(xdp));
/* First buffer has already been processed, so bump ntc */
if (++rx_ring->next_to_clean == rx_ring->count)
rx_ring->next_to_clean = 0;
i40e_process_rx_buffs(rx_ring, I40E_XDP_PASS, xdp);
}
return skb;
}
/**
* i40e_build_skb - Build skb around an existing buffer
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buffer: Rx buffer to pull data from
* @xdp: xdp_buff pointing to the data
* @nr_frags: number of buffers for the packet
*
* This function builds an skb around an existing Rx buffer, taking care
* to set up the skb correctly and avoid any memcpy overhead.
*/
static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer,
struct xdp_buff *xdp)
struct xdp_buff *xdp,
u32 nr_frags)
{
unsigned int metasize = xdp->data - xdp->data_meta;
#if (PAGE_SIZE < 8192)
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
SKB_DATA_ALIGN(xdp->data_end -
xdp->data_hard_start);
#endif
struct sk_buff *skb;
/* Prefetch first cache line of first page. If xdp->data_meta
......@@ -2202,7 +2245,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
net_prefetch(xdp->data_meta);
/* build an skb around the page buffer */
skb = napi_build_skb(xdp->data_hard_start, truesize);
skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
if (unlikely(!skb))
return NULL;
......@@ -2212,42 +2255,25 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
if (metasize)
skb_metadata_set(skb, metasize);
/* buffer is used by skb, update page_offset */
#if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize;
#else
rx_buffer->page_offset += truesize;
#endif
if (unlikely(xdp_buff_has_frags(xdp))) {
struct skb_shared_info *sinfo;
return skb;
}
sinfo = xdp_get_shared_info_from_buff(xdp);
xdp_update_skb_shared_info(skb, nr_frags,
sinfo->xdp_frags_size,
nr_frags * xdp->frame_sz,
xdp_buff_is_frag_pfmemalloc(xdp));
/**
* i40e_put_rx_buffer - Clean up used buffer and either recycle or free
* @rx_ring: rx descriptor ring to transact packets on
* @rx_buffer: rx buffer to pull data from
* @rx_buffer_pgcnt: rx buffer page refcount pre xdp_do_redirect() call
*
* This function will clean up the contents of the rx_buffer. It will
* either recycle the buffer or unmap it and free the associated resources.
*/
static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer,
int rx_buffer_pgcnt)
{
if (i40e_can_reuse_rx_page(rx_buffer, &rx_ring->rx_stats, rx_buffer_pgcnt)) {
/* hand second half of page back to the ring */
i40e_reuse_rx_page(rx_ring, rx_buffer);
i40e_process_rx_buffs(rx_ring, I40E_XDP_PASS, xdp);
} else {
/* we are not reusing the buffer so unmap it */
dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
i40e_rx_pg_size(rx_ring),
DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
__page_frag_cache_drain(rx_buffer->page,
rx_buffer->pagecnt_bias);
/* clear contents of buffer_info */
rx_buffer->page = NULL;
struct i40e_rx_buffer *rx_buffer;
rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
/* buffer is used by skb, update page_offset */
i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz);
}
return skb;
}
/**
......@@ -2332,25 +2358,6 @@ static int i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp, struct
return result;
}
/**
* i40e_rx_buffer_flip - adjusted rx_buffer to point to an unused region
* @rx_ring: Rx ring
* @rx_buffer: Rx buffer to adjust
* @size: Size of adjustment
**/
static void i40e_rx_buffer_flip(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer,
unsigned int size)
{
unsigned int truesize = i40e_rx_frame_truesize(rx_ring, size);
#if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize;
#else
rx_buffer->page_offset += truesize;
#endif
}
/**
* i40e_xdp_ring_update_tail - Updates the XDP Tx ring tail register
* @xdp_ring: XDP Tx ring
......@@ -2409,16 +2416,65 @@ void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res)
}
/**
* i40e_inc_ntc: Advance the next_to_clean index
* i40e_inc_ntp: Advance the next_to_process index
* @rx_ring: Rx ring
**/
static void i40e_inc_ntc(struct i40e_ring *rx_ring)
static void i40e_inc_ntp(struct i40e_ring *rx_ring)
{
u32 ntp = rx_ring->next_to_process + 1;
ntp = (ntp < rx_ring->count) ? ntp : 0;
rx_ring->next_to_process = ntp;
prefetch(I40E_RX_DESC(rx_ring, ntp));
}
/**
* i40e_add_xdp_frag: Add a frag to xdp_buff
* @xdp: xdp_buff pointing to the data
* @nr_frags: return number of buffers for the packet
* @rx_buffer: rx_buffer holding data of the current frag
* @size: size of data of current frag
*/
static int i40e_add_xdp_frag(struct xdp_buff *xdp, u32 *nr_frags,
struct i40e_rx_buffer *rx_buffer, u32 size)
{
u32 ntc = rx_ring->next_to_clean + 1;
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
if (!xdp_buff_has_frags(xdp)) {
sinfo->nr_frags = 0;
sinfo->xdp_frags_size = 0;
xdp_buff_set_frags_flag(xdp);
} else if (unlikely(sinfo->nr_frags >= MAX_SKB_FRAGS)) {
/* Overflowing packet: All frags need to be dropped */
return -ENOMEM;
}
__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buffer->page,
rx_buffer->page_offset, size);
ntc = (ntc < rx_ring->count) ? ntc : 0;
rx_ring->next_to_clean = ntc;
prefetch(I40E_RX_DESC(rx_ring, ntc));
sinfo->xdp_frags_size += size;
if (page_is_pfmemalloc(rx_buffer->page))
xdp_buff_set_frag_pfmemalloc(xdp);
*nr_frags = sinfo->nr_frags;
return 0;
}
/**
* i40e_consume_xdp_buff - Consume all the buffers of the packet and update ntc
* @rx_ring: rx descriptor ring to transact packets on
* @xdp: xdp_buff pointing to the data
* @rx_buffer: rx_buffer of eop desc
*/
static void i40e_consume_xdp_buff(struct i40e_ring *rx_ring,
struct xdp_buff *xdp,
struct i40e_rx_buffer *rx_buffer)
{
i40e_process_rx_buffs(rx_ring, I40E_XDP_CONSUMED, xdp);
i40e_put_rx_buffer(rx_ring, rx_buffer);
rx_ring->next_to_clean = rx_ring->next_to_process;
xdp->data = NULL;
}
/**
......@@ -2437,38 +2493,36 @@ static void i40e_inc_ntc(struct i40e_ring *rx_ring)
static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
unsigned int *rx_cleaned)
{
unsigned int total_rx_bytes = 0, total_rx_packets = 0, frame_sz = 0;
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
u16 clean_threshold = rx_ring->count / 2;
unsigned int offset = rx_ring->rx_offset;
struct sk_buff *skb = rx_ring->skb;
struct xdp_buff *xdp = &rx_ring->xdp;
unsigned int xdp_xmit = 0;
struct bpf_prog *xdp_prog;
bool failure = false;
struct xdp_buff xdp;
int xdp_res = 0;
#if (PAGE_SIZE < 8192)
frame_sz = i40e_rx_frame_truesize(rx_ring, 0);
#endif
xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq);
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
while (likely(total_rx_packets < (unsigned int)budget)) {
u16 ntp = rx_ring->next_to_process;
struct i40e_rx_buffer *rx_buffer;
union i40e_rx_desc *rx_desc;
int rx_buffer_pgcnt;
struct sk_buff *skb;
unsigned int size;
u32 nfrags = 0;
bool neop;
u64 qword;
/* return some buffers to hardware, one at a time is too slow */
if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
if (cleaned_count >= clean_threshold) {
failure = failure ||
i40e_alloc_rx_buffers(rx_ring, cleaned_count);
cleaned_count = 0;
}
rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean);
rx_desc = I40E_RX_DESC(rx_ring, ntp);
/* status_error_len will always be zero for unused descriptors
* because it's cleared in cleanup, and overlaps with hdr_addr
......@@ -2487,8 +2541,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
i40e_clean_programming_status(rx_ring,
rx_desc->raw.qword[0],
qword);
rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
i40e_inc_ntc(rx_ring);
rx_buffer = i40e_rx_bi(rx_ring, ntp);
i40e_inc_ntp(rx_ring);
i40e_reuse_rx_page(rx_ring, rx_buffer);
cleaned_count++;
continue;
......@@ -2499,59 +2553,62 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
if (!size)
break;
i40e_trace(clean_rx_irq, rx_ring, rx_desc, skb);
rx_buffer = i40e_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt);
i40e_trace(clean_rx_irq, rx_ring, rx_desc, xdp);
/* retrieve a buffer from the ring */
if (!skb) {
rx_buffer = i40e_get_rx_buffer(rx_ring, size);
neop = i40e_is_non_eop(rx_ring, rx_desc);
i40e_inc_ntp(rx_ring);
if (!xdp->data) {
unsigned char *hard_start;
hard_start = page_address(rx_buffer->page) +
rx_buffer->page_offset - offset;
xdp_prepare_buff(&xdp, hard_start, offset, size, true);
xdp_buff_clear_frags_flag(&xdp);
xdp_prepare_buff(xdp, hard_start, offset, size, true);
#if (PAGE_SIZE > 4096)
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, size);
xdp->frame_sz = i40e_rx_frame_truesize(rx_ring, size);
#endif
xdp_res = i40e_run_xdp(rx_ring, &xdp, xdp_prog);
} else if (i40e_add_xdp_frag(xdp, &nfrags, rx_buffer, size) &&
!neop) {
/* Overflowing packet: Drop all frags on EOP */
i40e_consume_xdp_buff(rx_ring, xdp, rx_buffer);
break;
}
if (neop)
continue;
xdp_res = i40e_run_xdp(rx_ring, xdp, xdp_prog);
if (xdp_res) {
if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) {
xdp_xmit |= xdp_res;
i40e_rx_buffer_flip(rx_ring, rx_buffer, size);
xdp_xmit |= xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR);
if (unlikely(xdp_buff_has_frags(xdp))) {
i40e_process_rx_buffs(rx_ring, xdp_res, xdp);
size = xdp_get_buff_len(xdp);
} else if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) {
i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz);
} else {
rx_buffer->pagecnt_bias++;
}
total_rx_bytes += size;
total_rx_packets++;
} else if (skb) {
i40e_add_rx_frag(rx_ring, rx_buffer, skb, size);
} else if (ring_uses_build_skb(rx_ring)) {
skb = i40e_build_skb(rx_ring, rx_buffer, &xdp);
} else {
skb = i40e_construct_skb(rx_ring, rx_buffer, &xdp);
}
if (ring_uses_build_skb(rx_ring))
skb = i40e_build_skb(rx_ring, xdp, nfrags);
else
skb = i40e_construct_skb(rx_ring, xdp, nfrags);
/* exit if we failed to retrieve a buffer */
if (!xdp_res && !skb) {
/* drop if we failed to retrieve a buffer */
if (!skb) {
rx_ring->rx_stats.alloc_buff_failed++;
rx_buffer->pagecnt_bias++;
i40e_consume_xdp_buff(rx_ring, xdp, rx_buffer);
break;
}
i40e_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt);
cleaned_count++;
i40e_inc_ntc(rx_ring);
if (i40e_is_non_eop(rx_ring, rx_desc))
continue;
if (xdp_res || i40e_cleanup_headers(rx_ring, skb, rx_desc)) {
skb = NULL;
continue;
}
if (i40e_cleanup_headers(rx_ring, skb, rx_desc))
goto process_next;
/* probably a little skewed due to removing CRC */
total_rx_bytes += skb->len;
......@@ -2559,16 +2616,21 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
/* populate checksum, VLAN, and protocol */
i40e_process_skb_fields(rx_ring, rx_desc, skb);
i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb);
i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, xdp);
napi_gro_receive(&rx_ring->q_vector->napi, skb);
skb = NULL;
}
/* update budget accounting */
total_rx_packets++;
process_next:
cleaned_count += nfrags + 1;
i40e_put_rx_buffer(rx_ring, rx_buffer);
rx_ring->next_to_clean = rx_ring->next_to_process;
xdp->data = NULL;
}
i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
rx_ring->skb = skb;
i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets);
......
......@@ -277,6 +277,7 @@ struct i40e_rx_buffer {
struct page *page;
__u32 page_offset;
__u16 pagecnt_bias;
__u32 page_count;
};
struct i40e_queue_stats {
......@@ -336,6 +337,17 @@ struct i40e_ring {
u8 dcb_tc; /* Traffic class of ring */
u8 __iomem *tail;
/* Storing xdp_buff on ring helps in saving the state of partially built
* packet when i40e_clean_rx_ring_irq() must return before it sees EOP
* and to resume packet building for this ring in the next call to
* i40e_clean_rx_ring_irq().
*/
struct xdp_buff xdp;
/* Next descriptor to be processed; next_to_clean is updated only on
* processing EOP descriptor
*/
u16 next_to_process;
/* high bit set means dynamic, use accessor routines to read/write.
* hardware only supports 2us resolution for the ITR registers.
* these values always store the USER setting, and must be converted
......@@ -380,14 +392,6 @@ struct i40e_ring {
struct rcu_head rcu; /* to avoid race on free */
u16 next_to_alloc;
struct sk_buff *skb; /* When i40e_clean_rx_ring_irq() must
* return before it sees the EOP for
* the current packet, we save that skb
* here and resume receiving this
* packet the next time
* i40e_clean_rx_ring_irq() is called
* for this ring.
*/
struct i40e_channel *ch;
u16 rx_offset;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment