Commit 10d1b0e4 authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'xdp-ice-mbuf'

Maciej Fijalkowski says:

====================
Although this work started as an effort to add multi-buffer XDP support
to ice driver, as usual it turned out that some other side stuff needed
to be addressed, so let me give you an overview.

First patch adjusts legacy-rx in a way that it will be possible to refer
to skb_shared_info being at the end of the buffer when gathering up
frame fragments within xdp_buff.

Then, patches 2-9 prepare ice driver in a way that actual multi-buffer
patches will be easier to swallow.

10 and 11 are the meat. What is worth mentioning is that this set
actually *fixes* things as patch 11 removes the logic based on
next_dd/rs and we previously stepped away from this for ice_xmit_zc().
Currently, AF_XDP ZC XDP_TX workload is off as there are two cleaning
sides that can be triggered and two of them work on different internal
logic. This set unifies that and allows us to improve the performance by
2x with a trick on the last (13) patch.

12th is a simple cleanup of no longer fields from Tx ring.

I might be wrong but I have not seen anyone reporting performance impact
among patches that add XDP multi-buffer support to a particular driver.
Numbers below were gathered via xdp_rxq_info and xdp_redirect_map on
1500 MTU:

XDP_DROP      +1%
XDP_PASS      -1,2%
XDP_TX        -0,5%
XDP_REDIRECT  -3,3%

Cherry on top, which is not directly related to mbuf support (last
patch):

XDP_TX ZC +126%

Target the we agreed on was to not degrade performance for any action by
anything that would be over 5%, so our goal was met. Basically this set
keeps the performance where it was. Redirect is slower due to more
frequent tail bumps.
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Reviewed-by: default avatarAlexander Lobakin <alexandr.lobakin@intel.com>
parents c1a3daf7 a24b4c6e
......@@ -355,9 +355,6 @@ static unsigned int ice_rx_offset(struct ice_rx_ring *rx_ring)
{
if (ice_ring_uses_build_skb(rx_ring))
return ICE_SKB_PAD;
else if (ice_is_xdp_ena_vsi(rx_ring->vsi))
return XDP_PACKET_HEADROOM;
return 0;
}
......@@ -495,7 +492,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
{
struct device *dev = ice_pf_to_dev(ring->vsi->back);
u16 num_bufs = ICE_DESC_UNUSED(ring);
u32 num_bufs = ICE_RX_DESC_UNUSED(ring);
int err;
ring->rx_buf_len = ring->vsi->rx_buf_len;
......@@ -503,8 +500,10 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
if (ring->vsi->type == ICE_VSI_PF) {
if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
/* coverity[check_return] */
xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
ring->q_index, ring->q_vector->napi.napi_id);
__xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
ring->q_index,
ring->q_vector->napi.napi_id,
ring->vsi->rx_buf_len);
ring->xsk_pool = ice_xsk_pool(ring);
if (ring->xsk_pool) {
......@@ -524,9 +523,11 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
} else {
if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
/* coverity[check_return] */
xdp_rxq_info_reg(&ring->xdp_rxq,
ring->netdev,
ring->q_index, ring->q_vector->napi.napi_id);
__xdp_rxq_info_reg(&ring->xdp_rxq,
ring->netdev,
ring->q_index,
ring->q_vector->napi.napi_id,
ring->vsi->rx_buf_len);
err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_PAGE_SHARED,
......@@ -536,6 +537,8 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
}
}
xdp_init_buff(&ring->xdp, ice_rx_pg_size(ring) / 2, &ring->xdp_rxq);
ring->xdp.data = NULL;
err = ice_setup_rx_ctx(ring);
if (err) {
dev_err(dev, "ice_setup_rx_ctx failed for RxQ %d, err %d\n",
......
......@@ -3046,8 +3046,6 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
/* clone ring and setup updated count */
xdp_rings[i] = *vsi->xdp_rings[i];
xdp_rings[i].count = new_tx_cnt;
xdp_rings[i].next_dd = ICE_RING_QUARTER(&xdp_rings[i]) - 1;
xdp_rings[i].next_rs = ICE_RING_QUARTER(&xdp_rings[i]) - 1;
xdp_rings[i].desc = NULL;
xdp_rings[i].tx_buf = NULL;
err = ice_setup_tx_ring(&xdp_rings[i]);
......@@ -3092,7 +3090,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
/* allocate Rx buffers */
err = ice_alloc_rx_bufs(&rx_rings[i],
ICE_DESC_UNUSED(&rx_rings[i]));
ICE_RX_DESC_UNUSED(&rx_rings[i]));
rx_unwind:
if (err) {
while (i) {
......
......@@ -1992,8 +1992,8 @@ void ice_update_eth_stats(struct ice_vsi *vsi)
void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
{
if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) {
vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
vsi->rx_buf_len = ICE_RXBUF_2048;
vsi->max_frame = ICE_MAX_FRAME_LEGACY_RX;
vsi->rx_buf_len = ICE_RXBUF_1664;
#if (PAGE_SIZE < 8192)
} else if (!ICE_2K_TOO_SMALL_WITH_PADDING &&
(vsi->netdev->mtu <= ETH_DATA_LEN)) {
......@@ -2002,11 +2002,7 @@ void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
#endif
} else {
vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
#if (PAGE_SIZE < 8192)
vsi->rx_buf_len = ICE_RXBUF_3072;
#else
vsi->rx_buf_len = ICE_RXBUF_2048;
#endif
}
}
......
......@@ -2570,8 +2570,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
xdp_ring->netdev = NULL;
xdp_ring->dev = dev;
xdp_ring->count = vsi->num_tx_desc;
xdp_ring->next_dd = ICE_RING_QUARTER(xdp_ring) - 1;
xdp_ring->next_rs = ICE_RING_QUARTER(xdp_ring) - 1;
WRITE_ONCE(vsi->xdp_rings[i], xdp_ring);
if (ice_setup_tx_ring(xdp_ring))
goto free_xdp_rings;
......@@ -2862,6 +2860,18 @@ int ice_vsi_determine_xdp_res(struct ice_vsi *vsi)
return 0;
}
/**
* ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
* @vsi: Pointer to VSI structure
*/
static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
{
if (test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
return ICE_RXBUF_1664;
else
return ICE_RXBUF_3072;
}
/**
* ice_xdp_setup_prog - Add or remove XDP eBPF program
* @vsi: VSI to setup XDP for
......@@ -2872,13 +2882,16 @@ static int
ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
unsigned int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
bool if_running = netif_running(vsi->netdev);
int ret = 0, xdp_ring_err = 0;
if (frame_size > vsi->rx_buf_len) {
NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP");
return -EOPNOTSUPP;
if (prog && !prog->aux->xdp_has_frags) {
if (frame_size > ice_max_xdp_frame_size(vsi)) {
NL_SET_ERR_MSG_MOD(extack,
"MTU is too large for linear frames and XDP prog does not support frags");
return -EOPNOTSUPP;
}
}
/* need to stop netdev while setting up the program for Rx rings */
......@@ -7330,18 +7343,6 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
dev_err(dev, "Rebuild failed, unload and reload driver\n");
}
/**
* ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
* @vsi: Pointer to VSI structure
*/
static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
{
if (PAGE_SIZE >= 8192 || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
return ICE_RXBUF_2048 - XDP_PACKET_HEADROOM;
else
return ICE_RXBUF_3072;
}
/**
* ice_change_mtu - NDO callback to change the MTU
* @netdev: network interface device structure
......@@ -7354,6 +7355,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
struct bpf_prog *prog;
u8 count = 0;
int err = 0;
......@@ -7362,7 +7364,8 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
return 0;
}
if (ice_is_xdp_ena_vsi(vsi)) {
prog = vsi->xdp_prog;
if (prog && !prog->aux->xdp_has_frags) {
int frame_size = ice_max_xdp_frame_size(vsi);
if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) {
......@@ -7370,6 +7373,12 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
frame_size - ICE_ETH_PKT_HDR_PAD);
return -EINVAL;
}
} else if (test_bit(ICE_FLAG_LEGACY_RX, pf->flags)) {
if (new_mtu + ICE_ETH_PKT_HDR_PAD > ICE_MAX_FRAME_LEGACY_RX) {
netdev_err(netdev, "Too big MTU for legacy-rx; Max is %d\n",
ICE_MAX_FRAME_LEGACY_RX - ICE_ETH_PKT_HDR_PAD);
return -EINVAL;
}
}
/* if a reset is in progress, wait for some time for it to complete */
......
This diff is collapsed.
......@@ -9,10 +9,12 @@
#define ICE_DFLT_IRQ_WORK 256
#define ICE_RXBUF_3072 3072
#define ICE_RXBUF_2048 2048
#define ICE_RXBUF_1664 1664
#define ICE_RXBUF_1536 1536
#define ICE_MAX_CHAINED_RX_BUFS 5
#define ICE_MAX_BUF_TXD 8
#define ICE_MIN_TX_LEN 17
#define ICE_MAX_FRAME_LEGACY_RX 8320
/* The size limit for a transmit buffer in a descriptor is (16K - 1).
* In order to align with the read requests we will align the value to
......@@ -110,6 +112,10 @@ static inline int ice_skb_pad(void)
(u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
(R)->next_to_clean - (R)->next_to_use - 1)
#define ICE_RX_DESC_UNUSED(R) \
((((R)->first_desc > (R)->next_to_use) ? 0 : (R)->count) + \
(R)->first_desc - (R)->next_to_use - 1)
#define ICE_RING_QUARTER(R) ((R)->count >> 2)
#define ICE_TX_FLAGS_TSO BIT(0)
......@@ -134,6 +140,7 @@ static inline int ice_skb_pad(void)
#define ICE_XDP_TX BIT(1)
#define ICE_XDP_REDIR BIT(2)
#define ICE_XDP_EXIT BIT(3)
#define ICE_SKB_CONSUMED ICE_XDP_CONSUMED
#define ICE_RX_DMA_ATTR \
(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
......@@ -143,13 +150,20 @@ static inline int ice_skb_pad(void)
#define ICE_TXD_LAST_DESC_CMD (ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)
struct ice_tx_buf {
struct ice_tx_desc *next_to_watch;
union {
struct ice_tx_desc *next_to_watch;
u32 rs_idx;
};
union {
struct sk_buff *skb;
void *raw_buf; /* used for XDP */
struct xdp_buff *xdp; /* used for XDP_TX ZC */
};
unsigned int bytecount;
unsigned short gso_segs;
union {
unsigned int gso_segs;
unsigned int nr_frags; /* used for mbuf XDP */
};
u32 tx_flags;
DEFINE_DMA_UNMAP_LEN(len);
DEFINE_DMA_UNMAP_ADDR(dma);
......@@ -170,7 +184,9 @@ struct ice_rx_buf {
dma_addr_t dma;
struct page *page;
unsigned int page_offset;
u16 pagecnt_bias;
unsigned int pgcnt;
unsigned int act;
unsigned int pagecnt_bias;
};
struct ice_q_stats {
......@@ -273,42 +289,44 @@ struct ice_rx_ring {
struct ice_vsi *vsi; /* Backreference to associated VSI */
struct ice_q_vector *q_vector; /* Backreference to associated vector */
u8 __iomem *tail;
u16 q_index; /* Queue number of ring */
u16 count; /* Number of descriptors */
u16 reg_idx; /* HW register index of the ring */
u16 next_to_alloc;
/* CL2 - 2nd cacheline starts here */
union {
struct ice_rx_buf *rx_buf;
struct xdp_buff **xdp_buf;
};
/* CL2 - 2nd cacheline starts here */
struct xdp_rxq_info xdp_rxq;
struct xdp_buff xdp;
/* CL3 - 3rd cacheline starts here */
u16 q_index; /* Queue number of ring */
u16 count; /* Number of descriptors */
u16 reg_idx; /* HW register index of the ring */
struct bpf_prog *xdp_prog;
u16 rx_offset;
/* used in interrupt processing */
u16 next_to_use;
u16 next_to_clean;
u16 next_to_alloc;
u16 rx_offset;
u16 rx_buf_len;
u16 first_desc;
/* stats structs */
struct ice_ring_stats *ring_stats;
struct rcu_head rcu; /* to avoid race on free */
/* CL4 - 3rd cacheline starts here */
/* CL4 - 4th cacheline starts here */
struct ice_channel *ch;
struct bpf_prog *xdp_prog;
struct ice_tx_ring *xdp_ring;
struct xsk_buff_pool *xsk_pool;
struct sk_buff *skb;
dma_addr_t dma; /* physical address of ring */
u64 cached_phctime;
u16 rx_buf_len;
u8 dcb_tc; /* Traffic class of ring */
u8 ptp_rx;
#define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1)
#define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2)
u8 flags;
/* CL5 - 5th cacheline starts here */
struct xdp_rxq_info xdp_rxq;
} ____cacheline_internodealigned_in_smp;
struct ice_tx_ring {
......@@ -326,12 +344,11 @@ struct ice_tx_ring {
struct xsk_buff_pool *xsk_pool;
u16 next_to_use;
u16 next_to_clean;
u16 next_rs;
u16 next_dd;
u16 q_handle; /* Queue handle per TC */
u16 reg_idx; /* HW register index of the ring */
u16 count; /* Number of descriptors */
u16 q_index; /* Queue number of ring */
u16 xdp_tx_active;
/* stats structs */
struct ice_ring_stats *ring_stats;
/* CL3 - 3rd cacheline starts here */
......@@ -342,7 +359,6 @@ struct ice_tx_ring {
spinlock_t tx_lock;
u32 txq_teid; /* Added Tx queue TEID */
/* CL4 - 4th cacheline starts here */
u16 xdp_tx_active;
#define ICE_TX_FLAGS_RING_XDP BIT(0)
#define ICE_TX_FLAGS_RING_VLAN_L2TAG1 BIT(1)
#define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2)
......@@ -431,7 +447,7 @@ static inline unsigned int ice_rx_pg_order(struct ice_rx_ring *ring)
union ice_32b_rx_flex_desc;
bool ice_alloc_rx_bufs(struct ice_rx_ring *rxr, u16 cleaned_count);
bool ice_alloc_rx_bufs(struct ice_rx_ring *rxr, unsigned int cleaned_count);
netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev);
u16
ice_select_queue(struct net_device *dev, struct sk_buff *skb,
......
......@@ -220,129 +220,194 @@ ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
napi_gro_receive(&rx_ring->q_vector->napi, skb);
}
/**
* ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
* @xdp_ring: XDP Tx ring
* @tx_buf: Tx buffer to clean
*/
static void
ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf)
{
dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
dma_unmap_len_set(tx_buf, len, 0);
xdp_ring->xdp_tx_active--;
page_frag_free(tx_buf->raw_buf);
tx_buf->raw_buf = NULL;
}
/**
* ice_clean_xdp_irq - Reclaim resources after transmit completes on XDP ring
* @xdp_ring: XDP ring to clean
*/
static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring)
static u32 ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring)
{
unsigned int total_bytes = 0, total_pkts = 0;
u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
u16 ntc = xdp_ring->next_to_clean;
struct ice_tx_desc *next_dd_desc;
u16 next_dd = xdp_ring->next_dd;
struct ice_tx_buf *tx_buf;
int i;
int total_bytes = 0, total_pkts = 0;
u32 ntc = xdp_ring->next_to_clean;
struct ice_tx_desc *tx_desc;
u32 cnt = xdp_ring->count;
u32 ready_frames = 0;
u32 frags;
u32 idx;
u32 ret;
idx = xdp_ring->tx_buf[ntc].rs_idx;
tx_desc = ICE_TX_DESC(xdp_ring, idx);
if (tx_desc->cmd_type_offset_bsz &
cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)) {
if (idx >= ntc)
ready_frames = idx - ntc + 1;
else
ready_frames = idx + cnt - ntc + 1;
}
next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd);
if (!(next_dd_desc->cmd_type_offset_bsz &
cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
return;
if (!ready_frames)
return 0;
ret = ready_frames;
for (i = 0; i < tx_thresh; i++) {
tx_buf = &xdp_ring->tx_buf[ntc];
while (ready_frames) {
struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc];
/* bytecount holds size of head + frags */
total_bytes += tx_buf->bytecount;
/* normally tx_buf->gso_segs was taken but at this point
* it's always 1 for us
*/
frags = tx_buf->nr_frags;
total_pkts++;
/* count head + frags */
ready_frames -= frags + 1;
page_frag_free(tx_buf->raw_buf);
dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
dma_unmap_len_set(tx_buf, len, 0);
tx_buf->raw_buf = NULL;
if (xdp_ring->xsk_pool)
xsk_buff_free(tx_buf->xdp);
else
ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
ntc++;
if (ntc >= xdp_ring->count)
if (ntc == cnt)
ntc = 0;
for (int i = 0; i < frags; i++) {
tx_buf = &xdp_ring->tx_buf[ntc];
ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
ntc++;
if (ntc == cnt)
ntc = 0;
}
}
next_dd_desc->cmd_type_offset_bsz = 0;
xdp_ring->next_dd = xdp_ring->next_dd + tx_thresh;
if (xdp_ring->next_dd > xdp_ring->count)
xdp_ring->next_dd = tx_thresh - 1;
tx_desc->cmd_type_offset_bsz = 0;
xdp_ring->next_to_clean = ntc;
ice_update_tx_ring_stats(xdp_ring, total_pkts, total_bytes);
return ret;
}
/**
* ice_xmit_xdp_ring - submit single packet to XDP ring for transmission
* @data: packet data pointer
* @size: packet data size
* __ice_xmit_xdp_ring - submit frame to XDP ring for transmission
* @xdp: XDP buffer to be placed onto Tx descriptors
* @xdp_ring: XDP ring for transmission
*/
int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring)
int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring)
{
u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
u16 i = xdp_ring->next_to_use;
struct skb_shared_info *sinfo = NULL;
u32 size = xdp->data_end - xdp->data;
struct device *dev = xdp_ring->dev;
u32 ntu = xdp_ring->next_to_use;
struct ice_tx_desc *tx_desc;
struct ice_tx_buf *tx_head;
struct ice_tx_buf *tx_buf;
dma_addr_t dma;
u32 cnt = xdp_ring->count;
void *data = xdp->data;
u32 nr_frags = 0;
u32 free_space;
u32 frag = 0;
free_space = ICE_DESC_UNUSED(xdp_ring);
if (ICE_DESC_UNUSED(xdp_ring) < ICE_RING_QUARTER(xdp_ring))
free_space += ice_clean_xdp_irq(xdp_ring);
if (unlikely(xdp_buff_has_frags(xdp))) {
sinfo = xdp_get_shared_info_from_buff(xdp);
nr_frags = sinfo->nr_frags;
if (free_space < nr_frags + 1) {
xdp_ring->ring_stats->tx_stats.tx_busy++;
return ICE_XDP_CONSUMED;
}
}
if (ICE_DESC_UNUSED(xdp_ring) < tx_thresh)
ice_clean_xdp_irq(xdp_ring);
tx_desc = ICE_TX_DESC(xdp_ring, ntu);
tx_head = &xdp_ring->tx_buf[ntu];
tx_buf = tx_head;
if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) {
xdp_ring->ring_stats->tx_stats.tx_busy++;
return ICE_XDP_CONSUMED;
}
for (;;) {
dma_addr_t dma;
dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE);
if (dma_mapping_error(xdp_ring->dev, dma))
return ICE_XDP_CONSUMED;
dma = dma_map_single(dev, data, size, DMA_TO_DEVICE);
if (dma_mapping_error(dev, dma))
goto dma_unmap;
tx_buf = &xdp_ring->tx_buf[i];
tx_buf->bytecount = size;
tx_buf->gso_segs = 1;
tx_buf->raw_buf = data;
/* record length, and DMA address */
dma_unmap_len_set(tx_buf, len, size);
dma_unmap_addr_set(tx_buf, dma, dma);
/* record length, and DMA address */
dma_unmap_len_set(tx_buf, len, size);
dma_unmap_addr_set(tx_buf, dma, dma);
tx_desc->buf_addr = cpu_to_le64(dma);
tx_desc->cmd_type_offset_bsz = ice_build_ctob(0, 0, size, 0);
tx_buf->raw_buf = data;
tx_desc = ICE_TX_DESC(xdp_ring, i);
tx_desc->buf_addr = cpu_to_le64(dma);
tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, 0,
size, 0);
ntu++;
if (ntu == cnt)
ntu = 0;
xdp_ring->xdp_tx_active++;
i++;
if (i == xdp_ring->count) {
i = 0;
tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
xdp_ring->next_rs = tx_thresh - 1;
}
xdp_ring->next_to_use = i;
if (frag == nr_frags)
break;
if (i > xdp_ring->next_rs) {
tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
xdp_ring->next_rs += tx_thresh;
tx_desc = ICE_TX_DESC(xdp_ring, ntu);
tx_buf = &xdp_ring->tx_buf[ntu];
data = skb_frag_address(&sinfo->frags[frag]);
size = skb_frag_size(&sinfo->frags[frag]);
frag++;
}
/* store info about bytecount and frag count in first desc */
tx_head->bytecount = xdp_get_buff_len(xdp);
tx_head->nr_frags = nr_frags;
/* update last descriptor from a frame with EOP */
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_EOP << ICE_TXD_QW1_CMD_S);
xdp_ring->xdp_tx_active++;
xdp_ring->next_to_use = ntu;
return ICE_XDP_TX;
dma_unmap:
for (;;) {
tx_buf = &xdp_ring->tx_buf[ntu];
dma_unmap_page(dev, dma_unmap_addr(tx_buf, dma),
dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
dma_unmap_len_set(tx_buf, len, 0);
if (tx_buf == tx_head)
break;
if (!ntu)
ntu += cnt;
ntu--;
}
return ICE_XDP_CONSUMED;
}
/**
* ice_xmit_xdp_buff - convert an XDP buffer to an XDP frame and send it
* @xdp: XDP buffer
* @xdp_ring: XDP Tx ring
*
* Returns negative on failure, 0 on success.
* ice_xmit_xdp_ring - submit frame to XDP ring for transmission
* @xdpf: XDP frame that will be converted to XDP buff
* @xdp_ring: XDP ring for transmission
*/
int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring)
int ice_xmit_xdp_ring(struct xdp_frame *xdpf, struct ice_tx_ring *xdp_ring)
{
struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
if (unlikely(!xdpf))
return ICE_XDP_CONSUMED;
struct xdp_buff xdp;
return ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
xdp_convert_frame_to_buff(xdpf, &xdp);
return __ice_xmit_xdp_ring(&xdp, xdp_ring);
}
/**
......@@ -354,14 +419,21 @@ int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring)
* should be called when a batch of packets has been processed in the
* napi loop.
*/
void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res)
void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res,
u32 first_idx)
{
struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[first_idx];
if (xdp_res & ICE_XDP_REDIR)
xdp_do_flush_map();
if (xdp_res & ICE_XDP_TX) {
if (static_branch_unlikely(&ice_xdp_locking_key))
spin_lock(&xdp_ring->tx_lock);
/* store index of descriptor with RS bit set in the first
* ice_tx_buf of given NAPI batch
*/
tx_buf->rs_idx = ice_set_rs_bit(xdp_ring);
ice_xdp_ring_update_tail(xdp_ring);
if (static_branch_unlikely(&ice_xdp_locking_key))
spin_unlock(&xdp_ring->tx_lock);
......
......@@ -5,6 +5,36 @@
#define _ICE_TXRX_LIB_H_
#include "ice.h"
/**
* ice_set_rx_bufs_act - propagate Rx buffer action to frags
* @xdp: XDP buffer representing frame (linear and frags part)
* @rx_ring: Rx ring struct
* act: action to store onto Rx buffers related to XDP buffer parts
*
* Set action that should be taken before putting Rx buffer from first frag
* to one before last. Last one is handled by caller of this function as it
* is the EOP frag that is currently being processed. This function is
* supposed to be called only when XDP buffer contains frags.
*/
static inline void
ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring,
const unsigned int act)
{
const struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
u32 first = rx_ring->first_desc;
u32 nr_frags = sinfo->nr_frags;
u32 cnt = rx_ring->count;
struct ice_rx_buf *buf;
for (int i = 0; i < nr_frags; i++) {
buf = &rx_ring->rx_buf[first];
buf->act = act;
if (++first == cnt)
first = 0;
}
}
/**
* ice_test_staterr - tests bits in Rx descriptor status and error fields
* @status_err_n: Rx descriptor status_error0 or status_error1 bits
......@@ -21,6 +51,28 @@ ice_test_staterr(__le16 status_err_n, const u16 stat_err_bits)
return !!(status_err_n & cpu_to_le16(stat_err_bits));
}
/**
* ice_is_non_eop - process handling of non-EOP buffers
* @rx_ring: Rx ring being processed
* @rx_desc: Rx descriptor for current buffer
*
* If the buffer is an EOP buffer, this function exits returning false,
* otherwise return true indicating that this is in fact a non-EOP buffer.
*/
static inline bool
ice_is_non_eop(const struct ice_rx_ring *rx_ring,
const union ice_32b_rx_flex_desc *rx_desc)
{
/* if we are the last buffer then there is nothing else to do */
#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF)))
return false;
rx_ring->ring_stats->rx_stats.non_eop_descs++;
return true;
}
static inline __le64
ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
{
......@@ -70,9 +122,28 @@ static inline void ice_xdp_ring_update_tail(struct ice_tx_ring *xdp_ring)
writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
}
void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res);
/**
* ice_set_rs_bit - set RS bit on last produced descriptor (one behind current NTU)
* @xdp_ring: XDP ring to produce the HW Tx descriptors on
*
* returns index of descriptor that had RS bit produced on
*/
static inline u32 ice_set_rs_bit(const struct ice_tx_ring *xdp_ring)
{
u32 rs_idx = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1;
struct ice_tx_desc *tx_desc;
tx_desc = ICE_TX_DESC(xdp_ring, rs_idx);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
return rs_idx;
}
void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res, u32 first_idx);
int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring);
int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring);
int ice_xmit_xdp_ring(struct xdp_frame *xdpf, struct ice_tx_ring *xdp_ring);
int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring);
void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val);
void
ice_process_skb_fields(struct ice_rx_ring *rx_ring,
......
......@@ -597,6 +597,107 @@ ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
return skb;
}
/**
* ice_clean_xdp_irq_zc - AF_XDP ZC specific Tx cleaning routine
* @xdp_ring: XDP Tx ring
*/
static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
{
u16 ntc = xdp_ring->next_to_clean;
struct ice_tx_desc *tx_desc;
u16 cnt = xdp_ring->count;
struct ice_tx_buf *tx_buf;
u16 xsk_frames = 0;
u16 last_rs;
int i;
last_rs = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : cnt - 1;
tx_desc = ICE_TX_DESC(xdp_ring, last_rs);
if (tx_desc->cmd_type_offset_bsz &
cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)) {
if (last_rs >= ntc)
xsk_frames = last_rs - ntc + 1;
else
xsk_frames = last_rs + cnt - ntc + 1;
}
if (!xsk_frames)
return;
if (likely(!xdp_ring->xdp_tx_active))
goto skip;
ntc = xdp_ring->next_to_clean;
for (i = 0; i < xsk_frames; i++) {
tx_buf = &xdp_ring->tx_buf[ntc];
if (tx_buf->xdp) {
xsk_buff_free(tx_buf->xdp);
xdp_ring->xdp_tx_active--;
} else {
xsk_frames++;
}
ntc++;
if (ntc == cnt)
ntc = 0;
}
skip:
tx_desc->cmd_type_offset_bsz = 0;
xdp_ring->next_to_clean += xsk_frames;
if (xdp_ring->next_to_clean >= cnt)
xdp_ring->next_to_clean -= cnt;
if (xsk_frames)
xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
}
/**
* ice_xmit_xdp_tx_zc - AF_XDP ZC handler for XDP_TX
* @xdp: XDP buffer to xmit
* @xdp_ring: XDP ring to produce descriptor onto
*
* note that this function works directly on xdp_buff, no need to convert
* it to xdp_frame. xdp_buff pointer is stored to ice_tx_buf so that cleaning
* side will be able to xsk_buff_free() it.
*
* Returns ICE_XDP_TX for successfully produced desc, ICE_XDP_CONSUMED if there
* was not enough space on XDP ring
*/
static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp,
struct ice_tx_ring *xdp_ring)
{
u32 size = xdp->data_end - xdp->data;
u32 ntu = xdp_ring->next_to_use;
struct ice_tx_desc *tx_desc;
struct ice_tx_buf *tx_buf;
dma_addr_t dma;
if (ICE_DESC_UNUSED(xdp_ring) < ICE_RING_QUARTER(xdp_ring)) {
ice_clean_xdp_irq_zc(xdp_ring);
if (!ICE_DESC_UNUSED(xdp_ring)) {
xdp_ring->ring_stats->tx_stats.tx_busy++;
return ICE_XDP_CONSUMED;
}
}
dma = xsk_buff_xdp_get_dma(xdp);
xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, size);
tx_buf = &xdp_ring->tx_buf[ntu];
tx_buf->xdp = xdp;
tx_desc = ICE_TX_DESC(xdp_ring, ntu);
tx_desc->buf_addr = cpu_to_le64(dma);
tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
0, size, 0);
xdp_ring->xdp_tx_active++;
if (++ntu == xdp_ring->count)
ntu = 0;
xdp_ring->next_to_use = ntu;
return ICE_XDP_TX;
}
/**
* ice_run_xdp_zc - Executes an XDP program in zero-copy path
* @rx_ring: Rx ring
......@@ -630,7 +731,7 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
case XDP_PASS:
break;
case XDP_TX:
result = ice_xmit_xdp_buff(xdp, xdp_ring);
result = ice_xmit_xdp_tx_zc(xdp, xdp_ring);
if (result == ICE_XDP_CONSUMED)
goto out_failure;
break;
......@@ -760,7 +861,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
if (entries_to_alloc > ICE_RING_QUARTER(rx_ring))
failure |= !ice_alloc_rx_bufs_zc(rx_ring, entries_to_alloc);
ice_finalize_xdp_rx(xdp_ring, xdp_xmit);
ice_finalize_xdp_rx(xdp_ring, xdp_xmit, 0);
ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
......@@ -775,75 +876,6 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
return failure ? budget : (int)total_rx_packets;
}
/**
* ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
* @xdp_ring: XDP Tx ring
* @tx_buf: Tx buffer to clean
*/
static void
ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf)
{
page_frag_free(tx_buf->raw_buf);
xdp_ring->xdp_tx_active--;
dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
dma_unmap_len_set(tx_buf, len, 0);
}
/**
* ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ
* @xdp_ring: XDP Tx ring
*/
static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
{
u16 ntc = xdp_ring->next_to_clean;
struct ice_tx_desc *tx_desc;
u16 cnt = xdp_ring->count;
struct ice_tx_buf *tx_buf;
u16 xsk_frames = 0;
u16 last_rs;
int i;
last_rs = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : cnt - 1;
tx_desc = ICE_TX_DESC(xdp_ring, last_rs);
if ((tx_desc->cmd_type_offset_bsz &
cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) {
if (last_rs >= ntc)
xsk_frames = last_rs - ntc + 1;
else
xsk_frames = last_rs + cnt - ntc + 1;
}
if (!xsk_frames)
return;
if (likely(!xdp_ring->xdp_tx_active))
goto skip;
ntc = xdp_ring->next_to_clean;
for (i = 0; i < xsk_frames; i++) {
tx_buf = &xdp_ring->tx_buf[ntc];
if (tx_buf->raw_buf) {
ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
tx_buf->raw_buf = NULL;
} else {
xsk_frames++;
}
ntc++;
if (ntc >= xdp_ring->count)
ntc = 0;
}
skip:
tx_desc->cmd_type_offset_bsz = 0;
xdp_ring->next_to_clean += xsk_frames;
if (xdp_ring->next_to_clean >= cnt)
xdp_ring->next_to_clean -= cnt;
if (xsk_frames)
xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
}
/**
* ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor
* @xdp_ring: XDP ring to produce the HW Tx descriptor on
......@@ -917,20 +949,6 @@ static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *d
ice_xmit_pkt(xdp_ring, &descs[i], total_bytes);
}
/**
* ice_set_rs_bit - set RS bit on last produced descriptor (one behind current NTU)
* @xdp_ring: XDP ring to produce the HW Tx descriptors on
*/
static void ice_set_rs_bit(struct ice_tx_ring *xdp_ring)
{
u16 ntu = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1;
struct ice_tx_desc *tx_desc;
tx_desc = ICE_TX_DESC(xdp_ring, ntu);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
}
/**
* ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring
* @xdp_ring: XDP ring to produce the HW Tx descriptors on
......@@ -1065,8 +1083,8 @@ void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring)
while (ntc != ntu) {
struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc];
if (tx_buf->raw_buf)
ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
if (tx_buf->xdp)
xsk_buff_free(tx_buf->xdp);
else
xsk_frames++;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment