Commit 9acf59a7 authored by Andre Guedes's avatar Andre Guedes Committed by Tony Nguyen

igc: Enable TX via AF_XDP zero-copy

Add support for transmitting packets via AF_XDP zero-copy mechanism.

The packet transmission itself is implemented by igc_xdp_xmit_zc() which
is called from igc_clean_tx_irq() when the ring has AF_XDP zero-copy
enabled. Likewise i40e and ice drivers, the transmission budget used is
the number of descriptors available on the ring.

A new tx buffer type is introduced to 'enum igc_tx_buffer_type' to
indicate the tx buffer uses memory from xsk pool so it can be properly
cleaned after transmission or when the ring is cleaned.

The I225 controller has only 4 Tx hardware queues so the main difference
between igc and other Intel drivers that support AF_XDP zero-copy is
that there is no tx ring dedicated exclusively to XDP. Instead, tx
rings are shared between the network stack and XDP, and netdev queue
lock is used to ensure mutual exclusion. This is the same approach
implemented to support XDP_TX and XDP_REDIRECT actions.
Signed-off-by: default avatarAndre Guedes <andre.guedes@intel.com>
Signed-off-by: default avatarVedang Patel <vedang.patel@intel.com>
Signed-off-by: default avatarJithu Joseph <jithu.joseph@intel.com>
Reviewed-by: default avatarMaciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: default avatarDvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: default avatarTony Nguyen <anthony.l.nguyen@intel.com>
parent fc9df2a0
...@@ -258,6 +258,8 @@ int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx); ...@@ -258,6 +258,8 @@ int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx);
void igc_update_stats(struct igc_adapter *adapter); void igc_update_stats(struct igc_adapter *adapter);
void igc_disable_rx_ring(struct igc_ring *ring); void igc_disable_rx_ring(struct igc_ring *ring);
void igc_enable_rx_ring(struct igc_ring *ring); void igc_enable_rx_ring(struct igc_ring *ring);
void igc_disable_tx_ring(struct igc_ring *ring);
void igc_enable_tx_ring(struct igc_ring *ring);
int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags); int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
/* igc_dump declarations */ /* igc_dump declarations */
...@@ -413,6 +415,7 @@ enum igc_boards { ...@@ -413,6 +415,7 @@ enum igc_boards {
enum igc_tx_buffer_type { enum igc_tx_buffer_type {
IGC_TX_BUFFER_TYPE_SKB, IGC_TX_BUFFER_TYPE_SKB,
IGC_TX_BUFFER_TYPE_XDP, IGC_TX_BUFFER_TYPE_XDP,
IGC_TX_BUFFER_TYPE_XSK,
}; };
/* wrapper around a pointer to a socket buffer, /* wrapper around a pointer to a socket buffer,
......
...@@ -78,6 +78,7 @@ union igc_adv_rx_desc { ...@@ -78,6 +78,7 @@ union igc_adv_rx_desc {
/* Additional Transmit Descriptor Control definitions */ /* Additional Transmit Descriptor Control definitions */
#define IGC_TXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Tx Queue */ #define IGC_TXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Tx Queue */
#define IGC_TXDCTL_SWFLUSH 0x04000000 /* Transmit Software Flush */
/* Additional Receive Descriptor Control definitions */ /* Additional Receive Descriptor Control definitions */
#define IGC_RXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Rx Queue */ #define IGC_RXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Rx Queue */
......
...@@ -187,24 +187,28 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) ...@@ -187,24 +187,28 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring)
{ {
u16 i = tx_ring->next_to_clean; u16 i = tx_ring->next_to_clean;
struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
u32 xsk_frames = 0;
while (i != tx_ring->next_to_use) { while (i != tx_ring->next_to_use) {
union igc_adv_tx_desc *eop_desc, *tx_desc; union igc_adv_tx_desc *eop_desc, *tx_desc;
switch (tx_buffer->type) { switch (tx_buffer->type) {
case IGC_TX_BUFFER_TYPE_XSK:
xsk_frames++;
break;
case IGC_TX_BUFFER_TYPE_XDP: case IGC_TX_BUFFER_TYPE_XDP:
xdp_return_frame(tx_buffer->xdpf); xdp_return_frame(tx_buffer->xdpf);
igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
break; break;
case IGC_TX_BUFFER_TYPE_SKB: case IGC_TX_BUFFER_TYPE_SKB:
dev_kfree_skb_any(tx_buffer->skb); dev_kfree_skb_any(tx_buffer->skb);
igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
break; break;
default: default:
netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n"); netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
break; break;
} }
igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
/* check for eop_desc to determine the end of the packet */ /* check for eop_desc to determine the end of the packet */
eop_desc = tx_buffer->next_to_watch; eop_desc = tx_buffer->next_to_watch;
tx_desc = IGC_TX_DESC(tx_ring, i); tx_desc = IGC_TX_DESC(tx_ring, i);
...@@ -234,6 +238,9 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) ...@@ -234,6 +238,9 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring)
} }
} }
if (tx_ring->xsk_pool && xsk_frames)
xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
/* reset BQL for queue */ /* reset BQL for queue */
netdev_tx_reset_queue(txring_txq(tx_ring)); netdev_tx_reset_queue(txring_txq(tx_ring));
...@@ -676,6 +683,8 @@ static void igc_configure_tx_ring(struct igc_adapter *adapter, ...@@ -676,6 +683,8 @@ static void igc_configure_tx_ring(struct igc_adapter *adapter,
u64 tdba = ring->dma; u64 tdba = ring->dma;
u32 txdctl = 0; u32 txdctl = 0;
ring->xsk_pool = igc_get_xsk_pool(adapter, ring);
/* disable the queue */ /* disable the queue */
wr32(IGC_TXDCTL(reg_idx), 0); wr32(IGC_TXDCTL(reg_idx), 0);
wrfl(); wrfl();
...@@ -2509,6 +2518,65 @@ static void igc_update_tx_stats(struct igc_q_vector *q_vector, ...@@ -2509,6 +2518,65 @@ static void igc_update_tx_stats(struct igc_q_vector *q_vector,
q_vector->tx.total_packets += packets; q_vector->tx.total_packets += packets;
} }
static void igc_xdp_xmit_zc(struct igc_ring *ring)
{
struct xsk_buff_pool *pool = ring->xsk_pool;
struct netdev_queue *nq = txring_txq(ring);
union igc_adv_tx_desc *tx_desc = NULL;
int cpu = smp_processor_id();
u16 ntu = ring->next_to_use;
struct xdp_desc xdp_desc;
u16 budget;
if (!netif_carrier_ok(ring->netdev))
return;
__netif_tx_lock(nq, cpu);
budget = igc_desc_unused(ring);
while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
u32 cmd_type, olinfo_status;
struct igc_tx_buffer *bi;
dma_addr_t dma;
cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
xdp_desc.len;
olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT;
dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr);
xsk_buff_raw_dma_sync_for_device(pool, dma, xdp_desc.len);
tx_desc = IGC_TX_DESC(ring, ntu);
tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
tx_desc->read.buffer_addr = cpu_to_le64(dma);
bi = &ring->tx_buffer_info[ntu];
bi->type = IGC_TX_BUFFER_TYPE_XSK;
bi->protocol = 0;
bi->bytecount = xdp_desc.len;
bi->gso_segs = 1;
bi->time_stamp = jiffies;
bi->next_to_watch = tx_desc;
netdev_tx_sent_queue(txring_txq(ring), xdp_desc.len);
ntu++;
if (ntu == ring->count)
ntu = 0;
}
ring->next_to_use = ntu;
if (tx_desc) {
igc_flush_tx_descriptors(ring);
xsk_tx_release(pool);
}
__netif_tx_unlock(nq);
}
/** /**
* igc_clean_tx_irq - Reclaim resources after transmit completes * igc_clean_tx_irq - Reclaim resources after transmit completes
* @q_vector: pointer to q_vector containing needed info * @q_vector: pointer to q_vector containing needed info
...@@ -2525,6 +2593,7 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) ...@@ -2525,6 +2593,7 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
unsigned int i = tx_ring->next_to_clean; unsigned int i = tx_ring->next_to_clean;
struct igc_tx_buffer *tx_buffer; struct igc_tx_buffer *tx_buffer;
union igc_adv_tx_desc *tx_desc; union igc_adv_tx_desc *tx_desc;
u32 xsk_frames = 0;
if (test_bit(__IGC_DOWN, &adapter->state)) if (test_bit(__IGC_DOWN, &adapter->state))
return true; return true;
...@@ -2555,19 +2624,22 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) ...@@ -2555,19 +2624,22 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
total_packets += tx_buffer->gso_segs; total_packets += tx_buffer->gso_segs;
switch (tx_buffer->type) { switch (tx_buffer->type) {
case IGC_TX_BUFFER_TYPE_XSK:
xsk_frames++;
break;
case IGC_TX_BUFFER_TYPE_XDP: case IGC_TX_BUFFER_TYPE_XDP:
xdp_return_frame(tx_buffer->xdpf); xdp_return_frame(tx_buffer->xdpf);
igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
break; break;
case IGC_TX_BUFFER_TYPE_SKB: case IGC_TX_BUFFER_TYPE_SKB:
napi_consume_skb(tx_buffer->skb, napi_budget); napi_consume_skb(tx_buffer->skb, napi_budget);
igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
break; break;
default: default:
netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n"); netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
break; break;
} }
igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
/* clear last DMA location and unmap remaining buffers */ /* clear last DMA location and unmap remaining buffers */
while (tx_desc != eop_desc) { while (tx_desc != eop_desc) {
tx_buffer++; tx_buffer++;
...@@ -2609,6 +2681,14 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) ...@@ -2609,6 +2681,14 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
igc_update_tx_stats(q_vector, total_packets, total_bytes); igc_update_tx_stats(q_vector, total_packets, total_bytes);
if (tx_ring->xsk_pool) {
if (xsk_frames)
xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
if (xsk_uses_need_wakeup(tx_ring->xsk_pool))
xsk_set_tx_need_wakeup(tx_ring->xsk_pool);
igc_xdp_xmit_zc(tx_ring);
}
if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
struct igc_hw *hw = &adapter->hw; struct igc_hw *hw = &adapter->hw;
...@@ -6336,6 +6416,31 @@ void igc_enable_rx_ring(struct igc_ring *ring) ...@@ -6336,6 +6416,31 @@ void igc_enable_rx_ring(struct igc_ring *ring)
igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
} }
static void igc_disable_tx_ring_hw(struct igc_ring *ring)
{
struct igc_hw *hw = &ring->q_vector->adapter->hw;
u8 idx = ring->reg_idx;
u32 txdctl;
txdctl = rd32(IGC_TXDCTL(idx));
txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE;
txdctl |= IGC_TXDCTL_SWFLUSH;
wr32(IGC_TXDCTL(idx), txdctl);
}
void igc_disable_tx_ring(struct igc_ring *ring)
{
igc_disable_tx_ring_hw(ring);
igc_clean_tx_ring(ring);
}
void igc_enable_tx_ring(struct igc_ring *ring)
{
struct igc_adapter *adapter = ring->q_vector->adapter;
igc_configure_tx_ring(adapter, ring);
}
/** /**
* igc_init_module - Driver Registration Routine * igc_init_module - Driver Registration Routine
* *
......
...@@ -39,13 +39,14 @@ static int igc_xdp_enable_pool(struct igc_adapter *adapter, ...@@ -39,13 +39,14 @@ static int igc_xdp_enable_pool(struct igc_adapter *adapter,
{ {
struct net_device *ndev = adapter->netdev; struct net_device *ndev = adapter->netdev;
struct device *dev = &adapter->pdev->dev; struct device *dev = &adapter->pdev->dev;
struct igc_ring *rx_ring; struct igc_ring *rx_ring, *tx_ring;
struct napi_struct *napi; struct napi_struct *napi;
bool needs_reset; bool needs_reset;
u32 frame_size; u32 frame_size;
int err; int err;
if (queue_id >= adapter->num_rx_queues) if (queue_id >= adapter->num_rx_queues ||
queue_id >= adapter->num_tx_queues)
return -EINVAL; return -EINVAL;
frame_size = xsk_pool_get_rx_frame_size(pool); frame_size = xsk_pool_get_rx_frame_size(pool);
...@@ -67,18 +68,23 @@ static int igc_xdp_enable_pool(struct igc_adapter *adapter, ...@@ -67,18 +68,23 @@ static int igc_xdp_enable_pool(struct igc_adapter *adapter,
needs_reset = netif_running(adapter->netdev) && igc_xdp_is_enabled(adapter); needs_reset = netif_running(adapter->netdev) && igc_xdp_is_enabled(adapter);
rx_ring = adapter->rx_ring[queue_id]; rx_ring = adapter->rx_ring[queue_id];
tx_ring = adapter->tx_ring[queue_id];
/* Rx and Tx rings share the same napi context. */
napi = &rx_ring->q_vector->napi; napi = &rx_ring->q_vector->napi;
if (needs_reset) { if (needs_reset) {
igc_disable_rx_ring(rx_ring); igc_disable_rx_ring(rx_ring);
igc_disable_tx_ring(tx_ring);
napi_disable(napi); napi_disable(napi);
} }
set_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags); set_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
set_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
if (needs_reset) { if (needs_reset) {
napi_enable(napi); napi_enable(napi);
igc_enable_rx_ring(rx_ring); igc_enable_rx_ring(rx_ring);
igc_enable_tx_ring(tx_ring);
err = igc_xsk_wakeup(ndev, queue_id, XDP_WAKEUP_RX); err = igc_xsk_wakeup(ndev, queue_id, XDP_WAKEUP_RX);
if (err) { if (err) {
...@@ -92,12 +98,13 @@ static int igc_xdp_enable_pool(struct igc_adapter *adapter, ...@@ -92,12 +98,13 @@ static int igc_xdp_enable_pool(struct igc_adapter *adapter,
static int igc_xdp_disable_pool(struct igc_adapter *adapter, u16 queue_id) static int igc_xdp_disable_pool(struct igc_adapter *adapter, u16 queue_id)
{ {
struct igc_ring *rx_ring, *tx_ring;
struct xsk_buff_pool *pool; struct xsk_buff_pool *pool;
struct igc_ring *rx_ring;
struct napi_struct *napi; struct napi_struct *napi;
bool needs_reset; bool needs_reset;
if (queue_id >= adapter->num_rx_queues) if (queue_id >= adapter->num_rx_queues ||
queue_id >= adapter->num_tx_queues)
return -EINVAL; return -EINVAL;
pool = xsk_get_pool_from_qid(adapter->netdev, queue_id); pool = xsk_get_pool_from_qid(adapter->netdev, queue_id);
...@@ -107,19 +114,24 @@ static int igc_xdp_disable_pool(struct igc_adapter *adapter, u16 queue_id) ...@@ -107,19 +114,24 @@ static int igc_xdp_disable_pool(struct igc_adapter *adapter, u16 queue_id)
needs_reset = netif_running(adapter->netdev) && igc_xdp_is_enabled(adapter); needs_reset = netif_running(adapter->netdev) && igc_xdp_is_enabled(adapter);
rx_ring = adapter->rx_ring[queue_id]; rx_ring = adapter->rx_ring[queue_id];
tx_ring = adapter->tx_ring[queue_id];
/* Rx and Tx rings share the same napi context. */
napi = &rx_ring->q_vector->napi; napi = &rx_ring->q_vector->napi;
if (needs_reset) { if (needs_reset) {
igc_disable_rx_ring(rx_ring); igc_disable_rx_ring(rx_ring);
igc_disable_tx_ring(tx_ring);
napi_disable(napi); napi_disable(napi);
} }
xsk_pool_dma_unmap(pool, IGC_RX_DMA_ATTR); xsk_pool_dma_unmap(pool, IGC_RX_DMA_ATTR);
clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags); clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
if (needs_reset) { if (needs_reset) {
napi_enable(napi); napi_enable(napi);
igc_enable_rx_ring(rx_ring); igc_enable_rx_ring(rx_ring);
igc_enable_tx_ring(tx_ring);
} }
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment