Commit 470f3669 authored by David S. Miller's avatar David S. Miller

Merge branch 'octeon_ep-transmit-cleanups-and-optimizations'

Shinas Rasheed says:

====================
Cleanup and optimizations to transmit code

Pad small packets to ETH_ZLEN before transmit, cleanup dma sync calls,
add xmit_more functionality and then further remove atomic
variable usage in the prior.

Changes:
V3:
  - Stop returning NETDEV_TX_BUSY when ring is full in xmit_patch.
    Change to inspect early if next packet can fit in ring instead of
    current packet, and stop queue if not.
  - Add smp_mb between stopping tx queue and checking if tx queue has
    free entries again, in queue full check function to let reflect
    IQ process completions that might have happened on other cpus.
  - Update small packet padding patch changelog to give more info.
V2: https://lore.kernel.org/all/20231024145119.2366588-1-srasheed@marvell.com/
  - Added patch for padding small packets to ETH_ZLEN, part of
    optimization patches for transmit code missed out in V1
  - Updated changelog to provide more details for dma_sync remove patch
  - Updated changelog to use imperative tone in add xmit_more patch
V1: https://lore.kernel.org/all/20231023114449.2362147-1-srasheed@marvell.com/
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 56eddc3c dc9c02b7
......@@ -13,9 +13,10 @@
#define OCTEP_64BYTE_INSTR 64
/* Tx Queue: maximum descriptors per ring */
/* This needs to be a power of 2 */
#define OCTEP_IQ_MAX_DESCRIPTORS 1024
/* Minimum input (Tx) requests to be enqueued to ring doorbell */
#define OCTEP_DB_MIN 1
#define OCTEP_DB_MIN 8
/* Packet threshold for Tx queue interrupt */
#define OCTEP_IQ_INTR_THRESHOLD 0x0
......
......@@ -777,17 +777,24 @@ static int octep_stop(struct net_device *netdev)
*/
static inline int octep_iq_full_check(struct octep_iq *iq)
{
if (likely((iq->max_count - atomic_read(&iq->instr_pending)) >=
if (likely((IQ_INSTR_SPACE(iq)) >
OCTEP_WAKE_QUEUE_THRESHOLD))
return 0;
/* Stop the queue if unable to send */
netif_stop_subqueue(iq->netdev, iq->q_no);
/* Allow for pending updates in write index
* from iq_process_completion in other cpus
* to reflect, in case queue gets free
* entries.
*/
smp_mb();
/* check again and restart the queue, in case NAPI has just freed
* enough Tx ring entries.
*/
if (unlikely((iq->max_count - atomic_read(&iq->instr_pending)) >=
if (unlikely(IQ_INSTR_SPACE(iq) >
OCTEP_WAKE_QUEUE_THRESHOLD)) {
netif_start_subqueue(iq->netdev, iq->q_no);
iq->stats.restart_cnt++;
......@@ -818,8 +825,12 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
struct octep_iq *iq;
skb_frag_t *frag;
u16 nr_frags, si;
int xmit_more;
u16 q_no, wi;
if (skb_put_padto(skb, ETH_ZLEN))
return NETDEV_TX_OK;
q_no = skb_get_queue_mapping(skb);
if (q_no >= oct->num_iqs) {
netdev_err(netdev, "Invalid Tx skb->queue_mapping=%d\n", q_no);
......@@ -827,10 +838,6 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
}
iq = oct->iq[q_no];
if (octep_iq_full_check(iq)) {
iq->stats.tx_busy++;
return NETDEV_TX_BUSY;
}
shinfo = skb_shinfo(skb);
nr_frags = shinfo->nr_frags;
......@@ -869,9 +876,6 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
if (dma_mapping_error(iq->dev, dma))
goto dma_map_err;
dma_sync_single_for_cpu(iq->dev, tx_buffer->sglist_dma,
OCTEP_SGLIST_SIZE_PER_PKT,
DMA_TO_DEVICE);
memset(sglist, 0, OCTEP_SGLIST_SIZE_PER_PKT);
sglist[0].len[3] = len;
sglist[0].dma_ptr[0] = dma;
......@@ -891,26 +895,33 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
frag++;
si++;
}
dma_sync_single_for_device(iq->dev, tx_buffer->sglist_dma,
OCTEP_SGLIST_SIZE_PER_PKT,
DMA_TO_DEVICE);
hw_desc->dptr = tx_buffer->sglist_dma;
}
netdev_tx_sent_queue(iq->netdev_q, skb->len);
xmit_more = netdev_xmit_more();
__netdev_tx_sent_queue(iq->netdev_q, skb->len, xmit_more);
skb_tx_timestamp(skb);
atomic_inc(&iq->instr_pending);
iq->fill_cnt++;
wi++;
if (wi == iq->max_count)
wi = 0;
iq->host_write_index = wi;
iq->host_write_index = wi & iq->ring_size_mask;
/* octep_iq_full_check stops the queue and returns
* true if so, in case the queue has become full
* by inserting current packet. If so, we can
* go ahead and ring doorbell.
*/
if (!octep_iq_full_check(iq) && xmit_more &&
iq->fill_cnt < iq->fill_threshold)
return NETDEV_TX_OK;
/* Flush the hw descriptor before writing to doorbell */
wmb();
/* Ring Doorbell to notify the NIC there is a new packet */
writel(1, iq->doorbell_reg);
iq->stats.instr_posted++;
/* Ring Doorbell to notify the NIC of new packets */
writel(iq->fill_cnt, iq->doorbell_reg);
iq->stats.instr_posted += iq->fill_cnt;
iq->fill_cnt = 0;
return NETDEV_TX_OK;
dma_map_sg_err:
......
......@@ -40,6 +40,15 @@
#define OCTEP_OQ_INTR_RESEND_BIT 59
#define OCTEP_MMIO_REGIONS 3
#define IQ_INSTR_PENDING(iq) ({ typeof(iq) iq__ = (iq); \
((iq__)->host_write_index - (iq__)->flush_index) & \
(iq__)->ring_size_mask; \
})
#define IQ_INSTR_SPACE(iq) ({ typeof(iq) iq_ = (iq); \
(iq_)->max_count - IQ_INSTR_PENDING(iq_); \
})
/* PCI address space mapping information.
* Each of the 3 address spaces given by BAR0, BAR2 and BAR4 of
* Octeon gets mapped to different physical address spaces in
......
......@@ -21,7 +21,6 @@ static void octep_iq_reset_indices(struct octep_iq *iq)
iq->flush_index = 0;
iq->pkts_processed = 0;
iq->pkt_in_done = 0;
atomic_set(&iq->instr_pending, 0);
}
/**
......@@ -82,7 +81,6 @@ int octep_iq_process_completions(struct octep_iq *iq, u16 budget)
}
iq->pkts_processed += compl_pkts;
atomic_sub(compl_pkts, &iq->instr_pending);
iq->stats.instr_completed += compl_pkts;
iq->stats.bytes_sent += compl_bytes;
iq->stats.sgentry_sent += compl_sg;
......@@ -91,7 +89,7 @@ int octep_iq_process_completions(struct octep_iq *iq, u16 budget)
netdev_tx_completed_queue(iq->netdev_q, compl_pkts, compl_bytes);
if (unlikely(__netif_subqueue_stopped(iq->netdev, iq->q_no)) &&
((iq->max_count - atomic_read(&iq->instr_pending)) >
(IQ_INSTR_SPACE(iq) >
OCTEP_WAKE_QUEUE_THRESHOLD))
netif_wake_subqueue(iq->netdev, iq->q_no);
return !budget;
......@@ -144,7 +142,6 @@ static void octep_iq_free_pending(struct octep_iq *iq)
dev_kfree_skb_any(skb);
}
atomic_set(&iq->instr_pending, 0);
iq->flush_index = fi;
netdev_tx_reset_queue(netdev_get_tx_queue(iq->netdev, iq->q_no));
}
......
......@@ -172,9 +172,6 @@ struct octep_iq {
/* Statistics for this input queue. */
struct octep_iq_stats stats;
/* This field keeps track of the instructions pending in this queue. */
atomic_t instr_pending;
/* Pointer to the Virtual Base addr of the input ring. */
struct octep_tx_desc_hw *desc_ring;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment