Commit dcf02bac authored by Paolo Abeni's avatar Paolo Abeni

Merge branch 'net-stmmac-improve-tx-timer-logic'

Christian Marangi says:

====================
net: stmmac: improve tx timer logic

This series comes with the intention of restoring original performance
of stmmac on some router/device that used the stmmac driver to handle
gigabit traffic.

More info are present in patch 3. This cover letter is to show results
and improvements of the following change.

The move to hr_timer for tx timer and commit 8fce3331 ("net: stmmac:
Rework coalesce timer and fix multi-queue races") caused big performance
regression on these kind of device.

This was observed on ipq806x that after kernel 4.19 couldn't handle
gigabit speed anymore.

The following series is currently applied and tested in OpenWrt SNAPSHOT
and have great performance increase. (the scenario is qca8k switch +
stmmac dwmac1000) Some good comparison can be found here [1].

The difference is from a swconfig scenario (where dsa tagging is not
used so very low CPU impact in handling traffic) and DSA scenario where
tagging is used and there is a minimal impact in the CPU. As can be
notice even with DSA in place we have better perf.

It was observed by other user that also SQM scenario with cake scheduler
were improved in the order of 100mbps (this scenario is CPU limited and
any increase of perf is caused by removing load on the CPU)

Been at least 15 days that this is in use without any complain or bug
reported about queue timeout. (was the case with v1 before the
additional patch was added, only appear on real world tests and not on
iperf tests)

[1] https://forum.openwrt.org/t/netgear-r7800-exploration-ipq8065-qca9984/285/3427?u=ansuel
====================

Link: https://lore.kernel.org/r/20231018123550.27110-1-ansuelsmth@gmail.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents a0e6323d 03955096
......@@ -2501,14 +2501,6 @@ static int napi_rx_handler(struct napi_struct *napi, int budget)
return work_done;
}
/*
* Returns true if the device is already scheduled for polling.
*/
static inline int napi_is_scheduled(struct napi_struct *napi)
{
return test_bit(NAPI_STATE_SCHED, &napi->state);
}
/**
* process_pure_responses - process pure responses from a response queue
* @adap: the adapter
......
......@@ -293,7 +293,7 @@ struct stmmac_safety_stats {
#define MIN_DMA_RIWT 0x10
#define DEF_DMA_RIWT 0xa0
/* Tx coalesce parameters */
#define STMMAC_COAL_TX_TIMER 1000
#define STMMAC_COAL_TX_TIMER 5000
#define STMMAC_MAX_COAL_TX_TICK 100000
#define STMMAC_TX_MAX_FRAMES 256
#define STMMAC_TX_FRAMES 25
......
......@@ -2543,9 +2543,13 @@ static void stmmac_bump_dma_threshold(struct stmmac_priv *priv, u32 chan)
* @priv: driver private structure
* @budget: napi budget limiting this functions packet handling
* @queue: TX queue index
* @pending_packets: signal to arm the TX coal timer
* Description: it reclaims the transmit resources after transmission completes.
* If some packets still needs to be handled, due to TX coalesce, set
* pending_packets to true to make NAPI arm the TX coal timer.
*/
static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue,
bool *pending_packets)
{
struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue];
struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[queue];
......@@ -2706,7 +2710,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
/* We still have pending packets, let's call for a new scheduling */
if (tx_q->dirty_tx != tx_q->cur_tx)
stmmac_tx_timer_arm(priv, queue);
*pending_packets = true;
flags = u64_stats_update_begin_irqsave(&txq_stats->syncp);
txq_stats->tx_packets += tx_packets;
......@@ -2996,13 +3000,25 @@ static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue)
{
struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue];
u32 tx_coal_timer = priv->tx_coal_timer[queue];
struct stmmac_channel *ch;
struct napi_struct *napi;
if (!tx_coal_timer)
return;
ch = &priv->channel[tx_q->queue_index];
napi = tx_q->xsk_pool ? &ch->rxtx_napi : &ch->tx_napi;
/* Arm timer only if napi is not already scheduled.
* Try to cancel any timer if napi is scheduled, timer will be armed
* again in the next scheduled napi.
*/
if (unlikely(!napi_is_scheduled(napi)))
hrtimer_start(&tx_q->txtimer,
STMMAC_COAL_TIMER(tx_coal_timer),
HRTIMER_MODE_REL);
else
hrtimer_try_to_cancel(&tx_q->txtimer);
}
/**
......@@ -5560,6 +5576,7 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
container_of(napi, struct stmmac_channel, tx_napi);
struct stmmac_priv *priv = ch->priv_data;
struct stmmac_txq_stats *txq_stats;
bool pending_packets = false;
u32 chan = ch->index;
unsigned long flags;
int work_done;
......@@ -5569,7 +5586,7 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
txq_stats->napi_poll++;
u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
work_done = stmmac_tx_clean(priv, budget, chan);
work_done = stmmac_tx_clean(priv, budget, chan, &pending_packets);
work_done = min(work_done, budget);
if (work_done < budget && napi_complete_done(napi, work_done)) {
......@@ -5580,6 +5597,10 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
spin_unlock_irqrestore(&ch->lock, flags);
}
/* TX still have packet to handle, check if we need to arm tx timer */
if (pending_packets)
stmmac_tx_timer_arm(priv, chan);
return work_done;
}
......@@ -5588,6 +5609,7 @@ static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget)
struct stmmac_channel *ch =
container_of(napi, struct stmmac_channel, rxtx_napi);
struct stmmac_priv *priv = ch->priv_data;
bool tx_pending_packets = false;
int rx_done, tx_done, rxtx_done;
struct stmmac_rxq_stats *rxq_stats;
struct stmmac_txq_stats *txq_stats;
......@@ -5604,7 +5626,7 @@ static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget)
txq_stats->napi_poll++;
u64_stats_update_end_irqrestore(&txq_stats->syncp, flags);
tx_done = stmmac_tx_clean(priv, budget, chan);
tx_done = stmmac_tx_clean(priv, budget, chan, &tx_pending_packets);
tx_done = min(tx_done, budget);
rx_done = stmmac_rx_zc(priv, budget, chan);
......@@ -5629,6 +5651,10 @@ static int stmmac_napi_poll_rxtx(struct napi_struct *napi, int budget)
spin_unlock_irqrestore(&ch->lock, flags);
}
/* TX still have packet to handle, check if we need to arm tx timer */
if (tx_pending_packets)
stmmac_tx_timer_arm(priv, chan);
return min(rxtx_done, budget - 1);
}
......
......@@ -2005,7 +2005,7 @@ static void rtw89_core_rx_to_mac80211(struct rtw89_dev *rtwdev,
struct napi_struct *napi = &rtwdev->napi;
/* In low power mode, napi isn't scheduled. Receive it to netif. */
if (unlikely(!test_bit(NAPI_STATE_SCHED, &napi->state)))
if (unlikely(!napi_is_scheduled(napi)))
napi = NULL;
rtw89_core_hw_to_sband_rate(rx_status);
......
......@@ -482,6 +482,29 @@ static inline bool napi_prefer_busy_poll(struct napi_struct *n)
return test_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state);
}
/**
* napi_is_scheduled - test if NAPI is scheduled
* @n: NAPI context
*
* This check is "best-effort". With no locking implemented,
* a NAPI can be scheduled or terminate right after this check
* and produce not precise results.
*
* NAPI_STATE_SCHED is an internal state, napi_is_scheduled
* should not be used normally and napi_schedule should be
* used instead.
*
* Use only if the driver really needs to check if a NAPI
* is scheduled for example in the context of delayed timer
* that can be skipped if a NAPI is already scheduled.
*
* Return True if NAPI is scheduled, False otherwise.
*/
static inline bool napi_is_scheduled(struct napi_struct *n)
{
return test_bit(NAPI_STATE_SCHED, &n->state);
}
bool napi_schedule_prep(struct napi_struct *n);
/**
......
......@@ -6532,7 +6532,7 @@ static int __napi_poll(struct napi_struct *n, bool *repoll)
* accidentally calling ->poll() when NAPI is not scheduled.
*/
work = 0;
if (test_bit(NAPI_STATE_SCHED, &n->state)) {
if (napi_is_scheduled(n)) {
work = n->poll(n, weight);
trace_napi_poll(n, work, weight);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment