Commit 38cc3c6d authored by Petr Tesarik's avatar Petr Tesarik Committed by David S. Miller

net: stmmac: protect updates of 64-bit statistics counters

As explained by a comment in <linux/u64_stats_sync.h>, write side of struct
u64_stats_sync must ensure mutual exclusion, or one seqcount update could
be lost on 32-bit platforms, thus blocking readers forever. Such lockups
have been observed in real world after stmmac_xmit() on one CPU raced with
stmmac_napi_poll_tx() on another CPU.

To fix the issue without introducing a new lock, split the statics into
three parts:

1. fields updated only under the tx queue lock,
2. fields updated only during NAPI poll,
3. fields updated only from interrupt context,

Updates to fields in the first two groups are already serialized through
other locks. It is sufficient to split the existing struct u64_stats_sync
so that each group has its own.

Note that tx_set_ic_bit is updated from both contexts. Split this counter
so that each context gets its own, and calculate their sum to get the total
value in stmmac_get_ethtool_stats().

For the third group, multiple interrupts may be processed by different CPUs
at the same time, but interrupts on the same CPU will not nest. Move fields
from this group to a newly created per-cpu struct stmmac_pcpu_stats.

Fixes: 133466c3 ("net: stmmac: use per-queue 64 bit statistics where necessary")
Link: https://lore.kernel.org/netdev/Za173PhviYg-1qIn@torres.zugschlus.de/t/
Cc: stable@vger.kernel.org
Signed-off-by: default avatarPetr Tesarik <petr@tesarici.cz>
Reviewed-by: default avatarJisheng Zhang <jszhang@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent cb88cb53
...@@ -59,28 +59,51 @@ ...@@ -59,28 +59,51 @@
#undef FRAME_FILTER_DEBUG #undef FRAME_FILTER_DEBUG
/* #define FRAME_FILTER_DEBUG */ /* #define FRAME_FILTER_DEBUG */
struct stmmac_q_tx_stats {
u64_stats_t tx_bytes;
u64_stats_t tx_set_ic_bit;
u64_stats_t tx_tso_frames;
u64_stats_t tx_tso_nfrags;
};
struct stmmac_napi_tx_stats {
u64_stats_t tx_packets;
u64_stats_t tx_pkt_n;
u64_stats_t poll;
u64_stats_t tx_clean;
u64_stats_t tx_set_ic_bit;
};
struct stmmac_txq_stats { struct stmmac_txq_stats {
u64 tx_bytes; /* Updates protected by tx queue lock. */
u64 tx_packets; struct u64_stats_sync q_syncp;
u64 tx_pkt_n; struct stmmac_q_tx_stats q;
u64 tx_normal_irq_n;
u64 napi_poll; /* Updates protected by NAPI poll logic. */
u64 tx_clean; struct u64_stats_sync napi_syncp;
u64 tx_set_ic_bit; struct stmmac_napi_tx_stats napi;
u64 tx_tso_frames;
u64 tx_tso_nfrags;
struct u64_stats_sync syncp;
} ____cacheline_aligned_in_smp; } ____cacheline_aligned_in_smp;
struct stmmac_napi_rx_stats {
u64_stats_t rx_bytes;
u64_stats_t rx_packets;
u64_stats_t rx_pkt_n;
u64_stats_t poll;
};
struct stmmac_rxq_stats { struct stmmac_rxq_stats {
u64 rx_bytes; /* Updates protected by NAPI poll logic. */
u64 rx_packets; struct u64_stats_sync napi_syncp;
u64 rx_pkt_n; struct stmmac_napi_rx_stats napi;
u64 rx_normal_irq_n;
u64 napi_poll;
struct u64_stats_sync syncp;
} ____cacheline_aligned_in_smp; } ____cacheline_aligned_in_smp;
/* Updates on each CPU protected by not allowing nested irqs. */
struct stmmac_pcpu_stats {
struct u64_stats_sync syncp;
u64_stats_t rx_normal_irq_n[MTL_MAX_TX_QUEUES];
u64_stats_t tx_normal_irq_n[MTL_MAX_RX_QUEUES];
};
/* Extra statistic and debug information exposed by ethtool */ /* Extra statistic and debug information exposed by ethtool */
struct stmmac_extra_stats { struct stmmac_extra_stats {
/* Transmit errors */ /* Transmit errors */
...@@ -205,6 +228,7 @@ struct stmmac_extra_stats { ...@@ -205,6 +228,7 @@ struct stmmac_extra_stats {
/* per queue statistics */ /* per queue statistics */
struct stmmac_txq_stats txq_stats[MTL_MAX_TX_QUEUES]; struct stmmac_txq_stats txq_stats[MTL_MAX_TX_QUEUES];
struct stmmac_rxq_stats rxq_stats[MTL_MAX_RX_QUEUES]; struct stmmac_rxq_stats rxq_stats[MTL_MAX_RX_QUEUES];
struct stmmac_pcpu_stats __percpu *pcpu_stats;
unsigned long rx_dropped; unsigned long rx_dropped;
unsigned long rx_errors; unsigned long rx_errors;
unsigned long tx_dropped; unsigned long tx_dropped;
......
...@@ -441,8 +441,7 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv, ...@@ -441,8 +441,7 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
struct stmmac_extra_stats *x, u32 chan, struct stmmac_extra_stats *x, u32 chan,
u32 dir) u32 dir)
{ {
struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan]; struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats);
struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan];
int ret = 0; int ret = 0;
u32 v; u32 v;
...@@ -455,9 +454,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv, ...@@ -455,9 +454,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
if (v & EMAC_TX_INT) { if (v & EMAC_TX_INT) {
ret |= handle_tx; ret |= handle_tx;
u64_stats_update_begin(&txq_stats->syncp); u64_stats_update_begin(&stats->syncp);
txq_stats->tx_normal_irq_n++; u64_stats_inc(&stats->tx_normal_irq_n[chan]);
u64_stats_update_end(&txq_stats->syncp); u64_stats_update_end(&stats->syncp);
} }
if (v & EMAC_TX_DMA_STOP_INT) if (v & EMAC_TX_DMA_STOP_INT)
...@@ -479,9 +478,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv, ...@@ -479,9 +478,9 @@ static int sun8i_dwmac_dma_interrupt(struct stmmac_priv *priv,
if (v & EMAC_RX_INT) { if (v & EMAC_RX_INT) {
ret |= handle_rx; ret |= handle_rx;
u64_stats_update_begin(&rxq_stats->syncp); u64_stats_update_begin(&stats->syncp);
rxq_stats->rx_normal_irq_n++; u64_stats_inc(&stats->rx_normal_irq_n[chan]);
u64_stats_update_end(&rxq_stats->syncp); u64_stats_update_end(&stats->syncp);
} }
if (v & EMAC_RX_BUF_UA_INT) if (v & EMAC_RX_BUF_UA_INT)
......
...@@ -171,8 +171,7 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr, ...@@ -171,8 +171,7 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs; const struct dwmac4_addrs *dwmac4_addrs = priv->plat->dwmac4_addrs;
u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(dwmac4_addrs, chan)); u32 intr_status = readl(ioaddr + DMA_CHAN_STATUS(dwmac4_addrs, chan));
u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan)); u32 intr_en = readl(ioaddr + DMA_CHAN_INTR_ENA(dwmac4_addrs, chan));
struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan]; struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats);
struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan];
int ret = 0; int ret = 0;
if (dir == DMA_DIR_RX) if (dir == DMA_DIR_RX)
...@@ -201,15 +200,15 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr, ...@@ -201,15 +200,15 @@ int dwmac4_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
} }
/* TX/RX NORMAL interrupts */ /* TX/RX NORMAL interrupts */
if (likely(intr_status & DMA_CHAN_STATUS_RI)) { if (likely(intr_status & DMA_CHAN_STATUS_RI)) {
u64_stats_update_begin(&rxq_stats->syncp); u64_stats_update_begin(&stats->syncp);
rxq_stats->rx_normal_irq_n++; u64_stats_inc(&stats->rx_normal_irq_n[chan]);
u64_stats_update_end(&rxq_stats->syncp); u64_stats_update_end(&stats->syncp);
ret |= handle_rx; ret |= handle_rx;
} }
if (likely(intr_status & DMA_CHAN_STATUS_TI)) { if (likely(intr_status & DMA_CHAN_STATUS_TI)) {
u64_stats_update_begin(&txq_stats->syncp); u64_stats_update_begin(&stats->syncp);
txq_stats->tx_normal_irq_n++; u64_stats_inc(&stats->tx_normal_irq_n[chan]);
u64_stats_update_end(&txq_stats->syncp); u64_stats_update_end(&stats->syncp);
ret |= handle_tx; ret |= handle_tx;
} }
......
...@@ -162,8 +162,7 @@ static void show_rx_process_state(unsigned int status) ...@@ -162,8 +162,7 @@ static void show_rx_process_state(unsigned int status)
int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr, int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
struct stmmac_extra_stats *x, u32 chan, u32 dir) struct stmmac_extra_stats *x, u32 chan, u32 dir)
{ {
struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan]; struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats);
struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan];
int ret = 0; int ret = 0;
/* read the status register (CSR5) */ /* read the status register (CSR5) */
u32 intr_status = readl(ioaddr + DMA_STATUS); u32 intr_status = readl(ioaddr + DMA_STATUS);
...@@ -215,16 +214,16 @@ int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr, ...@@ -215,16 +214,16 @@ int dwmac_dma_interrupt(struct stmmac_priv *priv, void __iomem *ioaddr,
u32 value = readl(ioaddr + DMA_INTR_ENA); u32 value = readl(ioaddr + DMA_INTR_ENA);
/* to schedule NAPI on real RIE event. */ /* to schedule NAPI on real RIE event. */
if (likely(value & DMA_INTR_ENA_RIE)) { if (likely(value & DMA_INTR_ENA_RIE)) {
u64_stats_update_begin(&rxq_stats->syncp); u64_stats_update_begin(&stats->syncp);
rxq_stats->rx_normal_irq_n++; u64_stats_inc(&stats->rx_normal_irq_n[chan]);
u64_stats_update_end(&rxq_stats->syncp); u64_stats_update_end(&stats->syncp);
ret |= handle_rx; ret |= handle_rx;
} }
} }
if (likely(intr_status & DMA_STATUS_TI)) { if (likely(intr_status & DMA_STATUS_TI)) {
u64_stats_update_begin(&txq_stats->syncp); u64_stats_update_begin(&stats->syncp);
txq_stats->tx_normal_irq_n++; u64_stats_inc(&stats->tx_normal_irq_n[chan]);
u64_stats_update_end(&txq_stats->syncp); u64_stats_update_end(&stats->syncp);
ret |= handle_tx; ret |= handle_tx;
} }
if (unlikely(intr_status & DMA_STATUS_ERI)) if (unlikely(intr_status & DMA_STATUS_ERI))
......
...@@ -337,8 +337,7 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv, ...@@ -337,8 +337,7 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv,
struct stmmac_extra_stats *x, u32 chan, struct stmmac_extra_stats *x, u32 chan,
u32 dir) u32 dir)
{ {
struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[chan]; struct stmmac_pcpu_stats *stats = this_cpu_ptr(priv->xstats.pcpu_stats);
struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[chan];
u32 intr_status = readl(ioaddr + XGMAC_DMA_CH_STATUS(chan)); u32 intr_status = readl(ioaddr + XGMAC_DMA_CH_STATUS(chan));
u32 intr_en = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan)); u32 intr_en = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan));
int ret = 0; int ret = 0;
...@@ -367,15 +366,15 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv, ...@@ -367,15 +366,15 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv,
/* TX/RX NORMAL interrupts */ /* TX/RX NORMAL interrupts */
if (likely(intr_status & XGMAC_NIS)) { if (likely(intr_status & XGMAC_NIS)) {
if (likely(intr_status & XGMAC_RI)) { if (likely(intr_status & XGMAC_RI)) {
u64_stats_update_begin(&rxq_stats->syncp); u64_stats_update_begin(&stats->syncp);
rxq_stats->rx_normal_irq_n++; u64_stats_inc(&stats->rx_normal_irq_n[chan]);
u64_stats_update_end(&rxq_stats->syncp); u64_stats_update_end(&stats->syncp);
ret |= handle_rx; ret |= handle_rx;
} }
if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) { if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) {
u64_stats_update_begin(&txq_stats->syncp); u64_stats_update_begin(&stats->syncp);
txq_stats->tx_normal_irq_n++; u64_stats_inc(&stats->tx_normal_irq_n[chan]);
u64_stats_update_end(&txq_stats->syncp); u64_stats_update_end(&stats->syncp);
ret |= handle_tx; ret |= handle_tx;
} }
} }
......
...@@ -549,44 +549,79 @@ stmmac_set_pauseparam(struct net_device *netdev, ...@@ -549,44 +549,79 @@ stmmac_set_pauseparam(struct net_device *netdev,
} }
} }
static u64 stmmac_get_rx_normal_irq_n(struct stmmac_priv *priv, int q)
{
u64 total;
int cpu;
total = 0;
for_each_possible_cpu(cpu) {
struct stmmac_pcpu_stats *pcpu;
unsigned int start;
u64 irq_n;
pcpu = per_cpu_ptr(priv->xstats.pcpu_stats, cpu);
do {
start = u64_stats_fetch_begin(&pcpu->syncp);
irq_n = u64_stats_read(&pcpu->rx_normal_irq_n[q]);
} while (u64_stats_fetch_retry(&pcpu->syncp, start));
total += irq_n;
}
return total;
}
static u64 stmmac_get_tx_normal_irq_n(struct stmmac_priv *priv, int q)
{
u64 total;
int cpu;
total = 0;
for_each_possible_cpu(cpu) {
struct stmmac_pcpu_stats *pcpu;
unsigned int start;
u64 irq_n;
pcpu = per_cpu_ptr(priv->xstats.pcpu_stats, cpu);
do {
start = u64_stats_fetch_begin(&pcpu->syncp);
irq_n = u64_stats_read(&pcpu->tx_normal_irq_n[q]);
} while (u64_stats_fetch_retry(&pcpu->syncp, start));
total += irq_n;
}
return total;
}
static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data) static void stmmac_get_per_qstats(struct stmmac_priv *priv, u64 *data)
{ {
u32 tx_cnt = priv->plat->tx_queues_to_use; u32 tx_cnt = priv->plat->tx_queues_to_use;
u32 rx_cnt = priv->plat->rx_queues_to_use; u32 rx_cnt = priv->plat->rx_queues_to_use;
unsigned int start; unsigned int start;
int q, stat; int q;
char *p;
for (q = 0; q < tx_cnt; q++) { for (q = 0; q < tx_cnt; q++) {
struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[q]; struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[q];
struct stmmac_txq_stats snapshot; u64 pkt_n;
do { do {
start = u64_stats_fetch_begin(&txq_stats->syncp); start = u64_stats_fetch_begin(&txq_stats->napi_syncp);
snapshot = *txq_stats; pkt_n = u64_stats_read(&txq_stats->napi.tx_pkt_n);
} while (u64_stats_fetch_retry(&txq_stats->syncp, start)); } while (u64_stats_fetch_retry(&txq_stats->napi_syncp, start));
p = (char *)&snapshot + offsetof(struct stmmac_txq_stats, tx_pkt_n); *data++ = pkt_n;
for (stat = 0; stat < STMMAC_TXQ_STATS; stat++) { *data++ = stmmac_get_tx_normal_irq_n(priv, q);
*data++ = (*(u64 *)p);
p += sizeof(u64);
}
} }
for (q = 0; q < rx_cnt; q++) { for (q = 0; q < rx_cnt; q++) {
struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[q]; struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[q];
struct stmmac_rxq_stats snapshot; u64 pkt_n;
do { do {
start = u64_stats_fetch_begin(&rxq_stats->syncp); start = u64_stats_fetch_begin(&rxq_stats->napi_syncp);
snapshot = *rxq_stats; pkt_n = u64_stats_read(&rxq_stats->napi.rx_pkt_n);
} while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); } while (u64_stats_fetch_retry(&rxq_stats->napi_syncp, start));
p = (char *)&snapshot + offsetof(struct stmmac_rxq_stats, rx_pkt_n); *data++ = pkt_n;
for (stat = 0; stat < STMMAC_RXQ_STATS; stat++) { *data++ = stmmac_get_rx_normal_irq_n(priv, q);
*data++ = (*(u64 *)p);
p += sizeof(u64);
}
} }
} }
...@@ -645,39 +680,49 @@ static void stmmac_get_ethtool_stats(struct net_device *dev, ...@@ -645,39 +680,49 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
pos = j; pos = j;
for (i = 0; i < rx_queues_count; i++) { for (i = 0; i < rx_queues_count; i++) {
struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[i]; struct stmmac_rxq_stats *rxq_stats = &priv->xstats.rxq_stats[i];
struct stmmac_rxq_stats snapshot; struct stmmac_napi_rx_stats snapshot;
u64 n_irq;
j = pos; j = pos;
do { do {
start = u64_stats_fetch_begin(&rxq_stats->syncp); start = u64_stats_fetch_begin(&rxq_stats->napi_syncp);
snapshot = *rxq_stats; snapshot = rxq_stats->napi;
} while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); } while (u64_stats_fetch_retry(&rxq_stats->napi_syncp, start));
data[j++] += snapshot.rx_pkt_n; data[j++] += u64_stats_read(&snapshot.rx_pkt_n);
data[j++] += snapshot.rx_normal_irq_n; n_irq = stmmac_get_rx_normal_irq_n(priv, i);
normal_irq_n += snapshot.rx_normal_irq_n; data[j++] += n_irq;
napi_poll += snapshot.napi_poll; normal_irq_n += n_irq;
napi_poll += u64_stats_read(&snapshot.poll);
} }
pos = j; pos = j;
for (i = 0; i < tx_queues_count; i++) { for (i = 0; i < tx_queues_count; i++) {
struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[i]; struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[i];
struct stmmac_txq_stats snapshot; struct stmmac_napi_tx_stats napi_snapshot;
struct stmmac_q_tx_stats q_snapshot;
u64 n_irq;
j = pos; j = pos;
do { do {
start = u64_stats_fetch_begin(&txq_stats->syncp); start = u64_stats_fetch_begin(&txq_stats->q_syncp);
snapshot = *txq_stats; q_snapshot = txq_stats->q;
} while (u64_stats_fetch_retry(&txq_stats->syncp, start)); } while (u64_stats_fetch_retry(&txq_stats->q_syncp, start));
do {
data[j++] += snapshot.tx_pkt_n; start = u64_stats_fetch_begin(&txq_stats->napi_syncp);
data[j++] += snapshot.tx_normal_irq_n; napi_snapshot = txq_stats->napi;
normal_irq_n += snapshot.tx_normal_irq_n; } while (u64_stats_fetch_retry(&txq_stats->napi_syncp, start));
data[j++] += snapshot.tx_clean;
data[j++] += snapshot.tx_set_ic_bit; data[j++] += u64_stats_read(&napi_snapshot.tx_pkt_n);
data[j++] += snapshot.tx_tso_frames; n_irq = stmmac_get_tx_normal_irq_n(priv, i);
data[j++] += snapshot.tx_tso_nfrags; data[j++] += n_irq;
napi_poll += snapshot.napi_poll; normal_irq_n += n_irq;
data[j++] += u64_stats_read(&napi_snapshot.tx_clean);
data[j++] += u64_stats_read(&q_snapshot.tx_set_ic_bit) +
u64_stats_read(&napi_snapshot.tx_set_ic_bit);
data[j++] += u64_stats_read(&q_snapshot.tx_tso_frames);
data[j++] += u64_stats_read(&q_snapshot.tx_tso_nfrags);
napi_poll += u64_stats_read(&napi_snapshot.poll);
} }
normal_irq_n += priv->xstats.rx_early_irq; normal_irq_n += priv->xstats.rx_early_irq;
data[j++] = normal_irq_n; data[j++] = normal_irq_n;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment