Commit 747056a9 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'net-ipa-simplify-tx-power-handling'

Alex Elder says:

====================
net: ipa: simplify TX power handling

In order to deliver a packet to the IPA hardware, we must ensure
it is powered.  We request power by calling pm_runtime_get(), and
its return value tells us the power state.  We can't block in
ipa_start_xmit(), so if power isn't enabled we prevent further
transmit attempts by calling netif_stop_queue().  Power will
eventually become enabled, at which point we call netif_wake_queue()
to allow the transmit to be retried.  When it does, the power should
be enabled, so the packet delivery can proceed.

The logic that handles this is convoluted, and was put in place
to address a race condition pointed out by Jakub Kicinski during
review.  The fix addressed that race, as well as another one that
was found while investigating it:
  b8e36e13 ("net: ipa: fix TX queue race")
I have wanted to simplify this code ever since, and I'm pleased to
report that this series implements a much better solution that
avoids both race conditions.

The first race occurs between the ->ndo_start_xmit thread and the
runtime resume thread.  If we find power is not enabled when
requested in ipa_start_xmit(), we stop queueing.  But there's a
chance the runtime resume will enable queuing just before that,
leaving queueing stopped forever.  A flag is used to ensure that
does not occur.

A second flag is used to prevent NETDEV_TX_BUSY from being returned
repeatedly during the small window between enabling queueing and
finishing power resume.  This can happen if resume was underway when
pm_runtime_get() was called and completes immediately afterward.
This condition only exists because of the use of the first flag.

The fix is to disable transmit for *every* call to ipa_start_xmit(),
disabling it *before* calling pm_runtime_get().  This leaves three
cases:
  - If the return value indicates power is not active (or is in
    transition), queueing remains disabled--thereby avoiding
    the race between disabling it and a concurrent power thread
    enabling it.
  - If pm_runtime_get() returns an error, we drop the packet and
    re-enable queueing.
  - Finally, if the hardware is powered, we re-enable queueing
    before delivering the packet to the hardware.

So the first race is avoided.  And as a result, the second condition
will not occur.

The first patch adds pointers to the TX and RX IPA endpoints in the
netdev private data.  The second patch has netif_stop_queue() be
called for all packets; if pm_runtime_get() indicates power is
enabled (or an error), netif_wake_queue() is called to enable it
again.  The third and fourth patches get rid of the STARTED and
STOPPED IPA power flags, as well as the power spinlock, because they
are no longer needed.  The last three patches just eliminate some
trivial functions, open-coding them instead.
====================

Link: https://lore.kernel.org/r/20240130192305.250915-1-elder@linaro.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents cf244463 e01bbdc9
...@@ -39,10 +39,14 @@ enum ipa_modem_state { ...@@ -39,10 +39,14 @@ enum ipa_modem_state {
/** /**
* struct ipa_priv - IPA network device private data * struct ipa_priv - IPA network device private data
* @ipa: IPA pointer * @ipa: IPA pointer
* @tx: Transmit endpoint pointer
* @rx: Receive endpoint pointer
* @work: Work structure used to wake the modem netdev TX queue * @work: Work structure used to wake the modem netdev TX queue
*/ */
struct ipa_priv { struct ipa_priv {
struct ipa *ipa; struct ipa *ipa;
struct ipa_endpoint *tx;
struct ipa_endpoint *rx;
struct work_struct work; struct work_struct work;
}; };
...@@ -59,11 +63,11 @@ static int ipa_open(struct net_device *netdev) ...@@ -59,11 +63,11 @@ static int ipa_open(struct net_device *netdev)
if (ret < 0) if (ret < 0)
goto err_power_put; goto err_power_put;
ret = ipa_endpoint_enable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]); ret = ipa_endpoint_enable_one(priv->tx);
if (ret) if (ret)
goto err_power_put; goto err_power_put;
ret = ipa_endpoint_enable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]); ret = ipa_endpoint_enable_one(priv->rx);
if (ret) if (ret)
goto err_disable_tx; goto err_disable_tx;
...@@ -75,7 +79,7 @@ static int ipa_open(struct net_device *netdev) ...@@ -75,7 +79,7 @@ static int ipa_open(struct net_device *netdev)
return 0; return 0;
err_disable_tx: err_disable_tx:
ipa_endpoint_disable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]); ipa_endpoint_disable_one(priv->tx);
err_power_put: err_power_put:
pm_runtime_put_noidle(dev); pm_runtime_put_noidle(dev);
...@@ -97,8 +101,8 @@ static int ipa_stop(struct net_device *netdev) ...@@ -97,8 +101,8 @@ static int ipa_stop(struct net_device *netdev)
netif_stop_queue(netdev); netif_stop_queue(netdev);
ipa_endpoint_disable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]); ipa_endpoint_disable_one(priv->rx);
ipa_endpoint_disable_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]); ipa_endpoint_disable_one(priv->tx);
out_power_put: out_power_put:
pm_runtime_mark_last_busy(dev); pm_runtime_mark_last_busy(dev);
(void)pm_runtime_put_autosuspend(dev); (void)pm_runtime_put_autosuspend(dev);
...@@ -106,13 +110,16 @@ static int ipa_stop(struct net_device *netdev) ...@@ -106,13 +110,16 @@ static int ipa_stop(struct net_device *netdev)
return 0; return 0;
} }
/** ipa_start_xmit() - Transmits an skb. /** ipa_start_xmit() - Transmit an skb
* @skb: skb to be transmitted * @skb: Socket buffer to be transmitted
* @dev: network device * @netdev: Network device
* *
* Return codes: * Return: NETDEV_TX_OK if successful (or dropped), NETDEV_TX_BUSY otherwise
* NETDEV_TX_OK: Success
* NETDEV_TX_BUSY: Error while transmitting the skb. Try again later * Normally NETDEV_TX_OK indicates the buffer was successfully transmitted.
* If the buffer has an unexpected protocol or its size is out of range it
* is quietly dropped, returning NETDEV_TX_OK. NETDEV_TX_BUSY indicates
* the buffer cannot be sent at this time and should retried later.
*/ */
static netdev_tx_t static netdev_tx_t
ipa_start_xmit(struct sk_buff *skb, struct net_device *netdev) ipa_start_xmit(struct sk_buff *skb, struct net_device *netdev)
...@@ -132,29 +139,41 @@ ipa_start_xmit(struct sk_buff *skb, struct net_device *netdev) ...@@ -132,29 +139,41 @@ ipa_start_xmit(struct sk_buff *skb, struct net_device *netdev)
if (endpoint->config.qmap && skb->protocol != htons(ETH_P_MAP)) if (endpoint->config.qmap && skb->protocol != htons(ETH_P_MAP))
goto err_drop_skb; goto err_drop_skb;
/* The hardware must be powered for us to transmit */ /* The hardware must be powered for us to transmit, so if we're not
* ready we want the network stack to stop queueing until power is
* ACTIVE. Once runtime resume has completed, we inform the network
* stack it's OK to try transmitting again.
*
* We learn from pm_runtime_get() whether the hardware is powered.
* If it was not, powering up is either started or already underway.
* And in that case we want to disable queueing, expecting it to be
* re-enabled once power is ACTIVE. But runtime PM and network
* transmit run concurrently, and if we're not careful the requests
* to stop and start queueing could occur in the wrong order.
*
* For that reason we *always* stop queueing here, *before* the call
* to pm_runtime_get(). If we determine here that power is ACTIVE,
* we restart queueing before transmitting the SKB. Otherwise
* queueing will eventually be enabled after resume completes.
*/
netif_stop_queue(netdev);
dev = &ipa->pdev->dev; dev = &ipa->pdev->dev;
ret = pm_runtime_get(dev); ret = pm_runtime_get(dev);
if (ret < 1) { if (ret < 1) {
/* If a resume won't happen, just drop the packet */ /* If a resume won't happen, just drop the packet */
if (ret < 0 && ret != -EINPROGRESS) { if (ret < 0 && ret != -EINPROGRESS) {
ipa_power_modem_queue_active(ipa); netif_wake_queue(netdev);
pm_runtime_put_noidle(dev); pm_runtime_put_noidle(dev);
goto err_drop_skb; goto err_drop_skb;
} }
/* No power (yet). Stop the network stack from transmitting
* until we're resumed; ipa_modem_resume() arranges for the
* TX queue to be started again.
*/
ipa_power_modem_queue_stop(ipa);
pm_runtime_put_noidle(dev); pm_runtime_put_noidle(dev);
return NETDEV_TX_BUSY; return NETDEV_TX_BUSY;
} }
ipa_power_modem_queue_active(ipa); netif_wake_queue(netdev);
ret = ipa_endpoint_skb_tx(endpoint, skb); ret = ipa_endpoint_skb_tx(endpoint, skb);
...@@ -233,14 +252,14 @@ static void ipa_modem_netdev_setup(struct net_device *netdev) ...@@ -233,14 +252,14 @@ static void ipa_modem_netdev_setup(struct net_device *netdev)
*/ */
void ipa_modem_suspend(struct net_device *netdev) void ipa_modem_suspend(struct net_device *netdev)
{ {
struct ipa_priv *priv = netdev_priv(netdev); struct ipa_priv *priv;
struct ipa *ipa = priv->ipa;
if (!(netdev->flags & IFF_UP)) if (!(netdev->flags & IFF_UP))
return; return;
ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]); priv = netdev_priv(netdev);
ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]); ipa_endpoint_suspend_one(priv->rx);
ipa_endpoint_suspend_one(priv->tx);
} }
/** /**
...@@ -258,7 +277,7 @@ static void ipa_modem_wake_queue_work(struct work_struct *work) ...@@ -258,7 +277,7 @@ static void ipa_modem_wake_queue_work(struct work_struct *work)
{ {
struct ipa_priv *priv = container_of(work, struct ipa_priv, work); struct ipa_priv *priv = container_of(work, struct ipa_priv, work);
ipa_power_modem_queue_wake(priv->ipa); netif_wake_queue(priv->tx->netdev);
} }
/** ipa_modem_resume() - resume callback for runtime_pm /** ipa_modem_resume() - resume callback for runtime_pm
...@@ -268,14 +287,14 @@ static void ipa_modem_wake_queue_work(struct work_struct *work) ...@@ -268,14 +287,14 @@ static void ipa_modem_wake_queue_work(struct work_struct *work)
*/ */
void ipa_modem_resume(struct net_device *netdev) void ipa_modem_resume(struct net_device *netdev)
{ {
struct ipa_priv *priv = netdev_priv(netdev); struct ipa_priv *priv;
struct ipa *ipa = priv->ipa;
if (!(netdev->flags & IFF_UP)) if (!(netdev->flags & IFF_UP))
return; return;
ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]); priv = netdev_priv(netdev);
ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]); ipa_endpoint_resume_one(priv->tx);
ipa_endpoint_resume_one(priv->rx);
/* Arrange for the TX queue to be restarted */ /* Arrange for the TX queue to be restarted */
(void)queue_pm_work(&priv->work); (void)queue_pm_work(&priv->work);
...@@ -306,16 +325,21 @@ int ipa_modem_start(struct ipa *ipa) ...@@ -306,16 +325,21 @@ int ipa_modem_start(struct ipa *ipa)
SET_NETDEV_DEV(netdev, &ipa->pdev->dev); SET_NETDEV_DEV(netdev, &ipa->pdev->dev);
priv = netdev_priv(netdev); priv = netdev_priv(netdev);
priv->ipa = ipa; priv->ipa = ipa;
priv->tx = ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX];
priv->rx = ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX];
INIT_WORK(&priv->work, ipa_modem_wake_queue_work); INIT_WORK(&priv->work, ipa_modem_wake_queue_work);
ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev;
ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev; priv->tx->netdev = netdev;
priv->rx->netdev = netdev;
ipa->modem_netdev = netdev; ipa->modem_netdev = netdev;
ret = register_netdev(netdev); ret = register_netdev(netdev);
if (ret) { if (ret) {
ipa->modem_netdev = NULL; ipa->modem_netdev = NULL;
ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL; priv->rx->netdev = NULL;
ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL; priv->tx->netdev = NULL;
free_netdev(netdev); free_netdev(netdev);
} }
...@@ -355,9 +379,11 @@ int ipa_modem_stop(struct ipa *ipa) ...@@ -355,9 +379,11 @@ int ipa_modem_stop(struct ipa *ipa)
if (netdev->flags & IFF_UP) if (netdev->flags & IFF_UP)
(void)ipa_stop(netdev); (void)ipa_stop(netdev);
unregister_netdev(netdev); unregister_netdev(netdev);
ipa->modem_netdev = NULL; ipa->modem_netdev = NULL;
ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL; priv->rx->netdev = NULL;
ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL; priv->tx->netdev = NULL;
free_netdev(netdev); free_netdev(netdev);
} }
......
...@@ -38,15 +38,11 @@ ...@@ -38,15 +38,11 @@
* enum ipa_power_flag - IPA power flags * enum ipa_power_flag - IPA power flags
* @IPA_POWER_FLAG_RESUMED: Whether resume from suspend has been signaled * @IPA_POWER_FLAG_RESUMED: Whether resume from suspend has been signaled
* @IPA_POWER_FLAG_SYSTEM: Hardware is system (not runtime) suspended * @IPA_POWER_FLAG_SYSTEM: Hardware is system (not runtime) suspended
* @IPA_POWER_FLAG_STOPPED: Modem TX is disabled by ipa_start_xmit()
* @IPA_POWER_FLAG_STARTED: Modem TX was enabled by ipa_runtime_resume()
* @IPA_POWER_FLAG_COUNT: Number of defined power flags * @IPA_POWER_FLAG_COUNT: Number of defined power flags
*/ */
enum ipa_power_flag { enum ipa_power_flag {
IPA_POWER_FLAG_RESUMED, IPA_POWER_FLAG_RESUMED,
IPA_POWER_FLAG_SYSTEM, IPA_POWER_FLAG_SYSTEM,
IPA_POWER_FLAG_STOPPED,
IPA_POWER_FLAG_STARTED,
IPA_POWER_FLAG_COUNT, /* Last; not a flag */ IPA_POWER_FLAG_COUNT, /* Last; not a flag */
}; };
...@@ -55,7 +51,6 @@ enum ipa_power_flag { ...@@ -55,7 +51,6 @@ enum ipa_power_flag {
* @dev: IPA device pointer * @dev: IPA device pointer
* @core: IPA core clock * @core: IPA core clock
* @qmp: QMP handle for AOSS communication * @qmp: QMP handle for AOSS communication
* @spinlock: Protects modem TX queue enable/disable
* @flags: Boolean state flags * @flags: Boolean state flags
* @interconnect_count: Number of elements in interconnect[] * @interconnect_count: Number of elements in interconnect[]
* @interconnect: Interconnect array * @interconnect: Interconnect array
...@@ -64,7 +59,6 @@ struct ipa_power { ...@@ -64,7 +59,6 @@ struct ipa_power {
struct device *dev; struct device *dev;
struct clk *core; struct clk *core;
struct qmp *qmp; struct qmp *qmp;
spinlock_t spinlock; /* used with STOPPED/STARTED power flags */
DECLARE_BITMAP(flags, IPA_POWER_FLAG_COUNT); DECLARE_BITMAP(flags, IPA_POWER_FLAG_COUNT);
u32 interconnect_count; u32 interconnect_count;
struct icc_bulk_data interconnect[] __counted_by(interconnect_count); struct icc_bulk_data interconnect[] __counted_by(interconnect_count);
...@@ -233,70 +227,6 @@ void ipa_power_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id) ...@@ -233,70 +227,6 @@ void ipa_power_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id)
ipa_interrupt_suspend_clear_all(ipa->interrupt); ipa_interrupt_suspend_clear_all(ipa->interrupt);
} }
/* The next few functions coordinate stopping and starting the modem
* network device transmit queue.
*
* Transmit can be running concurrent with power resume, and there's a
* chance the resume completes before the transmit path stops the queue,
* leaving the queue in a stopped state. The next two functions are used
* to avoid this: ipa_power_modem_queue_stop() is used by ipa_start_xmit()
* to conditionally stop the TX queue; and ipa_power_modem_queue_start()
* is used by ipa_runtime_resume() to conditionally restart it.
*
* Two flags and a spinlock are used. If the queue is stopped, the STOPPED
* power flag is set. And if the queue is started, the STARTED flag is set.
* The queue is only started on resume if the STOPPED flag is set. And the
* queue is only started in ipa_start_xmit() if the STARTED flag is *not*
* set. As a result, the queue remains operational if the two activites
* happen concurrently regardless of the order they complete. The spinlock
* ensures the flag and TX queue operations are done atomically.
*
* The first function stops the modem netdev transmit queue, but only if
* the STARTED flag is *not* set. That flag is cleared if it was set.
* If the queue is stopped, the STOPPED flag is set. This is called only
* from the power ->runtime_resume operation.
*/
void ipa_power_modem_queue_stop(struct ipa *ipa)
{
struct ipa_power *power = ipa->power;
unsigned long flags;
spin_lock_irqsave(&power->spinlock, flags);
if (!__test_and_clear_bit(IPA_POWER_FLAG_STARTED, power->flags)) {
netif_stop_queue(ipa->modem_netdev);
__set_bit(IPA_POWER_FLAG_STOPPED, power->flags);
}
spin_unlock_irqrestore(&power->spinlock, flags);
}
/* This function starts the modem netdev transmit queue, but only if the
* STOPPED flag is set. That flag is cleared if it was set. If the queue
* was restarted, the STARTED flag is set; this allows ipa_start_xmit()
* to skip stopping the queue in the event of a race.
*/
void ipa_power_modem_queue_wake(struct ipa *ipa)
{
struct ipa_power *power = ipa->power;
unsigned long flags;
spin_lock_irqsave(&power->spinlock, flags);
if (__test_and_clear_bit(IPA_POWER_FLAG_STOPPED, power->flags)) {
__set_bit(IPA_POWER_FLAG_STARTED, power->flags);
netif_wake_queue(ipa->modem_netdev);
}
spin_unlock_irqrestore(&power->spinlock, flags);
}
/* This function clears the STARTED flag once the TX queue is operating */
void ipa_power_modem_queue_active(struct ipa *ipa)
{
clear_bit(IPA_POWER_FLAG_STARTED, ipa->power->flags);
}
static int ipa_power_retention_init(struct ipa_power *power) static int ipa_power_retention_init(struct ipa_power *power)
{ {
struct qmp *qmp = qmp_get(power->dev); struct qmp *qmp = qmp_get(power->dev);
...@@ -385,7 +315,6 @@ ipa_power_init(struct device *dev, const struct ipa_power_data *data) ...@@ -385,7 +315,6 @@ ipa_power_init(struct device *dev, const struct ipa_power_data *data)
} }
power->dev = dev; power->dev = dev;
power->core = clk; power->core = clk;
spin_lock_init(&power->spinlock);
power->interconnect_count = data->interconnect_count; power->interconnect_count = data->interconnect_count;
ret = ipa_interconnect_init(power, data->interconnect_data); ret = ipa_interconnect_init(power, data->interconnect_data);
......
...@@ -23,24 +23,6 @@ extern const struct dev_pm_ops ipa_pm_ops; ...@@ -23,24 +23,6 @@ extern const struct dev_pm_ops ipa_pm_ops;
*/ */
u32 ipa_core_clock_rate(struct ipa *ipa); u32 ipa_core_clock_rate(struct ipa *ipa);
/**
* ipa_power_modem_queue_stop() - Possibly stop the modem netdev TX queue
* @ipa: IPA pointer
*/
void ipa_power_modem_queue_stop(struct ipa *ipa);
/**
* ipa_power_modem_queue_wake() - Possibly wake the modem netdev TX queue
* @ipa: IPA pointer
*/
void ipa_power_modem_queue_wake(struct ipa *ipa);
/**
* ipa_power_modem_queue_active() - Report modem netdev TX queue active
* @ipa: IPA pointer
*/
void ipa_power_modem_queue_active(struct ipa *ipa);
/** /**
* ipa_power_retention() - Control register retention on power collapse * ipa_power_retention() - Control register retention on power collapse
* @ipa: IPA pointer * @ipa: IPA pointer
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment