Commit 1e560864 authored by Johan Hovold's avatar Johan Hovold Committed by Bjorn Helgaas

PCI/ASPM: Fix deadlock when enabling ASPM

A last minute revert in 6.7-final introduced a potential deadlock when
enabling ASPM during probe of Qualcomm PCIe controllers as reported by
lockdep:

  ============================================
  WARNING: possible recursive locking detected
  6.7.0 #40 Not tainted
  --------------------------------------------
  kworker/u16:5/90 is trying to acquire lock:
  ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pcie_aspm_pm_state_change+0x58/0xdc

              but task is already holding lock:
  ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pci_walk_bus+0x34/0xbc

              other info that might help us debug this:
   Possible unsafe locking scenario:

         CPU0
         ----
    lock(pci_bus_sem);
    lock(pci_bus_sem);

               *** DEADLOCK ***

  Call trace:
   print_deadlock_bug+0x25c/0x348
   __lock_acquire+0x10a4/0x2064
   lock_acquire+0x1e8/0x318
   down_read+0x60/0x184
   pcie_aspm_pm_state_change+0x58/0xdc
   pci_set_full_power_state+0xa8/0x114
   pci_set_power_state+0xc4/0x120
   qcom_pcie_enable_aspm+0x1c/0x3c [pcie_qcom]
   pci_walk_bus+0x64/0xbc
   qcom_pcie_host_post_init_2_7_0+0x28/0x34 [pcie_qcom]

The deadlock can easily be reproduced on machines like the Lenovo ThinkPad
X13s by adding a delay to increase the race window during asynchronous
probe where another thread can take a write lock.

Add a new pci_set_power_state_locked() and associated helper functions that
can be called with the PCI bus semaphore held to avoid taking the read lock
twice.

Link: https://lore.kernel.org/r/ZZu0qx2cmn7IwTyQ@hovoldconsulting.com
Link: https://lore.kernel.org/r/20240130100243.11011-1-johan+linaro@kernel.org
Fixes: f93e71ae ("Revert "PCI/ASPM: Remove pcie_aspm_pm_state_change()"")
Signed-off-by: default avatarJohan Hovold <johan+linaro@kernel.org>
Signed-off-by: default avatarBjorn Helgaas <bhelgaas@google.com>
Cc: <stable@vger.kernel.org>	# 6.7
parent 6613476e
...@@ -386,21 +386,8 @@ void pci_bus_add_devices(const struct pci_bus *bus) ...@@ -386,21 +386,8 @@ void pci_bus_add_devices(const struct pci_bus *bus)
} }
EXPORT_SYMBOL(pci_bus_add_devices); EXPORT_SYMBOL(pci_bus_add_devices);
/** pci_walk_bus - walk devices on/under bus, calling callback. static void __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
* @top bus whose devices should be walked void *userdata, bool locked)
* @cb callback to be called for each device found
* @userdata arbitrary pointer to be passed to callback.
*
* Walk the given bus, including any bridged devices
* on buses under this bus. Call the provided callback
* on each device found.
*
* We check the return of @cb each time. If it returns anything
* other than 0, we break out.
*
*/
void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
void *userdata)
{ {
struct pci_dev *dev; struct pci_dev *dev;
struct pci_bus *bus; struct pci_bus *bus;
...@@ -408,7 +395,8 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), ...@@ -408,7 +395,8 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
int retval; int retval;
bus = top; bus = top;
down_read(&pci_bus_sem); if (!locked)
down_read(&pci_bus_sem);
next = top->devices.next; next = top->devices.next;
for (;;) { for (;;) {
if (next == &bus->devices) { if (next == &bus->devices) {
...@@ -431,10 +419,37 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), ...@@ -431,10 +419,37 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
if (retval) if (retval)
break; break;
} }
up_read(&pci_bus_sem); if (!locked)
up_read(&pci_bus_sem);
}
/**
* pci_walk_bus - walk devices on/under bus, calling callback.
* @top: bus whose devices should be walked
* @cb: callback to be called for each device found
* @userdata: arbitrary pointer to be passed to callback
*
* Walk the given bus, including any bridged devices
* on buses under this bus. Call the provided callback
* on each device found.
*
* We check the return of @cb each time. If it returns anything
* other than 0, we break out.
*/
void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata)
{
__pci_walk_bus(top, cb, userdata, false);
} }
EXPORT_SYMBOL_GPL(pci_walk_bus); EXPORT_SYMBOL_GPL(pci_walk_bus);
void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata)
{
lockdep_assert_held(&pci_bus_sem);
__pci_walk_bus(top, cb, userdata, true);
}
EXPORT_SYMBOL_GPL(pci_walk_bus_locked);
struct pci_bus *pci_bus_get(struct pci_bus *bus) struct pci_bus *pci_bus_get(struct pci_bus *bus)
{ {
if (bus) if (bus)
......
...@@ -972,7 +972,7 @@ static int qcom_pcie_enable_aspm(struct pci_dev *pdev, void *userdata) ...@@ -972,7 +972,7 @@ static int qcom_pcie_enable_aspm(struct pci_dev *pdev, void *userdata)
* Downstream devices need to be in D0 state before enabling PCI PM * Downstream devices need to be in D0 state before enabling PCI PM
* substates. * substates.
*/ */
pci_set_power_state(pdev, PCI_D0); pci_set_power_state_locked(pdev, PCI_D0);
pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL); pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL);
return 0; return 0;
......
...@@ -1354,6 +1354,7 @@ int pci_power_up(struct pci_dev *dev) ...@@ -1354,6 +1354,7 @@ int pci_power_up(struct pci_dev *dev)
/** /**
* pci_set_full_power_state - Put a PCI device into D0 and update its state * pci_set_full_power_state - Put a PCI device into D0 and update its state
* @dev: PCI device to power up * @dev: PCI device to power up
* @locked: whether pci_bus_sem is held
* *
* Call pci_power_up() to put @dev into D0, read from its PCI_PM_CTRL register * Call pci_power_up() to put @dev into D0, read from its PCI_PM_CTRL register
* to confirm the state change, restore its BARs if they might be lost and * to confirm the state change, restore its BARs if they might be lost and
...@@ -1363,7 +1364,7 @@ int pci_power_up(struct pci_dev *dev) ...@@ -1363,7 +1364,7 @@ int pci_power_up(struct pci_dev *dev)
* to D0, it is more efficient to use pci_power_up() directly instead of this * to D0, it is more efficient to use pci_power_up() directly instead of this
* function. * function.
*/ */
static int pci_set_full_power_state(struct pci_dev *dev) static int pci_set_full_power_state(struct pci_dev *dev, bool locked)
{ {
u16 pmcsr; u16 pmcsr;
int ret; int ret;
...@@ -1399,7 +1400,7 @@ static int pci_set_full_power_state(struct pci_dev *dev) ...@@ -1399,7 +1400,7 @@ static int pci_set_full_power_state(struct pci_dev *dev)
} }
if (dev->bus->self) if (dev->bus->self)
pcie_aspm_pm_state_change(dev->bus->self); pcie_aspm_pm_state_change(dev->bus->self, locked);
return 0; return 0;
} }
...@@ -1428,10 +1429,22 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state) ...@@ -1428,10 +1429,22 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state)
pci_walk_bus(bus, __pci_dev_set_current_state, &state); pci_walk_bus(bus, __pci_dev_set_current_state, &state);
} }
static void __pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state, bool locked)
{
if (!bus)
return;
if (locked)
pci_walk_bus_locked(bus, __pci_dev_set_current_state, &state);
else
pci_walk_bus(bus, __pci_dev_set_current_state, &state);
}
/** /**
* pci_set_low_power_state - Put a PCI device into a low-power state. * pci_set_low_power_state - Put a PCI device into a low-power state.
* @dev: PCI device to handle. * @dev: PCI device to handle.
* @state: PCI power state (D1, D2, D3hot) to put the device into. * @state: PCI power state (D1, D2, D3hot) to put the device into.
* @locked: whether pci_bus_sem is held
* *
* Use the device's PCI_PM_CTRL register to put it into a low-power state. * Use the device's PCI_PM_CTRL register to put it into a low-power state.
* *
...@@ -1442,7 +1455,7 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state) ...@@ -1442,7 +1455,7 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state)
* 0 if device already is in the requested state. * 0 if device already is in the requested state.
* 0 if device's power state has been successfully changed. * 0 if device's power state has been successfully changed.
*/ */
static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state) static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state, bool locked)
{ {
u16 pmcsr; u16 pmcsr;
...@@ -1496,29 +1509,12 @@ static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state) ...@@ -1496,29 +1509,12 @@ static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state)
pci_power_name(state)); pci_power_name(state));
if (dev->bus->self) if (dev->bus->self)
pcie_aspm_pm_state_change(dev->bus->self); pcie_aspm_pm_state_change(dev->bus->self, locked);
return 0; return 0;
} }
/** static int __pci_set_power_state(struct pci_dev *dev, pci_power_t state, bool locked)
* pci_set_power_state - Set the power state of a PCI device
* @dev: PCI device to handle.
* @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
*
* Transition a device to a new power state, using the platform firmware and/or
* the device's PCI PM registers.
*
* RETURN VALUE:
* -EINVAL if the requested state is invalid.
* -EIO if device does not support PCI PM or its PM capabilities register has a
* wrong version, or device doesn't support the requested state.
* 0 if the transition is to D1 or D2 but D1 and D2 are not supported.
* 0 if device already is in the requested state.
* 0 if the transition is to D3 but D3 is not supported.
* 0 if device's power state has been successfully changed.
*/
int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
{ {
int error; int error;
...@@ -1542,7 +1538,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) ...@@ -1542,7 +1538,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
return 0; return 0;
if (state == PCI_D0) if (state == PCI_D0)
return pci_set_full_power_state(dev); return pci_set_full_power_state(dev, locked);
/* /*
* This device is quirked not to be put into D3, so don't put it in * This device is quirked not to be put into D3, so don't put it in
...@@ -1556,16 +1552,16 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) ...@@ -1556,16 +1552,16 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
* To put the device in D3cold, put it into D3hot in the native * To put the device in D3cold, put it into D3hot in the native
* way, then put it into D3cold using platform ops. * way, then put it into D3cold using platform ops.
*/ */
error = pci_set_low_power_state(dev, PCI_D3hot); error = pci_set_low_power_state(dev, PCI_D3hot, locked);
if (pci_platform_power_transition(dev, PCI_D3cold)) if (pci_platform_power_transition(dev, PCI_D3cold))
return error; return error;
/* Powering off a bridge may power off the whole hierarchy */ /* Powering off a bridge may power off the whole hierarchy */
if (dev->current_state == PCI_D3cold) if (dev->current_state == PCI_D3cold)
pci_bus_set_current_state(dev->subordinate, PCI_D3cold); __pci_bus_set_current_state(dev->subordinate, PCI_D3cold, locked);
} else { } else {
error = pci_set_low_power_state(dev, state); error = pci_set_low_power_state(dev, state, locked);
if (pci_platform_power_transition(dev, state)) if (pci_platform_power_transition(dev, state))
return error; return error;
...@@ -1573,8 +1569,38 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) ...@@ -1573,8 +1569,38 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
return 0; return 0;
} }
/**
* pci_set_power_state - Set the power state of a PCI device
* @dev: PCI device to handle.
* @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
*
* Transition a device to a new power state, using the platform firmware and/or
* the device's PCI PM registers.
*
* RETURN VALUE:
* -EINVAL if the requested state is invalid.
* -EIO if device does not support PCI PM or its PM capabilities register has a
* wrong version, or device doesn't support the requested state.
* 0 if the transition is to D1 or D2 but D1 and D2 are not supported.
* 0 if device already is in the requested state.
* 0 if the transition is to D3 but D3 is not supported.
* 0 if device's power state has been successfully changed.
*/
int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
{
return __pci_set_power_state(dev, state, false);
}
EXPORT_SYMBOL(pci_set_power_state); EXPORT_SYMBOL(pci_set_power_state);
int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state)
{
lockdep_assert_held(&pci_bus_sem);
return __pci_set_power_state(dev, state, true);
}
EXPORT_SYMBOL(pci_set_power_state_locked);
#define PCI_EXP_SAVE_REGS 7 #define PCI_EXP_SAVE_REGS 7
static struct pci_cap_saved_state *_pci_find_saved_cap(struct pci_dev *pci_dev, static struct pci_cap_saved_state *_pci_find_saved_cap(struct pci_dev *pci_dev,
......
...@@ -571,12 +571,12 @@ int pcie_retrain_link(struct pci_dev *pdev, bool use_lt); ...@@ -571,12 +571,12 @@ int pcie_retrain_link(struct pci_dev *pdev, bool use_lt);
#ifdef CONFIG_PCIEASPM #ifdef CONFIG_PCIEASPM
void pcie_aspm_init_link_state(struct pci_dev *pdev); void pcie_aspm_init_link_state(struct pci_dev *pdev);
void pcie_aspm_exit_link_state(struct pci_dev *pdev); void pcie_aspm_exit_link_state(struct pci_dev *pdev);
void pcie_aspm_pm_state_change(struct pci_dev *pdev); void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked);
void pcie_aspm_powersave_config_link(struct pci_dev *pdev); void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
#else #else
static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) { } static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) { }
static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { } static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
#endif #endif
......
...@@ -1003,8 +1003,11 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev) ...@@ -1003,8 +1003,11 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev)
up_read(&pci_bus_sem); up_read(&pci_bus_sem);
} }
/* @pdev: the root port or switch downstream port */ /*
void pcie_aspm_pm_state_change(struct pci_dev *pdev) * @pdev: the root port or switch downstream port
* @locked: whether pci_bus_sem is held
*/
void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked)
{ {
struct pcie_link_state *link = pdev->link_state; struct pcie_link_state *link = pdev->link_state;
...@@ -1014,12 +1017,14 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev) ...@@ -1014,12 +1017,14 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev)
* Devices changed PM state, we should recheck if latency * Devices changed PM state, we should recheck if latency
* meets all functions' requirement * meets all functions' requirement
*/ */
down_read(&pci_bus_sem); if (!locked)
down_read(&pci_bus_sem);
mutex_lock(&aspm_lock); mutex_lock(&aspm_lock);
pcie_update_aspm_capable(link->root); pcie_update_aspm_capable(link->root);
pcie_config_aspm_path(link); pcie_config_aspm_path(link);
mutex_unlock(&aspm_lock); mutex_unlock(&aspm_lock);
up_read(&pci_bus_sem); if (!locked)
up_read(&pci_bus_sem);
} }
void pcie_aspm_powersave_config_link(struct pci_dev *pdev) void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
......
...@@ -1422,6 +1422,7 @@ int pci_load_and_free_saved_state(struct pci_dev *dev, ...@@ -1422,6 +1422,7 @@ int pci_load_and_free_saved_state(struct pci_dev *dev,
struct pci_saved_state **state); struct pci_saved_state **state);
int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state); int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state);
int pci_set_power_state(struct pci_dev *dev, pci_power_t state); int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state);
pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state); pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
bool pci_pme_capable(struct pci_dev *dev, pci_power_t state); bool pci_pme_capable(struct pci_dev *dev, pci_power_t state);
void pci_pme_active(struct pci_dev *dev, bool enable); void pci_pme_active(struct pci_dev *dev, bool enable);
...@@ -1625,6 +1626,8 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, ...@@ -1625,6 +1626,8 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
void *userdata); void *userdata);
void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
void *userdata);
int pci_cfg_space_size(struct pci_dev *dev); int pci_cfg_space_size(struct pci_dev *dev);
unsigned char pci_bus_max_busnr(struct pci_bus *bus); unsigned char pci_bus_max_busnr(struct pci_bus *bus);
void pci_setup_bridge(struct pci_bus *bus); void pci_setup_bridge(struct pci_bus *bus);
...@@ -2025,6 +2028,8 @@ static inline int pci_save_state(struct pci_dev *dev) { return 0; } ...@@ -2025,6 +2028,8 @@ static inline int pci_save_state(struct pci_dev *dev) { return 0; }
static inline void pci_restore_state(struct pci_dev *dev) { } static inline void pci_restore_state(struct pci_dev *dev) { }
static inline int pci_set_power_state(struct pci_dev *dev, pci_power_t state) static inline int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
{ return 0; } { return 0; }
static inline int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state)
{ return 0; }
static inline int pci_wake_from_d3(struct pci_dev *dev, bool enable) static inline int pci_wake_from_d3(struct pci_dev *dev, bool enable)
{ return 0; } { return 0; }
static inline pci_power_t pci_choose_state(struct pci_dev *dev, static inline pci_power_t pci_choose_state(struct pci_dev *dev,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment