Commit bcb5d6c7 authored by Gerd Bayer's avatar Gerd Bayer Committed by Heiko Carstens

s390/pci: introduce lock to synchronize state of zpci_dev's

There's a number of tasks that need the state of a zpci device
to be stable. Other tasks need to be synchronized as they change the state.

State changes could be generated by the system as availability or error
events, or be requested by the user through manipulations in sysfs.
Some other actions accessible through sysfs - like device resets - need the
state to be stable.

Unsynchronized state handling could lead to unusable devices. This has
been observed in cases of concurrent state changes through systemd udev
rules and DPM boot control. Some breakage can be provoked by artificial
tests, e.g. through repetitively injecting "recover" on a PCI function
through sysfs while running a "hotplug remove/add" in a loop through a
PCI slot's "power" attribute in sysfs. After a few iterations this could
result in a kernel oops.

So introduce a new mutex "state_lock" to guard the state property of the
struct zpci_dev. Acquire this lock in all task that modify the state:

- hotplug add and remove, through the PCI hotplug slot entry,
- avaiability events, as reported by the platform,
- error events, as reported by the platform,
- during device resets, explicit through sysfs requests or
  implict through the common PCI layer.

Break out an inner _do_recover() routine out of recover_store() to
separte the necessary synchronizations from the actual manipulations of
the zpci_dev required for the reset.

With the following changes I was able to run the inject loops for hours
without hitting an error.
Signed-off-by: default avatarGerd Bayer <gbayer@linux.ibm.com>
Reviewed-by: default avatarNiklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: default avatarHeiko Carstens <hca@linux.ibm.com>
parent 0d48566d
...@@ -122,6 +122,7 @@ struct zpci_dev { ...@@ -122,6 +122,7 @@ struct zpci_dev {
struct rcu_head rcu; struct rcu_head rcu;
struct hotplug_slot hotplug_slot; struct hotplug_slot hotplug_slot;
struct mutex state_lock; /* protect state changes */
enum zpci_state state; enum zpci_state state;
u32 fid; /* function ID, used by sclp */ u32 fid; /* function ID, used by sclp */
u32 fh; /* function handle, used by insn's */ u32 fh; /* function handle, used by insn's */
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <linux/jump_label.h> #include <linux/jump_label.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/printk.h> #include <linux/printk.h>
#include <linux/lockdep.h>
#include <asm/isc.h> #include <asm/isc.h>
#include <asm/airq.h> #include <asm/airq.h>
...@@ -730,12 +731,12 @@ EXPORT_SYMBOL_GPL(zpci_disable_device); ...@@ -730,12 +731,12 @@ EXPORT_SYMBOL_GPL(zpci_disable_device);
* equivalent to its state during boot when first probing a driver. * equivalent to its state during boot when first probing a driver.
* Consequently after reset the PCI function requires re-initialization via the * Consequently after reset the PCI function requires re-initialization via the
* common PCI code including re-enabling IRQs via pci_alloc_irq_vectors() * common PCI code including re-enabling IRQs via pci_alloc_irq_vectors()
* and enabling the function via e.g.pci_enablde_device_flags().The caller * and enabling the function via e.g. pci_enable_device_flags(). The caller
* must guard against concurrent reset attempts. * must guard against concurrent reset attempts.
* *
* In most cases this function should not be called directly but through * In most cases this function should not be called directly but through
* pci_reset_function() or pci_reset_bus() which handle the save/restore and * pci_reset_function() or pci_reset_bus() which handle the save/restore and
* locking. * locking - asserted by lockdep.
* *
* Return: 0 on success and an error value otherwise * Return: 0 on success and an error value otherwise
*/ */
...@@ -744,6 +745,7 @@ int zpci_hot_reset_device(struct zpci_dev *zdev) ...@@ -744,6 +745,7 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
u8 status; u8 status;
int rc; int rc;
lockdep_assert_held(&zdev->state_lock);
zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh); zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh);
if (zdev_enabled(zdev)) { if (zdev_enabled(zdev)) {
/* Disables device access, DMAs and IRQs (reset state) */ /* Disables device access, DMAs and IRQs (reset state) */
...@@ -806,6 +808,7 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state) ...@@ -806,6 +808,7 @@ struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
zdev->state = state; zdev->state = state;
kref_init(&zdev->kref); kref_init(&zdev->kref);
mutex_init(&zdev->state_lock);
mutex_init(&zdev->fmb_lock); mutex_init(&zdev->fmb_lock);
mutex_init(&zdev->kzdev_lock); mutex_init(&zdev->kzdev_lock);
...@@ -870,6 +873,10 @@ int zpci_deconfigure_device(struct zpci_dev *zdev) ...@@ -870,6 +873,10 @@ int zpci_deconfigure_device(struct zpci_dev *zdev)
{ {
int rc; int rc;
lockdep_assert_held(&zdev->state_lock);
if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
return 0;
if (zdev->zbus->bus) if (zdev->zbus->bus)
zpci_bus_remove_device(zdev, false); zpci_bus_remove_device(zdev, false);
......
...@@ -267,6 +267,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) ...@@ -267,6 +267,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
zpci_err_hex(ccdf, sizeof(*ccdf)); zpci_err_hex(ccdf, sizeof(*ccdf));
if (zdev) { if (zdev) {
mutex_lock(&zdev->state_lock);
zpci_update_fh(zdev, ccdf->fh); zpci_update_fh(zdev, ccdf->fh);
if (zdev->zbus->bus) if (zdev->zbus->bus)
pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
...@@ -294,6 +295,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) ...@@ -294,6 +295,8 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
} }
pci_dev_put(pdev); pci_dev_put(pdev);
no_pdev: no_pdev:
if (zdev)
mutex_unlock(&zdev->state_lock);
zpci_zdev_put(zdev); zpci_zdev_put(zdev);
} }
...@@ -326,6 +329,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) ...@@ -326,6 +329,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n", zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n",
ccdf->fid, ccdf->fh, ccdf->pec); ccdf->fid, ccdf->fh, ccdf->pec);
if (existing_zdev)
mutex_lock(&zdev->state_lock);
switch (ccdf->pec) { switch (ccdf->pec) {
case 0x0301: /* Reserved|Standby -> Configured */ case 0x0301: /* Reserved|Standby -> Configured */
if (!zdev) { if (!zdev) {
...@@ -383,8 +390,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) ...@@ -383,8 +390,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
default: default:
break; break;
} }
if (existing_zdev) if (existing_zdev) {
mutex_unlock(&zdev->state_lock);
zpci_zdev_put(zdev); zpci_zdev_put(zdev);
}
} }
void zpci_event_availability(void *data) void zpci_event_availability(void *data)
......
...@@ -49,6 +49,39 @@ static ssize_t mio_enabled_show(struct device *dev, ...@@ -49,6 +49,39 @@ static ssize_t mio_enabled_show(struct device *dev,
} }
static DEVICE_ATTR_RO(mio_enabled); static DEVICE_ATTR_RO(mio_enabled);
static int _do_recover(struct pci_dev *pdev, struct zpci_dev *zdev)
{
u8 status;
int ret;
pci_stop_and_remove_bus_device(pdev);
if (zdev_enabled(zdev)) {
ret = zpci_disable_device(zdev);
/*
* Due to a z/VM vs LPAR inconsistency in the error
* state the FH may indicate an enabled device but
* disable says the device is already disabled don't
* treat it as an error here.
*/
if (ret == -EINVAL)
ret = 0;
if (ret)
return ret;
}
ret = zpci_enable_device(zdev);
if (ret)
return ret;
if (zdev->dma_table) {
ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
virt_to_phys(zdev->dma_table), &status);
if (ret)
zpci_disable_device(zdev);
}
return ret;
}
static ssize_t recover_store(struct device *dev, struct device_attribute *attr, static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count) const char *buf, size_t count)
{ {
...@@ -56,7 +89,6 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, ...@@ -56,7 +89,6 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
struct pci_dev *pdev = to_pci_dev(dev); struct pci_dev *pdev = to_pci_dev(dev);
struct zpci_dev *zdev = to_zpci(pdev); struct zpci_dev *zdev = to_zpci(pdev);
int ret = 0; int ret = 0;
u8 status;
/* Can't use device_remove_self() here as that would lead us to lock /* Can't use device_remove_self() here as that would lead us to lock
* the pci_rescan_remove_lock while holding the device' kernfs lock. * the pci_rescan_remove_lock while holding the device' kernfs lock.
...@@ -70,6 +102,12 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, ...@@ -70,6 +102,12 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
*/ */
kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
WARN_ON_ONCE(!kn); WARN_ON_ONCE(!kn);
/* Device needs to be configured and state must not change */
mutex_lock(&zdev->state_lock);
if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
goto out;
/* device_remove_file() serializes concurrent calls ignoring all but /* device_remove_file() serializes concurrent calls ignoring all but
* the first * the first
*/ */
...@@ -82,35 +120,13 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, ...@@ -82,35 +120,13 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
*/ */
pci_lock_rescan_remove(); pci_lock_rescan_remove();
if (pci_dev_is_added(pdev)) { if (pci_dev_is_added(pdev)) {
pci_stop_and_remove_bus_device(pdev); ret = _do_recover(pdev, zdev);
if (zdev_enabled(zdev)) {
ret = zpci_disable_device(zdev);
/*
* Due to a z/VM vs LPAR inconsistency in the error
* state the FH may indicate an enabled device but
* disable says the device is already disabled don't
* treat it as an error here.
*/
if (ret == -EINVAL)
ret = 0;
if (ret)
goto out;
} }
ret = zpci_enable_device(zdev);
if (ret)
goto out;
if (zdev->dma_table) {
ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
virt_to_phys(zdev->dma_table), &status);
if (ret)
zpci_disable_device(zdev);
}
}
out:
pci_rescan_bus(zdev->zbus->bus); pci_rescan_bus(zdev->zbus->bus);
pci_unlock_rescan_remove(); pci_unlock_rescan_remove();
out:
mutex_unlock(&zdev->state_lock);
if (kn) if (kn)
sysfs_unbreak_active_protection(kn); sysfs_unbreak_active_protection(kn);
return ret ? ret : count; return ret ? ret : count;
......
...@@ -26,58 +26,79 @@ static int enable_slot(struct hotplug_slot *hotplug_slot) ...@@ -26,58 +26,79 @@ static int enable_slot(struct hotplug_slot *hotplug_slot)
hotplug_slot); hotplug_slot);
int rc; int rc;
if (zdev->state != ZPCI_FN_STATE_STANDBY) mutex_lock(&zdev->state_lock);
return -EIO; if (zdev->state != ZPCI_FN_STATE_STANDBY) {
rc = -EIO;
goto out;
}
rc = sclp_pci_configure(zdev->fid); rc = sclp_pci_configure(zdev->fid);
zpci_dbg(3, "conf fid:%x, rc:%d\n", zdev->fid, rc); zpci_dbg(3, "conf fid:%x, rc:%d\n", zdev->fid, rc);
if (rc) if (rc)
return rc; goto out;
zdev->state = ZPCI_FN_STATE_CONFIGURED; zdev->state = ZPCI_FN_STATE_CONFIGURED;
return zpci_scan_configured_device(zdev, zdev->fh); rc = zpci_scan_configured_device(zdev, zdev->fh);
out:
mutex_unlock(&zdev->state_lock);
return rc;
} }
static int disable_slot(struct hotplug_slot *hotplug_slot) static int disable_slot(struct hotplug_slot *hotplug_slot)
{ {
struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev, struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev,
hotplug_slot); hotplug_slot);
struct pci_dev *pdev; struct pci_dev *pdev = NULL;
int rc;
if (zdev->state != ZPCI_FN_STATE_CONFIGURED) mutex_lock(&zdev->state_lock);
return -EIO; if (zdev->state != ZPCI_FN_STATE_CONFIGURED) {
rc = -EIO;
goto out;
}
pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
if (pdev && pci_num_vf(pdev)) { if (pdev && pci_num_vf(pdev)) {
pci_dev_put(pdev); pci_dev_put(pdev);
return -EBUSY; rc = -EBUSY;
goto out;
} }
pci_dev_put(pdev);
return zpci_deconfigure_device(zdev); rc = zpci_deconfigure_device(zdev);
out:
mutex_unlock(&zdev->state_lock);
if (pdev)
pci_dev_put(pdev);
return rc;
} }
static int reset_slot(struct hotplug_slot *hotplug_slot, bool probe) static int reset_slot(struct hotplug_slot *hotplug_slot, bool probe)
{ {
struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev, struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev,
hotplug_slot); hotplug_slot);
int rc = -EIO;
if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
return -EIO;
/* /*
* We can't take the zdev->lock as reset_slot may be called during * If we can't get the zdev->state_lock the device state is
* probing and/or device removal which already happens under the * currently undergoing a transition and we bail out - just
* zdev->lock. Instead the user should use the higher level * the same as if the device's state is not configured at all.
* pci_reset_function() or pci_bus_reset() which hold the PCI device
* lock preventing concurrent removal. If not using these functions
* holding the PCI device lock is required.
*/ */
if (!mutex_trylock(&zdev->state_lock))
return rc;
/* As long as the function is configured we can reset */ /* We can reset only if the function is configured */
if (probe) if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
return 0; goto out;
if (probe) {
rc = 0;
goto out;
}
return zpci_hot_reset_device(zdev); rc = zpci_hot_reset_device(zdev);
out:
mutex_unlock(&zdev->state_lock);
return rc;
} }
static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value) static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment