Commit 1061c90f authored by Mohamad Haj Yahia's avatar Mohamad Haj Yahia Committed by David S. Miller

net/mlx5: Fix pci error recovery flow

When PCI error is detected we should save the state of the pci prior to
disabling it.

Also when receiving pci slot reset call we need to verify that the
device is responsive.

Fixes: 89d44f0a ('net/mlx5_core: Add pci error handlers to mlx5_core
driver')
Signed-off-by: default avatarMohamad Haj Yahia <mohamad@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 506753b0
...@@ -1420,36 +1420,12 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, ...@@ -1420,36 +1420,12 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
dev_info(&pdev->dev, "%s was called\n", __func__); dev_info(&pdev->dev, "%s was called\n", __func__);
mlx5_enter_error_state(dev); mlx5_enter_error_state(dev);
mlx5_unload_one(dev, priv); mlx5_unload_one(dev, priv);
pci_save_state(pdev);
mlx5_pci_disable_device(dev); mlx5_pci_disable_device(dev);
return state == pci_channel_io_perm_failure ? return state == pci_channel_io_perm_failure ?
PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
} }
static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
int err = 0;
dev_info(&pdev->dev, "%s was called\n", __func__);
err = mlx5_pci_enable_device(dev);
if (err) {
dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
, __func__, err);
return PCI_ERS_RESULT_DISCONNECT;
}
pci_set_master(pdev);
pci_set_power_state(pdev, PCI_D0);
pci_restore_state(pdev);
return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
}
void mlx5_disable_device(struct mlx5_core_dev *dev)
{
mlx5_pci_err_detected(dev->pdev, 0);
}
/* wait for the device to show vital signs by waiting /* wait for the device to show vital signs by waiting
* for the health counter to start counting. * for the health counter to start counting.
*/ */
...@@ -1477,21 +1453,44 @@ static int wait_vital(struct pci_dev *pdev) ...@@ -1477,21 +1453,44 @@ static int wait_vital(struct pci_dev *pdev)
return -ETIMEDOUT; return -ETIMEDOUT;
} }
static void mlx5_pci_resume(struct pci_dev *pdev) static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
{ {
struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct mlx5_priv *priv = &dev->priv;
int err; int err;
dev_info(&pdev->dev, "%s was called\n", __func__); dev_info(&pdev->dev, "%s was called\n", __func__);
pci_save_state(pdev); err = mlx5_pci_enable_device(dev);
err = wait_vital(pdev);
if (err) { if (err) {
dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
, __func__, err);
return PCI_ERS_RESULT_DISCONNECT;
}
pci_set_master(pdev);
pci_restore_state(pdev);
if (wait_vital(pdev)) {
dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__); dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
return; return PCI_ERS_RESULT_DISCONNECT;
} }
return PCI_ERS_RESULT_RECOVERED;
}
void mlx5_disable_device(struct mlx5_core_dev *dev)
{
mlx5_pci_err_detected(dev->pdev, 0);
}
static void mlx5_pci_resume(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct mlx5_priv *priv = &dev->priv;
int err;
dev_info(&pdev->dev, "%s was called\n", __func__);
err = mlx5_load_one(dev, priv); err = mlx5_load_one(dev, priv);
if (err) if (err)
dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n" dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment