Commit b3cb5388 authored by Huy Nguyen's avatar Huy Nguyen Committed by Saeed Mahameed

net/mlx5: Skip mlx5_unload_one if mlx5_load_one fails

There is an issue where the firmware fails during mlx5_load_one,
the health_care timer detects the issue and schedules a health_care call.
Then the mlx5_load_one detects the issue, cleans up and quits. Then
the health_care starts and calls mlx5_unload_one to clean up the resources
that no longer exist and causes kernel panic.

The root cause is that the bit MLX5_INTERFACE_STATE_DOWN is not set
after mlx5_load_one fails. The solution is removing the bit
MLX5_INTERFACE_STATE_DOWN and quit mlx5_unload_one if the
bit MLX5_INTERFACE_STATE_UP is not set. The bit MLX5_INTERFACE_STATE_DOWN
is redundant and we can use MLX5_INTERFACE_STATE_UP instead.

Fixes: 5fc7197d ("net/mlx5: Add pci shutdown callback")
Signed-off-by: default avatarHuy Nguyen <huyn@mellanox.com>
Reviewed-by: default avatarDaniel Jurgens <danielj@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
parent 672d0880
...@@ -1186,7 +1186,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, ...@@ -1186,7 +1186,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
} }
} }
clear_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state);
set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
out: out:
mutex_unlock(&dev->intf_state_mutex); mutex_unlock(&dev->intf_state_mutex);
...@@ -1261,7 +1260,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, ...@@ -1261,7 +1260,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
mlx5_drain_health_recovery(dev); mlx5_drain_health_recovery(dev);
mutex_lock(&dev->intf_state_mutex); mutex_lock(&dev->intf_state_mutex);
if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n",
__func__); __func__);
if (cleanup) if (cleanup)
...@@ -1270,7 +1269,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, ...@@ -1270,7 +1269,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
} }
clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
set_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state);
if (mlx5_device_registered(dev)) if (mlx5_device_registered(dev))
mlx5_detach_device(dev); mlx5_detach_device(dev);
......
...@@ -673,9 +673,8 @@ enum mlx5_device_state { ...@@ -673,9 +673,8 @@ enum mlx5_device_state {
}; };
enum mlx5_interface_state { enum mlx5_interface_state {
MLX5_INTERFACE_STATE_DOWN = BIT(0), MLX5_INTERFACE_STATE_UP = BIT(0),
MLX5_INTERFACE_STATE_UP = BIT(1), MLX5_INTERFACE_STATE_SHUTDOWN = BIT(1),
MLX5_INTERFACE_STATE_SHUTDOWN = BIT(2),
}; };
enum mlx5_pci_status { enum mlx5_pci_status {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment