Commit fc3d3db0 authored by Moshe Shemesh's avatar Moshe Shemesh Committed by Saeed Mahameed

net/mlx5: Avoid double clear or set of sync reset requested

Double clear of reset requested state can lead to NULL pointer as it
will try to delete the timer twice. This can happen for example on a
race between abort from FW and pci error or reset. Avoid such case using
test_and_clear_bit() to verify only one time reset requested state clear
flow. Similarly use test_and_set_bit() to verify only one time reset
requested state set flow.

Fixes: 7dd6df32 ("net/mlx5: Handle sync reset abort event")
Signed-off-by: default avatarMoshe Shemesh <moshe@nvidia.com>
Reviewed-by: default avatarMaher Sanalla <msanalla@nvidia.com>
Reviewed-by: default avatarShay Drory <shayd@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent cb7786a7
......@@ -162,14 +162,19 @@ static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
del_timer_sync(&fw_reset->timer);
}
static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
mlx5_core_warn(dev, "Reset request was already cleared\n");
return -EALREADY;
}
mlx5_stop_sync_reset_poll(dev);
clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
if (poll_health)
mlx5_start_health_poll(dev);
return 0;
}
static void mlx5_sync_reset_reload_work(struct work_struct *work)
......@@ -229,13 +234,17 @@ static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev)
return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false);
}
static void mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
mlx5_core_warn(dev, "Reset request was already set\n");
return -EALREADY;
}
mlx5_stop_health_poll(dev, true);
set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
mlx5_start_sync_reset_poll(dev);
return 0;
}
static void mlx5_fw_live_patch_event(struct work_struct *work)
......@@ -264,7 +273,9 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
err ? "Failed" : "Sent");
return;
}
mlx5_sync_reset_set_reset_requested(dev);
if (mlx5_sync_reset_set_reset_requested(dev))
return;
err = mlx5_fw_reset_set_reset_sync_ack(dev);
if (err)
mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err);
......@@ -362,7 +373,8 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
struct mlx5_core_dev *dev = fw_reset->dev;
int err;
mlx5_sync_reset_clear_reset_requested(dev, false);
if (mlx5_sync_reset_clear_reset_requested(dev, false))
return;
mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n");
......@@ -391,10 +403,8 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work)
reset_abort_work);
struct mlx5_core_dev *dev = fw_reset->dev;
if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags))
if (mlx5_sync_reset_clear_reset_requested(dev, true))
return;
mlx5_sync_reset_clear_reset_requested(dev, true);
mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment