Commit 45fee8ed authored by Moshe Shemesh's avatar Moshe Shemesh Committed by Saeed Mahameed

net/mlx5: Add clarification on sync reset failure

In case devlink reload action fw_activate failed in sync reset stage,
use the new MFRL field reset_state to find why it failed and share this
clarification with the user.
Signed-off-by: default avatarMoshe Shemesh <moshe@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 72fb3b60
...@@ -100,15 +100,11 @@ static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netli ...@@ -100,15 +100,11 @@ static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netli
} }
net_port_alive = !!(reset_type & MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE); net_port_alive = !!(reset_type & MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE);
err = mlx5_fw_reset_set_reset_sync(dev, net_port_alive); err = mlx5_fw_reset_set_reset_sync(dev, net_port_alive, extack);
if (err) if (err)
goto out; return err;
err = mlx5_fw_reset_wait_reset_done(dev); return mlx5_fw_reset_wait_reset_done(dev);
out:
if (err)
NL_SET_ERR_MSG_MOD(extack, "FW activate command failed");
return err;
} }
static int mlx5_devlink_trigger_fw_live_patch(struct devlink *devlink, static int mlx5_devlink_trigger_fw_live_patch(struct devlink *devlink,
......
...@@ -57,7 +57,8 @@ static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level, ...@@ -57,7 +57,8 @@ static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level,
return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 1); return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MFRL, 0, 1);
} }
static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type) static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level,
u8 *reset_type, u8 *reset_state)
{ {
u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {}; u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {}; u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
...@@ -71,25 +72,67 @@ static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *r ...@@ -71,25 +72,67 @@ static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *r
*reset_level = MLX5_GET(mfrl_reg, out, reset_level); *reset_level = MLX5_GET(mfrl_reg, out, reset_level);
if (reset_type) if (reset_type)
*reset_type = MLX5_GET(mfrl_reg, out, reset_type); *reset_type = MLX5_GET(mfrl_reg, out, reset_type);
if (reset_state)
*reset_state = MLX5_GET(mfrl_reg, out, reset_state);
return 0; return 0;
} }
int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type) int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type)
{ {
return mlx5_reg_mfrl_query(dev, reset_level, reset_type); return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL);
} }
int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel) static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev,
struct netlink_ext_ack *extack)
{
u8 reset_state;
if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state))
goto out;
switch (reset_state) {
case MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION:
case MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS:
NL_SET_ERR_MSG_MOD(extack, "Sync reset was already triggered");
return -EBUSY;
case MLX5_MFRL_REG_RESET_STATE_TIMEOUT:
NL_SET_ERR_MSG_MOD(extack, "Sync reset got timeout");
return -ETIMEDOUT;
case MLX5_MFRL_REG_RESET_STATE_NACK:
NL_SET_ERR_MSG_MOD(extack, "One of the hosts disabled reset");
return -EPERM;
}
out:
NL_SET_ERR_MSG_MOD(extack, "Sync reset failed");
return -EIO;
}
int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
struct netlink_ext_ack *extack)
{ {
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
int err; int err;
set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
err = mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, reset_type_sel, 0, true);
if (err) MLX5_SET(mfrl_reg, in, reset_level, MLX5_MFRL_REG_RESET_LEVEL3);
clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); MLX5_SET(mfrl_reg, in, rst_type_sel, reset_type_sel);
return err; MLX5_SET(mfrl_reg, in, pci_sync_for_fw_update_start, 1);
err = mlx5_access_reg(dev, in, sizeof(in), out, sizeof(out),
MLX5_REG_MFRL, 0, 1, false);
if (!err)
return 0;
clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
if (err == -EREMOTEIO && MLX5_CAP_MCAM_FEATURE(dev, reset_state))
return mlx5_fw_reset_get_reset_state_err(dev, extack);
NL_SET_ERR_MSG_MOD(extack, "Sync reset command failed");
return mlx5_cmd_check(dev, err, in, out);
} }
int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev) int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev)
......
...@@ -9,7 +9,8 @@ ...@@ -9,7 +9,8 @@
void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable); void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable);
bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev); bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev);
int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type); int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type);
int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel); int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
struct netlink_ext_ack *extack);
int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev); int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev);
int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev); int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev);
......
...@@ -33,9 +33,10 @@ ...@@ -33,9 +33,10 @@
#include <linux/mlx5/port.h> #include <linux/mlx5/port.h>
#include "mlx5_core.h" #include "mlx5_core.h"
int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, /* calling with verbose false will not print error to log */
int size_in, void *data_out, int size_out, int mlx5_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in,
u16 reg_id, int arg, int write) void *data_out, int size_out, u16 reg_id, int arg,
int write, bool verbose)
{ {
int outlen = MLX5_ST_SZ_BYTES(access_register_out) + size_out; int outlen = MLX5_ST_SZ_BYTES(access_register_out) + size_out;
int inlen = MLX5_ST_SZ_BYTES(access_register_in) + size_in; int inlen = MLX5_ST_SZ_BYTES(access_register_in) + size_in;
...@@ -57,7 +58,9 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, ...@@ -57,7 +58,9 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
MLX5_SET(access_register_in, in, argument, arg); MLX5_SET(access_register_in, in, argument, arg);
MLX5_SET(access_register_in, in, register_id, reg_id); MLX5_SET(access_register_in, in, register_id, reg_id);
err = mlx5_cmd_exec(dev, in, inlen, out, outlen); err = mlx5_cmd_do(dev, in, inlen, out, outlen);
if (verbose)
err = mlx5_cmd_check(dev, err, in, out);
if (err) if (err)
goto out; goto out;
...@@ -69,6 +72,15 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, ...@@ -69,6 +72,15 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
kvfree(in); kvfree(in);
return err; return err;
} }
EXPORT_SYMBOL_GPL(mlx5_access_reg);
int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
int size_in, void *data_out, int size_out,
u16 reg_id, int arg, int write)
{
return mlx5_access_reg(dev, data_in, size_in, data_out, size_out,
reg_id, arg, write, true);
}
EXPORT_SYMBOL_GPL(mlx5_core_access_reg); EXPORT_SYMBOL_GPL(mlx5_core_access_reg);
int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group, int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group,
......
...@@ -1031,6 +1031,9 @@ int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) ...@@ -1031,6 +1031,9 @@ int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev); void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev);
void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev); void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev);
int mlx5_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in,
void *data_out, int size_out, u16 reg_id, int arg,
int write, bool verbose);
int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
int size_in, void *data_out, int size_out, int size_in, void *data_out, int size_out,
u16 reg_num, int arg, int write); u16 reg_num, int arg, int write);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment