Commit ac4cd478 authored by David S. Miller's avatar David S. Miller

Merge branch 'devlink-health'

Moshe Shemesh says:

====================
Add devlink-health support for devlink ports

Implement support for devlink health reporters on per-port basis.

This patchset comes to fix a design issue as some health reporters report
on errors and run recovery on device level while the actual functionality
is on port level. As for the current implemented devlink health reporters
it is relevant only to Tx and Rx reporters of mlx5, which has only one
port, so no real effect on functionality, but this should be fixed before
more drivers will use devlink health reporters.

First part in the series prepares common functions parts for health
reporter implementation. Second introduces required API to devlink-health
and mlx5e ones demonstrate its usage and implement the feature for mlx5
driver.

The per-port reporter functionality is achieved by adding a list of
devlink_health_reporters to devlink_port struct in a manner similar to
existing device infrastructure. This is the only major difference and
it makes possible to fully reuse device reporters operations.
The effect will be seen in conjunction with iproute2 additions and
will affect all devlink health commands. User can distinguish between
device and port reporters by looking at a devlink handle. Port reporters
have a port index at the end of the address and such addresses can be
provided as a parameter in every place where devlink-health accepted it.
These can be obtained from devlink port show command.
For example:
$ devlink health show
pci/0000:00:0a.0:
  reporter fw
    state healthy error 0 recover 0 auto_dump true
pci/0000:00:0a.0/1:
  reporter tx
    state healthy error 0 recover 0 grace_period 500 auto_recover true auto_dump true
$ devlink health set pci/0000:00:0a.0/1 reporter tx grace_period 1000 \
auto_recover false auto_dump false
$ devlink health show pci/0000:00:0a.0/1 reporter tx
pci/0000:00:0a.0/1:
  reporter tx
    state healthy error 0 recover 0 grace_period 1000 auto_recover flase auto_dump false

Note: User can use the same devlink health uAPI commands can get now either
port health reporter or device health reporter.
For example, the recover command:
Before this patchset: devlink health recover DEV reporter REPORTER_NAME
After this patchset: devlink health recover { DEV | DEV/PORT_INDEX } reporter REPORTER_NAME

Changes v1 -> v2:
Fixed functions comment to match parameters list.

Changes v2 -> v3:
Added motivation to cover letter and note on uAPI.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d6c7fc0c b7e93bb6
...@@ -611,13 +611,10 @@ static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { ...@@ -611,13 +611,10 @@ static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = {
void mlx5e_reporter_rx_create(struct mlx5e_priv *priv) void mlx5e_reporter_rx_create(struct mlx5e_priv *priv)
{ {
struct devlink *devlink = priv_to_devlink(priv->mdev);
struct devlink_health_reporter *reporter; struct devlink_health_reporter *reporter;
reporter = devlink_health_reporter_create(devlink, reporter = devlink_port_health_reporter_create(&priv->dl_port, &mlx5_rx_reporter_ops,
&mlx5_rx_reporter_ops, MLX5E_REPORTER_RX_GRACEFUL_PERIOD, priv);
MLX5E_REPORTER_RX_GRACEFUL_PERIOD,
priv);
if (IS_ERR(reporter)) { if (IS_ERR(reporter)) {
netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n", netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n",
PTR_ERR(reporter)); PTR_ERR(reporter));
...@@ -631,5 +628,5 @@ void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv) ...@@ -631,5 +628,5 @@ void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv)
if (!priv->rx_reporter) if (!priv->rx_reporter)
return; return;
devlink_health_reporter_destroy(priv->rx_reporter); devlink_port_health_reporter_destroy(priv->rx_reporter);
} }
...@@ -440,14 +440,9 @@ static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { ...@@ -440,14 +440,9 @@ static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
void mlx5e_reporter_tx_create(struct mlx5e_priv *priv) void mlx5e_reporter_tx_create(struct mlx5e_priv *priv)
{ {
struct devlink_health_reporter *reporter; struct devlink_health_reporter *reporter;
struct mlx5_core_dev *mdev = priv->mdev;
struct devlink *devlink; reporter = devlink_port_health_reporter_create(&priv->dl_port, &mlx5_tx_reporter_ops,
MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv);
devlink = priv_to_devlink(mdev);
reporter =
devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops,
MLX5_REPORTER_TX_GRACEFUL_PERIOD,
priv);
if (IS_ERR(reporter)) { if (IS_ERR(reporter)) {
netdev_warn(priv->netdev, netdev_warn(priv->netdev,
"Failed to create tx reporter, err = %ld\n", "Failed to create tx reporter, err = %ld\n",
...@@ -462,5 +457,5 @@ void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv) ...@@ -462,5 +457,5 @@ void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv)
if (!priv->tx_reporter) if (!priv->tx_reporter)
return; return;
devlink_health_reporter_destroy(priv->tx_reporter); devlink_port_health_reporter_destroy(priv->tx_reporter);
} }
...@@ -5086,6 +5086,9 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, ...@@ -5086,6 +5086,9 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
if (err) if (err)
mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
mlx5e_build_nic_netdev(netdev); mlx5e_build_nic_netdev(netdev);
err = mlx5e_devlink_port_register(priv);
if (err)
mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err);
mlx5e_health_create_reporters(priv); mlx5e_health_create_reporters(priv);
return 0; return 0;
...@@ -5094,6 +5097,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, ...@@ -5094,6 +5097,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
{ {
mlx5e_health_destroy_reporters(priv); mlx5e_health_destroy_reporters(priv);
mlx5e_devlink_port_unregister(priv);
mlx5e_tls_cleanup(priv); mlx5e_tls_cleanup(priv);
mlx5e_ipsec_cleanup(priv); mlx5e_ipsec_cleanup(priv);
mlx5e_netdev_cleanup(priv->netdev, priv); mlx5e_netdev_cleanup(priv->netdev, priv);
...@@ -5526,16 +5530,10 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) ...@@ -5526,16 +5530,10 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
goto err_destroy_netdev; goto err_destroy_netdev;
} }
err = mlx5e_devlink_port_register(priv);
if (err) {
mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err);
goto err_detach;
}
err = register_netdev(netdev); err = register_netdev(netdev);
if (err) { if (err) {
mlx5_core_err(mdev, "register_netdev failed, %d\n", err); mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
goto err_devlink_port_unregister; goto err_detach;
} }
mlx5e_devlink_port_type_eth_set(priv); mlx5e_devlink_port_type_eth_set(priv);
...@@ -5543,8 +5541,6 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) ...@@ -5543,8 +5541,6 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
mlx5e_dcbnl_init_app(priv); mlx5e_dcbnl_init_app(priv);
return priv; return priv;
err_devlink_port_unregister:
mlx5e_devlink_port_unregister(priv);
err_detach: err_detach:
mlx5e_detach(mdev, priv); mlx5e_detach(mdev, priv);
err_destroy_netdev: err_destroy_netdev:
...@@ -5565,7 +5561,6 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) ...@@ -5565,7 +5561,6 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
priv = vpriv; priv = vpriv;
mlx5e_dcbnl_delete_app(priv); mlx5e_dcbnl_delete_app(priv);
unregister_netdev(priv->netdev); unregister_netdev(priv->netdev);
mlx5e_devlink_port_unregister(priv);
mlx5e_detach(mdev, vpriv); mlx5e_detach(mdev, vpriv);
mlx5e_destroy_netdev(priv); mlx5e_destroy_netdev(priv);
} }
......
...@@ -101,6 +101,8 @@ struct devlink_port { ...@@ -101,6 +101,8 @@ struct devlink_port {
u8 attrs_set:1, u8 attrs_set:1,
switch_port:1; switch_port:1;
struct delayed_work type_warn_dw; struct delayed_work type_warn_dw;
struct list_head reporter_list;
struct mutex reporters_lock; /* Protects reporter_list */
}; };
struct devlink_sb_pool_info { struct devlink_sb_pool_info {
...@@ -1336,9 +1338,18 @@ struct devlink_health_reporter * ...@@ -1336,9 +1338,18 @@ struct devlink_health_reporter *
devlink_health_reporter_create(struct devlink *devlink, devlink_health_reporter_create(struct devlink *devlink,
const struct devlink_health_reporter_ops *ops, const struct devlink_health_reporter_ops *ops,
u64 graceful_period, void *priv); u64 graceful_period, void *priv);
struct devlink_health_reporter *
devlink_port_health_reporter_create(struct devlink_port *port,
const struct devlink_health_reporter_ops *ops,
u64 graceful_period, void *priv);
void void
devlink_health_reporter_destroy(struct devlink_health_reporter *reporter); devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
void
devlink_port_health_reporter_destroy(struct devlink_health_reporter *reporter);
void * void *
devlink_health_reporter_priv(struct devlink_health_reporter *reporter); devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
int devlink_health_report(struct devlink_health_reporter *reporter, int devlink_health_report(struct devlink_health_reporter *reporter,
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment