Commit 7f46a0b7 authored by Mark Bloch's avatar Mark Bloch Committed by Saeed Mahameed

net/mlx5: Lag, add debugfs to query hardware lag state

Lag state has become very complicated with many modes, flags, types and
port selections methods and future work will add additional features.

Add a debugfs to query the current lag state. A new directory named "lag"
will be created under the mlx5 debugfs directory. As the driver has
debugfs per pci function the location will be: <debugfs>/mlx5/<BDF>/lag

For example:
/sys/kernel/debug/mlx5/0000:08:00.0/lag

The following files are exposed:

- state: Returns "active" or "disabled". If "active" it means hardware
         lag is active.

- members: Returns the BDFs of all the members of lag object.

- type: Returns the type of the lag currently configured. Valid only
	if hardware lag is active.
	* "roce" - Members are bare metal PFs.
	* "switchdev" - Members are in switchdev mode.
	* "multipath" - ECMP offloads.

- port_sel_mode: Returns the egress port selection method, valid
		 only if hardware lag is active.
		 * "queue_affinity" - Egress port is selected by
		   the QP/SQ affinity.
		 * "hash" - Egress port is selected by hash done on
		   each packet. Controlled by: xmit_hash_policy of the
		   bond device.
- flags: Returns flags that are specific per lag @type. Valid only if
	 hardware lag is active.
	 * "shared_fdb" - "on" or "off", if "on" single FDB is used.

- mapping: Returns the mapping which is used to select egress port.
	   Valid only if hardware lag is active.
	   If @port_sel_mode is "hash" returns the active egress ports.
	   The hash result will select only active ports.
	   if @port_sel_mode is "queue_affinity" returns the mapping
	   between the configured port affinity of the QP/SQ and actual
	   egress port. For example:
	   * 1:1 - Mapping means if the configured affinity is port 1
	           traffic will egress via port 1.
	   * 1:2 - Mapping means if the configured affinity is port 1
		   traffic will egress via port 2. This can happen
		   if port 1 is down or in active/backup mode and port 1
		   is backup.
Signed-off-by: default avatarMark Bloch <mbloch@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 352899f3
...@@ -14,7 +14,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o ...@@ -14,7 +14,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o alloc.o port.o mr.o pd.o \ health.o mcg.o cq.o alloc.o port.o mr.o pd.o \
transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
fs_counters.o fs_ft_pool.o rl.o lag/lag.o dev.o events.o wq.o lib/gid.o \ fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \
lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
fw_reset.o qos.o lib/tout.o fw_reset.o qos.o lib/tout.o
......
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
#include "lag.h"
static char *get_str_mode_type(struct mlx5_lag *ldev)
{
if (ldev->flags & MLX5_LAG_FLAG_ROCE)
return "roce";
if (ldev->flags & MLX5_LAG_FLAG_SRIOV)
return "switchdev";
if (ldev->flags & MLX5_LAG_FLAG_MULTIPATH)
return "multipath";
return NULL;
}
static int type_show(struct seq_file *file, void *priv)
{
struct mlx5_core_dev *dev = file->private;
struct mlx5_lag *ldev;
char *mode = NULL;
ldev = dev->priv.lag;
mutex_lock(&ldev->lock);
if (__mlx5_lag_is_active(ldev))
mode = get_str_mode_type(ldev);
mutex_unlock(&ldev->lock);
if (!mode)
return -EINVAL;
seq_printf(file, "%s\n", mode);
return 0;
}
static int port_sel_mode_show(struct seq_file *file, void *priv)
{
struct mlx5_core_dev *dev = file->private;
struct mlx5_lag *ldev;
int ret = 0;
char *mode;
ldev = dev->priv.lag;
mutex_lock(&ldev->lock);
if (__mlx5_lag_is_active(ldev))
mode = get_str_port_sel_mode(ldev->flags);
else
ret = -EINVAL;
mutex_unlock(&ldev->lock);
if (ret || !mode)
return ret;
seq_printf(file, "%s\n", mode);
return 0;
}
static int state_show(struct seq_file *file, void *priv)
{
struct mlx5_core_dev *dev = file->private;
struct mlx5_lag *ldev;
bool active;
ldev = dev->priv.lag;
mutex_lock(&ldev->lock);
active = __mlx5_lag_is_active(ldev);
mutex_unlock(&ldev->lock);
seq_printf(file, "%s\n", active ? "active" : "disabled");
return 0;
}
static int flags_show(struct seq_file *file, void *priv)
{
struct mlx5_core_dev *dev = file->private;
struct mlx5_lag *ldev;
bool shared_fdb;
bool lag_active;
ldev = dev->priv.lag;
mutex_lock(&ldev->lock);
lag_active = __mlx5_lag_is_active(ldev);
if (lag_active)
shared_fdb = ldev->shared_fdb;
mutex_unlock(&ldev->lock);
if (!lag_active)
return -EINVAL;
seq_printf(file, "%s:%s\n", "shared_fdb", shared_fdb ? "on" : "off");
return 0;
}
static int mapping_show(struct seq_file *file, void *priv)
{
struct mlx5_core_dev *dev = file->private;
u8 ports[MLX5_MAX_PORTS] = {};
struct mlx5_lag *ldev;
bool hash = false;
bool lag_active;
int num_ports;
int i;
ldev = dev->priv.lag;
mutex_lock(&ldev->lock);
lag_active = __mlx5_lag_is_active(ldev);
if (lag_active) {
if (ldev->flags & MLX5_LAG_FLAG_HASH_BASED) {
mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, ports,
&num_ports);
hash = true;
} else {
for (i = 0; i < ldev->ports; i++)
ports[i] = ldev->v2p_map[i];
num_ports = ldev->ports;
}
}
mutex_unlock(&ldev->lock);
if (!lag_active)
return -EINVAL;
for (i = 0; i < num_ports; i++) {
if (hash)
seq_printf(file, "%d\n", ports[i] + 1);
else
seq_printf(file, "%d:%d\n", i + 1, ports[i]);
}
return 0;
}
static int members_show(struct seq_file *file, void *priv)
{
struct mlx5_core_dev *dev = file->private;
struct mlx5_lag *ldev;
int i;
ldev = dev->priv.lag;
mutex_lock(&ldev->lock);
for (i = 0; i < ldev->ports; i++) {
if (!ldev->pf[i].dev)
continue;
seq_printf(file, "%s\n", dev_name(ldev->pf[i].dev->device));
}
mutex_unlock(&ldev->lock);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(type);
DEFINE_SHOW_ATTRIBUTE(port_sel_mode);
DEFINE_SHOW_ATTRIBUTE(state);
DEFINE_SHOW_ATTRIBUTE(flags);
DEFINE_SHOW_ATTRIBUTE(mapping);
DEFINE_SHOW_ATTRIBUTE(members);
void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev)
{
struct dentry *dbg;
dbg = debugfs_create_dir("lag", mlx5_debugfs_get_dev_root(dev));
dev->priv.dbg.lag_debugfs = dbg;
debugfs_create_file("type", 0444, dbg, dev, &type_fops);
debugfs_create_file("port_sel_mode", 0444, dbg, dev, &port_sel_mode_fops);
debugfs_create_file("state", 0444, dbg, dev, &state_fops);
debugfs_create_file("flags", 0444, dbg, dev, &flags_fops);
debugfs_create_file("mapping", 0444, dbg, dev, &mapping_fops);
debugfs_create_file("members", 0444, dbg, dev, &members_fops);
}
void mlx5_ldev_remove_debugfs(struct dentry *dbg)
{
debugfs_remove_recursive(dbg);
}
...@@ -120,8 +120,8 @@ static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports, ...@@ -120,8 +120,8 @@ static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports,
} }
} }
static void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
u8 *ports, int *num_enabled) u8 *ports, int *num_enabled)
{ {
int i; int i;
...@@ -454,7 +454,7 @@ static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev, ...@@ -454,7 +454,7 @@ static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
return mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, flags); return mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, flags);
} }
static char *get_str_port_sel_mode(u8 flags) char *get_str_port_sel_mode(u8 flags)
{ {
if (flags & MLX5_LAG_FLAG_HASH_BASED) if (flags & MLX5_LAG_FLAG_HASH_BASED)
return "hash"; return "hash";
...@@ -1106,6 +1106,10 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) ...@@ -1106,6 +1106,10 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
if (!ldev) if (!ldev)
return; return;
/* mdev is being removed, might as well remove debugfs
* as early as possible.
*/
mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
recheck: recheck:
mutex_lock(&ldev->lock); mutex_lock(&ldev->lock);
if (ldev->mode_changes_in_progress) { if (ldev->mode_changes_in_progress) {
...@@ -1137,6 +1141,7 @@ void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) ...@@ -1137,6 +1141,7 @@ void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
msleep(100); msleep(100);
goto recheck; goto recheck;
} }
mlx5_ldev_add_debugfs(dev);
} }
void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
......
...@@ -4,6 +4,8 @@ ...@@ -4,6 +4,8 @@
#ifndef __MLX5_LAG_H__ #ifndef __MLX5_LAG_H__
#define __MLX5_LAG_H__ #define __MLX5_LAG_H__
#include <linux/debugfs.h>
#define MLX5_LAG_MAX_HASH_BUCKETS 16 #define MLX5_LAG_MAX_HASH_BUCKETS 16
#include "mlx5_core.h" #include "mlx5_core.h"
#include "mp.h" #include "mp.h"
...@@ -90,4 +92,11 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, ...@@ -90,4 +92,11 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
struct net_device *ndev); struct net_device *ndev);
char *get_str_port_sel_mode(u8 flags);
void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
u8 *ports, int *num_enabled);
void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev);
void mlx5_ldev_remove_debugfs(struct dentry *dbg);
#endif /* __MLX5_LAG_H__ */ #endif /* __MLX5_LAG_H__ */
...@@ -558,6 +558,7 @@ struct mlx5_debugfs_entries { ...@@ -558,6 +558,7 @@ struct mlx5_debugfs_entries {
struct dentry *cq_debugfs; struct dentry *cq_debugfs;
struct dentry *cmdif_debugfs; struct dentry *cmdif_debugfs;
struct dentry *pages_debugfs; struct dentry *pages_debugfs;
struct dentry *lag_debugfs;
}; };
struct mlx5_ft_pool; struct mlx5_ft_pool;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment