Commit 606e6a72 authored by Michael Guralnik's avatar Michael Guralnik Committed by Saeed Mahameed

net/mlx5: Expose vnic diagnostic counters for eswitch managed vports

Expose on vport group managers debug counters for their managed vports.

Counters are exposed through debugfs, the directory will be present only
for functions that are eswitch managers and only counters that are
supported on their specific HW/FW will be exposed.

Example:
$ ls /sys/kernel/debug/mlx5/0000:08:00.0/esw/
pf sf_8  vf_0  vf_1

$ ls -l /sys/kernel/debug/mlx5/0000:08:00.0/esw/vf_0/vnic_diag/
cq_overrun
quota_exceeded_command
total_q_under_processor_handle
invalid_command
send_queue_priority_update_flow

List of all counter added:
total_q_under_processor_handle - number of queues in error state due to an
async error or errored command.
send_queue_priority_update_flow - number of QP/SQ priority/SL update
events.
cq_overrun - number of times CQ entered an error state due to an
overflow.
async_eq_overrun -number of time an EQ mapped to async events was
overrun.
comp_eq_overrun - number of time an EQ mapped to completion events was
overrun.
quota_exceeded_command - number of commands issued and failed due to quota
exceeded.
invalid_command - number of commands issued and failed dues to any reason
other than quota exceeded.
Signed-off-by: default avatarMichael Guralnik <michaelgur@nvidia.com>
Reviewed-by: default avatarMark Bloch <mbloch@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent dc402ccc
...@@ -68,7 +68,7 @@ mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o ...@@ -68,7 +68,7 @@ mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o
# #
mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
ecpf.o rdma.o esw/legacy.o \ ecpf.o rdma.o esw/legacy.o \
esw/devlink_port.o esw/vporttbl.o esw/qos.o esw/debugfs.o esw/devlink_port.o esw/vporttbl.o esw/qos.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \ mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \
esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \ esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
......
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
#include <linux/debugfs.h>
#include "eswitch.h"
enum vnic_diag_counter {
MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE,
MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW,
MLX5_VNIC_DIAG_COMP_EQ_OVERRUN,
MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN,
MLX5_VNIC_DIAG_CQ_OVERRUN,
MLX5_VNIC_DIAG_INVALID_COMMAND,
MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND,
};
static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_counter counter,
u32 *val)
{
u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {};
u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
struct mlx5_core_dev *dev = vport->dev;
u16 vport_num = vport->vport;
void *vnic_diag_out;
int err;
MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV);
MLX5_SET(query_vnic_env_in, in, vport_number, vport_num);
if (!mlx5_esw_is_manager_vport(dev->priv.eswitch, vport_num))
MLX5_SET(query_vnic_env_in, in, other_vport, 1);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (err)
return err;
vnic_diag_out = MLX5_ADDR_OF(query_vnic_env_out, out, vport_env);
switch (counter) {
case MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE:
*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, total_error_queues);
break;
case MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW:
*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out,
send_queue_priority_update_flow);
break;
case MLX5_VNIC_DIAG_COMP_EQ_OVERRUN:
*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, comp_eq_overrun);
break;
case MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN:
*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, async_eq_overrun);
break;
case MLX5_VNIC_DIAG_CQ_OVERRUN:
*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, cq_overrun);
break;
case MLX5_VNIC_DIAG_INVALID_COMMAND:
*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, invalid_command);
break;
case MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND:
*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, quota_exceeded_command);
break;
}
return 0;
}
static int __show_vnic_diag(struct seq_file *file, struct mlx5_vport *vport,
enum vnic_diag_counter type)
{
u32 val = 0;
int ret;
ret = mlx5_esw_query_vnic_diag(vport, type, &val);
if (ret)
return ret;
seq_printf(file, "%d\n", val);
return 0;
}
static int total_q_under_processor_handle_show(struct seq_file *file, void *priv)
{
return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE);
}
static int send_queue_priority_update_flow_show(struct seq_file *file, void *priv)
{
return __show_vnic_diag(file, file->private,
MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW);
}
static int comp_eq_overrun_show(struct seq_file *file, void *priv)
{
return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_COMP_EQ_OVERRUN);
}
static int async_eq_overrun_show(struct seq_file *file, void *priv)
{
return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN);
}
static int cq_overrun_show(struct seq_file *file, void *priv)
{
return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_CQ_OVERRUN);
}
static int invalid_command_show(struct seq_file *file, void *priv)
{
return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_INVALID_COMMAND);
}
static int quota_exceeded_command_show(struct seq_file *file, void *priv)
{
return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND);
}
DEFINE_SHOW_ATTRIBUTE(total_q_under_processor_handle);
DEFINE_SHOW_ATTRIBUTE(send_queue_priority_update_flow);
DEFINE_SHOW_ATTRIBUTE(comp_eq_overrun);
DEFINE_SHOW_ATTRIBUTE(async_eq_overrun);
DEFINE_SHOW_ATTRIBUTE(cq_overrun);
DEFINE_SHOW_ATTRIBUTE(invalid_command);
DEFINE_SHOW_ATTRIBUTE(quota_exceeded_command);
void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num)
{
struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
debugfs_remove_recursive(vport->dbgfs);
vport->dbgfs = NULL;
}
/* vnic diag dir name is "pf", "ecpf" or "{vf/sf}_xxxx" */
#define VNIC_DIAG_DIR_NAME_MAX_LEN 8
void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num)
{
struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
struct dentry *vnic_diag;
char dir_name[VNIC_DIAG_DIR_NAME_MAX_LEN];
int err;
if (!MLX5_CAP_GEN(esw->dev, vport_group_manager))
return;
if (vport_num == MLX5_VPORT_PF) {
strcpy(dir_name, "pf");
} else if (vport_num == MLX5_VPORT_ECPF) {
strcpy(dir_name, "ecpf");
} else {
err = snprintf(dir_name, VNIC_DIAG_DIR_NAME_MAX_LEN, "%s_%d", is_sf ? "sf" : "vf",
is_sf ? sf_num : vport_num - MLX5_VPORT_FIRST_VF);
if (WARN_ON(err < 0))
return;
}
vport->dbgfs = debugfs_create_dir(dir_name, esw->dbgfs);
vnic_diag = debugfs_create_dir("vnic_diag", vport->dbgfs);
if (MLX5_CAP_GEN(esw->dev, vnic_env_queue_counters)) {
debugfs_create_file("total_q_under_processor_handle", 0444, vnic_diag, vport,
&total_q_under_processor_handle_fops);
debugfs_create_file("send_queue_priority_update_flow", 0444, vnic_diag, vport,
&send_queue_priority_update_flow_fops);
}
if (MLX5_CAP_GEN(esw->dev, eq_overrun_count)) {
debugfs_create_file("comp_eq_overrun", 0444, vnic_diag, vport,
&comp_eq_overrun_fops);
debugfs_create_file("async_eq_overrun", 0444, vnic_diag, vport,
&async_eq_overrun_fops);
}
if (MLX5_CAP_GEN(esw->dev, vnic_env_cq_overrun))
debugfs_create_file("cq_overrun", 0444, vnic_diag, vport, &cq_overrun_fops);
if (MLX5_CAP_GEN(esw->dev, invalid_command_count))
debugfs_create_file("invalid_command", 0444, vnic_diag, vport,
&invalid_command_fops);
if (MLX5_CAP_GEN(esw->dev, quota_exceeded_count))
debugfs_create_file("quota_exceeded_command", 0444, vnic_diag, vport,
&quota_exceeded_command_fops);
}
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include <linux/mlx5/vport.h> #include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h> #include <linux/mlx5/fs.h>
#include <linux/mlx5/mpfs.h> #include <linux/mlx5/mpfs.h>
#include <linux/debugfs.h>
#include "esw/acl/lgcy.h" #include "esw/acl/lgcy.h"
#include "esw/legacy.h" #include "esw/legacy.h"
#include "esw/qos.h" #include "esw/qos.h"
...@@ -1002,6 +1003,7 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, ...@@ -1002,6 +1003,7 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
if (err) if (err)
return err; return err;
mlx5_esw_vport_debugfs_create(esw, vport_num, false, 0);
err = esw_offloads_load_rep(esw, vport_num); err = esw_offloads_load_rep(esw, vport_num);
if (err) if (err)
goto err_rep; goto err_rep;
...@@ -1009,6 +1011,7 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, ...@@ -1009,6 +1011,7 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
return err; return err;
err_rep: err_rep:
mlx5_esw_vport_debugfs_destroy(esw, vport_num);
mlx5_esw_vport_disable(esw, vport_num); mlx5_esw_vport_disable(esw, vport_num);
return err; return err;
} }
...@@ -1016,6 +1019,7 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, ...@@ -1016,6 +1019,7 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num) void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num)
{ {
esw_offloads_unload_rep(esw, vport_num); esw_offloads_unload_rep(esw, vport_num);
mlx5_esw_vport_debugfs_destroy(esw, vport_num);
mlx5_esw_vport_disable(esw, vport_num); mlx5_esw_vport_disable(esw, vport_num);
} }
...@@ -1622,6 +1626,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) ...@@ -1622,6 +1626,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
dev->priv.eswitch = esw; dev->priv.eswitch = esw;
BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head); BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
esw->dbgfs = debugfs_create_dir("esw", mlx5_debugfs_get_dev_root(esw->dev));
esw_info(dev, esw_info(dev,
"Total vports %d, per vport: max uc(%d) max mc(%d)\n", "Total vports %d, per vport: max uc(%d) max mc(%d)\n",
esw->total_vports, esw->total_vports,
...@@ -1645,6 +1650,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) ...@@ -1645,6 +1650,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
esw_info(esw->dev, "cleanup\n"); esw_info(esw->dev, "cleanup\n");
debugfs_remove_recursive(esw->dbgfs);
esw->dev->priv.eswitch = NULL; esw->dev->priv.eswitch = NULL;
destroy_workqueue(esw->work_queue); destroy_workqueue(esw->work_queue);
WARN_ON(refcount_read(&esw->qos.refcnt)); WARN_ON(refcount_read(&esw->qos.refcnt));
......
...@@ -191,6 +191,7 @@ struct mlx5_vport { ...@@ -191,6 +191,7 @@ struct mlx5_vport {
enum mlx5_eswitch_vport_event enabled_events; enum mlx5_eswitch_vport_event enabled_events;
int index; int index;
struct devlink_port *dl_port; struct devlink_port *dl_port;
struct dentry *dbgfs;
}; };
struct mlx5_esw_indir_table; struct mlx5_esw_indir_table;
...@@ -336,6 +337,7 @@ struct mlx5_eswitch { ...@@ -336,6 +337,7 @@ struct mlx5_eswitch {
u32 large_group_num; u32 large_group_num;
} params; } params;
struct blocking_notifier_head n_head; struct blocking_notifier_head n_head;
struct dentry *dbgfs;
}; };
void esw_offloads_disable(struct mlx5_eswitch *esw); void esw_offloads_disable(struct mlx5_eswitch *esw);
...@@ -684,6 +686,9 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_ ...@@ -684,6 +686,9 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_
void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num); void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num); struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num);
void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num);
void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num);
int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port, int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
u16 vport_num, u32 controller, u32 sfnum); u16 vport_num, u32 controller, u32 sfnum);
void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num); void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
......
...@@ -3704,12 +3704,14 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p ...@@ -3704,12 +3704,14 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p
if (err) if (err)
goto devlink_err; goto devlink_err;
mlx5_esw_vport_debugfs_create(esw, vport_num, true, sfnum);
err = mlx5_esw_offloads_rep_load(esw, vport_num); err = mlx5_esw_offloads_rep_load(esw, vport_num);
if (err) if (err)
goto rep_err; goto rep_err;
return 0; return 0;
rep_err: rep_err:
mlx5_esw_vport_debugfs_destroy(esw, vport_num);
mlx5_esw_devlink_sf_port_unregister(esw, vport_num); mlx5_esw_devlink_sf_port_unregister(esw, vport_num);
devlink_err: devlink_err:
mlx5_esw_vport_disable(esw, vport_num); mlx5_esw_vport_disable(esw, vport_num);
...@@ -3719,6 +3721,7 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p ...@@ -3719,6 +3721,7 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p
void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num)
{ {
mlx5_esw_offloads_rep_unload(esw, vport_num); mlx5_esw_offloads_rep_unload(esw, vport_num);
mlx5_esw_vport_debugfs_destroy(esw, vport_num);
mlx5_esw_devlink_sf_port_unregister(esw, vport_num); mlx5_esw_devlink_sf_port_unregister(esw, vport_num);
mlx5_esw_vport_disable(esw, vport_num); mlx5_esw_vport_disable(esw, vport_num);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment