Commit 48bb52c8 authored by Eran Ben Elisha's avatar Eran Ben Elisha Committed by David S. Miller

devlink: Add auto dump flag to health reporter

On low memory system, run time dumps can consume too much memory. Add
administrator ability to disable auto dumps per reporter as part of the
error flow handle routine.

This attribute is not relevant while executing
DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET.

By default, auto dump is activated for any reporter that has a dump method,
as part of the reporter registration to devlink.
Signed-off-by: default avatarEran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: default avatarJiri Pirko <jiri@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent ba7d16c7
...@@ -429,6 +429,8 @@ enum devlink_attr { ...@@ -429,6 +429,8 @@ enum devlink_attr {
DEVLINK_ATTR_NETNS_FD, /* u32 */ DEVLINK_ATTR_NETNS_FD, /* u32 */
DEVLINK_ATTR_NETNS_PID, /* u32 */ DEVLINK_ATTR_NETNS_PID, /* u32 */
DEVLINK_ATTR_NETNS_ID, /* u32 */ DEVLINK_ATTR_NETNS_ID, /* u32 */
DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP, /* u8 */
/* add new attributes above here, update the policy in devlink.c */ /* add new attributes above here, update the policy in devlink.c */
__DEVLINK_ATTR_MAX, __DEVLINK_ATTR_MAX,
......
...@@ -5089,6 +5089,7 @@ struct devlink_health_reporter { ...@@ -5089,6 +5089,7 @@ struct devlink_health_reporter {
struct mutex dump_lock; /* lock parallel read/write from dump buffers */ struct mutex dump_lock; /* lock parallel read/write from dump buffers */
u64 graceful_period; u64 graceful_period;
bool auto_recover; bool auto_recover;
bool auto_dump;
u8 health_state; u8 health_state;
u64 dump_ts; u64 dump_ts;
u64 dump_real_ts; u64 dump_real_ts;
...@@ -5155,6 +5156,7 @@ devlink_health_reporter_create(struct devlink *devlink, ...@@ -5155,6 +5156,7 @@ devlink_health_reporter_create(struct devlink *devlink,
reporter->devlink = devlink; reporter->devlink = devlink;
reporter->graceful_period = graceful_period; reporter->graceful_period = graceful_period;
reporter->auto_recover = !!ops->recover; reporter->auto_recover = !!ops->recover;
reporter->auto_dump = !!ops->dump;
mutex_init(&reporter->dump_lock); mutex_init(&reporter->dump_lock);
refcount_set(&reporter->refcount, 1); refcount_set(&reporter->refcount, 1);
list_add_tail(&reporter->list, &devlink->reporter_list); list_add_tail(&reporter->list, &devlink->reporter_list);
...@@ -5235,6 +5237,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg, ...@@ -5235,6 +5237,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg,
nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS, nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
reporter->dump_real_ts, DEVLINK_ATTR_PAD)) reporter->dump_real_ts, DEVLINK_ATTR_PAD))
goto reporter_nest_cancel; goto reporter_nest_cancel;
if (reporter->ops->dump &&
nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP,
reporter->auto_dump))
goto reporter_nest_cancel;
nla_nest_end(msg, reporter_attr); nla_nest_end(msg, reporter_attr);
genlmsg_end(msg, hdr); genlmsg_end(msg, hdr);
...@@ -5381,10 +5387,12 @@ int devlink_health_report(struct devlink_health_reporter *reporter, ...@@ -5381,10 +5387,12 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
mutex_lock(&reporter->dump_lock); if (reporter->auto_dump) {
/* store current dump of current error, for later analysis */ mutex_lock(&reporter->dump_lock);
devlink_health_do_dump(reporter, priv_ctx, NULL); /* store current dump of current error, for later analysis */
mutex_unlock(&reporter->dump_lock); devlink_health_do_dump(reporter, priv_ctx, NULL);
mutex_unlock(&reporter->dump_lock);
}
if (reporter->auto_recover) if (reporter->auto_recover)
return devlink_health_reporter_recover(reporter, return devlink_health_reporter_recover(reporter,
...@@ -5558,6 +5566,11 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, ...@@ -5558,6 +5566,11 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
err = -EOPNOTSUPP; err = -EOPNOTSUPP;
goto out; goto out;
} }
if (!reporter->ops->dump &&
info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]) {
err = -EOPNOTSUPP;
goto out;
}
if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]) if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
reporter->graceful_period = reporter->graceful_period =
...@@ -5567,6 +5580,10 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, ...@@ -5567,6 +5580,10 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
reporter->auto_recover = reporter->auto_recover =
nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]); nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]);
if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP])
reporter->auto_dump =
nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]);
devlink_health_reporter_put(reporter); devlink_health_reporter_put(reporter);
return 0; return 0;
out: out:
...@@ -6313,6 +6330,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { ...@@ -6313,6 +6330,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 }, [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 },
[DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 }, [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 },
[DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 }, [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 },
[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .type = NLA_U8 },
}; };
static const struct genl_ops devlink_nl_ops[] = { static const struct genl_ops devlink_nl_ops[] = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment