Commit e307b302 authored by Yuri Nudelman's avatar Yuri Nudelman Committed by Oded Gabbay

habanalabs: added open_stats info ioctl

In a system with multiple ASICs, there is a need to provide monitoring
tools with information on how long a device was opened and how many
times a device was opened.

Therefore, we add a new opcode to the INFO ioctl to provide that
information.
Signed-off-by: default avatarYuri Nudelman <ynudelman@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent 1f7ef4bf
...@@ -132,6 +132,9 @@ static int hl_device_release(struct inode *inode, struct file *filp) ...@@ -132,6 +132,9 @@ static int hl_device_release(struct inode *inode, struct file *filp)
dev_warn(hdev->dev, dev_warn(hdev->dev,
"Device is still in use because there are live CS and/or memory mappings\n"); "Device is still in use because there are live CS and/or memory mappings\n");
hdev->last_open_session_duration_jif =
jiffies - hdev->last_successful_open_jif;
return 0; return 0;
} }
......
...@@ -2137,6 +2137,11 @@ struct hl_mmu_funcs { ...@@ -2137,6 +2137,11 @@ struct hl_mmu_funcs {
* the error will be ignored by the driver during * the error will be ignored by the driver during
* device initialization. Mainly used to debug and * device initialization. Mainly used to debug and
* workaround firmware bugs * workaround firmware bugs
* @last_successful_open_jif: timestamp (jiffies) of the last successful
* device open.
* @last_open_session_duration_jif: duration (jiffies) of the last device open
* session.
* @open_counter: number of successful device open operations.
* @in_reset: is device in reset flow. * @in_reset: is device in reset flow.
* @curr_pll_profile: current PLL profile. * @curr_pll_profile: current PLL profile.
* @card_type: Various ASICs have several card types. This indicates the card * @card_type: Various ASICs have several card types. This indicates the card
...@@ -2259,6 +2264,9 @@ struct hl_device { ...@@ -2259,6 +2264,9 @@ struct hl_device {
u64 max_power; u64 max_power;
u64 clock_gating_mask; u64 clock_gating_mask;
u64 boot_error_status_mask; u64 boot_error_status_mask;
u64 last_successful_open_jif;
u64 last_open_session_duration_jif;
u64 open_counter;
atomic_t in_reset; atomic_t in_reset;
enum hl_pll_frequency curr_pll_profile; enum hl_pll_frequency curr_pll_profile;
enum cpucp_card_types card_type; enum cpucp_card_types card_type;
......
...@@ -187,6 +187,9 @@ int hl_device_open(struct inode *inode, struct file *filp) ...@@ -187,6 +187,9 @@ int hl_device_open(struct inode *inode, struct file *filp)
hl_debugfs_add_file(hpriv); hl_debugfs_add_file(hpriv);
hdev->open_counter++;
hdev->last_successful_open_jif = jiffies;
return 0; return 0;
out_err: out_err:
......
...@@ -460,6 +460,24 @@ static int power_info(struct hl_fpriv *hpriv, struct hl_info_args *args) ...@@ -460,6 +460,24 @@ static int power_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
min((size_t) max_size, sizeof(power_info))) ? -EFAULT : 0; min((size_t) max_size, sizeof(power_info))) ? -EFAULT : 0;
} }
static int open_stats_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
struct hl_device *hdev = hpriv->hdev;
u32 max_size = args->return_size;
struct hl_open_stats_info open_stats_info = {0};
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
if ((!max_size) || (!out))
return -EINVAL;
open_stats_info.last_open_period_ms = jiffies64_to_msecs(
hdev->last_open_session_duration_jif);
open_stats_info.open_counter = hdev->open_counter;
return copy_to_user(out, &open_stats_info,
min((size_t) max_size, sizeof(open_stats_info))) ? -EFAULT : 0;
}
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev) struct device *dev)
{ {
...@@ -543,6 +561,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, ...@@ -543,6 +561,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_POWER: case HL_INFO_POWER:
return power_info(hpriv, args); return power_info(hpriv, args);
case HL_INFO_OPEN_STATS:
return open_stats_info(hpriv, args);
default: default:
dev_err(dev, "Invalid request %d\n", args->op); dev_err(dev, "Invalid request %d\n", args->op);
rc = -ENOTTY; rc = -ENOTTY;
......
...@@ -313,6 +313,7 @@ enum hl_device_status { ...@@ -313,6 +313,7 @@ enum hl_device_status {
* HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore * HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore
* HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption * HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption
* HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency
* HL_INFO_OPEN_STATS - Retrieve info regarding recent device open calls
*/ */
#define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_IP_INFO 0
#define HL_INFO_HW_EVENTS 1 #define HL_INFO_HW_EVENTS 1
...@@ -331,6 +332,7 @@ enum hl_device_status { ...@@ -331,6 +332,7 @@ enum hl_device_status {
#define HL_INFO_TOTAL_ENERGY 15 #define HL_INFO_TOTAL_ENERGY 15
#define HL_INFO_PLL_FREQUENCY 16 #define HL_INFO_PLL_FREQUENCY 16
#define HL_INFO_POWER 17 #define HL_INFO_POWER 17
#define HL_INFO_OPEN_STATS 18
#define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_VERSION_MAX_LEN 128
#define HL_INFO_CARD_NAME_MAX_LEN 16 #define HL_INFO_CARD_NAME_MAX_LEN 16
...@@ -444,6 +446,16 @@ struct hl_pll_frequency_info { ...@@ -444,6 +446,16 @@ struct hl_pll_frequency_info {
__u16 output[HL_PLL_NUM_OUTPUTS]; __u16 output[HL_PLL_NUM_OUTPUTS];
}; };
/**
* struct hl_open_stats_info - device open statistics information
* @open_counter: ever growing counter, increased on each successful dev open
* @last_open_period_ms: duration (ms) device was open last time
*/
struct hl_open_stats_info {
__u64 open_counter;
__u64 last_open_period_ms;
};
/** /**
* struct hl_power_info - power information * struct hl_power_info - power information
* @power: power consumption * @power: power consumption
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment