habanalabs: added open_stats info ioctl

In a system with multiple ASICs, there is a need to provide monitoring
tools with information on how long a device was opened and how many
times a device was opened.

Therefore, we add a new opcode to the INFO ioctl to provide that
information.

Signed-off-by: Yuri Nudelman <ynudelman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Yuri Nudelman 2021-05-24 11:25:21 +03:00 committed by Oded Gabbay
parent 1f7ef4bf41
commit e307b302be
5 changed files with 47 additions and 0 deletions

View File

@ -132,6 +132,9 @@ static int hl_device_release(struct inode *inode, struct file *filp)
dev_warn(hdev->dev,
"Device is still in use because there are live CS and/or memory mappings\n");
hdev->last_open_session_duration_jif =
jiffies - hdev->last_successful_open_jif;
return 0;
}

View File

@ -2137,6 +2137,11 @@ struct hl_mmu_funcs {
* the error will be ignored by the driver during
* device initialization. Mainly used to debug and
* workaround firmware bugs
* @last_successful_open_jif: timestamp (jiffies) of the last successful
* device open.
* @last_open_session_duration_jif: duration (jiffies) of the last device open
* session.
* @open_counter: number of successful device open operations.
* @in_reset: is device in reset flow.
* @curr_pll_profile: current PLL profile.
* @card_type: Various ASICs have several card types. This indicates the card
@ -2259,6 +2264,9 @@ struct hl_device {
u64 max_power;
u64 clock_gating_mask;
u64 boot_error_status_mask;
u64 last_successful_open_jif;
u64 last_open_session_duration_jif;
u64 open_counter;
atomic_t in_reset;
enum hl_pll_frequency curr_pll_profile;
enum cpucp_card_types card_type;

View File

@ -187,6 +187,9 @@ int hl_device_open(struct inode *inode, struct file *filp)
hl_debugfs_add_file(hpriv);
hdev->open_counter++;
hdev->last_successful_open_jif = jiffies;
return 0;
out_err:

View File

@ -460,6 +460,24 @@ static int power_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
min((size_t) max_size, sizeof(power_info))) ? -EFAULT : 0;
}
static int open_stats_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
struct hl_device *hdev = hpriv->hdev;
u32 max_size = args->return_size;
struct hl_open_stats_info open_stats_info = {0};
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
if ((!max_size) || (!out))
return -EINVAL;
open_stats_info.last_open_period_ms = jiffies64_to_msecs(
hdev->last_open_session_duration_jif);
open_stats_info.open_counter = hdev->open_counter;
return copy_to_user(out, &open_stats_info,
min((size_t) max_size, sizeof(open_stats_info))) ? -EFAULT : 0;
}
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev)
{
@ -543,6 +561,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_POWER:
return power_info(hpriv, args);
case HL_INFO_OPEN_STATS:
return open_stats_info(hpriv, args);
default:
dev_err(dev, "Invalid request %d\n", args->op);
rc = -ENOTTY;

View File

@ -313,6 +313,7 @@ enum hl_device_status {
* HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore
* HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption
* HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency
* HL_INFO_OPEN_STATS - Retrieve info regarding recent device open calls
*/
#define HL_INFO_HW_IP_INFO 0
#define HL_INFO_HW_EVENTS 1
@ -331,6 +332,7 @@ enum hl_device_status {
#define HL_INFO_TOTAL_ENERGY 15
#define HL_INFO_PLL_FREQUENCY 16
#define HL_INFO_POWER 17
#define HL_INFO_OPEN_STATS 18
#define HL_INFO_VERSION_MAX_LEN 128
#define HL_INFO_CARD_NAME_MAX_LEN 16
@ -444,6 +446,16 @@ struct hl_pll_frequency_info {
__u16 output[HL_PLL_NUM_OUTPUTS];
};
/**
* struct hl_open_stats_info - device open statistics information
* @open_counter: ever growing counter, increased on each successful dev open
* @last_open_period_ms: duration (ms) device was open last time
*/
struct hl_open_stats_info {
__u64 open_counter;
__u64 last_open_period_ms;
};
/**
* struct hl_power_info - power information
* @power: power consumption