Commit f7f0085e authored by Koby Elbaz's avatar Koby Elbaz Committed by Oded Gabbay

accel/habanalabs: add uapi to stall/resume engine

The user might want to stall/resume engines to perform power testing
for various scenarios. Because our current
HL_CS_FLAGS_ENGINE_CORE_COMMAND command only handles the engines' cores,
we need to add another opcode for handling entire engine and not just
its core.

The user supplies an array, where each entry holds the engine's ID and
the command to send to the engine. The size of the array is limited
by the number of engines in the ASIC (only Gaudi2 is currently
supported).
Signed-off-by: default avatarKoby Elbaz <kelbaz@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent 28fbc058
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
#define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \ #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \ HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \
HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND | \ HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND | \
HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES) HL_CS_FLAGS_ENGINES_COMMAND | HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES)
#define MAX_TS_ITER_NUM 100 #define MAX_TS_ITER_NUM 100
...@@ -1319,6 +1319,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags) ...@@ -1319,6 +1319,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
return CS_UNRESERVE_SIGNALS; return CS_UNRESERVE_SIGNALS;
else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND) else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND)
return CS_TYPE_ENGINE_CORE; return CS_TYPE_ENGINE_CORE;
else if (cs_type_flags & HL_CS_FLAGS_ENGINES_COMMAND)
return CS_TYPE_ENGINES;
else if (cs_type_flags & HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES) else if (cs_type_flags & HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES)
return CS_TYPE_FLUSH_PCI_HBW_WRITES; return CS_TYPE_FLUSH_PCI_HBW_WRITES;
else else
...@@ -2444,10 +2446,13 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, ...@@ -2444,10 +2446,13 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores, static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
u32 num_engine_cores, u32 core_command) u32 num_engine_cores, u32 core_command)
{ {
int rc;
struct hl_device *hdev = hpriv->hdev; struct hl_device *hdev = hpriv->hdev;
void __user *engine_cores_arr; void __user *engine_cores_arr;
u32 *cores; u32 *cores;
int rc;
if (!hdev->asic_prop.supports_engine_modes)
return -EPERM;
if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) { if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) {
dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores); dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores);
...@@ -2476,6 +2481,48 @@ static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores, ...@@ -2476,6 +2481,48 @@ static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
return rc; return rc;
} }
static int cs_ioctl_engines(struct hl_fpriv *hpriv, u64 engines_arr_user_addr,
u32 num_engines, enum hl_engine_command command)
{
struct hl_device *hdev = hpriv->hdev;
u32 *engines, max_num_of_engines;
void __user *engines_arr;
int rc;
if (!hdev->asic_prop.supports_engine_modes)
return -EPERM;
if (command >= HL_ENGINE_COMMAND_MAX) {
dev_err(hdev->dev, "Engine command is invalid\n");
return -EINVAL;
}
max_num_of_engines = hdev->asic_prop.max_num_of_engines;
if (command == HL_ENGINE_CORE_RUN || command == HL_ENGINE_CORE_HALT)
max_num_of_engines = hdev->asic_prop.num_engine_cores;
if (!num_engines || num_engines > max_num_of_engines) {
dev_err(hdev->dev, "Number of engines %d is invalid\n", num_engines);
return -EINVAL;
}
engines_arr = (void __user *) (uintptr_t) engines_arr_user_addr;
engines = kmalloc_array(num_engines, sizeof(u32), GFP_KERNEL);
if (!engines)
return -ENOMEM;
if (copy_from_user(engines, engines_arr, num_engines * sizeof(u32))) {
dev_err(hdev->dev, "Failed to copy engine-ids array from user\n");
kfree(engines);
return -EFAULT;
}
rc = hdev->asic_funcs->set_engines(hdev, engines, num_engines, command);
kfree(engines);
return rc;
}
static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv) static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv)
{ {
struct hl_device *hdev = hpriv->hdev; struct hl_device *hdev = hpriv->hdev;
...@@ -2547,6 +2594,10 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -2547,6 +2594,10 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores, rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores,
args->in.num_engine_cores, args->in.core_command); args->in.num_engine_cores, args->in.core_command);
break; break;
case CS_TYPE_ENGINES:
rc = cs_ioctl_engines(hpriv, args->in.engines,
args->in.num_engines, args->in.engine_command);
break;
case CS_TYPE_FLUSH_PCI_HBW_WRITES: case CS_TYPE_FLUSH_PCI_HBW_WRITES:
rc = cs_ioctl_flush_pci_hbw_writes(hpriv); rc = cs_ioctl_flush_pci_hbw_writes(hpriv);
break; break;
......
...@@ -372,6 +372,7 @@ enum hl_cs_type { ...@@ -372,6 +372,7 @@ enum hl_cs_type {
CS_RESERVE_SIGNALS, CS_RESERVE_SIGNALS,
CS_UNRESERVE_SIGNALS, CS_UNRESERVE_SIGNALS,
CS_TYPE_ENGINE_CORE, CS_TYPE_ENGINE_CORE,
CS_TYPE_ENGINES,
CS_TYPE_FLUSH_PCI_HBW_WRITES, CS_TYPE_FLUSH_PCI_HBW_WRITES,
}; };
...@@ -644,7 +645,8 @@ struct hl_hints_range { ...@@ -644,7 +645,8 @@ struct hl_hints_range {
* which the property supports_user_set_page_size is true * which the property supports_user_set_page_size is true
* (i.e. the DRAM supports multiple page sizes), otherwise * (i.e. the DRAM supports multiple page sizes), otherwise
* it will shall be equal to dram_page_size. * it will shall be equal to dram_page_size.
* @num_engine_cores: number of engine cpu cores * @num_engine_cores: number of engine cpu cores.
* @max_num_of_engines: maximum number of all engines in the ASIC.
* @num_of_special_blocks: special_blocks array size. * @num_of_special_blocks: special_blocks array size.
* @glbl_err_cause_num: global err cause number. * @glbl_err_cause_num: global err cause number.
* @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is * @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is
...@@ -695,6 +697,7 @@ struct hl_hints_range { ...@@ -695,6 +697,7 @@ struct hl_hints_range {
* @supports_user_set_page_size: true if user can set the allocation page size. * @supports_user_set_page_size: true if user can set the allocation page size.
* @dma_mask: the dma mask to be set for this device * @dma_mask: the dma mask to be set for this device
* @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported. * @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported.
* @supports_engine_modes: true if changing engines/engine_cores modes is supported.
*/ */
struct asic_fixed_properties { struct asic_fixed_properties {
struct hw_queue_properties *hw_queues_props; struct hw_queue_properties *hw_queues_props;
...@@ -773,6 +776,7 @@ struct asic_fixed_properties { ...@@ -773,6 +776,7 @@ struct asic_fixed_properties {
u32 xbar_edge_enabled_mask; u32 xbar_edge_enabled_mask;
u32 device_mem_alloc_default_page_size; u32 device_mem_alloc_default_page_size;
u32 num_engine_cores; u32 num_engine_cores;
u32 max_num_of_engines;
u32 num_of_special_blocks; u32 num_of_special_blocks;
u32 glbl_err_cause_num; u32 glbl_err_cause_num;
u32 hbw_flush_reg; u32 hbw_flush_reg;
...@@ -810,6 +814,7 @@ struct asic_fixed_properties { ...@@ -810,6 +814,7 @@ struct asic_fixed_properties {
u8 supports_user_set_page_size; u8 supports_user_set_page_size;
u8 dma_mask; u8 dma_mask;
u8 supports_advanced_cpucp_rc; u8 supports_advanced_cpucp_rc;
u8 supports_engine_modes;
}; };
/** /**
...@@ -1564,6 +1569,7 @@ struct engines_data { ...@@ -1564,6 +1569,7 @@ struct engines_data {
* @access_dev_mem: access device memory * @access_dev_mem: access device memory
* @set_dram_bar_base: set the base of the DRAM BAR * @set_dram_bar_base: set the base of the DRAM BAR
* @set_engine_cores: set a config command to engine cores * @set_engine_cores: set a config command to engine cores
* @set_engines: set a config command to user engines
* @send_device_activity: indication to FW about device availability * @send_device_activity: indication to FW about device availability
* @set_dram_properties: set DRAM related properties. * @set_dram_properties: set DRAM related properties.
* @set_binning_masks: set binning/enable masks for all relevant components. * @set_binning_masks: set binning/enable masks for all relevant components.
...@@ -1703,6 +1709,8 @@ struct hl_asic_funcs { ...@@ -1703,6 +1709,8 @@ struct hl_asic_funcs {
u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr); u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
int (*set_engine_cores)(struct hl_device *hdev, u32 *core_ids, int (*set_engine_cores)(struct hl_device *hdev, u32 *core_ids,
u32 num_cores, u32 core_command); u32 num_cores, u32 core_command);
int (*set_engines)(struct hl_device *hdev, u32 *engine_ids,
u32 num_engines, u32 engine_command);
int (*send_device_activity)(struct hl_device *hdev, bool open); int (*send_device_activity)(struct hl_device *hdev, bool open);
int (*set_dram_properties)(struct hl_device *hdev); int (*set_dram_properties)(struct hl_device *hdev);
int (*set_binning_masks)(struct hl_device *hdev); int (*set_binning_masks)(struct hl_device *hdev);
...@@ -1826,7 +1834,7 @@ struct hl_cs_outcome_store { ...@@ -1826,7 +1834,7 @@ struct hl_cs_outcome_store {
* @hpriv: pointer to the private (Kernel Driver) data of the process (fd). * @hpriv: pointer to the private (Kernel Driver) data of the process (fd).
* @hdev: pointer to the device structure. * @hdev: pointer to the device structure.
* @refcount: reference counter for the context. Context is released only when * @refcount: reference counter for the context. Context is released only when
* this hits 0l. It is incremented on CS and CS_WAIT. * this hits 0. It is incremented on CS and CS_WAIT.
* @cs_pending: array of hl fence objects representing pending CS. * @cs_pending: array of hl fence objects representing pending CS.
* @outcome_store: storage data structure used to remember outcomes of completed * @outcome_store: storage data structure used to remember outcomes of completed
* command submissions for a long time after CS id wraparound. * command submissions for a long time after CS id wraparound.
......
...@@ -656,6 +656,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) ...@@ -656,6 +656,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->cfg_size = CFG_SIZE; prop->cfg_size = CFG_SIZE;
prop->max_asid = MAX_ASID; prop->max_asid = MAX_ASID;
prop->num_of_events = GAUDI_EVENT_SIZE; prop->num_of_events = GAUDI_EVENT_SIZE;
prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
prop->tpc_enabled_mask = TPC_ENABLED_MASK; prop->tpc_enabled_mask = TPC_ENABLED_MASK;
set_default_power_values(hdev); set_default_power_values(hdev);
......
This diff is collapsed.
...@@ -86,6 +86,8 @@ ...@@ -86,6 +86,8 @@
#define DCORE0_TPC0_QM_CGM_STS_AGENT_IDLE_MASK 0x100 #define DCORE0_TPC0_QM_CGM_STS_AGENT_IDLE_MASK 0x100
#define DCORE0_TPC0_EML_CFG_DBG_CNT_DBG_EXIT_MASK 0x40
/* CGM_IDLE_MASK is valid for all engines CGM idle check */ /* CGM_IDLE_MASK is valid for all engines CGM idle check */
#define CGM_IDLE_MASK DCORE0_TPC0_QM_CGM_STS_AGENT_IDLE_MASK #define CGM_IDLE_MASK DCORE0_TPC0_QM_CGM_STS_AGENT_IDLE_MASK
......
...@@ -164,6 +164,8 @@ ...@@ -164,6 +164,8 @@
#define mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR 0x4800040 #define mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR 0x4800040
#define mmDCORE0_TPC0_EML_CFG_DBG_CNT 0x40000
#define SM_OBJS_PROT_BITS_OFFS 0x14000 #define SM_OBJS_PROT_BITS_OFFS 0x14000
#define DCORE_OFFSET (mmDCORE1_TPC0_QM_BASE - mmDCORE0_TPC0_QM_BASE) #define DCORE_OFFSET (mmDCORE1_TPC0_QM_BASE - mmDCORE0_TPC0_QM_BASE)
...@@ -185,7 +187,10 @@ ...@@ -185,7 +187,10 @@
#define TPC_CFG_STALL_ON_ERR_OFFSET (mmDCORE0_TPC0_CFG_STALL_ON_ERR - mmDCORE0_TPC0_CFG_BASE) #define TPC_CFG_STALL_ON_ERR_OFFSET (mmDCORE0_TPC0_CFG_STALL_ON_ERR - mmDCORE0_TPC0_CFG_BASE)
#define TPC_CFG_TPC_INTR_MASK_OFFSET (mmDCORE0_TPC0_CFG_TPC_INTR_MASK - mmDCORE0_TPC0_CFG_BASE) #define TPC_CFG_TPC_INTR_MASK_OFFSET (mmDCORE0_TPC0_CFG_TPC_INTR_MASK - mmDCORE0_TPC0_CFG_BASE)
#define TPC_CFG_MSS_CONFIG_OFFSET (mmDCORE0_TPC0_CFG_MSS_CONFIG - mmDCORE0_TPC0_CFG_BASE) #define TPC_CFG_MSS_CONFIG_OFFSET (mmDCORE0_TPC0_CFG_MSS_CONFIG - mmDCORE0_TPC0_CFG_BASE)
#define TPC_EML_CFG_DBG_CNT_OFFSET (mmDCORE0_TPC0_EML_CFG_DBG_CNT - mmDCORE0_TPC0_EML_CFG_BASE)
#define EDMA_CORE_CFG_STALL_OFFSET (mmDCORE0_EDMA0_CORE_CFG_1 - mmDCORE0_EDMA0_CORE_BASE)
#define MME_CTRL_LO_QM_STALL_OFFSET (mmDCORE0_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_BASE)
#define MME_ACC_INTR_MASK_OFFSET (mmDCORE0_MME_ACC_INTR_MASK - mmDCORE0_MME_ACC_BASE) #define MME_ACC_INTR_MASK_OFFSET (mmDCORE0_MME_ACC_INTR_MASK - mmDCORE0_MME_ACC_BASE)
#define MME_ACC_WR_AXI_AGG_COUT0_OFFSET (mmDCORE0_MME_ACC_WR_AXI_AGG_COUT0 - mmDCORE0_MME_ACC_BASE) #define MME_ACC_WR_AXI_AGG_COUT0_OFFSET (mmDCORE0_MME_ACC_WR_AXI_AGG_COUT0 - mmDCORE0_MME_ACC_BASE)
#define MME_ACC_WR_AXI_AGG_COUT1_OFFSET (mmDCORE0_MME_ACC_WR_AXI_AGG_COUT1 - mmDCORE0_MME_ACC_BASE) #define MME_ACC_WR_AXI_AGG_COUT1_OFFSET (mmDCORE0_MME_ACC_WR_AXI_AGG_COUT1 - mmDCORE0_MME_ACC_BASE)
......
...@@ -1535,17 +1535,31 @@ struct hl_cs_chunk { ...@@ -1535,17 +1535,31 @@ struct hl_cs_chunk {
*/ */
#define HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES 0x8000 #define HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES 0x8000
/*
* The engines CS is merged into the existing CS ioctls.
* Use it to control engines modes.
*/
#define HL_CS_FLAGS_ENGINES_COMMAND 0x10000
#define HL_CS_STATUS_SUCCESS 0 #define HL_CS_STATUS_SUCCESS 0
#define HL_MAX_JOBS_PER_CS 512 #define HL_MAX_JOBS_PER_CS 512
/* HL_ENGINE_CORE_ values /*
* enum hl_engine_command - engine command
* *
* HL_ENGINE_CORE_HALT: engine core halt * @HL_ENGINE_CORE_HALT: engine core halt
* HL_ENGINE_CORE_RUN: engine core run * @HL_ENGINE_CORE_RUN: engine core run
*/ * @HL_ENGINE_STALL: user engine/s stall
#define HL_ENGINE_CORE_HALT (1 << 0) * @HL_ENGINE_RESUME: user engine/s resume
#define HL_ENGINE_CORE_RUN (1 << 1) */
enum hl_engine_command {
HL_ENGINE_CORE_HALT = 1,
HL_ENGINE_CORE_RUN = 2,
HL_ENGINE_STALL = 3,
HL_ENGINE_RESUME = 4,
HL_ENGINE_COMMAND_MAX
};
struct hl_cs_in { struct hl_cs_in {
...@@ -1569,6 +1583,18 @@ struct hl_cs_in { ...@@ -1569,6 +1583,18 @@ struct hl_cs_in {
/* the core command to be sent towards engine cores */ /* the core command to be sent towards engine cores */
__u32 core_command; __u32 core_command;
}; };
/* Valid only when HL_CS_FLAGS_ENGINES_COMMAND is set */
struct {
/* this holds address of array of uint32 for engines */
__u64 engines;
/* number of engines in engines array */
__u32 num_engines;
/* the engine command to be sent towards engines */
__u32 engine_command;
};
}; };
union { union {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment