Commit e855869b authored by Tomer Tayar's avatar Tomer Tayar Committed by Oded Gabbay

accel/habanalabs: fix glbl error cause handling

The glbl error cause handling has a wrong assumption that all error
bits are consecutive.
Fix the handling to check all relevant error bits per ASIC.
Signed-off-by: default avatarTomer Tayar <ttayar@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Reviewed-by: default avatarCarl Vanderlip <quic_carlv@quicinc.com>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent c1e89ae4
...@@ -647,7 +647,7 @@ struct hl_hints_range { ...@@ -647,7 +647,7 @@ struct hl_hints_range {
* @num_engine_cores: number of engine cpu cores. * @num_engine_cores: number of engine cpu cores.
* @max_num_of_engines: maximum number of all engines in the ASIC. * @max_num_of_engines: maximum number of all engines in the ASIC.
* @num_of_special_blocks: special_blocks array size. * @num_of_special_blocks: special_blocks array size.
* @glbl_err_cause_num: global err cause number. * @glbl_err_max_cause_num: global err max cause number.
* @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is * @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is
* not supported. * not supported.
* @reserved_fw_mem_size: size in MB of dram memory reserved for FW. * @reserved_fw_mem_size: size in MB of dram memory reserved for FW.
...@@ -779,7 +779,7 @@ struct asic_fixed_properties { ...@@ -779,7 +779,7 @@ struct asic_fixed_properties {
u32 num_engine_cores; u32 num_engine_cores;
u32 max_num_of_engines; u32 max_num_of_engines;
u32 num_of_special_blocks; u32 num_of_special_blocks;
u32 glbl_err_cause_num; u32 glbl_err_max_cause_num;
u32 hbw_flush_reg; u32 hbw_flush_reg;
u32 reserved_fw_mem_size; u32 reserved_fw_mem_size;
u16 collective_first_sob; u16 collective_first_sob;
......
...@@ -7,15 +7,31 @@ ...@@ -7,15 +7,31 @@
#include "habanalabs.h" #include "habanalabs.h"
static const char * const hl_glbl_error_cause[HL_MAX_NUM_OF_GLBL_ERR_CAUSE] = { static const char * const hl_glbl_error_cause[] = {
"Error due to un-priv read", "Error due to un-priv read",
"Error due to un-secure read", "Error due to un-secure read",
"Error due to read from unmapped reg", "Error due to read from unmapped reg",
"Error due to un-priv write", "Error due to un-priv write",
"Error due to un-secure write", "Error due to un-secure write",
"Error due to write to unmapped reg", "Error due to write to unmapped reg",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"External I/F write sec violation", "External I/F write sec violation",
"External I/F write to un-mapped reg", "External I/F write to un-mapped reg",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"N/A",
"Read to write only", "Read to write only",
"Write to read only" "Write to read only"
}; };
...@@ -671,10 +687,11 @@ static bool hl_check_block_range_exclusion(struct hl_device *hdev, ...@@ -671,10 +687,11 @@ static bool hl_check_block_range_exclusion(struct hl_device *hdev,
static int hl_read_glbl_errors(struct hl_device *hdev, static int hl_read_glbl_errors(struct hl_device *hdev,
u32 blk_idx, u32 major, u32 minor, u32 sub_minor, void *data) u32 blk_idx, u32 major, u32 minor, u32 sub_minor, void *data)
{ {
struct hl_special_block_info *special_blocks = hdev->asic_prop.special_blocks; struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_special_block_info *special_blocks = prop->special_blocks;
struct hl_special_block_info *current_block = &special_blocks[blk_idx]; struct hl_special_block_info *current_block = &special_blocks[blk_idx];
u32 glbl_err_addr, glbl_err_cause, addr_val, cause_val, block_base, u32 glbl_err_addr, glbl_err_cause, addr_val, cause_val, block_base,
base = current_block->base_addr - lower_32_bits(hdev->asic_prop.cfg_base_address); base = current_block->base_addr - lower_32_bits(prop->cfg_base_address);
int i; int i;
block_base = base + major * current_block->major_offset + block_base = base + major * current_block->major_offset +
...@@ -689,12 +706,12 @@ static int hl_read_glbl_errors(struct hl_device *hdev, ...@@ -689,12 +706,12 @@ static int hl_read_glbl_errors(struct hl_device *hdev,
glbl_err_addr = block_base + HL_GLBL_ERR_ADDR_OFFSET; glbl_err_addr = block_base + HL_GLBL_ERR_ADDR_OFFSET;
addr_val = RREG32(glbl_err_addr); addr_val = RREG32(glbl_err_addr);
for (i = 0 ; i < hdev->asic_prop.glbl_err_cause_num ; i++) { for (i = 0 ; i <= prop->glbl_err_max_cause_num ; i++) {
if (cause_val & BIT(i)) if (cause_val & BIT(i))
dev_err_ratelimited(hdev->dev, dev_err_ratelimited(hdev->dev,
"%s, addr %#llx\n", "%s, addr %#llx\n",
hl_glbl_error_cause[i], hl_glbl_error_cause[i],
hdev->asic_prop.cfg_base_address + block_base + prop->cfg_base_address + block_base +
FIELD_GET(HL_GLBL_ERR_ADDRESS_MASK, addr_val)); FIELD_GET(HL_GLBL_ERR_ADDRESS_MASK, addr_val));
} }
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
struct hl_device; struct hl_device;
/* special blocks */ /* special blocks */
#define HL_MAX_NUM_OF_GLBL_ERR_CAUSE 10
#define HL_GLBL_ERR_ADDRESS_MASK GENMASK(11, 0) #define HL_GLBL_ERR_ADDRESS_MASK GENMASK(11, 0)
/* GLBL_ERR_ADDR register offset from the start of the block */ /* GLBL_ERR_ADDR register offset from the start of the block */
#define HL_GLBL_ERR_ADDR_OFFSET 0xF44 #define HL_GLBL_ERR_ADDR_OFFSET 0xF44
......
...@@ -164,6 +164,8 @@ ...@@ -164,6 +164,8 @@
/* HW scrambles only bits 0-25 */ /* HW scrambles only bits 0-25 */
#define HW_UNSCRAMBLED_BITS_MASK GENMASK_ULL(63, 26) #define HW_UNSCRAMBLED_BITS_MASK GENMASK_ULL(63, 26)
#define GAUDI2_GLBL_ERR_MAX_CAUSE_NUM 17
struct gaudi2_razwi_info { struct gaudi2_razwi_info {
u32 axuser_xy; u32 axuser_xy;
u32 rtr_ctrl; u32 rtr_ctrl;
...@@ -3587,7 +3589,7 @@ static int gaudi2_special_blocks_config(struct hl_device *hdev) ...@@ -3587,7 +3589,7 @@ static int gaudi2_special_blocks_config(struct hl_device *hdev)
int i, rc; int i, rc;
/* Configure Special blocks */ /* Configure Special blocks */
prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE; prop->glbl_err_max_cause_num = GAUDI2_GLBL_ERR_MAX_CAUSE_NUM;
prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks); prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks);
prop->special_blocks = kmalloc_array(prop->num_of_special_blocks, prop->special_blocks = kmalloc_array(prop->num_of_special_blocks,
sizeof(*prop->special_blocks), GFP_KERNEL); sizeof(*prop->special_blocks), GFP_KERNEL);
......
...@@ -239,7 +239,6 @@ ...@@ -239,7 +239,6 @@
#define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0) #define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0)
#define GAUDI2_NUM_OF_GLBL_ERR_CAUSE 8
enum gaudi2_reserved_sob_id { enum gaudi2_reserved_sob_id {
GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST, GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment