Commit d7bb1ac8 authored by Oded Gabbay's avatar Oded Gabbay

habanalabs: add gaudi2 asic-specific code

Add the ASIC-specific code for Gaudi2. Supply (almost) all of the
function callbacks that the driver's common code need to initialize,
finalize and submit workloads to the Gaudi2 ASIC.

It also contains the code to initialize the F/W of the Gaudi2 ASIC
and to receive events from the F/W.

It contains new debugfs entry to dump razwi events. razwi is a case
where the device's engines create a transaction that reaches an
invalid destination.
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent 97c6d22f
......@@ -101,6 +101,15 @@ Description: Specify the size of the DMA transaction when using DMA to read
When the write is finished, the user can read the "data_dma"
blob
What: /sys/kernel/debug/habanalabs/hl<n>/dump_razwi_events
Date: Aug 2022
KernelVersion: 5.20
Contact: fkassabri@habana.ai
Description: Dumps all razwi events to dmesg if exist.
After reading the status register of an existing event
the routine will clear the status register.
Usage: cat dump_razwi_events
What: /sys/kernel/debug/habanalabs/hl<n>/dump_security_violations
Date: Jan 2021
KernelVersion: 5.12
......@@ -278,7 +287,7 @@ Description: Displays a list with information about the currently user
to DMA addresses
What: /sys/kernel/debug/habanalabs/hl<n>/userptr_lookup
Date: Aug 2021
Date: Oct 2021
KernelVersion: 5.15
Contact: ogabbay@kernel.org
Description: Allows to search for specific user pointers (user virtual
......
......@@ -14,4 +14,7 @@ habanalabs-y += $(HL_GOYA_FILES)
include $(src)/gaudi/Makefile
habanalabs-y += $(HL_GAUDI_FILES)
include $(src)/gaudi2/Makefile
habanalabs-y += $(HL_GAUDI2_FILES)
habanalabs-$(CONFIG_DEBUG_FS) += common/debugfs.o
......@@ -11,4 +11,5 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
common/command_buffer.o common/hw_queue.o common/irq.o \
common/sysfs.o common/hwmon.o common/memory.o \
common/command_submission.o common/firmware_if.o \
common/state_dump.o common/memory_mgr.o
common/state_dump.o common/memory_mgr.o \
common/decoder.o
......@@ -3334,9 +3334,8 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
interrupt_id = FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags);
first_interrupt = prop->first_available_user_msix_interrupt;
last_interrupt = prop->first_available_user_msix_interrupt +
prop->user_interrupt_count - 1;
first_interrupt = prop->first_available_user_interrupt;
last_interrupt = prop->first_available_user_interrupt + prop->user_interrupt_count - 1;
if ((interrupt_id < first_interrupt || interrupt_id > last_interrupt) &&
interrupt_id != HL_COMMON_USER_INTERRUPT_ID) {
......
......@@ -102,6 +102,9 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
hl_device_set_debug_mode(hdev, ctx, false);
hdev->asic_funcs->ctx_fini(ctx);
hl_dec_ctx_fini(ctx);
hl_cb_va_pool_fini(ctx);
hl_vm_ctx_fini(ctx);
hl_asid_free(hdev, ctx->asid);
......
......@@ -1348,6 +1348,17 @@ static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf,
return count;
}
static ssize_t hl_check_razwi_happened(struct file *f, char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
hdev->asic_funcs->check_if_razwi_happened(hdev);
return 0;
}
static const struct file_operations hl_mem_scrub_fops = {
.owner = THIS_MODULE,
.write = hl_memory_scrub,
......@@ -1437,6 +1448,11 @@ static const struct file_operations hl_timeout_locked_fops = {
.write = hl_timeout_locked_write
};
static const struct file_operations hl_razwi_check_fops = {
.owner = THIS_MODULE,
.read = hl_check_razwi_happened
};
static const struct hl_info_list hl_debugfs_list[] = {
{"command_buffers", command_buffers_show, NULL},
{"command_submission", command_submission_show, NULL},
......@@ -1614,6 +1630,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
dev_entry,
&hl_security_violations_fops);
debugfs_create_file("dump_razwi_events",
0644,
dev_entry->root,
dev_entry,
&hl_razwi_check_fops);
debugfs_create_file("dma_size",
0200,
dev_entry->root,
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2022 HabanaLabs, Ltd.
* All Rights Reserved.
*/
#include "habanalabs.h"
#define VCMD_CONTROL_OFFSET 0x40 /* SWREG16 */
#define VCMD_IRQ_STATUS_OFFSET 0x44 /* SWREG17 */
#define VCMD_IRQ_STATUS_ENDCMD_MASK 0x1
#define VCMD_IRQ_STATUS_BUSERR_MASK 0x2
#define VCMD_IRQ_STATUS_TIMEOUT_MASK 0x4
#define VCMD_IRQ_STATUS_CMDERR_MASK 0x8
#define VCMD_IRQ_STATUS_ABORT_MASK 0x10
#define VCMD_IRQ_STATUS_RESET_MASK 0x20
static void dec_print_abnrm_intr_source(struct hl_device *hdev, u32 irq_status)
{
const char *format = "abnormal interrupt source:%s%s%s%s%s%s\n";
char *intr_source[6] = {"Unknown", "", "", "", "", ""};
int i = 0;
if (!irq_status)
return;
if (irq_status & VCMD_IRQ_STATUS_ENDCMD_MASK)
intr_source[i++] = " ENDCMD";
if (irq_status & VCMD_IRQ_STATUS_BUSERR_MASK)
intr_source[i++] = " BUSERR";
if (irq_status & VCMD_IRQ_STATUS_TIMEOUT_MASK)
intr_source[i++] = " TIMEOUT";
if (irq_status & VCMD_IRQ_STATUS_CMDERR_MASK)
intr_source[i++] = " CMDERR";
if (irq_status & VCMD_IRQ_STATUS_ABORT_MASK)
intr_source[i++] = " ABORT";
if (irq_status & VCMD_IRQ_STATUS_RESET_MASK)
intr_source[i++] = " RESET";
dev_err(hdev->dev, format, intr_source[0], intr_source[1],
intr_source[2], intr_source[3], intr_source[4], intr_source[5]);
}
static void dec_error_intr_work(struct hl_device *hdev, u32 base_addr, u32 core_id)
{
bool reset_required = false;
u32 irq_status;
irq_status = RREG32(base_addr + VCMD_IRQ_STATUS_OFFSET);
dev_err(hdev->dev, "Decoder abnormal interrupt %#x, core %d\n", irq_status, core_id);
dec_print_abnrm_intr_source(hdev, irq_status);
if (irq_status & VCMD_IRQ_STATUS_TIMEOUT_MASK)
reset_required = true;
/* Clear the interrupt */
WREG32(base_addr + VCMD_IRQ_STATUS_OFFSET, irq_status);
/* Flush the interrupt clear */
RREG32(base_addr + VCMD_IRQ_STATUS_OFFSET);
if (reset_required)
hl_device_reset(hdev, HL_DRV_RESET_HARD);
}
static void dec_completion_abnrm(struct work_struct *work)
{
struct hl_dec *dec = container_of(work, struct hl_dec, completion_abnrm_work);
struct hl_device *hdev = dec->hdev;
dec_error_intr_work(hdev, dec->base_addr, dec->core_id);
}
void hl_dec_fini(struct hl_device *hdev)
{
kfree(hdev->dec);
}
int hl_dec_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_dec *dec;
int rc, j;
/* if max core is 0, nothing to do*/
if (!prop->max_dec)
return 0;
hdev->dec = kcalloc(prop->max_dec, sizeof(struct hl_dec), GFP_KERNEL);
if (!hdev->dec)
return -ENOMEM;
for (j = 0 ; j < prop->max_dec ; j++) {
dec = hdev->dec + j;
dec->hdev = hdev;
INIT_WORK(&dec->completion_abnrm_work, dec_completion_abnrm);
dec->core_id = j;
dec->base_addr = hdev->asic_funcs->get_dec_base_addr(hdev, j);
if (!dec->base_addr) {
dev_err(hdev->dev, "Invalid base address of decoder %d\n", j);
rc = -EINVAL;
goto err_dec_fini;
}
}
return 0;
err_dec_fini:
hl_dec_fini(hdev);
return rc;
}
void hl_dec_ctx_fini(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_dec *dec;
int j;
for (j = 0 ; j < prop->max_dec ; j++) {
if (!!(prop->decoder_enabled_mask & BIT(j))) {
dec = hdev->dec + j;
/* Stop the decoder */
WREG32(dec->base_addr + VCMD_CONTROL_OFFSET, 0);
}
}
}
......@@ -1822,6 +1822,12 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
goto release_ctx;
}
rc = hl_dec_init(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to initialize the decoder module\n");
goto cb_pool_fini;
}
/*
* From this point, override rc (=0) in case of an error to allow
* debugging (by adding char devices and create sysfs nodes as part of
......@@ -1915,6 +1921,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
return 0;
cb_pool_fini:
hl_cb_pool_fini(hdev);
release_ctx:
if (hl_ctx_put(hdev->kernel_ctx) != 1)
dev_err(hdev->dev,
......@@ -2065,6 +2073,8 @@ void hl_device_fini(struct hl_device *hdev)
hl_debugfs_remove_device(hdev);
hl_dec_fini(hdev);
hl_vm_fini(hdev);
hl_mmu_fini(hdev);
......
......@@ -15,6 +15,14 @@
#define FW_FILE_MAX_SIZE 0x1400000 /* maximum size of 20MB */
struct fw_binning_conf {
u64 tpc_binning;
u32 dec_binning;
u32 hbm_binning;
u32 edma_binning;
u32 mme_redundancy;
};
static char *extract_fw_ver_from_str(const char *fw_str)
{
char *str, *fw_ver, *whitespace;
......@@ -523,6 +531,11 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
err_val &= ~CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL;
}
if (err_val & CPU_BOOT_ERR0_BINNING_FAIL) {
dev_err(hdev->dev, "Device boot error - binning failure\n");
err_exists = true;
}
if (sts_val & CPU_BOOT_DEV_STS0_ENABLED)
dev_dbg(hdev->dev, "Device status0 %#x\n", sts_val);
......@@ -2359,6 +2372,19 @@ static int hl_fw_dynamic_send_msg(struct hl_device *hdev,
case HL_COMMS_RESET_CAUSE_TYPE:
msg.reset_cause = *(__u8 *) data;
break;
case HL_COMMS_BINNING_CONF_TYPE:
{
struct fw_binning_conf *binning_conf = (struct fw_binning_conf *) data;
msg.tpc_binning_conf = cpu_to_le64(binning_conf->tpc_binning);
msg.dec_binning_conf = cpu_to_le32(binning_conf->dec_binning);
msg.hbm_binning_conf = cpu_to_le32(binning_conf->hbm_binning);
msg.edma_binning_conf = cpu_to_le32(binning_conf->edma_binning);
msg.mme_redundancy_conf = cpu_to_le32(binning_conf->mme_redundancy);
break;
}
default:
dev_err(hdev->dev,
"Send COMMS message - invalid message type %u\n",
......
......@@ -31,6 +31,9 @@
#define HL_NAME "habanalabs"
struct hl_device;
struct hl_fpriv;
/* Use upper bits of mmap offset to store habana driver specific information.
* bits[63:59] - Encode mmap type
* bits[45:0] - mmap offset value
......@@ -69,7 +72,9 @@
#define HL_PCI_ELBI_TIMEOUT_MSEC 10 /* 10ms */
#define HL_SIM_MAX_TIMEOUT_US 10000000 /* 10s */
#define HL_SIM_MAX_TIMEOUT_US 100000000 /* 100s */
#define HL_INVALID_QUEUE UINT_MAX
#define HL_COMMON_USER_INTERRUPT_ID 0xFFF
......@@ -118,7 +123,12 @@ enum hl_mmu_page_table_location {
#define HL_PCI_NUM_BARS 6
#define HL_MAX_DCORES 4
/* Completion queue entry relates to completed job */
#define HL_COMPLETION_MODE_JOB 0
/* Completion queue entry relates to completed command submission */
#define HL_COMPLETION_MODE_CS 1
#define HL_MAX_DCORES 8
/*
* Reset Flags
......@@ -159,6 +169,31 @@ enum hl_mmu_page_table_location {
#define HL_DRV_RESET_FW_FATAL_ERR (1 << 6)
#define HL_DRV_RESET_DELAY (1 << 7)
/*
* Security
*/
#define HL_BLOCK_SIZE 0x1000
/**
* struct iterate_module_ctx - HW module iterator
* @fn: function to apply to each HW module instance
* @data: optional internal data to the function iterator
*/
struct iterate_module_ctx {
/*
* callback for the HW module iterator
* @hdev: pointer to the habanalabs device structure
* @block: block (ASIC specific definition can be dcore/hdcore)
* @inst: HW module instance within the block
* @offset: current HW module instance offset from the 1-st HW module instance
* in the 1-st block
* @data: function specific data
*/
void (*fn)(struct hl_device *hdev, int block, int inst, u32 offset, void *data);
void *data;
};
#define HL_MAX_SOBS_PER_MONITOR 8
/**
......@@ -202,9 +237,6 @@ struct pgt_info {
int num_of_ptes;
};
struct hl_device;
struct hl_fpriv;
/**
* enum hl_pci_match_mode - pci match mode per region
* @PCI_ADDRESS_MATCH_MODE: address match mode
......@@ -337,13 +369,14 @@ enum hl_collective_mode {
/**
* struct hw_queue_properties - queue information.
* @type: queue type.
* @queue_cb_alloc_flags: bitmap which indicates if the hw queue supports CB
* @cb_alloc_flags: bitmap which indicates if the hw queue supports CB
* that allocated by the Kernel driver and therefore,
* a CB handle can be provided for jobs on this queue.
* Otherwise, a CB address must be provided.
* @collective_mode: collective mode of current queue
* @driver_only: true if only the driver is allowed to send a job to this queue,
* false otherwise.
* @binned: True if the queue is binned out and should not be used
* @supports_sync_stream: True if queue supports sync stream
*/
struct hw_queue_properties {
......@@ -351,6 +384,7 @@ struct hw_queue_properties {
enum queue_cb_alloc_flags cb_alloc_flags;
enum hl_collective_mode collective_mode;
u8 driver_only;
u8 binned;
u8 supports_sync_stream;
};
......@@ -458,7 +492,7 @@ struct hl_hints_range {
* @dram_user_base_address: DRAM physical start address for user access.
* @dram_size: DRAM total size.
* @dram_pci_bar_size: size of PCI bar towards DRAM.
* @max_power_default: max power of the device after reset
* @max_power_default: max power of the device after reset.
* @dc_power_default: power consumed by the device in mode idle.
* @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page
* fault.
......@@ -466,12 +500,19 @@ struct hl_hints_range {
* @pcie_aux_dbi_reg_addr: Address of the PCIE_AUX DBI register.
* @mmu_pgt_addr: base physical address in DRAM of MMU page tables.
* @mmu_dram_default_page_addr: DRAM default page physical address.
* @tpc_enabled_mask: which TPCs are enabled.
* @tpc_binning_mask: which TPCs are binned. 0 means usable and 1 means binned.
* @dram_enabled_mask: which DRAMs are enabled.
* @dram_binning_mask: which DRAMs are binned. 0 means usable, 1 means binned.
* @cb_va_start_addr: virtual start address of command buffers which are mapped
* to the device's MMU.
* @cb_va_end_addr: virtual end address of command buffers which are mapped to
* the device's MMU.
* @dram_hints_align_mask: dram va hint addresses alignment mask which is used
* for hints validity check.
* @cfg_base_address: config space base address.
* @mmu_cache_mng_addr: address of the MMU cache.
* @mmu_cache_mng_size: size of the MMU cache.
* @device_dma_offset_for_host_access: the offset to add to host DMA addresses
* to enable the device to access them.
* @host_base_address: host physical start address for host DMA from device
......@@ -496,6 +537,12 @@ struct hl_hints_range {
* @high_pll: high PLL frequency used by the device.
* @cb_pool_cb_cnt: number of CBs in the CB pool.
* @cb_pool_cb_size: size of each CB in the CB pool.
* @decoder_enabled_mask: which decoders are enabled.
* @decoder_binning_mask: which decoders are binned, 0 means usable and 1
* means binned (at most one binned decoder per dcore).
* @edma_enabled_mask: which EDMAs are enabled.
* @edma_binning_mask: which EDMAs are binned, 0 means usable and 1 means
* binned (at most one binned DMA).
* @max_pending_cs: maximum of concurrent pending command submissions
* @max_queues: maximum amount of queues in the system
* @fw_preboot_cpu_boot_dev_sts0: bitmap representation of preboot cpu
......@@ -516,6 +563,13 @@ struct hl_hints_range {
* @fw_app_cpu_boot_dev_sts1: bitmap representation of application security
* status reported by FW, bit description can be
* found in CPU_BOOT_DEV_STS1
* @max_dec: maximum number of decoders
* @hmmu_hif_enabled_mask: mask of HMMUs/HIFs that are not isolated (enabled)
* 1- enabled, 0- isolated.
* @faulty_dram_cluster_map: mask of faulty DRAM cluster.
* 1- faulty cluster, 0- good cluster.
* @xbar_edge_enabled_mask: mask of XBAR_EDGEs that are not isolated (enabled)
* 1- enabled, 0- isolated.
* @device_mem_alloc_default_page_size: may be different than dram_page_size only for ASICs for
* which the property supports_user_set_page_size is true
* (i.e. the DRAM supports multiple page sizes), otherwise
......@@ -526,14 +580,17 @@ struct hl_hints_range {
* @sync_stream_first_mon: first monitor available for sync stream use
* @first_available_user_sob: first sob available for the user
* @first_available_user_mon: first monitor available for the user
* @first_available_user_msix_interrupt: first available msix interrupt
* reserved for the user
* @first_available_user_interrupt: first available interrupt reserved for the user
* @first_available_cq: first available CQ for the user.
* @user_interrupt_count: number of user interrupts.
* @user_dec_intr_count: number of decoder interrupts exposed to user.
* @cache_line_size: device cache line size.
* @server_type: Server type that the ASIC is currently installed in.
* The value is according to enum hl_server_type in uapi file.
* @tpc_enabled_mask: which TPCs are enabled.
* @completion_queues_count: number of completion queues.
* @completion_mode: 0 - job based completion, 1 - cs based completion
* @mme_master_slave_mode: 0 - Each MME works independently, 1 - MME works
* in Master/Slave mode
* @fw_security_enabled: true if security measures are enabled in firmware,
* false otherwise
* @fw_cpu_boot_dev_sts0_valid: status bits are valid and can be fetched from
......@@ -588,9 +645,16 @@ struct asic_fixed_properties {
u64 pcie_aux_dbi_reg_addr;
u64 mmu_pgt_addr;
u64 mmu_dram_default_page_addr;
u64 tpc_enabled_mask;
u64 tpc_binning_mask;
u64 dram_enabled_mask;
u64 dram_binning_mask;
u64 cb_va_start_addr;
u64 cb_va_end_addr;
u64 dram_hints_align_mask;
u64 cfg_base_address;
u64 mmu_cache_mng_addr;
u64 mmu_cache_mng_size;
u64 device_dma_offset_for_host_access;
u64 host_base_address;
u64 host_end_address;
......@@ -613,6 +677,10 @@ struct asic_fixed_properties {
u32 high_pll;
u32 cb_pool_cb_cnt;
u32 cb_pool_cb_size;
u32 decoder_enabled_mask;
u32 decoder_binning_mask;
u32 edma_enabled_mask;
u32 edma_binning_mask;
u32 max_pending_cs;
u32 max_queues;
u32 fw_preboot_cpu_boot_dev_sts0;
......@@ -621,6 +689,10 @@ struct asic_fixed_properties {
u32 fw_bootfit_cpu_boot_dev_sts1;
u32 fw_app_cpu_boot_dev_sts0;
u32 fw_app_cpu_boot_dev_sts1;
u32 max_dec;
u32 hmmu_hif_enabled_mask;
u32 faulty_dram_cluster_map;
u32 xbar_edge_enabled_mask;
u32 device_mem_alloc_default_page_size;
u16 collective_first_sob;
u16 collective_first_mon;
......@@ -628,12 +700,15 @@ struct asic_fixed_properties {
u16 sync_stream_first_mon;
u16 first_available_user_sob[HL_MAX_DCORES];
u16 first_available_user_mon[HL_MAX_DCORES];
u16 first_available_user_msix_interrupt;
u16 first_available_user_interrupt;
u16 first_available_cq[HL_MAX_DCORES];
u16 user_interrupt_count;
u16 user_dec_intr_count;
u16 cache_line_size;
u16 server_type;
u8 tpc_enabled_mask;
u8 completion_queues_count;
u8 completion_mode;
u8 mme_master_slave_mode;
u8 fw_security_enabled;
u8 fw_cpu_boot_dev_sts0_valid;
u8 fw_cpu_boot_dev_sts1_valid;
......@@ -814,7 +889,6 @@ struct hl_cb {
* QUEUES
*/
struct hl_cs;
struct hl_cs_job;
/* Queue length of external and HW queues */
......@@ -937,12 +1011,14 @@ struct hl_cq {
* @wait_list_head: head to the list of user threads pending on this interrupt
* @wait_list_lock: protects wait_list_head
* @interrupt_id: msix interrupt id
* @is_decoder: whether this entry represents a decoder interrupt
*/
struct hl_user_interrupt {
struct hl_device *hdev;
struct list_head wait_list_head;
spinlock_t wait_list_lock;
u32 interrupt_id;
bool is_decoder;
};
/**
......@@ -1028,23 +1104,36 @@ struct hl_eq {
bool check_eqe_index;
};
/*
* ASICs
/**
* struct hl_dec - describes a decoder sw instance.
* @hdev: pointer to the device structure.
* @completion_abnrm_work: workqueue object to run when decoder generates an error interrupt
* @core_id: ID of the decoder.
* @base_addr: base address of the decoder.
*/
struct hl_dec {
struct hl_device *hdev;
struct work_struct completion_abnrm_work;
u32 core_id;
u32 base_addr;
};
/**
* enum hl_asic_type - supported ASIC types.
* @ASIC_INVALID: Invalid ASIC type.
* @ASIC_GOYA: Goya device.
* @ASIC_GAUDI: Gaudi device.
* @ASIC_GOYA: Goya device (HL-1000).
* @ASIC_GAUDI: Gaudi device (HL-2000).
* @ASIC_GAUDI_SEC: Gaudi secured device (HL-2000).
* @ASIC_GAUDI2: Gaudi2 device.
* @ASIC_GAUDI2_SEC: Gaudi2 secured device.
*/
enum hl_asic_type {
ASIC_INVALID,
ASIC_GOYA,
ASIC_GAUDI,
ASIC_GAUDI_SEC
ASIC_GAUDI_SEC,
ASIC_GAUDI2,
ASIC_GAUDI2_SEC,
};
struct hl_cs_parser;
......@@ -1220,6 +1309,8 @@ struct fw_load_mgr {
u8 fw_comp_loaded;
};
struct hl_cs;
/**
* struct hl_asic_funcs - ASIC specific functions that are can be called from
* common code.
......@@ -1260,6 +1351,8 @@ struct fw_load_mgr {
* @asic_dma_pool_free: free small DMA allocation from pool.
* @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
* @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
* @asic_dma_unmap_single: unmap a single DMA buffer
* @asic_dma_map_single: map a single buffer to a DMA
* @hl_dma_unmap_sgtable: DMA unmap scatter-gather table.
* @cs_parser: parse Command Submission.
* @asic_dma_map_sgtable: DMA map scatter-gather table.
......@@ -1284,6 +1377,8 @@ struct fw_load_mgr {
* @non_hard_reset_late_init: perform certain actions needed after a reset which is not hard-reset
* @hw_queues_lock: acquire H/W queues lock.
* @hw_queues_unlock: release H/W queues lock.
* @kdma_lock: acquire H/W queues lock. Relevant from GRECO ASIC
* @kdma_unlock: release H/W queues lock. Relevant from GRECO ASIC
* @get_pci_id: retrieve PCI ID.
* @get_eeprom_data: retrieve EEPROM data from F/W.
* @get_monitor_dump: retrieve monitor registers dump from F/W.
......@@ -1300,6 +1395,7 @@ struct fw_load_mgr {
* @halt_coresight: stop the ETF and ETR traces.
* @ctx_init: context dependent initialization.
* @ctx_fini: context dependent cleanup.
* @pre_schedule_cs: Perform pre-CS-scheduling operations.
* @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
* @load_firmware_to_device: load the firmware to the device's memory
* @load_boot_fit_to_device: load boot fit to device's memory
......@@ -1310,9 +1406,11 @@ struct fw_load_mgr {
* @reset_sob: Reset a SOB.
* @reset_sob_group: Reset SOB group
* @get_device_time: Get the device time.
* @pb_print_security_errors: print security errors according block and cause
* @collective_wait_init_cs: Generate collective master/slave packets
* and place them in the relevant cs jobs
* @collective_wait_create_jobs: allocate collective wait cs jobs
* @get_dec_base_addr: get the base address of a given decoder.
* @scramble_addr: Routine to scramble the address prior of mapping it
* in the MMU.
* @descramble_addr: Routine to de-scramble the address prior of
......@@ -1326,6 +1424,7 @@ struct fw_load_mgr {
* driver is ready to receive asynchronous events. This
* function should be called during the first init and
* after every hard-reset of the device
* @ack_mmu_errors: check and ack mmu errors, page fault, access violation.
* @get_msi_info: Retrieve asic-specific MSI ID of the f/w async event
* @map_pll_idx_to_fw_idx: convert driver specific per asic PLL index to
* generic f/w compatible PLL Indexes
......@@ -1335,6 +1434,7 @@ struct fw_load_mgr {
* @get_sob_addr: get SOB base address offset.
* @set_pci_memory_regions: setting properties of PCI memory regions
* @get_stream_master_qid_arr: get pointer to stream masters QID array
* @check_if_razwi_happened: check if there was a razwi due to RR violation.
* @access_dev_mem: access device memory
* @set_dram_bar_base: set the base of the DRAM BAR
*/
......@@ -1372,6 +1472,12 @@ struct hl_asic_funcs {
size_t size, dma_addr_t *dma_handle);
void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
size_t size, void *vaddr);
void (*asic_dma_unmap_single)(struct hl_device *hdev,
dma_addr_t dma_addr, int len,
enum dma_data_direction dir);
dma_addr_t (*asic_dma_map_single)(struct hl_device *hdev,
void *addr, int len,
enum dma_data_direction dir);
void (*hl_dma_unmap_sgtable)(struct hl_device *hdev,
struct sg_table *sgt,
enum dma_data_direction dir);
......@@ -1408,6 +1514,8 @@ struct hl_asic_funcs {
int (*non_hard_reset_late_init)(struct hl_device *hdev);
void (*hw_queues_lock)(struct hl_device *hdev);
void (*hw_queues_unlock)(struct hl_device *hdev);
void (*kdma_lock)(struct hl_device *hdev, int dcore_id);
void (*kdma_unlock)(struct hl_device *hdev, int dcore_id);
u32 (*get_pci_id)(struct hl_device *hdev);
int (*get_eeprom_data)(struct hl_device *hdev, void *data, size_t max_size);
int (*get_monitor_dump)(struct hl_device *hdev, void *data);
......@@ -1420,6 +1528,7 @@ struct hl_asic_funcs {
void (*halt_coresight)(struct hl_device *hdev, struct hl_ctx *ctx);
int (*ctx_init)(struct hl_ctx *ctx);
void (*ctx_fini)(struct hl_ctx *ctx);
int (*pre_schedule_cs)(struct hl_cs *cs);
u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
int (*load_firmware_to_device)(struct hl_device *hdev);
int (*load_boot_fit_to_device)(struct hl_device *hdev);
......@@ -1432,11 +1541,14 @@ struct hl_asic_funcs {
void (*reset_sob)(struct hl_device *hdev, void *data);
void (*reset_sob_group)(struct hl_device *hdev, u16 sob_group);
u64 (*get_device_time)(struct hl_device *hdev);
void (*pb_print_security_errors)(struct hl_device *hdev,
u32 block_addr, u32 cause, u32 offended_addr);
int (*collective_wait_init_cs)(struct hl_cs *cs);
int (*collective_wait_create_jobs)(struct hl_device *hdev,
struct hl_ctx *ctx, struct hl_cs *cs,
u32 wait_queue_id, u32 collective_engine_id,
u32 encaps_signal_offset);
u32 (*get_dec_base_addr)(struct hl_device *hdev, u32 core_id);
u64 (*scramble_addr)(struct hl_device *hdev, u64 addr);
u64 (*descramble_addr)(struct hl_device *hdev, u64 addr);
void (*ack_protection_bits_errors)(struct hl_device *hdev);
......@@ -1445,6 +1557,7 @@ struct hl_asic_funcs {
int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
u32 block_id, u32 block_size);
void (*enable_events_from_fw)(struct hl_device *hdev);
int (*ack_mmu_errors)(struct hl_device *hdev, u64 mmu_cap_mask);
void (*get_msi_info)(__le32 *table);
int (*map_pll_idx_to_fw_idx)(u32 pll_idx);
void (*init_firmware_loader)(struct hl_device *hdev);
......@@ -1453,6 +1566,7 @@ struct hl_asic_funcs {
u32 (*get_sob_addr)(struct hl_device *hdev, u32 sob_id);
void (*set_pci_memory_regions)(struct hl_device *hdev);
u32* (*get_stream_master_qid_arr)(void);
void (*check_if_razwi_happened)(struct hl_device *hdev);
int (*mmu_get_real_page_size)(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
u32 page_size, u32 *real_page_size, bool is_dram_addr);
int (*access_dev_mem)(struct hl_device *hdev, enum pci_region region_type,
......@@ -1698,6 +1812,7 @@ struct hl_userptr {
* @timeout_jiffies: cs timeout in jiffies.
* @submission_time_jiffies: submission time of the cs
* @type: CS_TYPE_*.
* @jobs_cnt: counter of submitted jobs on all queues.
* @encaps_sig_hdl_id: encaps signals handle id, set for the first staged cs.
* @sob_addr_offset: sob offset from the configuration base address.
* @initial_sob_count: count of completed signals in SOB before current submission of signal or
......@@ -1736,6 +1851,7 @@ struct hl_cs {
u64 timeout_jiffies;
u64 submission_time_jiffies;
enum hl_cs_type type;
u32 jobs_cnt;
u32 encaps_sig_hdl_id;
u32 sob_addr_offset;
u16 initial_sob_count;
......@@ -2333,7 +2449,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
__timeout = ktime_add_us(ktime_get(), timeout_us); \
else \
__timeout = ktime_add_us(ktime_get(),\
min((u64)(timeout_us * 10), \
min((u64)(timeout_us * 100), \
(u64) HL_SIM_MAX_TIMEOUT_US)); \
might_sleep_if(sleep_us); \
for (;;) { \
......@@ -2381,6 +2497,23 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
(cond) ? 0 : -ETIMEDOUT; \
})
#define HL_USR_MAPPED_BLK_INIT(blk, base, sz) \
({ \
struct user_mapped_block *p = blk; \
\
p->address = base; \
p->size = sz; \
})
#define HL_USR_INTR_STRUCT_INIT(usr_intr, hdev, intr_id, decoder) \
({ \
usr_intr.hdev = hdev; \
usr_intr.interrupt_id = intr_id; \
usr_intr.is_decoder = decoder; \
INIT_LIST_HEAD(&usr_intr.wait_list_head); \
spin_lock_init(&usr_intr.wait_list_lock); \
})
struct hwmon_chip_info;
/**
......@@ -2397,28 +2530,16 @@ struct hl_device_reset_work {
u32 flags;
};
/**
* struct hr_mmu_hop_addrs - used for holding per-device host-resident mmu hop
* information.
* @virt_addr: the virtual address of the hop.
* @phys-addr: the physical address of the hop (used by the device-mmu).
* @shadow_addr: The shadow of the hop used by the driver for walking the hops.
*/
struct hr_mmu_hop_addrs {
u64 virt_addr;
u64 phys_addr;
u64 shadow_addr;
};
/**
* struct hl_mmu_hr_pgt_priv - used for holding per-device mmu host-resident
* page-table internal information.
* @mmu_pgt_pool: pool of page tables used by MMU for allocating hops.
* @mmu_shadow_hop0: shadow array of hop0 tables.
* @mmu_pgt_pool: pool of page tables used by a host-resident MMU for
* allocating hops.
* @mmu_asid_hop0: per-ASID array of host-resident hop0 tables.
*/
struct hl_mmu_hr_priv {
struct gen_pool *mmu_pgt_pool;
struct hr_mmu_hop_addrs *mmu_shadow_hop0;
struct pgt_info *mmu_asid_hop0;
};
/**
......@@ -2601,6 +2722,16 @@ struct hl_clk_throttle {
u32 aggregated_reason;
};
/**
* struct user_mapped_block - describes a hw block allowed to be mmapped by user
* @address: physical HW block address
* @size: allowed size for mmap
*/
struct user_mapped_block {
u32 address;
u32 size;
};
/**
* struct cs_timeout_info - info of last CS timeout occurred.
* @timestamp: CS timeout timestamp.
......@@ -2784,6 +2915,7 @@ struct hl_reset_info {
* @aggregated_cs_counters: aggregated cs counters among all contexts
* @mmu_priv: device-specific MMU data.
* @mmu_func: device-related MMU functions.
* @dec: list of decoder sw instance
* @fw_loader: FW loader manager.
* @pci_mem_region: array of memory regions in the PCI
* @state_dump_specs: constants and dictionaries needed to dump system state.
......@@ -2816,10 +2948,18 @@ struct hl_reset_info {
* used for CPU boot status
* @fw_comms_poll_interval_usec: FW comms/protocol poll interval in usec.
* used for COMMs protocols cmds(COMMS_STS_*)
* @dram_binning: contains mask of drams that is received from the f/w which indicates which
* drams are binned-out
* @tpc_binning: contains mask of tpc engines that is received from the f/w which indicates which
* tpc engines are binned-out
* @card_type: Various ASICs have several card types. This indicates the card
* type of the current device.
* @major: habanalabs kernel driver major.
* @high_pll: high PLL profile frequency.
* @decoder_binning: contains mask of decoder engines that is received from the f/w which
* indicates which decoder engines are binned-out
* @edma_binning: contains mask of edma engines that is received from the f/w which
* indicates which edma engines are binned-out
* @id: device minor.
* @id_control: minor of the control device
* @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
......@@ -2924,6 +3064,8 @@ struct hl_device {
struct hl_mmu_priv mmu_priv;
struct hl_mmu_funcs mmu_func[MMU_NUM_PGT_LOCATIONS];
struct hl_dec *dec;
struct fw_load_mgr fw_loader;
struct pci_mem_region pci_mem_region[PCI_REGION_NUMBER];
......@@ -2951,10 +3093,14 @@ struct hl_device {
u64 fw_poll_interval_usec;
ktime_t last_successful_open_ktime;
u64 fw_comms_poll_interval_usec;
u64 dram_binning;
u64 tpc_binning;
enum cpucp_card_types card_type;
u32 major;
u32 high_pll;
u32 decoder_binning;
u32 edma_binning;
u16 id;
u16 id_control;
u16 cpu_pci_msb_addr;
......@@ -2995,12 +3141,10 @@ struct hl_device {
u8 reset_pcilink;
u8 cpu_queues_enable;
u8 pldm;
u8 axi_drain;
u8 sram_scrambler_enable;
u8 dram_scrambler_enable;
u8 hard_reset_on_fw_events;
u8 bmc_enable;
u8 rl_enable;
u8 reset_on_preboot_fail;
u8 reset_upon_device_release;
u8 reset_if_device_not_idle;
......@@ -3164,7 +3308,8 @@ void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q);
void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
irqreturn_t hl_irq_handler_cq(int irq, void *arg);
irqreturn_t hl_irq_handler_eq(int irq, void *arg);
irqreturn_t hl_irq_handler_user_cq(int irq, void *arg);
irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg);
irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg);
irqreturn_t hl_irq_handler_default(int irq, void *arg);
u32 hl_cq_inc_ptr(u32 ptr);
......@@ -3237,6 +3382,7 @@ void hl_multi_cs_completion_init(struct hl_device *hdev);
void goya_set_asic_funcs(struct hl_device *hdev);
void gaudi_set_asic_funcs(struct hl_device *hdev);
void gaudi2_set_asic_funcs(struct hl_device *hdev);
int hl_vm_ctx_init(struct hl_ctx *ctx);
void hl_vm_ctx_fini(struct hl_ctx *ctx);
......@@ -3377,6 +3523,11 @@ void hl_encaps_handle_do_release(struct kref *ref);
void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
struct hl_cs *cs, struct hl_cs_job *job,
struct hl_cs_compl *cs_cmpl);
int hl_dec_init(struct hl_device *hdev);
void hl_dec_fini(struct hl_device *hdev);
void hl_dec_ctx_fini(struct hl_ctx *ctx);
void hl_release_pending_user_interrupts(struct hl_device *hdev);
int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig);
......
......@@ -293,7 +293,6 @@ static void set_driver_behavior_per_device(struct hl_device *hdev)
hdev->reset_if_device_not_idle = 1;
hdev->reset_pcilink = 0;
hdev->axi_drain = 0;
}
static void copy_kernel_module_params_to_device(struct hl_device *hdev)
......
......@@ -93,7 +93,7 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od;
hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor;
hw_ip.first_available_interrupt_id = prop->first_available_user_msix_interrupt;
hw_ip.first_available_interrupt_id = prop->first_available_user_interrupt;
hw_ip.number_of_user_interrupts = prop->user_interrupt_count;
hw_ip.server_type = prop->server_type;
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2019 HabanaLabs, Ltd.
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*/
......@@ -217,8 +217,7 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi
return 0;
}
static void handle_user_cq(struct hl_device *hdev,
struct hl_user_interrupt *user_cq)
static void handle_user_cq(struct hl_device *hdev, struct hl_user_interrupt *user_cq)
{
struct hl_user_pending_interrupt *pend, *temp_pend;
struct list_head *ts_reg_free_list_head = NULL;
......@@ -271,22 +270,27 @@ static void handle_user_cq(struct hl_device *hdev,
}
/**
* hl_irq_handler_user_cq - irq handler for user completion queues
* hl_irq_handler_user_interrupt - irq handler for user interrupts
*
* @irq: irq number
* @arg: pointer to user interrupt structure
*
*/
irqreturn_t hl_irq_handler_user_cq(int irq, void *arg)
irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg)
{
struct hl_user_interrupt *user_cq = arg;
struct hl_device *hdev = user_cq->hdev;
struct hl_user_interrupt *user_int = arg;
struct hl_device *hdev = user_int->hdev;
/* If the interrupt is not a decoder interrupt, it means the interrupt
* belongs to a user cq. In that case, before handling it, we need to handle the common
* user cq
*/
if (!user_int->is_decoder)
/* Handle user cq interrupts registered on all interrupts */
handle_user_cq(hdev, &hdev->common_user_interrupt);
/* Handle user cq interrupts registered on this specific interrupt */
handle_user_cq(hdev, user_cq);
/* Handle user cq or decoder interrupts registered on this specific irq */
handle_user_cq(hdev, user_int);
return IRQ_HANDLED;
}
......@@ -304,9 +308,7 @@ irqreturn_t hl_irq_handler_default(int irq, void *arg)
struct hl_device *hdev = user_interrupt->hdev;
u32 interrupt_id = user_interrupt->interrupt_id;
dev_err(hdev->dev,
"got invalid user interrupt %u",
interrupt_id);
dev_err(hdev->dev, "got invalid user interrupt %u", interrupt_id);
return IRQ_HANDLED;
}
......@@ -389,12 +391,27 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)
return IRQ_HANDLED;
}
/**
* hl_irq_handler_dec_abnrm - Decoder error interrupt handler
* @irq: IRQ number
* @arg: pointer to decoder structure.
*/
irqreturn_t hl_irq_handler_dec_abnrm(int irq, void *arg)
{
struct hl_dec *dec = arg;
schedule_work(&dec->completion_abnrm_work);
return IRQ_HANDLED;
}
/**
* hl_cq_init - main initialization function for an cq object
*
* @hdev: pointer to device structure
* @q: pointer to cq structure
* @hw_queue_id: The H/W queue ID this completion queue belongs to
* HL_INVALID_QUEUE if cq is not attached to any specific queue
*
* Allocate dma-able memory for the completion queue and initialize fields
* Returns 0 on success
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright 2016-2021 HabanaLabs, Ltd.
* Copyright 2016-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*/
......@@ -2476,7 +2476,7 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
/**
* va_range_init() - initialize virtual addresses range.
* @hdev: pointer to the habanalabs device structure.
* @va_ranges: pointer to va_ranges array.
* @va_range: pointer to va_range structure.
* @start: range start address.
* @end: range end address.
* @page_size: page size for this va_range.
......
......@@ -280,21 +280,19 @@ int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
}
/* Point to the specified address */
rc |= hl_pci_iatu_write(hdev, offset + 0x14,
lower_32_bits(pci_region->addr));
rc |= hl_pci_iatu_write(hdev, offset + 0x18,
upper_32_bits(pci_region->addr));
rc |= hl_pci_iatu_write(hdev, offset + 0x14, lower_32_bits(pci_region->addr));
rc |= hl_pci_iatu_write(hdev, offset + 0x18, upper_32_bits(pci_region->addr));
/* Set bar type as memory */
rc |= hl_pci_iatu_write(hdev, offset + 0x0, 0);
/* Enable + bar/address match + match enable + bar number */
ctrl_reg_val = FIELD_PREP(IATU_REGION_CTRL_REGION_EN_MASK, 1);
ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_MATCH_MODE_MASK,
pci_region->mode);
ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_MATCH_MODE_MASK, pci_region->mode);
ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_NUM_MATCH_EN_MASK, 1);
if (pci_region->mode == PCI_BAR_MATCH_MODE)
ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_BAR_NUM_MASK,
pci_region->bar);
ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_BAR_NUM_MASK, pci_region->bar);
rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val);
......
......@@ -679,7 +679,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->sync_stream_first_mon +
(num_sync_stream_queues * HL_RSVD_MONS);
prop->first_available_user_msix_interrupt = USHRT_MAX;
prop->first_available_user_interrupt = USHRT_MAX;
for (i = 0 ; i < HL_MAX_DCORES ; i++)
prop->first_available_cq[i] = USHRT_MAX;
......
# SPDX-License-Identifier: GPL-2.0-only
HL_GAUDI2_FILES := gaudi2/gaudi2.o
This source diff could not be displayed because it is too large. You can view the blob instead.
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2020-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
#ifndef GAUDI2P_H_
#define GAUDI2P_H_
#include <uapi/misc/habanalabs.h>
#include "../common/habanalabs.h"
#include "../include/common/hl_boot_if.h"
#include "../include/gaudi2/gaudi2.h"
#include "../include/gaudi2/gaudi2_packets.h"
#include "../include/gaudi2/gaudi2_fw_if.h"
#include "../include/gaudi2/gaudi2_async_events.h"
#include "../include/gaudi2/gaudi2_async_virt_events.h"
#define GAUDI2_LINUX_FW_FILE "habanalabs/gaudi2/gaudi2-fit.itb"
#define GAUDI2_BOOT_FIT_FILE "habanalabs/gaudi2/gaudi2-boot-fit.itb"
#define MMU_PAGE_TABLES_INITIAL_SIZE 0x10000000 /* 256MB */
#define GAUDI2_CPU_TIMEOUT_USEC 30000000 /* 30s */
#define GAUDI2_FPGA_CPU_TIMEOUT 100000000 /* 100s */
#define NUMBER_OF_PDMA_QUEUES 2
#define NUMBER_OF_EDMA_QUEUES 8
#define NUMBER_OF_MME_QUEUES 4
#define NUMBER_OF_TPC_QUEUES 25
#define NUMBER_OF_NIC_QUEUES 24
#define NUMBER_OF_ROT_QUEUES 2
#define NUMBER_OF_CPU_QUEUES 1
#define NUMBER_OF_HW_QUEUES ((NUMBER_OF_PDMA_QUEUES + \
NUMBER_OF_EDMA_QUEUES + \
NUMBER_OF_MME_QUEUES + \
NUMBER_OF_TPC_QUEUES + \
NUMBER_OF_NIC_QUEUES + \
NUMBER_OF_ROT_QUEUES + \
NUMBER_OF_CPU_QUEUES) * \
NUM_OF_PQ_PER_QMAN)
#define NUMBER_OF_QUEUES (NUMBER_OF_CPU_QUEUES + NUMBER_OF_HW_QUEUES)
#define DCORE_NUM_OF_SOB \
(((mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_8191 - \
mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0) + 4) >> 2)
#define DCORE_NUM_OF_MONITORS \
(((mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_2047 - \
mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0) + 4) >> 2)
#define NUMBER_OF_DEC ((NUM_OF_DEC_PER_DCORE * NUM_OF_DCORES) + NUMBER_OF_PCIE_DEC)
/* Map all arcs dccm + arc schedulers acp blocks */
#define NUM_OF_USER_ACP_BLOCKS (NUM_OF_SCHEDULER_ARC + 2)
#define NUM_OF_USER_NIC_UMR_BLOCKS 15
#define NUM_OF_EXPOSED_SM_BLOCKS ((NUM_OF_DCORES - 1) * 2)
#define NUM_USER_MAPPED_BLOCKS \
(NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS + NUMBER_OF_DEC + \
NUM_OF_EXPOSED_SM_BLOCKS + \
(NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS))
/* Within the user mapped array, decoder entries start post all the ARC related
* entries
*/
#define USR_MAPPED_BLK_DEC_START_IDX \
(NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS + \
(NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS))
#define USR_MAPPED_BLK_SM_START_IDX \
(NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS + NUMBER_OF_DEC + \
(NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS))
#define SM_OBJS_BLOCK_SIZE (mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - \
mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0)
#define GAUDI2_MAX_PENDING_CS 64
/* Sob/Mon per CS + Sob/Mon for KDMA completion */
#define GAUDI2_RESERVED_SOBS (GAUDI2_MAX_PENDING_CS + 1)
#define GAUDI2_RESERVED_MONITORS (GAUDI2_MAX_PENDING_CS + 1)
#define GAUDI2_RESERVED_SOB_KDMA_COMP (GAUDI2_MAX_PENDING_CS)
#define GAUDI2_RESERVED_MON_KDMA_COMP (GAUDI2_MAX_PENDING_CS)
#if !IS_MAX_PENDING_CS_VALID(GAUDI2_MAX_PENDING_CS)
#error "GAUDI2_MAX_PENDING_CS must be power of 2 and greater than 1"
#endif
#define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */
#define GAUDI2_PREBOOT_REQ_TIMEOUT_USEC 25000000 /* 25s */
#define GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC 10000000 /* 10s */
#define GAUDI2_NIC_CLK_FREQ 450000000ull /* 450 MHz */
#define DC_POWER_DEFAULT 60000 /* 60W */
#define GAUDI2_HBM_NUM 6
#define DMA_MAX_TRANSFER_SIZE U32_MAX
#define GAUDI2_DEFAULT_CARD_NAME "HL225"
#define QMAN_STREAMS 4
#define PQ_FETCHER_CACHE_SIZE 8
#define NUM_OF_MME_SBTE_PORTS 5
#define NUM_OF_MME_WB_PORTS 2
#define GAUDI2_ENGINE_ID_DCORE_OFFSET \
(GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
/* DRAM Memory Map */
#define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */
/* This define should be used only when working in a debug mode without dram.
* When working with dram, the driver size will be calculated dynamically.
*/
#define NIC_DEFAULT_DRV_SIZE 0x20000000 /* 512MB */
#define CPU_FW_IMAGE_ADDR DRAM_PHYS_BASE
#define NIC_NUMBER_OF_PORTS NIC_NUMBER_OF_ENGINES
#define NUMBER_OF_PCIE_DEC 2
#define PCIE_DEC_SHIFT 8
#define SRAM_USER_BASE_OFFSET 0
/* cluster binning */
#define MAX_FAULTY_HBMS 1
#define GAUDI2_XBAR_EDGE_FULL_MASK 0xF
#define GAUDI2_EDMA_FULL_MASK 0xFF
#define GAUDI2_DRAM_FULL_MASK 0x3F
/* Host virtual address space. */
#define VA_HOST_SPACE_PAGE_START 0xFFF0000000000000ull
#define VA_HOST_SPACE_PAGE_END 0xFFF0800000000000ull /* 140TB */
#define VA_HOST_SPACE_HPAGE_START 0xFFF0800000000000ull
#define VA_HOST_SPACE_HPAGE_END 0xFFF1000000000000ull /* 140TB */
#define VA_HOST_SPACE_USER_MAPPED_CB_START 0xFFF1000000000000ull
#define VA_HOST_SPACE_USER_MAPPED_CB_END 0xFFF1000100000000ull /* 4GB */
/* 140TB */
#define VA_HOST_SPACE_PAGE_SIZE (VA_HOST_SPACE_PAGE_END - VA_HOST_SPACE_PAGE_START)
/* 140TB */
#define VA_HOST_SPACE_HPAGE_SIZE (VA_HOST_SPACE_HPAGE_END - VA_HOST_SPACE_HPAGE_START)
#define VA_HOST_SPACE_SIZE (VA_HOST_SPACE_PAGE_SIZE + VA_HOST_SPACE_HPAGE_SIZE)
#define HOST_SPACE_INTERNAL_CB_SZ SZ_2M
/*
* HBM virtual address space
* Gaudi2 has 6 HBM devices, each supporting 16GB total of 96GB at most.
* No core separation is supported so we can have one chunk of virtual address
* space just above the physical ones.
* The virtual address space starts immediately after the end of the physical
* address space which is determined at run-time.
*/
#define VA_HBM_SPACE_END 0x1002000000000000ull
#define HW_CAP_PLL BIT_ULL(0)
#define HW_CAP_DRAM BIT_ULL(1)
#define HW_CAP_PMMU BIT_ULL(2)
#define HW_CAP_CPU BIT_ULL(3)
#define HW_CAP_MSIX BIT_ULL(4)
#define HW_CAP_CPU_Q BIT_ULL(5)
#define HW_CAP_CPU_Q_SHIFT 5
#define HW_CAP_CLK_GATE BIT_ULL(6)
#define HW_CAP_KDMA BIT_ULL(7)
#define HW_CAP_SRAM_SCRAMBLER BIT_ULL(8)
#define HW_CAP_DCORE0_DMMU0 BIT_ULL(9)
#define HW_CAP_DCORE0_DMMU1 BIT_ULL(10)
#define HW_CAP_DCORE0_DMMU2 BIT_ULL(11)
#define HW_CAP_DCORE0_DMMU3 BIT_ULL(12)
#define HW_CAP_DCORE1_DMMU0 BIT_ULL(13)
#define HW_CAP_DCORE1_DMMU1 BIT_ULL(14)
#define HW_CAP_DCORE1_DMMU2 BIT_ULL(15)
#define HW_CAP_DCORE1_DMMU3 BIT_ULL(16)
#define HW_CAP_DCORE2_DMMU0 BIT_ULL(17)
#define HW_CAP_DCORE2_DMMU1 BIT_ULL(18)
#define HW_CAP_DCORE2_DMMU2 BIT_ULL(19)
#define HW_CAP_DCORE2_DMMU3 BIT_ULL(20)
#define HW_CAP_DCORE3_DMMU0 BIT_ULL(21)
#define HW_CAP_DCORE3_DMMU1 BIT_ULL(22)
#define HW_CAP_DCORE3_DMMU2 BIT_ULL(23)
#define HW_CAP_DCORE3_DMMU3 BIT_ULL(24)
#define HW_CAP_DMMU_MASK GENMASK_ULL(24, 9)
#define HW_CAP_DMMU_SHIFT 9
#define HW_CAP_PDMA_MASK BIT_ULL(26)
#define HW_CAP_EDMA_MASK GENMASK_ULL(34, 27)
#define HW_CAP_EDMA_SHIFT 27
#define HW_CAP_MME_MASK GENMASK_ULL(38, 35)
#define HW_CAP_MME_SHIFT 35
#define HW_CAP_ROT_MASK GENMASK_ULL(40, 39)
#define HW_CAP_ROT_SHIFT 39
#define HW_CAP_HBM_SCRAMBLER_HW_RESET BIT_ULL(41)
#define HW_CAP_HBM_SCRAMBLER_SW_RESET BIT_ULL(42)
#define HW_CAP_HBM_SCRAMBLER_MASK (HW_CAP_HBM_SCRAMBLER_HW_RESET | \
HW_CAP_HBM_SCRAMBLER_SW_RESET)
#define HW_CAP_HBM_SCRAMBLER_SHIFT 41
#define HW_CAP_RESERVED BIT(43)
#define HW_CAP_MMU_MASK (HW_CAP_PMMU | HW_CAP_DMMU_MASK)
/* Range Registers */
#define RR_TYPE_SHORT 0
#define RR_TYPE_LONG 1
#define RR_TYPE_SHORT_PRIV 2
#define RR_TYPE_LONG_PRIV 3
#define NUM_SHORT_LBW_RR 14
#define NUM_LONG_LBW_RR 4
#define NUM_SHORT_HBW_RR 6
#define NUM_LONG_HBW_RR 4
/* RAZWI initiator coordinates- X- 5 bits, Y- 4 bits */
#define RAZWI_INITIATOR_X_SHIFT 0
#define RAZWI_INITIATOR_X_MASK 0x1F
#define RAZWI_INITIATOR_Y_SHIFT 5
#define RAZWI_INITIATOR_Y_MASK 0xF
#define RTR_ID_X_Y(x, y) \
((((y) & RAZWI_INITIATOR_Y_MASK) << RAZWI_INITIATOR_Y_SHIFT) | \
(((x) & RAZWI_INITIATOR_X_MASK) << RAZWI_INITIATOR_X_SHIFT))
/* decoders have separate mask */
#define HW_CAP_DEC_SHIFT 0
#define HW_CAP_DEC_MASK GENMASK_ULL(9, 0)
/* TPCs have separate mask */
#define HW_CAP_TPC_SHIFT 0
#define HW_CAP_TPC_MASK GENMASK_ULL(24, 0)
/* nics have separate mask */
#define HW_CAP_NIC_SHIFT 0
#define HW_CAP_NIC_MASK GENMASK_ULL(NIC_NUMBER_OF_ENGINES - 1, 0)
#define GAUDI2_ARC_PCI_MSB_ADDR(addr) (((addr) & GENMASK_ULL(49, 28)) >> 28)
enum gaudi2_reserved_cq_id {
GAUDI2_RESERVED_CQ_COMPLETION,
GAUDI2_RESERVED_CQ_KDMA_COMPLETION,
GAUDI2_RESERVED_CQ_NUMBER,
};
/*
* Gaudi2 subtitute TPCs Numbering
* At most- two faulty TPCs are allowed
* First replacement to a faulty TPC will be TPC24, second- TPC23
*/
enum substitude_tpc {
FAULTY_TPC_SUBTS_1_TPC_24,
FAULTY_TPC_SUBTS_2_TPC_23,
MAX_FAULTY_TPCS
};
enum gaudi2_dma_core_id {
DMA_CORE_ID_PDMA0, /* Dcore 0 */
DMA_CORE_ID_PDMA1, /* Dcore 0 */
DMA_CORE_ID_EDMA0, /* Dcore 0 */
DMA_CORE_ID_EDMA1, /* Dcore 0 */
DMA_CORE_ID_EDMA2, /* Dcore 1 */
DMA_CORE_ID_EDMA3, /* Dcore 1 */
DMA_CORE_ID_EDMA4, /* Dcore 2 */
DMA_CORE_ID_EDMA5, /* Dcore 2 */
DMA_CORE_ID_EDMA6, /* Dcore 3 */
DMA_CORE_ID_EDMA7, /* Dcore 3 */
DMA_CORE_ID_KDMA, /* Dcore 0 */
DMA_CORE_ID_SIZE
};
enum gaudi2_rotator_id {
ROTATOR_ID_0,
ROTATOR_ID_1,
ROTATOR_ID_SIZE,
};
enum gaudi2_mme_id {
MME_ID_DCORE0,
MME_ID_DCORE1,
MME_ID_DCORE2,
MME_ID_DCORE3,
MME_ID_SIZE,
};
enum gaudi2_tpc_id {
TPC_ID_DCORE0_TPC0,
TPC_ID_DCORE0_TPC1,
TPC_ID_DCORE0_TPC2,
TPC_ID_DCORE0_TPC3,
TPC_ID_DCORE0_TPC4,
TPC_ID_DCORE0_TPC5,
TPC_ID_DCORE1_TPC0,
TPC_ID_DCORE1_TPC1,
TPC_ID_DCORE1_TPC2,
TPC_ID_DCORE1_TPC3,
TPC_ID_DCORE1_TPC4,
TPC_ID_DCORE1_TPC5,
TPC_ID_DCORE2_TPC0,
TPC_ID_DCORE2_TPC1,
TPC_ID_DCORE2_TPC2,
TPC_ID_DCORE2_TPC3,
TPC_ID_DCORE2_TPC4,
TPC_ID_DCORE2_TPC5,
TPC_ID_DCORE3_TPC0,
TPC_ID_DCORE3_TPC1,
TPC_ID_DCORE3_TPC2,
TPC_ID_DCORE3_TPC3,
TPC_ID_DCORE3_TPC4,
TPC_ID_DCORE3_TPC5,
/* the PCI TPC is placed last (mapped liked HW) */
TPC_ID_DCORE0_TPC6,
TPC_ID_SIZE,
};
enum gaudi2_dec_id {
DEC_ID_DCORE0_DEC0,
DEC_ID_DCORE0_DEC1,
DEC_ID_DCORE1_DEC0,
DEC_ID_DCORE1_DEC1,
DEC_ID_DCORE2_DEC0,
DEC_ID_DCORE2_DEC1,
DEC_ID_DCORE3_DEC0,
DEC_ID_DCORE3_DEC1,
DEC_ID_PCIE_VDEC0,
DEC_ID_PCIE_VDEC1,
DEC_ID_SIZE,
};
enum gaudi2_hbm_id {
HBM_ID0,
HBM_ID1,
HBM_ID2,
HBM_ID3,
HBM_ID4,
HBM_ID5,
HBM_ID_SIZE,
};
/* specific EDMA enumeration */
enum gaudi2_edma_id {
EDMA_ID_DCORE0_INSTANCE0,
EDMA_ID_DCORE0_INSTANCE1,
EDMA_ID_DCORE1_INSTANCE0,
EDMA_ID_DCORE1_INSTANCE1,
EDMA_ID_DCORE2_INSTANCE0,
EDMA_ID_DCORE2_INSTANCE1,
EDMA_ID_DCORE3_INSTANCE0,
EDMA_ID_DCORE3_INSTANCE1,
EDMA_ID_SIZE,
};
/* User interrupt count is aligned with HW CQ count.
* We have 64 CQ's per dcore, CQ0 in dcore 0 is reserved for legacy mode
*/
#define GAUDI2_NUM_USER_INTERRUPTS 255
enum gaudi2_irq_num {
GAUDI2_IRQ_NUM_EVENT_QUEUE = GAUDI2_EVENT_QUEUE_MSIX_IDX,
GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM,
GAUDI2_IRQ_NUM_DCORE0_DEC0_ABNRM,
GAUDI2_IRQ_NUM_DCORE0_DEC1_NRM,
GAUDI2_IRQ_NUM_DCORE0_DEC1_ABNRM,
GAUDI2_IRQ_NUM_DCORE1_DEC0_NRM,
GAUDI2_IRQ_NUM_DCORE1_DEC0_ABNRM,
GAUDI2_IRQ_NUM_DCORE1_DEC1_NRM,
GAUDI2_IRQ_NUM_DCORE1_DEC1_ABNRM,
GAUDI2_IRQ_NUM_DCORE2_DEC0_NRM,
GAUDI2_IRQ_NUM_DCORE2_DEC0_ABNRM,
GAUDI2_IRQ_NUM_DCORE2_DEC1_NRM,
GAUDI2_IRQ_NUM_DCORE2_DEC1_ABNRM,
GAUDI2_IRQ_NUM_DCORE3_DEC0_NRM,
GAUDI2_IRQ_NUM_DCORE3_DEC0_ABNRM,
GAUDI2_IRQ_NUM_DCORE3_DEC1_NRM,
GAUDI2_IRQ_NUM_DCORE3_DEC1_ABNRM,
GAUDI2_IRQ_NUM_SHARED_DEC0_NRM,
GAUDI2_IRQ_NUM_SHARED_DEC0_ABNRM,
GAUDI2_IRQ_NUM_SHARED_DEC1_NRM,
GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM,
GAUDI2_IRQ_NUM_COMPLETION,
GAUDI2_IRQ_NUM_NIC_PORT_FIRST,
GAUDI2_IRQ_NUM_NIC_PORT_LAST = (GAUDI2_IRQ_NUM_NIC_PORT_FIRST + NIC_NUMBER_OF_PORTS - 1),
GAUDI2_IRQ_NUM_RESERVED_FIRST,
GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_NUM_USER_INTERRUPTS - 1),
GAUDI2_IRQ_NUM_USER_FIRST,
GAUDI2_IRQ_NUM_USER_LAST = (GAUDI2_IRQ_NUM_USER_FIRST + GAUDI2_NUM_USER_INTERRUPTS - 1),
GAUDI2_IRQ_NUM_LAST = (GAUDI2_MSIX_ENTRIES - 1)
};
static_assert(GAUDI2_IRQ_NUM_USER_FIRST > GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM);
/**
* struct dup_block_ctx - context to initialize unit instances across multiple
* blocks where block can be either a dcore of duplicated
* common module. this code relies on constant offsets
* of blocks and unit instances in a block.
* @instance_cfg_fn: instance specific configuration function.
* @data: private configuration data.
* @base: base address of the first instance in the first block.
* @block_off: subsequent blocks address spacing.
* @instance_off: subsequent block's instances address spacing.
* @enabled_mask: mask of enabled instances (1- enabled, 0- disabled).
* @blocks: number of blocks.
* @instances: unit instances per block.
*/
struct dup_block_ctx {
void (*instance_cfg_fn)(struct hl_device *hdev, u64 base, void *data);
void *data;
u64 base;
u64 block_off;
u64 instance_off;
u64 enabled_mask;
unsigned int blocks;
unsigned int instances;
};
/**
* struct gaudi2_device - ASIC specific manage structure.
* @cpucp_info_get: get information on device from CPU-CP
* @mapped_blocks: array that holds the base address and size of all blocks
* the user can map.
* @lfsr_rand_seeds: array of MME ACC random seeds to set.
* @hw_queues_lock: protects the H/W queues from concurrent access.
* @kdma_lock: protects the KDMA engine from concurrent access.
* @scratchpad_kernel_address: general purpose PAGE_SIZE contiguous memory,
* this memory region should be write-only.
* currently used for HBW QMAN writes which is
* redundant.
* @scratchpad_bus_address: scratchpad bus address
* @dram_bar_cur_addr: current address of DRAM PCI bar.
* @hw_cap_initialized: This field contains a bit per H/W engine. When that
* engine is initialized, that bit is set by the driver to
* signal we can use this engine in later code paths.
* Each bit is cleared upon reset of its corresponding H/W
* engine.
* @active_hw_arc: This field contains a bit per ARC of an H/W engine with
* exception of TPC and NIC engines. Once an engine arc is
* initialized, its respective bit is set. Driver can uniquely
* identify each initialized ARC and use this information in
* later code paths. Each respective bit is cleared upon reset
* of its corresponding ARC of the H/W engine.
* @dec_hw_cap_initialized: This field contains a bit per decoder H/W engine.
* When that engine is initialized, that bit is set by
* the driver to signal we can use this engine in later
* code paths.
* Each bit is cleared upon reset of its corresponding H/W
* engine.
* @tpc_hw_cap_initialized: This field contains a bit per TPC H/W engine.
* When that engine is initialized, that bit is set by
* the driver to signal we can use this engine in later
* code paths.
* Each bit is cleared upon reset of its corresponding H/W
* engine.
* @active_tpc_arc: This field contains a bit per ARC of the TPC engines.
* Once an engine arc is initialized, its respective bit is
* set. Each respective bit is cleared upon reset of its
* corresponding ARC of the TPC engine.
* @nic_hw_cap_initialized: This field contains a bit per nic H/W engine.
* @active_nic_arc: This field contains a bit per ARC of the NIC engines.
* Once an engine arc is initialized, its respective bit is
* set. Each respective bit is cleared upon reset of its
* corresponding ARC of the NIC engine.
* @hw_events: array that holds all H/W events that are defined valid.
* @events_stat: array that holds histogram of all received events.
* @events_stat_aggregate: same as events_stat but doesn't get cleared on reset.
* @num_of_valid_hw_events: used to hold the number of valid H/W events.
* @nic_ports: array that holds all NIC ports manage structures.
* @nic_macros: array that holds all NIC macro manage structures.
* @core_info: core info to be used by the Ethernet driver.
* @aux_ops: functions for core <-> aux drivers communication.
* @flush_db_fifo: flag to force flush DB FIFO after a write.
* @hbm_cfg: HBM subsystem settings
* @hw_queues_lock_mutex: used by simulator instead of hw_queues_lock.
* @kdma_lock_mutex: used by simulator instead of kdma_lock.
* @use_deprecated_event_mappings: use old event mappings which are about to be
* deprecated
*/
struct gaudi2_device {
int (*cpucp_info_get)(struct hl_device *hdev);
struct user_mapped_block mapped_blocks[NUM_USER_MAPPED_BLOCKS];
int lfsr_rand_seeds[MME_NUM_OF_LFSR_SEEDS];
spinlock_t hw_queues_lock;
spinlock_t kdma_lock;
void *scratchpad_kernel_address;
dma_addr_t scratchpad_bus_address;
u64 dram_bar_cur_addr;
u64 hw_cap_initialized;
u64 active_hw_arc;
u64 dec_hw_cap_initialized;
u64 tpc_hw_cap_initialized;
u64 active_tpc_arc;
u64 nic_hw_cap_initialized;
u64 active_nic_arc;
u32 hw_events[GAUDI2_EVENT_SIZE];
u32 events_stat[GAUDI2_EVENT_SIZE];
u32 events_stat_aggregate[GAUDI2_EVENT_SIZE];
u32 num_of_valid_hw_events;
};
extern const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE];
extern const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE];
extern const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE];
extern const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE];
extern const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES];
extern const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE];
void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx);
int gaudi2_coresight_init(struct hl_device *hdev);
int gaudi2_debug_coresight(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
void gaudi2_halt_coresight(struct hl_device *hdev, struct hl_ctx *ctx);
void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx);
bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id);
void gaudi2_write_rr_to_all_lbw_rtrs(struct hl_device *hdev, u8 rr_type, u32 rr_index, u64 min_val,
u64 max_val);
void gaudi2_pb_print_security_errors(struct hl_device *hdev, u32 block_addr, u32 cause,
u32 offended_addr);
#endif /* GAUDI2P_H_ */
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2020-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
#ifndef GAUDI2_MASKS_H_
#define GAUDI2_MASKS_H_
#include "../include/gaudi2/asic_reg/gaudi2_regs.h"
/* Useful masks for bits in various registers */
#define QMAN_GLBL_ERR_CFG_MSG_EN_MASK \
((0xF << PDMA0_QM_GLBL_ERR_CFG_PQF_ERR_MSG_EN_SHIFT) | \
(0x1F << PDMA0_QM_GLBL_ERR_CFG_CQF_ERR_MSG_EN_SHIFT) | \
(0x1F << PDMA0_QM_GLBL_ERR_CFG_CP_ERR_MSG_EN_SHIFT))
#define QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK \
((0xF << PDMA0_QM_GLBL_ERR_CFG_PQF_STOP_ON_ERR_SHIFT) | \
(0x1F << PDMA0_QM_GLBL_ERR_CFG_CQF_STOP_ON_ERR_SHIFT) | \
(0x1F << PDMA0_QM_GLBL_ERR_CFG_CP_STOP_ON_ERR_SHIFT) | \
(0x1 << PDMA0_QM_GLBL_ERR_CFG_ARB_STOP_ON_ERR_SHIFT))
#define QMAN_GLBL_ERR_CFG1_MSG_EN_MASK \
(0x1 << PDMA0_QM_GLBL_ERR_CFG1_CQF_ERR_MSG_EN_SHIFT)
#define QMAN_GLBL_ERR_CFG1_STOP_ON_ERR_EN_MASK \
((0x1 << PDMA0_QM_GLBL_ERR_CFG1_CQF_STOP_ON_ERR_SHIFT) | \
(0x1 << PDMA0_QM_GLBL_ERR_CFG1_ARC_STOP_ON_ERR_SHIFT))
#define QM_PQC_LBW_WDATA \
((1 << DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_SHIFT) | \
(1 << DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_SHIFT))
#define QMAN_MAKE_TRUSTED \
((0xF << PDMA0_QM_GLBL_PROT_PQF_SHIFT) | \
(0x1 << PDMA0_QM_GLBL_PROT_ERR_SHIFT) | \
(0x1 << PDMA0_QM_GLBL_PROT_PQC_SHIFT))
#define QMAN_MAKE_TRUSTED_TEST_MODE \
((0xF << PDMA0_QM_GLBL_PROT_PQF_SHIFT) | \
(0xF << PDMA0_QM_GLBL_PROT_CQF_SHIFT) | \
(0xF << PDMA0_QM_GLBL_PROT_CP_SHIFT) | \
(0x1 << PDMA0_QM_GLBL_PROT_ERR_SHIFT) | \
(0x1 << PDMA0_QM_GLBL_PROT_PQC_SHIFT))
#define QMAN_ENABLE \
((0xF << PDMA0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \
(0x1F << PDMA0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \
(0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \
(0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT))
#define PDMA1_QMAN_ENABLE \
((0x3 << PDMA0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \
(0x1F << PDMA0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \
(0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \
(0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT))
/* QM_IDLE_MASK is valid for all engines QM idle check */
#define QM_IDLE_MASK (DCORE0_EDMA0_QM_GLBL_STS0_PQF_IDLE_MASK | \
DCORE0_EDMA0_QM_GLBL_STS0_CQF_IDLE_MASK | \
DCORE0_EDMA0_QM_GLBL_STS0_CP_IDLE_MASK)
#define QM_ARC_IDLE_MASK DCORE0_EDMA0_QM_GLBL_STS1_ARC_CQF_IDLE_MASK
#define MME_ARCH_IDLE_MASK \
(DCORE0_MME_CTRL_LO_ARCH_STATUS_SB_IN_EMPTY_MASK | \
DCORE0_MME_CTRL_LO_ARCH_STATUS_AGU_COUT_SM_IDLE_MASK | \
DCORE0_MME_CTRL_LO_ARCH_STATUS_WBC_AXI_IDLE_MASK | \
DCORE0_MME_CTRL_LO_ARCH_STATUS_SB_IN_AXI_IDLE_MASK | \
DCORE0_MME_CTRL_LO_ARCH_STATUS_QM_IDLE_MASK | \
DCORE0_MME_CTRL_LO_ARCH_STATUS_QM_RDY_MASK)
#define TPC_IDLE_MASK (DCORE0_TPC0_CFG_STATUS_SCALAR_PIPE_EMPTY_MASK | \
DCORE0_TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK | \
DCORE0_TPC0_CFG_STATUS_IQ_EMPTY_MASK | \
DCORE0_TPC0_CFG_STATUS_SB_EMPTY_MASK | \
DCORE0_TPC0_CFG_STATUS_QM_IDLE_MASK | \
DCORE0_TPC0_CFG_STATUS_QM_RDY_MASK)
#define DCORE0_TPC0_QM_CGM_STS_AGENT_IDLE_MASK 0x100
/* CGM_IDLE_MASK is valid for all engines CGM idle check */
#define CGM_IDLE_MASK DCORE0_TPC0_QM_CGM_STS_AGENT_IDLE_MASK
#define QM_GLBL_CFG1_PQF_STOP PDMA0_QM_GLBL_CFG1_PQF_STOP_MASK
#define QM_GLBL_CFG1_CQF_STOP PDMA0_QM_GLBL_CFG1_CQF_STOP_MASK
#define QM_GLBL_CFG1_CP_STOP PDMA0_QM_GLBL_CFG1_CP_STOP_MASK
#define QM_GLBL_CFG1_PQF_FLUSH PDMA0_QM_GLBL_CFG1_PQF_FLUSH_MASK
#define QM_GLBL_CFG1_CQF_FLUSH PDMA0_QM_GLBL_CFG1_CQF_FLUSH_MASK
#define QM_GLBL_CFG1_CP_FLUSH PDMA0_QM_GLBL_CFG1_CP_FLUSH_MASK
#define QM_GLBL_CFG2_ARC_CQF_STOP PDMA0_QM_GLBL_CFG2_ARC_CQF_STOP_MASK
#define QM_GLBL_CFG2_ARC_CQF_FLUSH PDMA0_QM_GLBL_CFG2_ARC_CQF_FLUSH_MASK
#define QM_ARB_ERR_MSG_EN_CHOISE_OVF_MASK 0x1
#define QM_ARB_ERR_MSG_EN_CHOISE_WDT_MASK 0x2
#define QM_ARB_ERR_MSG_EN_AXI_LBW_ERR_MASK 0x4
#define QM_ARB_ERR_MSG_EN_MASK (\
QM_ARB_ERR_MSG_EN_CHOISE_OVF_MASK |\
QM_ARB_ERR_MSG_EN_CHOISE_WDT_MASK |\
QM_ARB_ERR_MSG_EN_AXI_LBW_ERR_MASK)
#define PCIE_AUX_FLR_CTRL_HW_CTRL_MASK 0x1
#define PCIE_AUX_FLR_CTRL_INT_MASK_MASK 0x2
#define MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK GENMASK(1, 0)
#define MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK BIT(2)
#define MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK BIT(3)
#define MME_ACC_INTR_MASK_AP_SRC_NAN_MASK BIT(4)
#define MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK BIT(5)
#define MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK BIT(6)
#define SM_CQ_L2H_MASK_VAL 0xFFFFFFFFFC000000ull
#define SM_CQ_L2H_CMPR_VAL 0x1000007FFC000000ull
#define SM_CQ_L2H_LOW_MASK GENMASK(31, 20)
#define SM_CQ_L2H_LOW_SHIFT 20
#define MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK \
REG_FIELD_MASK(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE, HOP4_PAGE_SIZE)
#define STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK \
REG_FIELD_MASK(DCORE0_HMMU0_STLB_HOP_CONFIGURATION, ONLY_LARGE_PAGE)
#define AXUSER_HB_SEC_ASID_MASK 0x3FF
#define AXUSER_HB_SEC_MMBP_MASK 0x400
#define MMUBP_ASID_MASK (AXUSER_HB_SEC_ASID_MASK | AXUSER_HB_SEC_MMBP_MASK)
#define ROT_MSS_HALT_WBC_MASK BIT(0)
#define ROT_MSS_HALT_RSB_MASK BIT(1)
#define ROT_MSS_HALT_MRSB_MASK BIT(2)
#endif /* GAUDI2_MASKS_H_ */
......@@ -470,7 +470,7 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->max_pending_cs = GOYA_MAX_PENDING_CS;
prop->first_available_user_msix_interrupt = USHRT_MAX;
prop->first_available_user_interrupt = USHRT_MAX;
for (i = 0 ; i < HL_MAX_DCORES ; i++)
prop->first_available_cq[i] = USHRT_MAX;
......
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2020-2021 HabanaLabs, Ltd.
* Copyright 2020-2022 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
......@@ -99,27 +99,265 @@ struct hl_eq_fw_alive {
__u8 pad[7];
};
enum hl_pcie_addr_dec_cause {
PCIE_ADDR_DEC_HBW_ERR_RESP,
PCIE_ADDR_DEC_LBW_ERR_RESP,
PCIE_ADDR_DEC_TLP_BLOCKED_BY_RR
struct hl_eq_intr_cause {
__le64 intr_cause_data;
};
struct hl_eq_pcie_addr_dec_data {
/* enum hl_pcie_addr_dec_cause */
__u8 addr_dec_cause;
__u8 pad[7];
struct hl_eq_pcie_drain_ind_data {
struct hl_eq_intr_cause intr_cause;
__le64 drain_wr_addr_lbw;
__le64 drain_rd_addr_lbw;
__le64 drain_wr_addr_hbw;
__le64 drain_rd_addr_hbw;
};
struct hl_eq_razwi_lbw_info_regs {
__le32 rr_aw_razwi_reg;
__le32 rr_aw_razwi_id_reg;
__le32 rr_ar_razwi_reg;
__le32 rr_ar_razwi_id_reg;
};
struct hl_eq_razwi_hbw_info_regs {
__le32 rr_aw_razwi_hi_reg;
__le32 rr_aw_razwi_lo_reg;
__le32 rr_aw_razwi_id_reg;
__le32 rr_ar_razwi_hi_reg;
__le32 rr_ar_razwi_lo_reg;
__le32 rr_ar_razwi_id_reg;
};
/* razwi_happened masks */
#define RAZWI_HAPPENED_HBW 0x1
#define RAZWI_HAPPENED_LBW 0x2
#define RAZWI_HAPPENED_AW 0x4
#define RAZWI_HAPPENED_AR 0x8
struct hl_eq_razwi_info {
__le32 razwi_happened_mask;
union {
struct hl_eq_razwi_lbw_info_regs lbw;
struct hl_eq_razwi_hbw_info_regs hbw;
};
__le32 pad;
};
struct hl_eq_razwi_with_intr_cause {
struct hl_eq_razwi_info razwi_info;
struct hl_eq_intr_cause intr_cause;
};
#define HBM_CA_ERR_CMD_LIFO_LEN 8
#define HBM_RD_ERR_DATA_LIFO_LEN 8
#define HBM_WR_PAR_CMD_LIFO_LEN 11
enum hl_hbm_sei_cause {
/* Command/address parity error event is split into 2 events due to
* size limitation: ODD suffix for odd HBM CK_t cycles and EVEN suffix
* for even HBM CK_t cycles
*/
HBM_SEI_CMD_PARITY_EVEN,
HBM_SEI_CMD_PARITY_ODD,
/* Read errors can be reflected as a combination of SERR/DERR/parity
* errors. Therefore, we define one event for all read error types.
* LKD will perform further proccessing.
*/
HBM_SEI_READ_ERR,
HBM_SEI_WRITE_DATA_PARITY_ERR,
HBM_SEI_CATTRIP,
HBM_SEI_MEM_BIST_FAIL,
HBM_SEI_DFI,
HBM_SEI_INV_TEMP_READ_OUT,
HBM_SEI_BIST_FAIL,
};
/* Masks for parsing hl_hbm_sei_headr fields */
#define HBM_ECC_SERR_CNTR_MASK 0xFF
#define HBM_ECC_DERR_CNTR_MASK 0xFF00
#define HBM_RD_PARITY_CNTR_MASK 0xFF0000
/* HBM index and MC index are known by the event_id */
struct hl_hbm_sei_header {
union {
/* relevant only in case of HBM read error */
struct {
__u8 ecc_serr_cnt;
__u8 ecc_derr_cnt;
__u8 read_par_cnt;
__u8 reserved;
};
/* All other cases */
__le32 cnt;
};
__u8 sei_cause; /* enum hl_hbm_sei_cause */
__u8 mc_channel; /* range: 0-3 */
__u8 mc_pseudo_channel; /* range: 0-7 */
__u8 pad[1];
};
#define HBM_RD_ADDR_SID_SHIFT 0
#define HBM_RD_ADDR_SID_MASK 0x1
#define HBM_RD_ADDR_BG_SHIFT 1
#define HBM_RD_ADDR_BG_MASK 0x6
#define HBM_RD_ADDR_BA_SHIFT 3
#define HBM_RD_ADDR_BA_MASK 0x18
#define HBM_RD_ADDR_COL_SHIFT 5
#define HBM_RD_ADDR_COL_MASK 0x7E0
#define HBM_RD_ADDR_ROW_SHIFT 11
#define HBM_RD_ADDR_ROW_MASK 0x3FFF800
struct hbm_rd_addr {
union {
/* bit fields are only for FW use */
struct {
u32 dbg_rd_err_addr_sid:1;
u32 dbg_rd_err_addr_bg:2;
u32 dbg_rd_err_addr_ba:2;
u32 dbg_rd_err_addr_col:6;
u32 dbg_rd_err_addr_row:15;
u32 reserved:6;
};
__le32 rd_addr_val;
};
};
#define HBM_RD_ERR_BEAT_SHIFT 2
/* dbg_rd_err_misc fields: */
/* Read parity is calculated per DW on every beat */
#define HBM_RD_ERR_PAR_ERR_BEAT0_SHIFT 0
#define HBM_RD_ERR_PAR_ERR_BEAT0_MASK 0x3
#define HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT 8
#define HBM_RD_ERR_PAR_DATA_BEAT0_MASK 0x300
/* ECC is calculated per PC on every beat */
#define HBM_RD_ERR_SERR_BEAT0_SHIFT 16
#define HBM_RD_ERR_SERR_BEAT0_MASK 0x10000
#define HBM_RD_ERR_DERR_BEAT0_SHIFT 24
#define HBM_RD_ERR_DERR_BEAT0_MASK 0x100000
struct hl_eq_hbm_sei_read_err_intr_info {
/* DFI_RD_ERR_REP_ADDR */
struct hbm_rd_addr dbg_rd_err_addr;
/* DFI_RD_ERR_REP_ERR */
union {
struct {
/* bit fields are only for FW use */
u32 dbg_rd_err_par:8;
u32 dbg_rd_err_par_data:8;
u32 dbg_rd_err_serr:4;
u32 dbg_rd_err_derr:4;
u32 reserved:8;
};
__le32 dbg_rd_err_misc;
};
/* DFI_RD_ERR_REP_DM */
__le32 dbg_rd_err_dm;
/* DFI_RD_ERR_REP_SYNDROME */
__le32 dbg_rd_err_syndrome;
/* DFI_RD_ERR_REP_DATA */
__le32 dbg_rd_err_data[HBM_RD_ERR_DATA_LIFO_LEN];
};
struct hl_eq_hbm_sei_ca_par_intr_info {
/* 14 LSBs */
__le16 dbg_row[HBM_CA_ERR_CMD_LIFO_LEN];
/* 18 LSBs */
__le32 dbg_col[HBM_CA_ERR_CMD_LIFO_LEN];
};
#define WR_PAR_LAST_CMD_COL_SHIFT 0
#define WR_PAR_LAST_CMD_COL_MASK 0x3F
#define WR_PAR_LAST_CMD_BG_SHIFT 6
#define WR_PAR_LAST_CMD_BG_MASK 0xC0
#define WR_PAR_LAST_CMD_BA_SHIFT 8
#define WR_PAR_LAST_CMD_BA_MASK 0x300
#define WR_PAR_LAST_CMD_SID_SHIFT 10
#define WR_PAR_LAST_CMD_SID_MASK 0x400
/* Row address isn't latched */
struct hbm_sei_wr_cmd_address {
/* DFI_DERR_LAST_CMD */
union {
struct {
/* bit fields are only for FW use */
u32 col:6;
u32 bg:2;
u32 ba:2;
u32 sid:1;
u32 reserved:21;
};
__le32 dbg_wr_cmd_addr;
};
};
struct hl_eq_hbm_sei_wr_par_intr_info {
/* entry 0: WR command address from the 1st cycle prior to the error
* entry 1: WR command address from the 2nd cycle prior to the error
* and so on...
*/
struct hbm_sei_wr_cmd_address dbg_last_wr_cmds[HBM_WR_PAR_CMD_LIFO_LEN];
/* derr[0:1] - 1st HBM cycle DERR output
* derr[2:3] - 2nd HBM cycle DERR output
*/
__u8 dbg_derr;
/* extend to reach 8B */
__u8 pad[3];
};
/*
* this struct represents the following sei causes:
* command parity, ECC double error, ECC single error, dfi error, cattrip,
* temperature read-out, read parity error and write parity error.
* some only use the header while some have extra data.
*/
struct hl_eq_hbm_sei_data {
struct hl_hbm_sei_header hdr;
union {
struct hl_eq_hbm_sei_ca_par_intr_info ca_parity_even_info;
struct hl_eq_hbm_sei_ca_par_intr_info ca_parity_odd_info;
struct hl_eq_hbm_sei_read_err_intr_info read_err_info;
struct hl_eq_hbm_sei_wr_par_intr_info wr_parity_info;
};
};
/* Engine/farm arc interrupt type */
enum hl_engine_arc_interrupt_type {
/* Qman/farm ARC DCCM QUEUE FULL interrupt type */
ENGINE_ARC_DCCM_QUEUE_FULL_IRQ = 1
};
/* Data structure specifies details of payload of DCCM QUEUE FULL interrupt */
struct hl_engine_arc_dccm_queue_full_irq {
/* Queue index value which caused DCCM QUEUE FULL */
__le32 queue_index;
__le32 pad;
};
/* Data structure specifies details of QM/FARM ARC interrupt */
struct hl_eq_engine_arc_intr_data {
/* ARC engine id e.g. DCORE0_TPC0_QM_ARC, DCORE0_TCP1_QM_ARC */
__le32 engine_id;
__le32 intr_type; /* enum hl_engine_arc_interrupt_type */
/* More info related to the interrupt e.g. queue index
* incase of DCCM_QUEUE_FULL interrupt.
*/
__le64 payload;
__le64 pad[5];
};
struct hl_eq_entry {
struct hl_eq_header hdr;
union {
struct hl_eq_ecc_data ecc_data;
struct hl_eq_hbm_ecc_data hbm_ecc_data;
struct hl_eq_hbm_ecc_data hbm_ecc_data; /* Gaudi1 HBM */
struct hl_eq_sm_sei_data sm_sei_data;
struct cpucp_pkt_sync_err pkt_sync_err;
struct hl_eq_fw_alive fw_alive;
struct hl_eq_pcie_addr_dec_data pcie_addr_dec_data;
struct hl_eq_intr_cause intr_cause;
struct hl_eq_pcie_drain_ind_data pcie_drain_ind_data;
struct hl_eq_razwi_info razwi_info;
struct hl_eq_razwi_with_intr_cause razwi_with_intr_cause;
struct hl_eq_hbm_sei_data sei_data; /* Gaudi2 HBM */
struct hl_eq_engine_arc_intr_data arc_data;
__le64 data[7];
};
};
......@@ -793,10 +1031,23 @@ struct cpucp_security_info {
* @infineon_second_stage_version: Infineon 2nd stage DC-DC version.
* @dram_size: available DRAM size.
* @card_name: card name that will be displayed in HWMON subsystem on the host
* @tpc_binning_mask: TPC binning mask, 1 bit per TPC instance
* (0 = functional, 1 = binned)
* @decoder_binning_mask: Decoder binning mask, 1 bit per decoder instance
* (0 = functional, 1 = binned), maximum 1 per dcore
* @sram_binning: Categorize SRAM functionality
* (0 = fully functional, 1 = lower-half is not functional,
* 2 = upper-half is not functional)
* @sec_info: security information
* @pll_map: Bit map of supported PLLs for current ASIC version.
* @mme_binning_mask: MME binning mask,
* (0 = functional, 1 = binned)
* bits [0:6] <==> dcore0 mme fma
* bits [7:13] <==> dcore1 mme fma
* bits [14:20] <==> dcore0 mme ima
* bits [21:27] <==> dcore1 mme ima
* For each group, if the 6th bit is set then first 5 bits
* represent the col's idx [0-31], otherwise these bits are
* ignored, and col idx 32 is binned. 7th bit is don't care.
* @dram_binning_mask: DRAM binning mask, 1 bit per dram instance
* (0 = functional 1 = binned)
* @memory_repair_flag: eFuse flag indicating memory repair
......@@ -804,6 +1055,8 @@ struct cpucp_security_info {
* (0 = functional 1 = binned)
* @xbar_binning_mask: Xbar binning mask, 1 bit per Xbar instance
* (0 = functional 1 = binned)
* @interposer_version: Interposer version programmed in eFuse
* @substrate_version: Substrate version programmed in eFuse
* @fw_os_version: Firmware OS Version
*/
struct cpucp_info {
......@@ -820,16 +1073,18 @@ struct cpucp_info {
__le32 infineon_second_stage_version;
__le64 dram_size;
char card_name[CARD_NAME_MAX_LEN];
__le64 reserved3;
__le64 reserved4;
__u8 reserved5;
__le64 tpc_binning_mask;
__le64 decoder_binning_mask;
__u8 sram_binning;
__u8 dram_binning_mask;
__u8 memory_repair_flag;
__u8 edma_binning_mask;
__u8 xbar_binning_mask;
__u8 pad[3];
__u8 interposer_version;
__u8 substrate_version;
__u8 reserved2;
struct cpucp_security_info sec_info;
__le32 reserved6;
__le32 reserved3;
__u8 pll_map[PLL_MAP_LEN];
__le64 mme_binning_mask;
__u8 fw_os_version[VERSION_MAX_LEN];
......@@ -933,6 +1188,11 @@ struct cpucp_hbm_row_replaced_rows_info {
struct cpucp_hbm_row_info replaced_rows[CPUCP_HBM_ROW_REPLACE_MAX];
};
enum cpu_reset_status {
CPU_RST_STATUS_NA = 0,
CPU_RST_STATUS_SOFT_RST_DONE = 1,
};
/*
* struct dcore_monitor_regs_data - DCORE monitor regs data.
* the structure follows sync manager block layout. relevant only to Gaudi.
......
......@@ -525,6 +525,13 @@ struct lkd_fw_comms_msg {
struct {
__u8 fw_cfg_skip; /* 1 - skip, 0 - don't skip */
};
struct {
__le64 tpc_binning_conf;
__le32 dec_binning_conf;
__le32 hbm_binning_conf;
__le32 edma_binning_conf;
__le32 mme_redundancy_conf; /* use MME_REDUNDANT_COLUMN */
};
};
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment