Commit d3222595 authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman

Merge tag 'misc-habanalabs-next-2022-09-21' of...

Merge tag 'misc-habanalabs-next-2022-09-21' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next

Oded writes:

  "This tag contains habanalabs driver changes for v6.1:

   - Support new notifier event for device state change through eventfd.
   - Add uAPI to retrieve device attestation information for Gaudi2.
   - Add uAPI to retrieve the h/w status of all h/w blocks.
   - Add uAPI to control the running mode of the engine cores in Gaudi2.
   - Expose whether the device runs with secured firmware through the INFO ioctl
     and sysfs.
   - Support trace events in DMA allocations and MMU map/unmap operations.
   - Notify firmware when the device was acquired by a user process and when it
     was released. This is done as part of the RAS that the f/w performs.
   - Multiple bug fixes, refactors and renames.
   - Cleanup of error messages, moving some to debug level.
   - Enhance log prints in case of h/w error events for Gaudi2."

* tag 'misc-habanalabs-next-2022-09-21' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: (68 commits)
  habanalabs: eliminate aggregate use warning
  habanalabs/gaudi: use 8KB aligned address for TPC kernels
  habanalabs: remove some f/w descriptor validations
  habanalabs: build ASICs from new to old
  habanalabs/gaudi2: allow user to flush PCIE by read
  habanalabs: failure to open device due to reset is debug level
  habanalabs/gaudi2: Remove unnecessary (void*) conversions
  habanalabs/gaudi2: add secured attestation info uapi
  habanalabs/gaudi2: add handling to pmmu events in eqe handler
  habanalabs/gaudi: change TPC Assert to use TPC DEC instead of QMAN err
  habanalabs: rename error info structure
  habanalabs/gaudi2: get f/w reset status register dynamically
  habanalabs/gaudi2: increase hard-reset sleep time to 2 sec
  habanalabs/gaudi2: print RAZWI info upon PCIe access error
  habanalabs: MMU invalidation h/w is per device
  habanalabs: new notifier events for device state
  habanalabs/gaudi2: free event irq if init fails
  habanalabs: fix resetting the DRAM BAR
  habanalabs: add support for new cpucp return codes
  habanalabs/gaudi2: read F/W security indication after hard reset
  ...
parents 8be7dfc6 259cee1c
...@@ -16,7 +16,7 @@ Description: Version of the application running on the device's CPU ...@@ -16,7 +16,7 @@ Description: Version of the application running on the device's CPU
What: /sys/class/habanalabs/hl<n>/clk_max_freq_mhz What: /sys/class/habanalabs/hl<n>/clk_max_freq_mhz
Date: Jun 2019 Date: Jun 2019
KernelVersion: not yet upstreamed KernelVersion: 5.7
Contact: ogabbay@kernel.org Contact: ogabbay@kernel.org
Description: Allows the user to set the maximum clock frequency, in MHz. Description: Allows the user to set the maximum clock frequency, in MHz.
The device clock might be set to lower value than the maximum. The device clock might be set to lower value than the maximum.
...@@ -26,7 +26,7 @@ Description: Allows the user to set the maximum clock frequency, in MHz. ...@@ -26,7 +26,7 @@ Description: Allows the user to set the maximum clock frequency, in MHz.
What: /sys/class/habanalabs/hl<n>/clk_cur_freq_mhz What: /sys/class/habanalabs/hl<n>/clk_cur_freq_mhz
Date: Jun 2019 Date: Jun 2019
KernelVersion: not yet upstreamed KernelVersion: 5.7
Contact: ogabbay@kernel.org Contact: ogabbay@kernel.org
Description: Displays the current frequency, in MHz, of the device clock. Description: Displays the current frequency, in MHz, of the device clock.
This property is valid only for the Gaudi ASIC family This property is valid only for the Gaudi ASIC family
...@@ -176,6 +176,12 @@ KernelVersion: 5.1 ...@@ -176,6 +176,12 @@ KernelVersion: 5.1
Contact: ogabbay@kernel.org Contact: ogabbay@kernel.org
Description: Version of the device's preboot F/W code Description: Version of the device's preboot F/W code
What: /sys/class/habanalabs/hl<n>/security_enabled
Date: Oct 2022
KernelVersion: 6.1
Contact: obitton@habana.ai
Description: Displays the device's security status
What: /sys/class/habanalabs/hl<n>/soft_reset What: /sys/class/habanalabs/hl<n>/soft_reset
Date: Jan 2019 Date: Jan 2019
KernelVersion: 5.1 KernelVersion: 5.1
...@@ -230,6 +236,6 @@ Description: Version of the u-boot running on the device's CPU ...@@ -230,6 +236,6 @@ Description: Version of the u-boot running on the device's CPU
What: /sys/class/habanalabs/hl<n>/vrm_ver What: /sys/class/habanalabs/hl<n>/vrm_ver
Date: Jan 2022 Date: Jan 2022
KernelVersion: not yet upstreamed KernelVersion: 5.17
Contact: ogabbay@kernel.org Contact: ogabbay@kernel.org
Description: Version of the Device's Voltage Regulator Monitor F/W code. N/A to GOYA and GAUDI Description: Version of the Device's Voltage Regulator Monitor F/W code. N/A to GOYA and GAUDI
...@@ -8878,6 +8878,7 @@ T: git https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git ...@@ -8878,6 +8878,7 @@ T: git https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git
F: Documentation/ABI/testing/debugfs-driver-habanalabs F: Documentation/ABI/testing/debugfs-driver-habanalabs
F: Documentation/ABI/testing/sysfs-driver-habanalabs F: Documentation/ABI/testing/sysfs-driver-habanalabs
F: drivers/misc/habanalabs/ F: drivers/misc/habanalabs/
F: include/trace/events/habanalabs.h
F: include/uapi/misc/habanalabs.h F: include/uapi/misc/habanalabs.h
HACKRF MEDIA DRIVER HACKRF MEDIA DRIVER
......
...@@ -10,6 +10,7 @@ config HABANA_AI ...@@ -10,6 +10,7 @@ config HABANA_AI
select HWMON select HWMON
select DMA_SHARED_BUFFER select DMA_SHARED_BUFFER
select CRC32 select CRC32
select FW_LOADER
help help
Enables PCIe card driver for Habana's AI Processors (AIP) that are Enables PCIe card driver for Habana's AI Processors (AIP) that are
designed to accelerate Deep Learning inference and training workloads. designed to accelerate Deep Learning inference and training workloads.
......
...@@ -8,13 +8,13 @@ obj-$(CONFIG_HABANA_AI) := habanalabs.o ...@@ -8,13 +8,13 @@ obj-$(CONFIG_HABANA_AI) := habanalabs.o
include $(src)/common/Makefile include $(src)/common/Makefile
habanalabs-y += $(HL_COMMON_FILES) habanalabs-y += $(HL_COMMON_FILES)
include $(src)/goya/Makefile include $(src)/gaudi2/Makefile
habanalabs-y += $(HL_GOYA_FILES) habanalabs-y += $(HL_GAUDI2_FILES)
include $(src)/gaudi/Makefile include $(src)/gaudi/Makefile
habanalabs-y += $(HL_GAUDI_FILES) habanalabs-y += $(HL_GAUDI_FILES)
include $(src)/gaudi2/Makefile include $(src)/goya/Makefile
habanalabs-y += $(HL_GAUDI2_FILES) habanalabs-y += $(HL_GOYA_FILES)
habanalabs-$(CONFIG_DEBUG_FS) += common/debugfs.o habanalabs-$(CONFIG_DEBUG_FS) += common/debugfs.o
...@@ -12,20 +12,18 @@ ...@@ -12,20 +12,18 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#define CB_VA_POOL_SIZE (4UL * SZ_1G)
static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
{ {
struct hl_device *hdev = ctx->hdev; struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop; struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_vm_va_block *va_block, *tmp;
dma_addr_t bus_addr;
u64 virt_addr;
u32 page_size = prop->pmmu.page_size; u32 page_size = prop->pmmu.page_size;
s32 offset;
int rc; int rc;
if (!hdev->supports_cb_mapping) { if (!hdev->supports_cb_mapping) {
dev_err_ratelimited(hdev->dev, dev_err_ratelimited(hdev->dev,
"Cannot map CB because no VA range is allocated for CB mapping\n"); "Mapping a CB to the device's MMU is not supported\n");
return -EINVAL; return -EINVAL;
} }
...@@ -35,106 +33,45 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) ...@@ -35,106 +33,45 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
return -EINVAL; return -EINVAL;
} }
INIT_LIST_HEAD(&cb->va_block_list); if (cb->is_mmu_mapped)
return 0;
for (bus_addr = cb->bus_address;
bus_addr < cb->bus_address + cb->size;
bus_addr += page_size) {
virt_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, page_size);
if (!virt_addr) {
dev_err(hdev->dev,
"Failed to allocate device virtual address for CB\n");
rc = -ENOMEM;
goto err_va_pool_free;
}
va_block = kzalloc(sizeof(*va_block), GFP_KERNEL); cb->roundup_size = roundup(cb->size, page_size);
if (!va_block) {
rc = -ENOMEM;
gen_pool_free(ctx->cb_va_pool, virt_addr, page_size);
goto err_va_pool_free;
}
va_block->start = virt_addr; cb->virtual_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, cb->roundup_size);
va_block->end = virt_addr + page_size - 1; if (!cb->virtual_addr) {
va_block->size = page_size; dev_err(hdev->dev, "Failed to allocate device virtual address for CB\n");
list_add_tail(&va_block->node, &cb->va_block_list); return -ENOMEM;
} }
mutex_lock(&ctx->mmu_lock); mutex_lock(&hdev->mmu_lock);
rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size);
bus_addr = cb->bus_address; if (rc) {
offset = 0; dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr);
list_for_each_entry(va_block, &cb->va_block_list, node) { goto err_va_umap;
rc = hl_mmu_map_page(ctx, va_block->start, bus_addr,
va_block->size, list_is_last(&va_block->node,
&cb->va_block_list));
if (rc) {
dev_err(hdev->dev, "Failed to map VA %#llx to CB\n",
va_block->start);
goto err_va_umap;
}
bus_addr += va_block->size;
offset += va_block->size;
} }
rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV); rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV);
mutex_unlock(&hdev->mmu_lock);
mutex_unlock(&ctx->mmu_lock);
cb->is_mmu_mapped = true; cb->is_mmu_mapped = true;
return rc; return rc;
err_va_umap: err_va_umap:
list_for_each_entry(va_block, &cb->va_block_list, node) { mutex_unlock(&hdev->mmu_lock);
if (offset <= 0) gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
break;
hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
offset <= va_block->size);
offset -= va_block->size;
}
rc = hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
mutex_unlock(&ctx->mmu_lock);
err_va_pool_free:
list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
list_del(&va_block->node);
kfree(va_block);
}
return rc; return rc;
} }
static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb) static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb)
{ {
struct hl_device *hdev = ctx->hdev; struct hl_device *hdev = ctx->hdev;
struct hl_vm_va_block *va_block, *tmp;
mutex_lock(&ctx->mmu_lock);
list_for_each_entry(va_block, &cb->va_block_list, node)
if (hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
list_is_last(&va_block->node,
&cb->va_block_list)))
dev_warn_ratelimited(hdev->dev,
"Failed to unmap CB's va 0x%llx\n",
va_block->start);
mutex_lock(&hdev->mmu_lock);
hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size);
hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
mutex_unlock(&hdev->mmu_lock);
mutex_unlock(&ctx->mmu_lock); gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
list_del(&va_block->node);
kfree(va_block);
}
} }
static void cb_fini(struct hl_device *hdev, struct hl_cb *cb) static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
...@@ -376,7 +313,6 @@ int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle) ...@@ -376,7 +313,6 @@ int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle)
static int hl_cb_info(struct hl_mem_mgr *mmg, static int hl_cb_info(struct hl_mem_mgr *mmg,
u64 handle, u32 flags, u32 *usage_cnt, u64 *device_va) u64 handle, u32 flags, u32 *usage_cnt, u64 *device_va)
{ {
struct hl_vm_va_block *va_block;
struct hl_cb *cb; struct hl_cb *cb;
int rc = 0; int rc = 0;
...@@ -388,9 +324,8 @@ static int hl_cb_info(struct hl_mem_mgr *mmg, ...@@ -388,9 +324,8 @@ static int hl_cb_info(struct hl_mem_mgr *mmg,
} }
if (flags & HL_CB_FLAGS_GET_DEVICE_VA) { if (flags & HL_CB_FLAGS_GET_DEVICE_VA) {
va_block = list_first_entry(&cb->va_block_list, struct hl_vm_va_block, node); if (cb->is_mmu_mapped) {
if (va_block) { *device_va = cb->virtual_addr;
*device_va = va_block->start;
} else { } else {
dev_err(mmg->dev, "CB is not mapped to the device's MMU\n"); dev_err(mmg->dev, "CB is not mapped to the device's MMU\n");
rc = -EINVAL; rc = -EINVAL;
...@@ -566,16 +501,23 @@ int hl_cb_va_pool_init(struct hl_ctx *ctx) ...@@ -566,16 +501,23 @@ int hl_cb_va_pool_init(struct hl_ctx *ctx)
return -ENOMEM; return -ENOMEM;
} }
rc = gen_pool_add(ctx->cb_va_pool, prop->cb_va_start_addr, ctx->cb_va_pool_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
prop->cb_va_end_addr - prop->cb_va_start_addr, -1); CB_VA_POOL_SIZE, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
if (!ctx->cb_va_pool_base) {
rc = -ENOMEM;
goto err_pool_destroy;
}
rc = gen_pool_add(ctx->cb_va_pool, ctx->cb_va_pool_base, CB_VA_POOL_SIZE, -1);
if (rc) { if (rc) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to add memory to VA gen pool for CB mapping\n"); "Failed to add memory to VA gen pool for CB mapping\n");
goto err_pool_destroy; goto err_unreserve_va_block;
} }
return 0; return 0;
err_unreserve_va_block:
hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE);
err_pool_destroy: err_pool_destroy:
gen_pool_destroy(ctx->cb_va_pool); gen_pool_destroy(ctx->cb_va_pool);
...@@ -590,4 +532,5 @@ void hl_cb_va_pool_fini(struct hl_ctx *ctx) ...@@ -590,4 +532,5 @@ void hl_cb_va_pool_fini(struct hl_ctx *ctx)
return; return;
gen_pool_destroy(ctx->cb_va_pool); gen_pool_destroy(ctx->cb_va_pool);
hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE);
} }
...@@ -12,7 +12,9 @@ ...@@ -12,7 +12,9 @@
#include <linux/slab.h> #include <linux/slab.h>
#define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \ #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
HL_CS_FLAGS_COLLECTIVE_WAIT) HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \
HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND)
#define MAX_TS_ITER_NUM 10 #define MAX_TS_ITER_NUM 10
...@@ -824,10 +826,10 @@ static void cs_timedout(struct work_struct *work) ...@@ -824,10 +826,10 @@ static void cs_timedout(struct work_struct *work)
} }
/* Save only the first CS timeout parameters */ /* Save only the first CS timeout parameters */
rc = atomic_cmpxchg(&hdev->last_error.cs_timeout.write_enable, 1, 0); rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0);
if (rc) { if (rc) {
hdev->last_error.cs_timeout.timestamp = ktime_get(); hdev->captured_err_info.cs_timeout.timestamp = ktime_get();
hdev->last_error.cs_timeout.seq = cs->sequence; hdev->captured_err_info.cs_timeout.seq = cs->sequence;
event_mask = device_reset ? (HL_NOTIFIER_EVENT_CS_TIMEOUT | event_mask = device_reset ? (HL_NOTIFIER_EVENT_CS_TIMEOUT |
HL_NOTIFIER_EVENT_DEVICE_RESET) : HL_NOTIFIER_EVENT_CS_TIMEOUT; HL_NOTIFIER_EVENT_DEVICE_RESET) : HL_NOTIFIER_EVENT_CS_TIMEOUT;
...@@ -1242,6 +1244,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags) ...@@ -1242,6 +1244,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
return CS_RESERVE_SIGNALS; return CS_RESERVE_SIGNALS;
else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY) else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
return CS_UNRESERVE_SIGNALS; return CS_UNRESERVE_SIGNALS;
else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND)
return CS_TYPE_ENGINE_CORE;
else else
return CS_TYPE_DEFAULT; return CS_TYPE_DEFAULT;
} }
...@@ -1253,6 +1257,7 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) ...@@ -1253,6 +1257,7 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
u32 cs_type_flags, num_chunks; u32 cs_type_flags, num_chunks;
enum hl_device_status status; enum hl_device_status status;
enum hl_cs_type cs_type; enum hl_cs_type cs_type;
bool is_sync_stream;
if (!hl_device_operational(hdev, &status)) { if (!hl_device_operational(hdev, &status)) {
return -EBUSY; return -EBUSY;
...@@ -1276,9 +1281,10 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) ...@@ -1276,9 +1281,10 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
cs_type = hl_cs_get_cs_type(cs_type_flags); cs_type = hl_cs_get_cs_type(cs_type_flags);
num_chunks = args->in.num_chunks_execute; num_chunks = args->in.num_chunks_execute;
if (unlikely((cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT || is_sync_stream = (cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT ||
cs_type == CS_TYPE_COLLECTIVE_WAIT) && cs_type == CS_TYPE_COLLECTIVE_WAIT);
!hdev->supports_sync_stream)) {
if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) {
dev_err(hdev->dev, "Sync stream CS is not supported\n"); dev_err(hdev->dev, "Sync stream CS is not supported\n");
return -EINVAL; return -EINVAL;
} }
...@@ -1288,7 +1294,7 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) ...@@ -1288,7 +1294,7 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid); dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid);
return -EINVAL; return -EINVAL;
} }
} else if (num_chunks != 1) { } else if (is_sync_stream && num_chunks != 1) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Sync stream CS mandates one chunk only, context %d\n", "Sync stream CS mandates one chunk only, context %d\n",
ctx->asid); ctx->asid);
...@@ -1584,13 +1590,14 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, ...@@ -1584,13 +1590,14 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
struct hl_device *hdev = hpriv->hdev; struct hl_device *hdev = hpriv->hdev;
struct hl_ctx *ctx = hpriv->ctx; struct hl_ctx *ctx = hpriv->ctx;
bool need_soft_reset = false; bool need_soft_reset = false;
int rc = 0, do_ctx_switch; int rc = 0, do_ctx_switch = 0;
void __user *chunks; void __user *chunks;
u32 num_chunks, tmp; u32 num_chunks, tmp;
u16 sob_count; u16 sob_count;
int ret; int ret;
do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); if (hdev->supports_ctx_switch)
do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
mutex_lock(&hpriv->restore_phase_mutex); mutex_lock(&hpriv->restore_phase_mutex);
...@@ -1661,9 +1668,10 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, ...@@ -1661,9 +1668,10 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
} }
} }
ctx->thread_ctx_switch_wait_token = 1; if (hdev->supports_ctx_switch)
ctx->thread_ctx_switch_wait_token = 1;
} else if (!ctx->thread_ctx_switch_wait_token) { } else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) {
rc = hl_poll_timeout_memory(hdev, rc = hl_poll_timeout_memory(hdev,
&ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
100, jiffies_to_usecs(hdev->timeout_jiffies), false); 100, jiffies_to_usecs(hdev->timeout_jiffies), false);
...@@ -2351,6 +2359,41 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, ...@@ -2351,6 +2359,41 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
return rc; return rc;
} }
static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
u32 num_engine_cores, u32 core_command)
{
int rc;
struct hl_device *hdev = hpriv->hdev;
void __user *engine_cores_arr;
u32 *cores;
if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) {
dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores);
return -EINVAL;
}
if (core_command != HL_ENGINE_CORE_RUN && core_command != HL_ENGINE_CORE_HALT) {
dev_err(hdev->dev, "Engine core command is invalid\n");
return -EINVAL;
}
engine_cores_arr = (void __user *) (uintptr_t) engine_cores;
cores = kmalloc_array(num_engine_cores, sizeof(u32), GFP_KERNEL);
if (!cores)
return -ENOMEM;
if (copy_from_user(cores, engine_cores_arr, num_engine_cores * sizeof(u32))) {
dev_err(hdev->dev, "Failed to copy core-ids array from user\n");
kfree(cores);
return -EFAULT;
}
rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command);
kfree(cores);
return rc;
}
int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
{ {
union hl_cs_args *args = data; union hl_cs_args *args = data;
...@@ -2403,6 +2446,10 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -2403,6 +2446,10 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
rc = cs_ioctl_unreserve_signals(hpriv, rc = cs_ioctl_unreserve_signals(hpriv,
args->in.encaps_sig_handle_id); args->in.encaps_sig_handle_id);
break; break;
case CS_TYPE_ENGINE_CORE:
rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores,
args->in.num_engine_cores, args->in.core_command);
break;
default: default:
rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq, rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
args->in.cs_flags, args->in.cs_flags,
...@@ -2524,7 +2571,7 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_com ...@@ -2524,7 +2571,7 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_com
ktime_t max_ktime, first_cs_time; ktime_t max_ktime, first_cs_time;
enum hl_cs_wait_status status; enum hl_cs_wait_status status;
memset(fence_ptr, 0, arr_len * sizeof(*fence_ptr)); memset(fence_ptr, 0, arr_len * sizeof(struct hl_fence *));
/* get all fences under the same lock */ /* get all fences under the same lock */
rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len); rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len);
...@@ -2826,7 +2873,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -2826,7 +2873,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
} }
/* allocate array for the fences */ /* allocate array for the fences */
fence_arr = kmalloc_array(seq_arr_len, sizeof(*fence_arr), GFP_KERNEL); fence_arr = kmalloc_array(seq_arr_len, sizeof(struct hl_fence *), GFP_KERNEL);
if (!fence_arr) { if (!fence_arr) {
rc = -ENOMEM; rc = -ENOMEM;
goto free_seq_arr; goto free_seq_arr;
......
...@@ -291,14 +291,16 @@ static int vm_show(struct seq_file *s, void *data) ...@@ -291,14 +291,16 @@ static int vm_show(struct seq_file *s, void *data)
if (ctx->asid != HL_KERNEL_ASID_ID && if (ctx->asid != HL_KERNEL_ASID_ID &&
!list_empty(&ctx->hw_block_mem_list)) { !list_empty(&ctx->hw_block_mem_list)) {
seq_puts(s, "\nhw_block mappings:\n\n"); seq_puts(s, "\nhw_block mappings:\n\n");
seq_puts(s, " virtual address size HW block id\n"); seq_puts(s,
seq_puts(s, "-------------------------------------------\n"); " virtual address block size mapped size HW block id\n");
seq_puts(s,
"---------------------------------------------------------------\n");
mutex_lock(&ctx->hw_block_list_lock); mutex_lock(&ctx->hw_block_list_lock);
list_for_each_entry(lnode, &ctx->hw_block_mem_list, list_for_each_entry(lnode, &ctx->hw_block_mem_list, node) {
node) {
seq_printf(s, seq_printf(s,
" 0x%-14lx %-6u %-9u\n", " 0x%-14lx %-6u %-6u %-9u\n",
lnode->vaddr, lnode->size, lnode->id); lnode->vaddr, lnode->block_size, lnode->mapped_size,
lnode->id);
} }
mutex_unlock(&ctx->hw_block_list_lock); mutex_unlock(&ctx->hw_block_list_lock);
} }
...@@ -591,6 +593,7 @@ static int engines_show(struct seq_file *s, void *data) ...@@ -591,6 +593,7 @@ static int engines_show(struct seq_file *s, void *data)
struct hl_debugfs_entry *entry = s->private; struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev; struct hl_device *hdev = dev_entry->hdev;
struct engines_data eng_data;
if (hdev->reset_info.in_reset) { if (hdev->reset_info.in_reset) {
dev_warn_ratelimited(hdev->dev, dev_warn_ratelimited(hdev->dev,
...@@ -598,7 +601,25 @@ static int engines_show(struct seq_file *s, void *data) ...@@ -598,7 +601,25 @@ static int engines_show(struct seq_file *s, void *data)
return 0; return 0;
} }
hdev->asic_funcs->is_device_idle(hdev, NULL, 0, s); eng_data.actual_size = 0;
eng_data.allocated_buf_size = HL_ENGINES_DATA_MAX_SIZE;
eng_data.buf = vmalloc(eng_data.allocated_buf_size);
if (!eng_data.buf)
return -ENOMEM;
hdev->asic_funcs->is_device_idle(hdev, NULL, 0, &eng_data);
if (eng_data.actual_size > eng_data.allocated_buf_size) {
dev_err(hdev->dev,
"Engines data size (%d Bytes) is bigger than allocated size (%u Bytes)\n",
eng_data.actual_size, eng_data.allocated_buf_size);
vfree(eng_data.buf);
return -ENOMEM;
}
seq_write(s, eng_data.buf, eng_data.actual_size);
vfree(eng_data.buf);
return 0; return 0;
} }
......
This diff is collapsed.
This diff is collapsed.
...@@ -14,6 +14,9 @@ ...@@ -14,6 +14,9 @@
#include <linux/aer.h> #include <linux/aer.h>
#include <linux/module.h> #include <linux/module.h>
#define CREATE_TRACE_POINTS
#include <trace/events/habanalabs.h>
#define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team" #define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team"
#define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators" #define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators"
...@@ -27,7 +30,10 @@ static struct class *hl_class; ...@@ -27,7 +30,10 @@ static struct class *hl_class;
static DEFINE_IDR(hl_devs_idr); static DEFINE_IDR(hl_devs_idr);
static DEFINE_MUTEX(hl_devs_idr_lock); static DEFINE_MUTEX(hl_devs_idr_lock);
static int timeout_locked = 30; #define HL_DEFAULT_TIMEOUT_LOCKED 30 /* 30 seconds */
#define GAUDI_DEFAULT_TIMEOUT_LOCKED 600 /* 10 minutes */
static int timeout_locked = HL_DEFAULT_TIMEOUT_LOCKED;
static int reset_on_lockup = 1; static int reset_on_lockup = 1;
static int memory_scrub; static int memory_scrub;
static ulong boot_error_status_mask = ULONG_MAX; static ulong boot_error_status_mask = ULONG_MAX;
...@@ -55,14 +61,12 @@ MODULE_PARM_DESC(boot_error_status_mask, ...@@ -55,14 +61,12 @@ MODULE_PARM_DESC(boot_error_status_mask,
#define PCI_IDS_GAUDI_SEC 0x1010 #define PCI_IDS_GAUDI_SEC 0x1010
#define PCI_IDS_GAUDI2 0x1020 #define PCI_IDS_GAUDI2 0x1020
#define PCI_IDS_GAUDI2_SEC 0x1030
static const struct pci_device_id ids[] = { static const struct pci_device_id ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), }, { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), }, { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), }, { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), },
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2), }, { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2), },
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2_SEC), },
{ 0, } { 0, }
}; };
MODULE_DEVICE_TABLE(pci, ids); MODULE_DEVICE_TABLE(pci, ids);
...@@ -92,9 +96,6 @@ static enum hl_asic_type get_asic_type(u16 device) ...@@ -92,9 +96,6 @@ static enum hl_asic_type get_asic_type(u16 device)
case PCI_IDS_GAUDI2: case PCI_IDS_GAUDI2:
asic_type = ASIC_GAUDI2; asic_type = ASIC_GAUDI2;
break; break;
case PCI_IDS_GAUDI2_SEC:
asic_type = ASIC_GAUDI2_SEC;
break;
default: default:
asic_type = ASIC_INVALID; asic_type = ASIC_INVALID;
break; break;
...@@ -107,7 +108,6 @@ static bool is_asic_secured(enum hl_asic_type asic_type) ...@@ -107,7 +108,6 @@ static bool is_asic_secured(enum hl_asic_type asic_type)
{ {
switch (asic_type) { switch (asic_type) {
case ASIC_GAUDI_SEC: case ASIC_GAUDI_SEC:
case ASIC_GAUDI2_SEC:
return true; return true;
default: default:
return false; return false;
...@@ -161,7 +161,7 @@ int hl_device_open(struct inode *inode, struct file *filp) ...@@ -161,7 +161,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
mutex_lock(&hdev->fpriv_list_lock); mutex_lock(&hdev->fpriv_list_lock);
if (!hl_device_operational(hdev, &status)) { if (!hl_device_operational(hdev, &status)) {
dev_err_ratelimited(hdev->dev, dev_dbg_ratelimited(hdev->dev,
"Can't open %s because it is %s\n", "Can't open %s because it is %s\n",
dev_name(hdev->dev), hdev->status[status]); dev_name(hdev->dev), hdev->status[status]);
...@@ -207,11 +207,13 @@ int hl_device_open(struct inode *inode, struct file *filp) ...@@ -207,11 +207,13 @@ int hl_device_open(struct inode *inode, struct file *filp)
list_add(&hpriv->dev_node, &hdev->fpriv_list); list_add(&hpriv->dev_node, &hdev->fpriv_list);
mutex_unlock(&hdev->fpriv_list_lock); mutex_unlock(&hdev->fpriv_list_lock);
hdev->asic_funcs->send_device_activity(hdev, true);
hl_debugfs_add_file(hpriv); hl_debugfs_add_file(hpriv);
atomic_set(&hdev->last_error.cs_timeout.write_enable, 1); atomic_set(&hdev->captured_err_info.cs_timeout.write_enable, 1);
atomic_set(&hdev->last_error.razwi.write_enable, 1); atomic_set(&hdev->captured_err_info.razwi.write_enable, 1);
hdev->last_error.undef_opcode.write_enable = true; hdev->captured_err_info.undef_opcode.write_enable = true;
hdev->open_counter++; hdev->open_counter++;
hdev->last_successful_open_jif = jiffies; hdev->last_successful_open_jif = jiffies;
...@@ -269,7 +271,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) ...@@ -269,7 +271,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
mutex_lock(&hdev->fpriv_ctrl_list_lock); mutex_lock(&hdev->fpriv_ctrl_list_lock);
if (!hl_device_operational(hdev, NULL)) { if (!hl_device_operational(hdev, NULL)) {
dev_err_ratelimited(hdev->dev_ctrl, dev_dbg_ratelimited(hdev->dev_ctrl,
"Can't open %s because it is disabled or in reset\n", "Can't open %s because it is disabled or in reset\n",
dev_name(hdev->dev_ctrl)); dev_name(hdev->dev_ctrl));
rc = -EPERM; rc = -EPERM;
...@@ -314,12 +316,22 @@ static void copy_kernel_module_params_to_device(struct hl_device *hdev) ...@@ -314,12 +316,22 @@ static void copy_kernel_module_params_to_device(struct hl_device *hdev)
hdev->boot_error_status_mask = boot_error_status_mask; hdev->boot_error_status_mask = boot_error_status_mask;
} }
static void fixup_device_params_per_asic(struct hl_device *hdev) static void fixup_device_params_per_asic(struct hl_device *hdev, int timeout)
{ {
switch (hdev->asic_type) { switch (hdev->asic_type) {
case ASIC_GOYA:
case ASIC_GAUDI: case ASIC_GAUDI:
case ASIC_GAUDI_SEC: case ASIC_GAUDI_SEC:
/* If user didn't request a different timeout than the default one, we have
* a different default timeout for Gaudi
*/
if (timeout == HL_DEFAULT_TIMEOUT_LOCKED)
hdev->timeout_jiffies = msecs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED *
MSEC_PER_SEC);
hdev->reset_upon_device_release = 0;
break;
case ASIC_GOYA:
hdev->reset_upon_device_release = 0; hdev->reset_upon_device_release = 0;
break; break;
...@@ -339,7 +351,7 @@ static int fixup_device_params(struct hl_device *hdev) ...@@ -339,7 +351,7 @@ static int fixup_device_params(struct hl_device *hdev)
hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC;
if (tmp_timeout) if (tmp_timeout)
hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * 1000); hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * MSEC_PER_SEC);
else else
hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
...@@ -360,7 +372,7 @@ static int fixup_device_params(struct hl_device *hdev) ...@@ -360,7 +372,7 @@ static int fixup_device_params(struct hl_device *hdev)
if (!hdev->cpu_queues_enable) if (!hdev->cpu_queues_enable)
hdev->heartbeat = 0; hdev->heartbeat = 0;
fixup_device_params_per_asic(hdev); fixup_device_params_per_asic(hdev, tmp_timeout);
return 0; return 0;
} }
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/vmalloc.h>
static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = { static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
[HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr), [HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
...@@ -103,6 +104,7 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) ...@@ -103,6 +104,7 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
hw_ip.edma_enabled_mask = prop->edma_enabled_mask; hw_ip.edma_enabled_mask = prop->edma_enabled_mask;
hw_ip.server_type = prop->server_type; hw_ip.server_type = prop->server_type;
hw_ip.security_enabled = prop->fw_security_enabled;
return copy_to_user(out, &hw_ip, return copy_to_user(out, &hw_ip,
min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0; min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0;
...@@ -591,8 +593,8 @@ static int cs_timeout_info(struct hl_fpriv *hpriv, struct hl_info_args *args) ...@@ -591,8 +593,8 @@ static int cs_timeout_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
if ((!max_size) || (!out)) if ((!max_size) || (!out))
return -EINVAL; return -EINVAL;
info.seq = hdev->last_error.cs_timeout.seq; info.seq = hdev->captured_err_info.cs_timeout.seq;
info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout.timestamp); info.timestamp = ktime_to_ns(hdev->captured_err_info.cs_timeout.timestamp);
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
} }
...@@ -607,12 +609,12 @@ static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args) ...@@ -607,12 +609,12 @@ static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
if ((!max_size) || (!out)) if ((!max_size) || (!out))
return -EINVAL; return -EINVAL;
info.timestamp = ktime_to_ns(hdev->last_error.razwi.timestamp); info.timestamp = ktime_to_ns(hdev->captured_err_info.razwi.timestamp);
info.addr = hdev->last_error.razwi.addr; info.addr = hdev->captured_err_info.razwi.addr;
info.engine_id_1 = hdev->last_error.razwi.engine_id_1; info.engine_id_1 = hdev->captured_err_info.razwi.engine_id_1;
info.engine_id_2 = hdev->last_error.razwi.engine_id_2; info.engine_id_2 = hdev->captured_err_info.razwi.engine_id_2;
info.no_engine_id = hdev->last_error.razwi.non_engine_initiator; info.no_engine_id = hdev->captured_err_info.razwi.non_engine_initiator;
info.error_type = hdev->last_error.razwi.type; info.error_type = hdev->captured_err_info.razwi.type;
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
} }
...@@ -627,13 +629,13 @@ static int undefined_opcode_info(struct hl_fpriv *hpriv, struct hl_info_args *ar ...@@ -627,13 +629,13 @@ static int undefined_opcode_info(struct hl_fpriv *hpriv, struct hl_info_args *ar
if ((!max_size) || (!out)) if ((!max_size) || (!out))
return -EINVAL; return -EINVAL;
info.timestamp = ktime_to_ns(hdev->last_error.undef_opcode.timestamp); info.timestamp = ktime_to_ns(hdev->captured_err_info.undef_opcode.timestamp);
info.engine_id = hdev->last_error.undef_opcode.engine_id; info.engine_id = hdev->captured_err_info.undef_opcode.engine_id;
info.cq_addr = hdev->last_error.undef_opcode.cq_addr; info.cq_addr = hdev->captured_err_info.undef_opcode.cq_addr;
info.cq_size = hdev->last_error.undef_opcode.cq_size; info.cq_size = hdev->captured_err_info.undef_opcode.cq_size;
info.stream_id = hdev->last_error.undef_opcode.stream_id; info.stream_id = hdev->captured_err_info.undef_opcode.stream_id;
info.cb_addr_streams_len = hdev->last_error.undef_opcode.cb_addr_streams_len; info.cb_addr_streams_len = hdev->captured_err_info.undef_opcode.cb_addr_streams_len;
memcpy(info.cb_addr_streams, hdev->last_error.undef_opcode.cb_addr_streams, memcpy(info.cb_addr_streams, hdev->captured_err_info.undef_opcode.cb_addr_streams,
sizeof(info.cb_addr_streams)); sizeof(info.cb_addr_streams));
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
...@@ -660,6 +662,55 @@ static int dev_mem_alloc_page_sizes_info(struct hl_fpriv *hpriv, struct hl_info_ ...@@ -660,6 +662,55 @@ static int dev_mem_alloc_page_sizes_info(struct hl_fpriv *hpriv, struct hl_info_
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
} }
static int sec_attest_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
struct cpucp_sec_attest_info *sec_attest_info;
struct hl_info_sec_attest *info;
u32 max_size = args->return_size;
int rc;
if ((!max_size) || (!out))
return -EINVAL;
sec_attest_info = kmalloc(sizeof(*sec_attest_info), GFP_KERNEL);
if (!sec_attest_info)
return -ENOMEM;
info = kmalloc(sizeof(*info), GFP_KERNEL);
if (!info) {
rc = -ENOMEM;
goto free_sec_attest_info;
}
rc = hl_fw_get_sec_attest_info(hpriv->hdev, sec_attest_info, args->sec_attest_nonce);
if (rc)
goto free_info;
info->nonce = le32_to_cpu(sec_attest_info->nonce);
info->pcr_quote_len = le16_to_cpu(sec_attest_info->pcr_quote_len);
info->pub_data_len = le16_to_cpu(sec_attest_info->pub_data_len);
info->certificate_len = le16_to_cpu(sec_attest_info->certificate_len);
info->pcr_num_reg = sec_attest_info->pcr_num_reg;
info->pcr_reg_len = sec_attest_info->pcr_reg_len;
info->quote_sig_len = sec_attest_info->quote_sig_len;
memcpy(&info->pcr_data, &sec_attest_info->pcr_data, sizeof(info->pcr_data));
memcpy(&info->pcr_quote, &sec_attest_info->pcr_quote, sizeof(info->pcr_quote));
memcpy(&info->public_data, &sec_attest_info->public_data, sizeof(info->public_data));
memcpy(&info->certificate, &sec_attest_info->certificate, sizeof(info->certificate));
memcpy(&info->quote_sig, &sec_attest_info->quote_sig, sizeof(info->quote_sig));
rc = copy_to_user(out, info,
min_t(size_t, max_size, sizeof(*info))) ? -EFAULT : 0;
free_info:
kfree(info);
free_sec_attest_info:
kfree(sec_attest_info);
return rc;
}
static int eventfd_register(struct hl_fpriv *hpriv, struct hl_info_args *args) static int eventfd_register(struct hl_fpriv *hpriv, struct hl_info_args *args)
{ {
int rc; int rc;
...@@ -697,6 +748,42 @@ static int eventfd_unregister(struct hl_fpriv *hpriv, struct hl_info_args *args) ...@@ -697,6 +748,42 @@ static int eventfd_unregister(struct hl_fpriv *hpriv, struct hl_info_args *args)
return 0; return 0;
} }
static int engine_status_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
u32 status_buf_size = args->return_size;
struct hl_device *hdev = hpriv->hdev;
struct engines_data eng_data;
int rc;
if ((status_buf_size < SZ_1K) || (status_buf_size > HL_ENGINES_DATA_MAX_SIZE) || (!out))
return -EINVAL;
eng_data.actual_size = 0;
eng_data.allocated_buf_size = status_buf_size;
eng_data.buf = vmalloc(status_buf_size);
if (!eng_data.buf)
return -ENOMEM;
hdev->asic_funcs->is_device_idle(hdev, NULL, 0, &eng_data);
if (eng_data.actual_size > eng_data.allocated_buf_size) {
dev_err(hdev->dev,
"Engines data size (%d Bytes) is bigger than allocated size (%u Bytes)\n",
eng_data.actual_size, status_buf_size);
vfree(eng_data.buf);
return -ENOMEM;
}
args->user_buffer_actual_size = eng_data.actual_size;
rc = copy_to_user(out, eng_data.buf, min_t(size_t, status_buf_size, eng_data.actual_size)) ?
-EFAULT : 0;
vfree(eng_data.buf);
return rc;
}
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev) struct device *dev)
{ {
...@@ -806,12 +893,18 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, ...@@ -806,12 +893,18 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_DRAM_PENDING_ROWS: case HL_INFO_DRAM_PENDING_ROWS:
return dram_pending_rows_info(hpriv, args); return dram_pending_rows_info(hpriv, args);
case HL_INFO_SECURED_ATTESTATION:
return sec_attest_info(hpriv, args);
case HL_INFO_REGISTER_EVENTFD: case HL_INFO_REGISTER_EVENTFD:
return eventfd_register(hpriv, args); return eventfd_register(hpriv, args);
case HL_INFO_UNREGISTER_EVENTFD: case HL_INFO_UNREGISTER_EVENTFD:
return eventfd_unregister(hpriv, args); return eventfd_unregister(hpriv, args);
case HL_INFO_ENGINE_STATUS:
return engine_status_info(hpriv, args);
default: default:
dev_err(dev, "Invalid request %d\n", args->op); dev_err(dev, "Invalid request %d\n", args->op);
rc = -EINVAL; rc = -EINVAL;
......
...@@ -826,9 +826,7 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, ...@@ -826,9 +826,7 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
q->kernel_address = p; q->kernel_address = p;
q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, sizeof(struct hl_cs_job *), GFP_KERNEL);
sizeof(*q->shadow_queue),
GFP_KERNEL);
if (!q->shadow_queue) { if (!q->shadow_queue) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to allocate shadow queue for H/W queue %d\n", "Failed to allocate shadow queue for H/W queue %d\n",
......
...@@ -194,7 +194,8 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sen ...@@ -194,7 +194,8 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sen
curr_arr[sensors_by_type_next_index[type]++] = flags; curr_arr[sensors_by_type_next_index[type]++] = flags;
} }
channels_info = kcalloc(num_active_sensor_types + 1, sizeof(*channels_info), GFP_KERNEL); channels_info = kcalloc(num_active_sensor_types + 1, sizeof(struct hwmon_channel_info *),
GFP_KERNEL);
if (!channels_info) { if (!channels_info) {
rc = -ENOMEM; rc = -ENOMEM;
goto channels_info_array_err; goto channels_info_array_err;
...@@ -910,3 +911,24 @@ void hl_hwmon_fini(struct hl_device *hdev) ...@@ -910,3 +911,24 @@ void hl_hwmon_fini(struct hl_device *hdev)
hwmon_device_unregister(hdev->hwmon_dev); hwmon_device_unregister(hdev->hwmon_dev);
} }
void hl_hwmon_release_resources(struct hl_device *hdev)
{
const struct hwmon_channel_info **channel_info_arr;
int i = 0;
if (!hdev->hl_chip_info->info)
return;
channel_info_arr = hdev->hl_chip_info->info;
while (channel_info_arr[i]) {
kfree(channel_info_arr[i]->config);
kfree(channel_info_arr[i]);
i++;
}
kfree(channel_info_arr);
hdev->hl_chip_info->info = NULL;
}
...@@ -457,7 +457,7 @@ static void merge_va_blocks_locked(struct hl_device *hdev, ...@@ -457,7 +457,7 @@ static void merge_va_blocks_locked(struct hl_device *hdev,
prev = list_prev_entry(va_block, node); prev = list_prev_entry(va_block, node);
if (&prev->node != va_list && prev->end + 1 == va_block->start) { if (&prev->node != va_list && prev->end + 1 == va_block->start) {
prev->end = va_block->end; prev->end = va_block->end;
prev->size = prev->end - prev->start; prev->size = prev->end - prev->start + 1;
list_del(&va_block->node); list_del(&va_block->node);
kfree(va_block); kfree(va_block);
va_block = prev; va_block = prev;
...@@ -466,7 +466,7 @@ static void merge_va_blocks_locked(struct hl_device *hdev, ...@@ -466,7 +466,7 @@ static void merge_va_blocks_locked(struct hl_device *hdev,
next = list_next_entry(va_block, node); next = list_next_entry(va_block, node);
if (&next->node != va_list && va_block->end + 1 == next->start) { if (&next->node != va_list && va_block->end + 1 == next->start) {
next->start = va_block->start; next->start = va_block->start;
next->size = next->end - next->start; next->size = next->end - next->start + 1;
list_del(&va_block->node); list_del(&va_block->node);
kfree(va_block); kfree(va_block);
} }
...@@ -755,7 +755,7 @@ static u64 get_va_block(struct hl_device *hdev, ...@@ -755,7 +755,7 @@ static u64 get_va_block(struct hl_device *hdev,
* - Return the start address of the virtual block. * - Return the start address of the virtual block.
*/ */
u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
enum hl_va_range_type type, u32 size, u32 alignment) enum hl_va_range_type type, u64 size, u32 alignment)
{ {
return get_va_block(hdev, ctx->va_range[type], size, 0, return get_va_block(hdev, ctx->va_range[type], size, 0,
max(alignment, ctx->va_range[type]->page_size), max(alignment, ctx->va_range[type]->page_size),
...@@ -1210,18 +1210,18 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device ...@@ -1210,18 +1210,18 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device
goto va_block_err; goto va_block_err;
} }
mutex_lock(&ctx->mmu_lock); mutex_lock(&hdev->mmu_lock);
rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack); rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
if (rc) { if (rc) {
dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle); dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle);
mutex_unlock(&ctx->mmu_lock); mutex_unlock(&hdev->mmu_lock);
goto map_err; goto map_err;
} }
rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV, rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV,
ctx->asid, ret_vaddr, phys_pg_pack->total_size); ctx->asid, ret_vaddr, phys_pg_pack->total_size);
mutex_unlock(&ctx->mmu_lock); mutex_unlock(&hdev->mmu_lock);
if (rc) if (rc)
goto map_err; goto map_err;
...@@ -1362,7 +1362,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, ...@@ -1362,7 +1362,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
else else
vaddr &= ~(((u64) phys_pg_pack->page_size) - 1); vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
mutex_lock(&ctx->mmu_lock); mutex_lock(&hdev->mmu_lock);
unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack); unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack);
...@@ -1375,7 +1375,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, ...@@ -1375,7 +1375,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
rc = hl_mmu_invalidate_cache_range(hdev, true, *vm_type, ctx->asid, vaddr, rc = hl_mmu_invalidate_cache_range(hdev, true, *vm_type, ctx->asid, vaddr,
phys_pg_pack->total_size); phys_pg_pack->total_size);
mutex_unlock(&ctx->mmu_lock); mutex_unlock(&hdev->mmu_lock);
/* /*
* If the context is closing we don't need to check for the MMU cache * If the context is closing we don't need to check for the MMU cache
...@@ -1418,18 +1418,23 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, ...@@ -1418,18 +1418,23 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
return rc; return rc;
} }
static int map_block(struct hl_device *hdev, u64 address, u64 *handle, static int map_block(struct hl_device *hdev, u64 address, u64 *handle, u32 *size)
u32 *size)
{ {
u32 block_id = 0; u32 block_id;
int rc; int rc;
*handle = 0;
if (size)
*size = 0;
rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id); rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id);
if (rc)
return rc;
*handle = block_id | HL_MMAP_TYPE_BLOCK; *handle = block_id | HL_MMAP_TYPE_BLOCK;
*handle <<= PAGE_SHIFT; *handle <<= PAGE_SHIFT;
return rc; return 0;
} }
static void hw_block_vm_close(struct vm_area_struct *vma) static void hw_block_vm_close(struct vm_area_struct *vma)
...@@ -1437,6 +1442,13 @@ static void hw_block_vm_close(struct vm_area_struct *vma) ...@@ -1437,6 +1442,13 @@ static void hw_block_vm_close(struct vm_area_struct *vma)
struct hl_vm_hw_block_list_node *lnode = struct hl_vm_hw_block_list_node *lnode =
(struct hl_vm_hw_block_list_node *) vma->vm_private_data; (struct hl_vm_hw_block_list_node *) vma->vm_private_data;
struct hl_ctx *ctx = lnode->ctx; struct hl_ctx *ctx = lnode->ctx;
long new_mmap_size;
new_mmap_size = lnode->mapped_size - (vma->vm_end - vma->vm_start);
if (new_mmap_size > 0) {
lnode->mapped_size = new_mmap_size;
return;
}
mutex_lock(&ctx->hw_block_list_lock); mutex_lock(&ctx->hw_block_list_lock);
list_del(&lnode->node); list_del(&lnode->node);
...@@ -1487,23 +1499,23 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) ...@@ -1487,23 +1499,23 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
if (!lnode) if (!lnode)
return -ENOMEM; return -ENOMEM;
vma->vm_ops = &hw_block_vm_ops;
vma->vm_private_data = lnode;
hl_ctx_get(ctx);
rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size); rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size);
if (rc) { if (rc) {
hl_ctx_put(ctx);
kfree(lnode); kfree(lnode);
return rc; return rc;
} }
hl_ctx_get(ctx);
lnode->ctx = ctx; lnode->ctx = ctx;
lnode->vaddr = vma->vm_start; lnode->vaddr = vma->vm_start;
lnode->size = block_size; lnode->block_size = block_size;
lnode->mapped_size = lnode->block_size;
lnode->id = block_id; lnode->id = block_id;
vma->vm_private_data = lnode;
vma->vm_ops = &hw_block_vm_ops;
mutex_lock(&ctx->hw_block_list_lock); mutex_lock(&ctx->hw_block_list_lock);
list_add_tail(&lnode->node, &ctx->hw_block_mem_list); list_add_tail(&lnode->node, &ctx->hw_block_mem_list);
mutex_unlock(&ctx->hw_block_list_lock); mutex_unlock(&ctx->hw_block_list_lock);
...@@ -2296,8 +2308,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, ...@@ -2296,8 +2308,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
return -EFAULT; return -EFAULT;
} }
userptr->pages = kvmalloc_array(npages, sizeof(*userptr->pages), userptr->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
GFP_KERNEL);
if (!userptr->pages) if (!userptr->pages)
return -ENOMEM; return -ENOMEM;
...@@ -2759,13 +2770,13 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) ...@@ -2759,13 +2770,13 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
unmap_device_va(ctx, &args, true); unmap_device_va(ctx, &args, true);
} }
mutex_lock(&ctx->mmu_lock); mutex_lock(&hdev->mmu_lock);
/* invalidate the cache once after the unmapping loop */ /* invalidate the cache once after the unmapping loop */
hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
hl_mmu_invalidate_cache(hdev, true, MMU_OP_PHYS_PACK); hl_mmu_invalidate_cache(hdev, true, MMU_OP_PHYS_PACK);
mutex_unlock(&ctx->mmu_lock); mutex_unlock(&hdev->mmu_lock);
INIT_LIST_HEAD(&free_list); INIT_LIST_HEAD(&free_list);
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* hl_mmap_mem_buf_get - increase the buffer refcount and return a pointer to * hl_mmap_mem_buf_get - increase the buffer refcount and return a pointer to
* the buffer descriptor. * the buffer descriptor.
* *
* @mmg: parent unifed memory manager * @mmg: parent unified memory manager
* @handle: requested buffer handle * @handle: requested buffer handle
* *
* Find the buffer in the store and return a pointer to its descriptor. * Find the buffer in the store and return a pointer to its descriptor.
...@@ -104,7 +104,7 @@ int hl_mmap_mem_buf_put(struct hl_mmap_mem_buf *buf) ...@@ -104,7 +104,7 @@ int hl_mmap_mem_buf_put(struct hl_mmap_mem_buf *buf)
* hl_mmap_mem_buf_put_handle - decrease the reference to the buffer with the * hl_mmap_mem_buf_put_handle - decrease the reference to the buffer with the
* given handle. * given handle.
* *
* @mmg: parent unifed memory manager * @mmg: parent unified memory manager
* @handle: requested buffer handle * @handle: requested buffer handle
* *
* Decrease the reference to the buffer, and release it if it was the last one. * Decrease the reference to the buffer, and release it if it was the last one.
...@@ -137,7 +137,7 @@ int hl_mmap_mem_buf_put_handle(struct hl_mem_mgr *mmg, u64 handle) ...@@ -137,7 +137,7 @@ int hl_mmap_mem_buf_put_handle(struct hl_mem_mgr *mmg, u64 handle)
/** /**
* hl_mmap_mem_buf_alloc - allocate a new mappable buffer * hl_mmap_mem_buf_alloc - allocate a new mappable buffer
* *
* @mmg: parent unifed memory manager * @mmg: parent unified memory manager
* @behavior: behavior object describing this buffer polymorphic behavior * @behavior: behavior object describing this buffer polymorphic behavior
* @gfp: gfp flags to use for the memory allocations * @gfp: gfp flags to use for the memory allocations
* @args: additional args passed to behavior->alloc * @args: additional args passed to behavior->alloc
...@@ -222,7 +222,7 @@ static const struct vm_operations_struct hl_mmap_mem_buf_vm_ops = { ...@@ -222,7 +222,7 @@ static const struct vm_operations_struct hl_mmap_mem_buf_vm_ops = {
/** /**
* hl_mem_mgr_mmap - map the given buffer to the user * hl_mem_mgr_mmap - map the given buffer to the user
* *
* @mmg: unifed memory manager * @mmg: unified memory manager
* @vma: the vma object for which mmap was closed. * @vma: the vma object for which mmap was closed.
* @args: additional args passed to behavior->mmap * @args: additional args passed to behavior->mmap
* *
...@@ -322,7 +322,7 @@ void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg) ...@@ -322,7 +322,7 @@ void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg)
/** /**
* hl_mem_mgr_fini - release unified memory manager * hl_mem_mgr_fini - release unified memory manager
* *
* @mmg: parent unifed memory manager * @mmg: parent unified memory manager
* *
* Release the unified memory manager. Shall be called from an interrupt context. * Release the unified memory manager. Shall be called from an interrupt context.
*/ */
......
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
#include "../habanalabs.h" #include "../habanalabs.h"
#include <trace/events/habanalabs.h>
/** /**
* hl_mmu_get_funcs() - get MMU functions structure * hl_mmu_get_funcs() - get MMU functions structure
* @hdev: habanalabs device structure. * @hdev: habanalabs device structure.
...@@ -45,6 +47,8 @@ int hl_mmu_init(struct hl_device *hdev) ...@@ -45,6 +47,8 @@ int hl_mmu_init(struct hl_device *hdev)
if (!hdev->mmu_enable) if (!hdev->mmu_enable)
return 0; return 0;
mutex_init(&hdev->mmu_lock);
if (hdev->mmu_func[MMU_DR_PGT].init != NULL) { if (hdev->mmu_func[MMU_DR_PGT].init != NULL) {
rc = hdev->mmu_func[MMU_DR_PGT].init(hdev); rc = hdev->mmu_func[MMU_DR_PGT].init(hdev);
if (rc) if (rc)
...@@ -86,6 +90,8 @@ void hl_mmu_fini(struct hl_device *hdev) ...@@ -86,6 +90,8 @@ void hl_mmu_fini(struct hl_device *hdev)
if (hdev->mmu_func[MMU_HR_PGT].fini != NULL) if (hdev->mmu_func[MMU_HR_PGT].fini != NULL)
hdev->mmu_func[MMU_HR_PGT].fini(hdev); hdev->mmu_func[MMU_HR_PGT].fini(hdev);
mutex_destroy(&hdev->mmu_lock);
} }
/** /**
...@@ -104,8 +110,6 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx) ...@@ -104,8 +110,6 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx)
if (!hdev->mmu_enable) if (!hdev->mmu_enable)
return 0; return 0;
mutex_init(&ctx->mmu_lock);
if (hdev->mmu_func[MMU_DR_PGT].ctx_init != NULL) { if (hdev->mmu_func[MMU_DR_PGT].ctx_init != NULL) {
rc = hdev->mmu_func[MMU_DR_PGT].ctx_init(ctx); rc = hdev->mmu_func[MMU_DR_PGT].ctx_init(ctx);
if (rc) if (rc)
...@@ -149,8 +153,6 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx) ...@@ -149,8 +153,6 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
if (hdev->mmu_func[MMU_HR_PGT].ctx_fini != NULL) if (hdev->mmu_func[MMU_HR_PGT].ctx_fini != NULL)
hdev->mmu_func[MMU_HR_PGT].ctx_fini(ctx); hdev->mmu_func[MMU_HR_PGT].ctx_fini(ctx);
mutex_destroy(&ctx->mmu_lock);
} }
/* /*
...@@ -259,6 +261,9 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flu ...@@ -259,6 +261,9 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flu
if (flush_pte) if (flush_pte)
mmu_funcs->flush(ctx); mmu_funcs->flush(ctx);
if (trace_habanalabs_mmu_unmap_enabled() && !rc)
trace_habanalabs_mmu_unmap(hdev->dev, virt_addr, 0, page_size, flush_pte);
return rc; return rc;
} }
...@@ -344,6 +349,8 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_s ...@@ -344,6 +349,8 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_s
if (flush_pte) if (flush_pte)
mmu_funcs->flush(ctx); mmu_funcs->flush(ctx);
trace_habanalabs_mmu_map(hdev->dev, virt_addr, phys_addr, page_size, flush_pte);
return 0; return 0;
err: err:
...@@ -403,6 +410,8 @@ int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr, ...@@ -403,6 +410,8 @@ int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr,
dev_err(hdev->dev, dev_err(hdev->dev,
"Map failed for va 0x%llx to pa 0x%llx\n", "Map failed for va 0x%llx to pa 0x%llx\n",
curr_va, curr_pa); curr_va, curr_pa);
/* last mapping failed so don't try to unmap it - reduce off by page_size */
off -= page_size;
goto unmap; goto unmap;
} }
} }
...@@ -600,9 +609,9 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, ...@@ -600,9 +609,9 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT; pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr); mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr);
mutex_lock(&ctx->mmu_lock); mutex_lock(&hdev->mmu_lock);
rc = mmu_funcs->get_tlb_info(ctx, virt_addr, hops); rc = mmu_funcs->get_tlb_info(ctx, virt_addr, hops);
mutex_unlock(&ctx->mmu_lock); mutex_unlock(&hdev->mmu_lock);
if (rc) if (rc)
return rc; return rc;
...@@ -692,16 +701,16 @@ static void hl_mmu_prefetch_work_function(struct work_struct *work) ...@@ -692,16 +701,16 @@ static void hl_mmu_prefetch_work_function(struct work_struct *work)
{ {
struct hl_prefetch_work *pfw = container_of(work, struct hl_prefetch_work, pf_work); struct hl_prefetch_work *pfw = container_of(work, struct hl_prefetch_work, pf_work);
struct hl_ctx *ctx = pfw->ctx; struct hl_ctx *ctx = pfw->ctx;
struct hl_device *hdev = ctx->hdev;
if (!hl_device_operational(ctx->hdev, NULL)) if (!hl_device_operational(hdev, NULL))
goto put_ctx; goto put_ctx;
mutex_lock(&ctx->mmu_lock); mutex_lock(&hdev->mmu_lock);
ctx->hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid, hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid, pfw->va, pfw->size);
pfw->va, pfw->size);
mutex_unlock(&ctx->mmu_lock); mutex_unlock(&hdev->mmu_lock);
put_ctx: put_ctx:
/* /*
......
...@@ -375,6 +375,14 @@ static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj, ...@@ -375,6 +375,14 @@ static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj,
return max_size; return max_size;
} }
static ssize_t security_enabled_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "%d\n", hdev->asic_prop.fw_security_enabled);
}
static DEVICE_ATTR_RO(armcp_kernel_ver); static DEVICE_ATTR_RO(armcp_kernel_ver);
static DEVICE_ATTR_RO(armcp_ver); static DEVICE_ATTR_RO(armcp_ver);
static DEVICE_ATTR_RO(cpld_ver); static DEVICE_ATTR_RO(cpld_ver);
...@@ -393,6 +401,7 @@ static DEVICE_ATTR_RO(status); ...@@ -393,6 +401,7 @@ static DEVICE_ATTR_RO(status);
static DEVICE_ATTR_RO(thermal_ver); static DEVICE_ATTR_RO(thermal_ver);
static DEVICE_ATTR_RO(uboot_ver); static DEVICE_ATTR_RO(uboot_ver);
static DEVICE_ATTR_RO(fw_os_ver); static DEVICE_ATTR_RO(fw_os_ver);
static DEVICE_ATTR_RO(security_enabled);
static struct bin_attribute bin_attr_eeprom = { static struct bin_attribute bin_attr_eeprom = {
.attr = {.name = "eeprom", .mode = (0444)}, .attr = {.name = "eeprom", .mode = (0444)},
...@@ -417,6 +426,7 @@ static struct attribute *hl_dev_attrs[] = { ...@@ -417,6 +426,7 @@ static struct attribute *hl_dev_attrs[] = {
&dev_attr_thermal_ver.attr, &dev_attr_thermal_ver.attr,
&dev_attr_uboot_ver.attr, &dev_attr_uboot_ver.attr,
&dev_attr_fw_os_ver.attr, &dev_attr_fw_os_ver.attr,
&dev_attr_security_enabled.attr,
NULL, NULL,
}; };
......
This diff is collapsed.
This diff is collapsed.
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
#include "../include/gaudi2/gaudi2_packets.h" #include "../include/gaudi2/gaudi2_packets.h"
#include "../include/gaudi2/gaudi2_fw_if.h" #include "../include/gaudi2/gaudi2_fw_if.h"
#include "../include/gaudi2/gaudi2_async_events.h" #include "../include/gaudi2/gaudi2_async_events.h"
#include "../include/gaudi2/gaudi2_async_virt_events.h"
#define GAUDI2_LINUX_FW_FILE "habanalabs/gaudi2/gaudi2-fit.itb" #define GAUDI2_LINUX_FW_FILE "habanalabs/gaudi2/gaudi2-fit.itb"
#define GAUDI2_BOOT_FIT_FILE "habanalabs/gaudi2/gaudi2-boot-fit.itb" #define GAUDI2_BOOT_FIT_FILE "habanalabs/gaudi2/gaudi2-boot-fit.itb"
...@@ -140,9 +139,6 @@ ...@@ -140,9 +139,6 @@
#define VA_HOST_SPACE_HPAGE_START 0xFFF0800000000000ull #define VA_HOST_SPACE_HPAGE_START 0xFFF0800000000000ull
#define VA_HOST_SPACE_HPAGE_END 0xFFF1000000000000ull /* 140TB */ #define VA_HOST_SPACE_HPAGE_END 0xFFF1000000000000ull /* 140TB */
#define VA_HOST_SPACE_USER_MAPPED_CB_START 0xFFF1000000000000ull
#define VA_HOST_SPACE_USER_MAPPED_CB_END 0xFFF1000100000000ull /* 4GB */
/* 140TB */ /* 140TB */
#define VA_HOST_SPACE_PAGE_SIZE (VA_HOST_SPACE_PAGE_END - VA_HOST_SPACE_PAGE_START) #define VA_HOST_SPACE_PAGE_SIZE (VA_HOST_SPACE_PAGE_END - VA_HOST_SPACE_PAGE_START)
...@@ -458,7 +454,6 @@ struct dup_block_ctx { ...@@ -458,7 +454,6 @@ struct dup_block_ctx {
* the user can map. * the user can map.
* @lfsr_rand_seeds: array of MME ACC random seeds to set. * @lfsr_rand_seeds: array of MME ACC random seeds to set.
* @hw_queues_lock: protects the H/W queues from concurrent access. * @hw_queues_lock: protects the H/W queues from concurrent access.
* @kdma_lock: protects the KDMA engine from concurrent access.
* @scratchpad_kernel_address: general purpose PAGE_SIZE contiguous memory, * @scratchpad_kernel_address: general purpose PAGE_SIZE contiguous memory,
* this memory region should be write-only. * this memory region should be write-only.
* currently used for HBW QMAN writes which is * currently used for HBW QMAN writes which is
...@@ -510,9 +505,6 @@ struct dup_block_ctx { ...@@ -510,9 +505,6 @@ struct dup_block_ctx {
* @flush_db_fifo: flag to force flush DB FIFO after a write. * @flush_db_fifo: flag to force flush DB FIFO after a write.
* @hbm_cfg: HBM subsystem settings * @hbm_cfg: HBM subsystem settings
* @hw_queues_lock_mutex: used by simulator instead of hw_queues_lock. * @hw_queues_lock_mutex: used by simulator instead of hw_queues_lock.
* @kdma_lock_mutex: used by simulator instead of kdma_lock.
* @use_deprecated_event_mappings: use old event mappings which are about to be
* deprecated
*/ */
struct gaudi2_device { struct gaudi2_device {
int (*cpucp_info_get)(struct hl_device *hdev); int (*cpucp_info_get)(struct hl_device *hdev);
...@@ -521,7 +513,6 @@ struct gaudi2_device { ...@@ -521,7 +513,6 @@ struct gaudi2_device {
int lfsr_rand_seeds[MME_NUM_OF_LFSR_SEEDS]; int lfsr_rand_seeds[MME_NUM_OF_LFSR_SEEDS];
spinlock_t hw_queues_lock; spinlock_t hw_queues_lock;
spinlock_t kdma_lock;
void *scratchpad_kernel_address; void *scratchpad_kernel_address;
dma_addr_t scratchpad_bus_address; dma_addr_t scratchpad_bus_address;
...@@ -562,5 +553,6 @@ void gaudi2_pb_print_security_errors(struct hl_device *hdev, u32 block_addr, u32 ...@@ -562,5 +553,6 @@ void gaudi2_pb_print_security_errors(struct hl_device *hdev, u32 block_addr, u32
u32 offended_addr); u32 offended_addr);
int gaudi2_init_security(struct hl_device *hdev); int gaudi2_init_security(struct hl_device *hdev);
void gaudi2_ack_protection_bits_errors(struct hl_device *hdev); void gaudi2_ack_protection_bits_errors(struct hl_device *hdev);
int gaudi2_send_device_activity(struct hl_device *hdev, bool open);
#endif /* GAUDI2P_H_ */ #endif /* GAUDI2P_H_ */
...@@ -51,12 +51,18 @@ ...@@ -51,12 +51,18 @@
(0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \ (0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \
(0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT)) (0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT))
#define PDMA1_QMAN_ENABLE \ #define PDMA0_QMAN_ENABLE \
((0x3 << PDMA0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \ ((0x3 << PDMA0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \
(0x1F << PDMA0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \ (0x1F << PDMA0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \
(0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \ (0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \
(0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT)) (0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT))
#define PDMA1_QMAN_ENABLE \
((0x1 << PDMA0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \
(0x1F << PDMA0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \
(0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \
(0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT))
/* QM_IDLE_MASK is valid for all engines QM idle check */ /* QM_IDLE_MASK is valid for all engines QM idle check */
#define QM_IDLE_MASK (DCORE0_EDMA0_QM_GLBL_STS0_PQF_IDLE_MASK | \ #define QM_IDLE_MASK (DCORE0_EDMA0_QM_GLBL_STS0_PQF_IDLE_MASK | \
DCORE0_EDMA0_QM_GLBL_STS0_CQF_IDLE_MASK | \ DCORE0_EDMA0_QM_GLBL_STS0_CQF_IDLE_MASK | \
...@@ -138,4 +144,17 @@ ...@@ -138,4 +144,17 @@
#define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_SHIFT 15 #define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_SHIFT 15
#define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK 0x8000 #define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK 0x8000
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_SHIFT 0
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK 0x1
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_SHIFT 1
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK 0x2
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_SHIFT 2
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK 0x4
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK_SHIFT 3
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK_MASK 0x8
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK_SHIFT 4
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK_MASK 0x10
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK_SHIFT 5
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK_MASK 0x20
#endif /* GAUDI2_MASKS_H_ */ #endif /* GAUDI2_MASKS_H_ */
...@@ -2559,6 +2559,10 @@ static const u32 gaudi2_pb_pcie[] = { ...@@ -2559,6 +2559,10 @@ static const u32 gaudi2_pb_pcie[] = {
mmPCIE_WRAP_BASE, mmPCIE_WRAP_BASE,
}; };
static const u32 gaudi2_pb_pcie_unsecured_regs[] = {
mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0,
};
static const u32 gaudi2_pb_thermal_sensor0[] = { static const u32 gaudi2_pb_thermal_sensor0[] = {
mmDCORE0_XFT_BASE, mmDCORE0_XFT_BASE,
mmDCORE0_TSTDVS_BASE, mmDCORE0_TSTDVS_BASE,
...@@ -2583,9 +2587,9 @@ struct gaudi2_tpc_pb_data { ...@@ -2583,9 +2587,9 @@ struct gaudi2_tpc_pb_data {
}; };
static void gaudi2_config_tpcs_glbl_sec(struct hl_device *hdev, int dcore, int inst, u32 offset, static void gaudi2_config_tpcs_glbl_sec(struct hl_device *hdev, int dcore, int inst, u32 offset,
void *data) struct iterate_module_ctx *ctx)
{ {
struct gaudi2_tpc_pb_data *pb_data = (struct gaudi2_tpc_pb_data *)data; struct gaudi2_tpc_pb_data *pb_data = ctx->data;
hl_config_glbl_sec(hdev, gaudi2_pb_dcr0_tpc0, pb_data->glbl_sec, hl_config_glbl_sec(hdev, gaudi2_pb_dcr0_tpc0, pb_data->glbl_sec,
offset, pb_data->block_array_size); offset, pb_data->block_array_size);
...@@ -2660,15 +2664,14 @@ static int gaudi2_init_pb_tpc(struct hl_device *hdev) ...@@ -2660,15 +2664,14 @@ static int gaudi2_init_pb_tpc(struct hl_device *hdev)
struct gaudi2_tpc_arc_pb_data { struct gaudi2_tpc_arc_pb_data {
u32 unsecured_regs_arr_size; u32 unsecured_regs_arr_size;
u32 arc_regs_arr_size; u32 arc_regs_arr_size;
int rc;
}; };
static void gaudi2_config_tpcs_pb_ranges(struct hl_device *hdev, int dcore, int inst, u32 offset, static void gaudi2_config_tpcs_pb_ranges(struct hl_device *hdev, int dcore, int inst, u32 offset,
void *data) struct iterate_module_ctx *ctx)
{ {
struct gaudi2_tpc_arc_pb_data *pb_data = (struct gaudi2_tpc_arc_pb_data *)data; struct gaudi2_tpc_arc_pb_data *pb_data = ctx->data;
pb_data->rc |= hl_init_pb_ranges(hdev, HL_PB_SHARED, HL_PB_NA, 1, ctx->rc = hl_init_pb_ranges(hdev, HL_PB_SHARED, HL_PB_NA, 1,
offset, gaudi2_pb_dcr0_tpc0_arc, offset, gaudi2_pb_dcr0_tpc0_arc,
pb_data->arc_regs_arr_size, pb_data->arc_regs_arr_size,
gaudi2_pb_dcr0_tpc0_arc_unsecured_regs, gaudi2_pb_dcr0_tpc0_arc_unsecured_regs,
...@@ -2683,12 +2686,12 @@ static int gaudi2_init_pb_tpc_arc(struct hl_device *hdev) ...@@ -2683,12 +2686,12 @@ static int gaudi2_init_pb_tpc_arc(struct hl_device *hdev)
tpc_arc_pb_data.arc_regs_arr_size = ARRAY_SIZE(gaudi2_pb_dcr0_tpc0_arc); tpc_arc_pb_data.arc_regs_arr_size = ARRAY_SIZE(gaudi2_pb_dcr0_tpc0_arc);
tpc_arc_pb_data.unsecured_regs_arr_size = tpc_arc_pb_data.unsecured_regs_arr_size =
ARRAY_SIZE(gaudi2_pb_dcr0_tpc0_arc_unsecured_regs); ARRAY_SIZE(gaudi2_pb_dcr0_tpc0_arc_unsecured_regs);
tpc_arc_pb_data.rc = 0;
tpc_iter.fn = &gaudi2_config_tpcs_pb_ranges; tpc_iter.fn = &gaudi2_config_tpcs_pb_ranges;
tpc_iter.data = &tpc_arc_pb_data; tpc_iter.data = &tpc_arc_pb_data;
gaudi2_iterate_tpcs(hdev, &tpc_iter); gaudi2_iterate_tpcs(hdev, &tpc_iter);
return tpc_arc_pb_data.rc; return tpc_iter.rc;
} }
static int gaudi2_init_pb_sm_objs(struct hl_device *hdev) static int gaudi2_init_pb_sm_objs(struct hl_device *hdev)
...@@ -3419,7 +3422,8 @@ static int gaudi2_init_protection_bits(struct hl_device *hdev) ...@@ -3419,7 +3422,8 @@ static int gaudi2_init_protection_bits(struct hl_device *hdev)
rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA, rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA,
HL_PB_SINGLE_INSTANCE, HL_PB_NA, HL_PB_SINGLE_INSTANCE, HL_PB_NA,
gaudi2_pb_pcie, ARRAY_SIZE(gaudi2_pb_pcie), gaudi2_pb_pcie, ARRAY_SIZE(gaudi2_pb_pcie),
NULL, HL_PB_NA); gaudi2_pb_pcie_unsecured_regs,
ARRAY_SIZE(gaudi2_pb_pcie_unsecured_regs));
/* Thermal Sensor. /* Thermal Sensor.
* Skip when security is enabled in F/W, because the blocks are protected by privileged RR. * Skip when security is enabled in F/W, because the blocks are protected by privileged RR.
...@@ -3547,9 +3551,9 @@ struct gaudi2_ack_pb_tpc_data { ...@@ -3547,9 +3551,9 @@ struct gaudi2_ack_pb_tpc_data {
}; };
static void gaudi2_ack_pb_tpc_config(struct hl_device *hdev, int dcore, int inst, u32 offset, static void gaudi2_ack_pb_tpc_config(struct hl_device *hdev, int dcore, int inst, u32 offset,
void *data) struct iterate_module_ctx *ctx)
{ {
struct gaudi2_ack_pb_tpc_data *pb_data = (struct gaudi2_ack_pb_tpc_data *)data; struct gaudi2_ack_pb_tpc_data *pb_data = ctx->data;
hl_ack_pb_single_dcore(hdev, offset, HL_PB_SINGLE_INSTANCE, HL_PB_NA, hl_ack_pb_single_dcore(hdev, offset, HL_PB_SINGLE_INSTANCE, HL_PB_NA,
gaudi2_pb_dcr0_tpc0, pb_data->tpc_regs_array_size); gaudi2_pb_dcr0_tpc0, pb_data->tpc_regs_array_size);
......
...@@ -916,26 +916,11 @@ int goya_late_init(struct hl_device *hdev) ...@@ -916,26 +916,11 @@ int goya_late_init(struct hl_device *hdev)
*/ */
void goya_late_fini(struct hl_device *hdev) void goya_late_fini(struct hl_device *hdev)
{ {
const struct hwmon_channel_info **channel_info_arr;
struct goya_device *goya = hdev->asic_specific; struct goya_device *goya = hdev->asic_specific;
int i = 0;
cancel_delayed_work_sync(&goya->goya_work->work_freq); cancel_delayed_work_sync(&goya->goya_work->work_freq);
if (!hdev->hl_chip_info->info) hl_hwmon_release_resources(hdev);
return;
channel_info_arr = hdev->hl_chip_info->info;
while (channel_info_arr[i]) {
kfree(channel_info_arr[i]->config);
kfree(channel_info_arr[i]);
i++;
}
kfree(channel_info_arr);
hdev->hl_chip_info->info = NULL;
} }
static void goya_set_pci_memory_regions(struct hl_device *hdev) static void goya_set_pci_memory_regions(struct hl_device *hdev)
...@@ -1040,6 +1025,7 @@ static int goya_sw_init(struct hl_device *hdev) ...@@ -1040,6 +1025,7 @@ static int goya_sw_init(struct hl_device *hdev)
hdev->asic_prop.supports_compute_reset = true; hdev->asic_prop.supports_compute_reset = true;
hdev->asic_prop.allow_inference_soft_reset = true; hdev->asic_prop.allow_inference_soft_reset = true;
hdev->supports_wait_for_multi_cs = false; hdev->supports_wait_for_multi_cs = false;
hdev->supports_ctx_switch = true;
hdev->asic_funcs->set_pci_memory_regions(hdev); hdev->asic_funcs->set_pci_memory_regions(hdev);
...@@ -4559,7 +4545,7 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, ...@@ -4559,7 +4545,7 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
return rc; return rc;
} }
static int goya_non_hard_reset_late_init(struct hl_device *hdev) static int goya_compute_reset_late_init(struct hl_device *hdev)
{ {
/* /*
* Unmask all IRQs since some could have been received * Unmask all IRQs since some could have been received
...@@ -5137,8 +5123,8 @@ int goya_cpucp_info_get(struct hl_device *hdev) ...@@ -5137,8 +5123,8 @@ int goya_cpucp_info_get(struct hl_device *hdev)
return 0; return 0;
} }
static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
u8 mask_len, struct seq_file *s) struct engines_data *e)
{ {
const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n"; const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
const char *dma_fmt = "%-5d%-9s%#-14x%#x\n"; const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
...@@ -5149,9 +5135,9 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, ...@@ -5149,9 +5135,9 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
u64 offset; u64 offset;
int i; int i;
if (s) if (e)
seq_puts(s, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n" hl_engine_data_sprintf(e, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n"
"--- ------- ------------ -------------\n"); "--- ------- ------------ -------------\n");
offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0; offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
...@@ -5164,13 +5150,13 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, ...@@ -5164,13 +5150,13 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
if (mask && !is_eng_idle) if (mask && !is_eng_idle)
set_bit(GOYA_ENGINE_ID_DMA_0 + i, mask); set_bit(GOYA_ENGINE_ID_DMA_0 + i, mask);
if (s) if (e)
seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N", hl_engine_data_sprintf(e, dma_fmt, i, is_eng_idle ? "Y" : "N",
qm_glbl_sts0, dma_core_sts0); qm_glbl_sts0, dma_core_sts0);
} }
if (s) if (e)
seq_puts(s, hl_engine_data_sprintf(e,
"\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n" "\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n"
"--- ------- ------------ -------------- ----------\n"); "--- ------- ------------ -------------- ----------\n");
...@@ -5187,13 +5173,13 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, ...@@ -5187,13 +5173,13 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
if (mask && !is_eng_idle) if (mask && !is_eng_idle)
set_bit(GOYA_ENGINE_ID_TPC_0 + i, mask); set_bit(GOYA_ENGINE_ID_TPC_0 + i, mask);
if (s) if (e)
seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N", hl_engine_data_sprintf(e, fmt, i, is_eng_idle ? "Y" : "N",
qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts); qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
} }
if (s) if (e)
seq_puts(s, hl_engine_data_sprintf(e,
"\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n" "\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n"
"--- ------- ------------ -------------- -----------\n"); "--- ------- ------------ -------------- -----------\n");
...@@ -5207,10 +5193,10 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, ...@@ -5207,10 +5193,10 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
if (mask && !is_eng_idle) if (mask && !is_eng_idle)
set_bit(GOYA_ENGINE_ID_MME_0, mask); set_bit(GOYA_ENGINE_ID_MME_0, mask);
if (s) { if (e) {
seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0, hl_engine_data_sprintf(e, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
cmdq_glbl_sts0, mme_arch_sts); cmdq_glbl_sts0, mme_arch_sts);
seq_puts(s, "\n"); hl_engine_data_sprintf(e, "\n");
} }
return is_idle; return is_idle;
...@@ -5434,6 +5420,11 @@ static int goya_scrub_device_dram(struct hl_device *hdev, u64 val) ...@@ -5434,6 +5420,11 @@ static int goya_scrub_device_dram(struct hl_device *hdev, u64 val)
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
static int goya_send_device_activity(struct hl_device *hdev, bool open)
{
return 0;
}
static const struct hl_asic_funcs goya_funcs = { static const struct hl_asic_funcs goya_funcs = {
.early_init = goya_early_init, .early_init = goya_early_init,
.early_fini = goya_early_fini, .early_fini = goya_early_fini,
...@@ -5478,11 +5469,9 @@ static const struct hl_asic_funcs goya_funcs = { ...@@ -5478,11 +5469,9 @@ static const struct hl_asic_funcs goya_funcs = {
.send_heartbeat = goya_send_heartbeat, .send_heartbeat = goya_send_heartbeat,
.debug_coresight = goya_debug_coresight, .debug_coresight = goya_debug_coresight,
.is_device_idle = goya_is_device_idle, .is_device_idle = goya_is_device_idle,
.non_hard_reset_late_init = goya_non_hard_reset_late_init, .compute_reset_late_init = goya_compute_reset_late_init,
.hw_queues_lock = goya_hw_queues_lock, .hw_queues_lock = goya_hw_queues_lock,
.hw_queues_unlock = goya_hw_queues_unlock, .hw_queues_unlock = goya_hw_queues_unlock,
.kdma_lock = NULL,
.kdma_unlock = NULL,
.get_pci_id = goya_get_pci_id, .get_pci_id = goya_get_pci_id,
.get_eeprom_data = goya_get_eeprom_data, .get_eeprom_data = goya_get_eeprom_data,
.get_monitor_dump = goya_get_monitor_dump, .get_monitor_dump = goya_get_monitor_dump,
...@@ -5528,6 +5517,7 @@ static const struct hl_asic_funcs goya_funcs = { ...@@ -5528,6 +5517,7 @@ static const struct hl_asic_funcs goya_funcs = {
.mmu_get_real_page_size = hl_mmu_get_real_page_size, .mmu_get_real_page_size = hl_mmu_get_real_page_size,
.access_dev_mem = hl_access_dev_mem, .access_dev_mem = hl_access_dev_mem,
.set_dram_bar_base = goya_set_ddr_bar_base, .set_dram_bar_base = goya_set_ddr_bar_base,
.send_device_activity = goya_send_device_activity,
}; };
/* /*
......
...@@ -629,6 +629,12 @@ enum pq_init_status { ...@@ -629,6 +629,12 @@ enum pq_init_status {
* CPUCP_PACKET_ENGINE_CORE_ASID_SET - * CPUCP_PACKET_ENGINE_CORE_ASID_SET -
* Packet to perform engine core ASID configuration * Packet to perform engine core ASID configuration
* *
* CPUCP_PACKET_SEC_ATTEST_GET -
* Get the attestaion data that is collected during various stages of the
* boot sequence. the attestation data is also hashed with some unique
* number (nonce) provided by the host to prevent replay attacks.
* public key and certificate also provided as part of the FW response.
*
* CPUCP_PACKET_MONITOR_DUMP_GET - * CPUCP_PACKET_MONITOR_DUMP_GET -
* Get monitors registers dump from the CpuCP kernel. * Get monitors registers dump from the CpuCP kernel.
* The CPU will put the registers dump in the a buffer allocated by the driver * The CPU will put the registers dump in the a buffer allocated by the driver
...@@ -636,6 +642,10 @@ enum pq_init_status { ...@@ -636,6 +642,10 @@ enum pq_init_status {
* passes the max size it allows the CpuCP to write to the structure, to prevent * passes the max size it allows the CpuCP to write to the structure, to prevent
* data corruption in case of mismatched driver/FW versions. * data corruption in case of mismatched driver/FW versions.
* Relevant only to Gaudi. * Relevant only to Gaudi.
*
* CPUCP_PACKET_ACTIVE_STATUS_SET -
* LKD sends FW indication whether device is free or in use, this indication is reported
* also to the BMC.
*/ */
enum cpucp_packet_id { enum cpucp_packet_id {
...@@ -687,10 +697,17 @@ enum cpucp_packet_id { ...@@ -687,10 +697,17 @@ enum cpucp_packet_id {
CPUCP_PACKET_RESERVED, /* not used */ CPUCP_PACKET_RESERVED, /* not used */
CPUCP_PACKET_ENGINE_CORE_ASID_SET, /* internal */ CPUCP_PACKET_ENGINE_CORE_ASID_SET, /* internal */
CPUCP_PACKET_RESERVED2, /* not used */ CPUCP_PACKET_RESERVED2, /* not used */
CPUCP_PACKET_SEC_ATTEST_GET, /* internal */
CPUCP_PACKET_RESERVED3, /* not used */ CPUCP_PACKET_RESERVED3, /* not used */
CPUCP_PACKET_RESERVED4, /* not used */ CPUCP_PACKET_RESERVED4, /* not used */
CPUCP_PACKET_RESERVED5, /* not used */
CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */ CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */
CPUCP_PACKET_RESERVED5, /* not used */
CPUCP_PACKET_RESERVED6, /* not used */
CPUCP_PACKET_RESERVED7, /* not used */
CPUCP_PACKET_RESERVED8, /* not used */
CPUCP_PACKET_RESERVED9, /* not used */
CPUCP_PACKET_ACTIVE_STATUS_SET, /* internal */
CPUCP_PACKET_ID_MAX /* must be last */
}; };
#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 #define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5
...@@ -783,6 +800,9 @@ struct cpucp_packet { ...@@ -783,6 +800,9 @@ struct cpucp_packet {
* result cannot be used to hold general purpose data. * result cannot be used to hold general purpose data.
*/ */
__le32 status_mask; __le32 status_mask;
/* random, used once number, for security packets */
__le32 nonce;
}; };
/* For NIC requests */ /* For NIC requests */
...@@ -813,10 +833,25 @@ enum cpucp_led_index { ...@@ -813,10 +833,25 @@ enum cpucp_led_index {
CPUCP_LED2_INDEX CPUCP_LED2_INDEX
}; };
/*
* enum cpucp_packet_rc - Error return code
* @cpucp_packet_success -> in case of success.
* @cpucp_packet_invalid -> this is to support Goya and Gaudi platform.
* @cpucp_packet_fault -> in case of processing error like failing to
* get device binding or semaphore etc.
* @cpucp_packet_invalid_pkt -> when cpucp packet is un-supported. This is
* supported Greco onwards.
* @cpucp_packet_invalid_params -> when checking parameter like length of buffer
* or attribute value etc. Supported Greco onwards.
* @cpucp_packet_rc_max -> It indicates size of enum so should be at last.
*/
enum cpucp_packet_rc { enum cpucp_packet_rc {
cpucp_packet_success, cpucp_packet_success,
cpucp_packet_invalid, cpucp_packet_invalid,
cpucp_packet_fault cpucp_packet_fault,
cpucp_packet_invalid_pkt,
cpucp_packet_invalid_params,
cpucp_packet_rc_max
}; };
/* /*
...@@ -1193,6 +1228,70 @@ enum cpu_reset_status { ...@@ -1193,6 +1228,70 @@ enum cpu_reset_status {
CPU_RST_STATUS_SOFT_RST_DONE = 1, CPU_RST_STATUS_SOFT_RST_DONE = 1,
}; };
#define SEC_PCR_DATA_BUF_SZ 256
#define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */
#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */
#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */
#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */
/*
* struct cpucp_sec_attest_info - attestation report of the boot
* @pcr_data: raw values of the PCR registers
* @pcr_num_reg: number of PCR registers in the pcr_data array
* @pcr_reg_len: length of each PCR register in the pcr_data array (bytes)
* @nonce: number only used once. random number provided by host. this also
* passed to the quote command as a qualifying data.
* @pcr_quote_len: length of the attestation quote data (bytes)
* @pcr_quote: attestation report data structure
* @quote_sig_len: length of the attestation report signature (bytes)
* @quote_sig: signature structure of the attestation report
* @pub_data_len: length of the public data (bytes)
* @public_data: public key for the signed attestation
* (outPublic + name + qualifiedName)
* @certificate_len: length of the certificate (bytes)
* @certificate: certificate for the attestation signing key
*/
struct cpucp_sec_attest_info {
__u8 pcr_data[SEC_PCR_DATA_BUF_SZ];
__u8 pcr_num_reg;
__u8 pcr_reg_len;
__le16 pad0;
__le32 nonce;
__le16 pcr_quote_len;
__u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ];
__u8 quote_sig_len;
__u8 quote_sig[SEC_SIGNATURE_BUF_SZ];
__le16 pub_data_len;
__u8 public_data[SEC_PUB_DATA_BUF_SZ];
__le16 certificate_len;
__u8 certificate[SEC_CERTIFICATE_BUF_SZ];
};
/*
* struct cpucp_dev_info_signed - device information signed by a secured device
* @info: device information structure as defined above
* @nonce: number only used once. random number provided by host. this number is
* hashed and signed along with the device information.
* @info_sig_len: length of the attestation signature (bytes)
* @info_sig: signature of the info + nonce data.
* @pub_data_len: length of the public data (bytes)
* @public_data: public key info signed info data
* (outPublic + name + qualifiedName)
* @certificate_len: length of the certificate (bytes)
* @certificate: certificate for the signing key
*/
struct cpucp_dev_info_signed {
struct cpucp_info info; /* assumed to be 64bit aligned */
__le32 nonce;
__le32 pad0;
__u8 info_sig_len;
__u8 info_sig[SEC_SIGNATURE_BUF_SZ];
__le16 pub_data_len;
__u8 public_data[SEC_PUB_DATA_BUF_SZ];
__le16 certificate_len;
__u8 certificate[SEC_CERTIFICATE_BUF_SZ];
};
/* /*
* struct dcore_monitor_regs_data - DCORE monitor regs data. * struct dcore_monitor_regs_data - DCORE monitor regs data.
* the structure follows sync manager block layout. relevant only to Gaudi. * the structure follows sync manager block layout. relevant only to Gaudi.
......
...@@ -34,6 +34,7 @@ enum cpu_boot_err { ...@@ -34,6 +34,7 @@ enum cpu_boot_err {
CPU_BOOT_ERR_BINNING_FAIL = 19, CPU_BOOT_ERR_BINNING_FAIL = 19,
CPU_BOOT_ERR_TPM_FAIL = 20, CPU_BOOT_ERR_TPM_FAIL = 20,
CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21, CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21,
CPU_BOOT_ERR_EEPROM_FAIL = 22,
CPU_BOOT_ERR_ENABLED = 31, CPU_BOOT_ERR_ENABLED = 31,
CPU_BOOT_ERR_SCND_EN = 63, CPU_BOOT_ERR_SCND_EN = 63,
CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */ CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */
...@@ -115,6 +116,9 @@ enum cpu_boot_err { ...@@ -115,6 +116,9 @@ enum cpu_boot_err {
* CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL Failed to set threshold for tmperature * CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL Failed to set threshold for tmperature
* sensor. * sensor.
* *
* CPU_BOOT_ERR_EEPROM_FAIL Failed reading EEPROM data. Defaults
* are used.
*
* CPU_BOOT_ERR0_ENABLED Error registers enabled. * CPU_BOOT_ERR0_ENABLED Error registers enabled.
* This is a main indication that the * This is a main indication that the
* running FW populates the error * running FW populates the error
...@@ -139,6 +143,7 @@ enum cpu_boot_err { ...@@ -139,6 +143,7 @@ enum cpu_boot_err {
#define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL) #define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL)
#define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL) #define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL)
#define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL (1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL) #define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL (1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL)
#define CPU_BOOT_ERR0_EEPROM_FAIL (1 << CPU_BOOT_ERR_EEPROM_FAIL)
#define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED) #define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED)
#define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED) #define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED)
...@@ -426,7 +431,9 @@ struct cpu_dyn_regs { ...@@ -426,7 +431,9 @@ struct cpu_dyn_regs {
__le32 gic_host_ints_irq; __le32 gic_host_ints_irq;
__le32 gic_host_soft_rst_irq; __le32 gic_host_soft_rst_irq;
__le32 gic_rot_qm_irq_ctrl; __le32 gic_rot_qm_irq_ctrl;
__le32 reserved1[22]; /* reserve for future use */ __le32 cpu_rst_status;
__le32 eng_arc_irq_ctrl;
__le32 reserved1[20]; /* reserve for future use */
}; };
/* TODO: remove the desc magic after the code is updated to use message */ /* TODO: remove the desc magic after the code is updated to use message */
...@@ -465,6 +472,26 @@ enum comms_msg_type { ...@@ -465,6 +472,26 @@ enum comms_msg_type {
HL_COMMS_BINNING_CONF_TYPE = 3, HL_COMMS_BINNING_CONF_TYPE = 3,
}; };
/*
* Binning information shared between LKD and FW
* @tpc_mask - TPC binning information
* @dec_mask - Decoder binning information
* @hbm_mask - HBM binning information
* @edma_mask - EDMA binning information
* @mme_mask_l - MME binning information lower 32
* @mme_mask_h - MME binning information upper 32
* @reserved - reserved field for 64 bit alignment
*/
struct lkd_fw_binning_info {
__le64 tpc_mask;
__le32 dec_mask;
__le32 hbm_mask;
__le32 edma_mask;
__le32 mme_mask_l;
__le32 mme_mask_h;
__le32 reserved;
};
/* TODO: remove this struct after the code is updated to use message */ /* TODO: remove this struct after the code is updated to use message */
/* this is the comms descriptor header - meta data */ /* this is the comms descriptor header - meta data */
struct comms_desc_header { struct comms_desc_header {
...@@ -525,13 +552,7 @@ struct lkd_fw_comms_msg { ...@@ -525,13 +552,7 @@ struct lkd_fw_comms_msg {
struct { struct {
__u8 fw_cfg_skip; /* 1 - skip, 0 - don't skip */ __u8 fw_cfg_skip; /* 1 - skip, 0 - don't skip */
}; };
struct { struct lkd_fw_binning_info binning_info;
__le64 tpc_binning_conf;
__le32 dec_binning_conf;
__le32 hbm_binning_conf;
__le32 edma_binning_conf;
__le32 mme_redundancy_conf; /* use MME_REDUNDANT_COLUMN */
};
}; };
}; };
......
...@@ -132,6 +132,7 @@ ...@@ -132,6 +132,7 @@
#include "dcore0_mme_ctrl_lo_arch_tensor_a_regs.h" #include "dcore0_mme_ctrl_lo_arch_tensor_a_regs.h"
#include "dcore0_mme_ctrl_lo_arch_tensor_b_regs.h" #include "dcore0_mme_ctrl_lo_arch_tensor_b_regs.h"
#include "dcore0_mme_ctrl_lo_arch_tensor_cout_regs.h" #include "dcore0_mme_ctrl_lo_arch_tensor_cout_regs.h"
#include "pcie_wrap_special_regs.h"
#include "pdma0_qm_masks.h" #include "pdma0_qm_masks.h"
#include "pdma0_core_masks.h" #include "pdma0_core_masks.h"
...@@ -239,6 +240,7 @@ ...@@ -239,6 +240,7 @@
#define SFT_IF_RTR_OFFSET (mmSFT0_HBW_RTR_IF1_RTR_H3_BASE - mmSFT0_HBW_RTR_IF0_RTR_H3_BASE) #define SFT_IF_RTR_OFFSET (mmSFT0_HBW_RTR_IF1_RTR_H3_BASE - mmSFT0_HBW_RTR_IF0_RTR_H3_BASE)
#define ARC_HALT_REQ_OFFSET (mmARC_FARM_ARC0_AUX_RUN_HALT_REQ - mmARC_FARM_ARC0_AUX_BASE) #define ARC_HALT_REQ_OFFSET (mmARC_FARM_ARC0_AUX_RUN_HALT_REQ - mmARC_FARM_ARC0_AUX_BASE)
#define ARC_HALT_ACK_OFFSET (mmARC_FARM_ARC0_AUX_RUN_HALT_ACK - mmARC_FARM_ARC0_AUX_BASE)
#define ARC_REGION_CFG_OFFSET(region) \ #define ARC_REGION_CFG_OFFSET(region) \
(mmARC_FARM_ARC0_AUX_ARC_REGION_CFG_0 + (region * 4) - mmARC_FARM_ARC0_AUX_BASE) (mmARC_FARM_ARC0_AUX_ARC_REGION_CFG_0 + (region * 4) - mmARC_FARM_ARC0_AUX_BASE)
......
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2016-2020 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
/************************************
** This is an auto-generated file **
** DO NOT EDIT BELOW **
************************************/
#ifndef ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_
#define ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_
/*
*****************************************
* PCIE_WRAP_SPECIAL
* (Prototype: SPECIAL_REGS)
*****************************************
*/
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_0 0x4C01E80
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_1 0x4C01E84
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_2 0x4C01E88
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_3 0x4C01E8C
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_4 0x4C01E90
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_5 0x4C01E94
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_6 0x4C01E98
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_7 0x4C01E9C
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_8 0x4C01EA0
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_9 0x4C01EA4
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_10 0x4C01EA8
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_11 0x4C01EAC
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_12 0x4C01EB0
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_13 0x4C01EB4
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_14 0x4C01EB8
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_15 0x4C01EBC
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_16 0x4C01EC0
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_17 0x4C01EC4
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_18 0x4C01EC8
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_19 0x4C01ECC
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_20 0x4C01ED0
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_21 0x4C01ED4
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_22 0x4C01ED8
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_23 0x4C01EDC
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_24 0x4C01EE0
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_25 0x4C01EE4
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_26 0x4C01EE8
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_27 0x4C01EEC
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_28 0x4C01EF0
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_29 0x4C01EF4
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_30 0x4C01EF8
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_31 0x4C01EFC
#define mmPCIE_WRAP_SPECIAL_MEM_GW_DATA 0x4C01F00
#define mmPCIE_WRAP_SPECIAL_MEM_GW_REQ 0x4C01F04
#define mmPCIE_WRAP_SPECIAL_MEM_NUMOF 0x4C01F0C
#define mmPCIE_WRAP_SPECIAL_MEM_ECC_SEL 0x4C01F10
#define mmPCIE_WRAP_SPECIAL_MEM_ECC_CTL 0x4C01F14
#define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_MASK 0x4C01F18
#define mmPCIE_WRAP_SPECIAL_MEM_ECC_GLBL_ERR_MASK 0x4C01F1C
#define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_STS 0x4C01F20
#define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_ADDR 0x4C01F24
#define mmPCIE_WRAP_SPECIAL_MEM_RM 0x4C01F28
#define mmPCIE_WRAP_SPECIAL_GLBL_ERR_MASK 0x4C01F40
#define mmPCIE_WRAP_SPECIAL_GLBL_ERR_ADDR 0x4C01F44
#define mmPCIE_WRAP_SPECIAL_GLBL_ERR_CAUSE 0x4C01F48
#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0 0x4C01F60
#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_1 0x4C01F64
#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_2 0x4C01F68
#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_3 0x4C01F6C
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_0 0x4C01F80
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_1 0x4C01F84
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_2 0x4C01F88
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_3 0x4C01F8C
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_4 0x4C01F90
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_5 0x4C01F94
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_6 0x4C01F98
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_7 0x4C01F9C
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_8 0x4C01FA0
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_9 0x4C01FA4
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_10 0x4C01FA8
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_11 0x4C01FAC
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_12 0x4C01FB0
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_13 0x4C01FB4
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_14 0x4C01FB8
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_15 0x4C01FBC
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_16 0x4C01FC0
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_17 0x4C01FC4
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_18 0x4C01FC8
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_19 0x4C01FCC
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_20 0x4C01FD0
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_21 0x4C01FD4
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_22 0x4C01FD8
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_23 0x4C01FDC
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_24 0x4C01FE0
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_25 0x4C01FE4
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_26 0x4C01FE8
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_27 0x4C01FEC
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_28 0x4C01FF0
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_29 0x4C01FF4
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_30 0x4C01FF8
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_31 0x4C01FFC
#endif /* ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_ */
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2022 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
#ifndef __GAUDI2_ASYNC_VIRT_EVENTS_H_
#define __GAUDI2_ASYNC_VIRT_EVENTS_H_
enum gaudi2_async_virt_event_id {
GAUDI2_EVENT_NIC3_QM1_OLD = 1206,
GAUDI2_EVENT_NIC4_QM0_OLD = 1207,
GAUDI2_EVENT_NIC4_QM1_OLD = 1208,
GAUDI2_EVENT_NIC5_QM0_OLD = 1209,
GAUDI2_EVENT_NIC5_QM1_OLD = 1210,
GAUDI2_EVENT_NIC6_QM0_OLD = 1211,
GAUDI2_EVENT_NIC6_QM1_OLD = 1212,
GAUDI2_EVENT_NIC7_QM0_OLD = 1213,
GAUDI2_EVENT_NIC7_QM1_OLD = 1214,
GAUDI2_EVENT_NIC8_QM0_OLD = 1215,
GAUDI2_EVENT_NIC8_QM1_OLD = 1216,
GAUDI2_EVENT_NIC9_QM0_OLD = 1217,
GAUDI2_EVENT_NIC9_QM1_OLD = 1218,
GAUDI2_EVENT_NIC10_QM0_OLD = 1219,
GAUDI2_EVENT_NIC10_QM1_OLD = 1220,
GAUDI2_EVENT_NIC11_QM0_OLD = 1221,
GAUDI2_EVENT_NIC11_QM1_OLD = 1222,
GAUDI2_EVENT_CPU_PKT_SANITY_FAILED_OLD = 1223,
GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0_OLD = 1224,
GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG1_OLD = 1225,
GAUDI2_EVENT_CPU1_STATUS_NIC1_ENG0_OLD = 1226,
GAUDI2_EVENT_CPU1_STATUS_NIC1_ENG1_OLD = 1227,
GAUDI2_EVENT_CPU2_STATUS_NIC2_ENG0_OLD = 1228,
GAUDI2_EVENT_CPU2_STATUS_NIC2_ENG1_OLD = 1229,
GAUDI2_EVENT_CPU3_STATUS_NIC3_ENG0_OLD = 1230,
GAUDI2_EVENT_CPU3_STATUS_NIC3_ENG1_OLD = 1231,
GAUDI2_EVENT_CPU4_STATUS_NIC4_ENG0_OLD = 1232,
GAUDI2_EVENT_CPU4_STATUS_NIC4_ENG1_OLD = 1233,
GAUDI2_EVENT_CPU5_STATUS_NIC5_ENG0_OLD = 1234,
GAUDI2_EVENT_CPU5_STATUS_NIC5_ENG1_OLD = 1235,
GAUDI2_EVENT_CPU6_STATUS_NIC6_ENG0_OLD = 1236,
GAUDI2_EVENT_CPU6_STATUS_NIC6_ENG1_OLD = 1237,
GAUDI2_EVENT_CPU7_STATUS_NIC7_ENG0_OLD = 1238,
GAUDI2_EVENT_CPU7_STATUS_NIC7_ENG1_OLD = 1239,
GAUDI2_EVENT_CPU8_STATUS_NIC8_ENG0_OLD = 1240,
GAUDI2_EVENT_CPU8_STATUS_NIC8_ENG1_OLD = 1241,
GAUDI2_EVENT_CPU9_STATUS_NIC9_ENG0_OLD = 1242,
GAUDI2_EVENT_CPU9_STATUS_NIC9_ENG1_OLD = 1243,
GAUDI2_EVENT_CPU10_STATUS_NIC10_ENG0_OLD = 1244,
GAUDI2_EVENT_CPU10_STATUS_NIC10_ENG1_OLD = 1245,
GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG0_OLD = 1246,
GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1_OLD = 1247,
GAUDI2_EVENT_ARC_DCCM_FULL_OLD = 1248,
};
#endif /* __GAUDI2_ASYNC_VIRT_EVENTS_H_ */
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2016-2021 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM habanalabs
#if !defined(_TRACE_HABANALABS_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_HABANALABS_H
#include <linux/tracepoint.h>
DECLARE_EVENT_CLASS(habanalabs_mmu_template,
TP_PROTO(struct device *dev, u64 virt_addr, u64 phys_addr, u32 page_size, bool flush_pte),
TP_ARGS(dev, virt_addr, phys_addr, page_size, flush_pte),
TP_STRUCT__entry(
__string(dname, dev_name(dev))
__field(u64, virt_addr)
__field(u64, phys_addr)
__field(u32, page_size)
__field(u8, flush_pte)
),
TP_fast_assign(
__assign_str(dname, dev_name(dev));
__entry->virt_addr = virt_addr;
__entry->phys_addr = phys_addr;
__entry->page_size = page_size;
__entry->flush_pte = flush_pte;
),
TP_printk("%s: vaddr: %#llx, paddr: %#llx, psize: %#x, flush: %s",
__get_str(dname),
__entry->virt_addr,
__entry->phys_addr,
__entry->page_size,
__entry->flush_pte ? "true" : "false")
);
DEFINE_EVENT(habanalabs_mmu_template, habanalabs_mmu_map,
TP_PROTO(struct device *dev, u64 virt_addr, u64 phys_addr, u32 page_size, bool flush_pte),
TP_ARGS(dev, virt_addr, phys_addr, page_size, flush_pte));
DEFINE_EVENT(habanalabs_mmu_template, habanalabs_mmu_unmap,
TP_PROTO(struct device *dev, u64 virt_addr, u64 phys_addr, u32 page_size, bool flush_pte),
TP_ARGS(dev, virt_addr, phys_addr, page_size, flush_pte));
DECLARE_EVENT_CLASS(habanalabs_dma_alloc_template,
TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, const char *caller),
TP_ARGS(dev, cpu_addr, dma_addr, size, caller),
TP_STRUCT__entry(
__string(dname, dev_name(dev))
__field(u64, cpu_addr)
__field(u64, dma_addr)
__field(u32, size)
__field(const char *, caller)
),
TP_fast_assign(
__assign_str(dname, dev_name(dev));
__entry->cpu_addr = cpu_addr;
__entry->dma_addr = dma_addr;
__entry->size = size;
__entry->caller = caller;
),
TP_printk("%s: cpu_addr: %#llx, dma_addr: %#llx, size: %#x, caller: %s",
__get_str(dname),
__entry->cpu_addr,
__entry->dma_addr,
__entry->size,
__entry->caller)
);
DEFINE_EVENT(habanalabs_dma_alloc_template, habanalabs_dma_alloc,
TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, const char *caller),
TP_ARGS(dev, cpu_addr, dma_addr, size, caller));
DEFINE_EVENT(habanalabs_dma_alloc_template, habanalabs_dma_free,
TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, const char *caller),
TP_ARGS(dev, cpu_addr, dma_addr, size, caller));
#endif /* if !defined(_TRACE_HABANALABS_H) || defined(TRACE_HEADER_MULTI_READ) */
/* This part must be outside protection */
#include <trace/define_trace.h>
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment