Commit d3222595 authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman

Merge tag 'misc-habanalabs-next-2022-09-21' of...

Merge tag 'misc-habanalabs-next-2022-09-21' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next

Oded writes:

  "This tag contains habanalabs driver changes for v6.1:

   - Support new notifier event for device state change through eventfd.
   - Add uAPI to retrieve device attestation information for Gaudi2.
   - Add uAPI to retrieve the h/w status of all h/w blocks.
   - Add uAPI to control the running mode of the engine cores in Gaudi2.
   - Expose whether the device runs with secured firmware through the INFO ioctl
     and sysfs.
   - Support trace events in DMA allocations and MMU map/unmap operations.
   - Notify firmware when the device was acquired by a user process and when it
     was released. This is done as part of the RAS that the f/w performs.
   - Multiple bug fixes, refactors and renames.
   - Cleanup of error messages, moving some to debug level.
   - Enhance log prints in case of h/w error events for Gaudi2."

* tag 'misc-habanalabs-next-2022-09-21' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: (68 commits)
  habanalabs: eliminate aggregate use warning
  habanalabs/gaudi: use 8KB aligned address for TPC kernels
  habanalabs: remove some f/w descriptor validations
  habanalabs: build ASICs from new to old
  habanalabs/gaudi2: allow user to flush PCIE by read
  habanalabs: failure to open device due to reset is debug level
  habanalabs/gaudi2: Remove unnecessary (void*) conversions
  habanalabs/gaudi2: add secured attestation info uapi
  habanalabs/gaudi2: add handling to pmmu events in eqe handler
  habanalabs/gaudi: change TPC Assert to use TPC DEC instead of QMAN err
  habanalabs: rename error info structure
  habanalabs/gaudi2: get f/w reset status register dynamically
  habanalabs/gaudi2: increase hard-reset sleep time to 2 sec
  habanalabs/gaudi2: print RAZWI info upon PCIe access error
  habanalabs: MMU invalidation h/w is per device
  habanalabs: new notifier events for device state
  habanalabs/gaudi2: free event irq if init fails
  habanalabs: fix resetting the DRAM BAR
  habanalabs: add support for new cpucp return codes
  habanalabs/gaudi2: read F/W security indication after hard reset
  ...
parents 8be7dfc6 259cee1c
......@@ -16,7 +16,7 @@ Description: Version of the application running on the device's CPU
What: /sys/class/habanalabs/hl<n>/clk_max_freq_mhz
Date: Jun 2019
KernelVersion: not yet upstreamed
KernelVersion: 5.7
Contact: ogabbay@kernel.org
Description: Allows the user to set the maximum clock frequency, in MHz.
The device clock might be set to lower value than the maximum.
......@@ -26,7 +26,7 @@ Description: Allows the user to set the maximum clock frequency, in MHz.
What: /sys/class/habanalabs/hl<n>/clk_cur_freq_mhz
Date: Jun 2019
KernelVersion: not yet upstreamed
KernelVersion: 5.7
Contact: ogabbay@kernel.org
Description: Displays the current frequency, in MHz, of the device clock.
This property is valid only for the Gaudi ASIC family
......@@ -176,6 +176,12 @@ KernelVersion: 5.1
Contact: ogabbay@kernel.org
Description: Version of the device's preboot F/W code
What: /sys/class/habanalabs/hl<n>/security_enabled
Date: Oct 2022
KernelVersion: 6.1
Contact: obitton@habana.ai
Description: Displays the device's security status
What: /sys/class/habanalabs/hl<n>/soft_reset
Date: Jan 2019
KernelVersion: 5.1
......@@ -230,6 +236,6 @@ Description: Version of the u-boot running on the device's CPU
What: /sys/class/habanalabs/hl<n>/vrm_ver
Date: Jan 2022
KernelVersion: not yet upstreamed
KernelVersion: 5.17
Contact: ogabbay@kernel.org
Description: Version of the Device's Voltage Regulator Monitor F/W code. N/A to GOYA and GAUDI
......@@ -8878,6 +8878,7 @@ T: git https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git
F: Documentation/ABI/testing/debugfs-driver-habanalabs
F: Documentation/ABI/testing/sysfs-driver-habanalabs
F: drivers/misc/habanalabs/
F: include/trace/events/habanalabs.h
F: include/uapi/misc/habanalabs.h
HACKRF MEDIA DRIVER
......
......@@ -10,6 +10,7 @@ config HABANA_AI
select HWMON
select DMA_SHARED_BUFFER
select CRC32
select FW_LOADER
help
Enables PCIe card driver for Habana's AI Processors (AIP) that are
designed to accelerate Deep Learning inference and training workloads.
......
......@@ -8,13 +8,13 @@ obj-$(CONFIG_HABANA_AI) := habanalabs.o
include $(src)/common/Makefile
habanalabs-y += $(HL_COMMON_FILES)
include $(src)/goya/Makefile
habanalabs-y += $(HL_GOYA_FILES)
include $(src)/gaudi2/Makefile
habanalabs-y += $(HL_GAUDI2_FILES)
include $(src)/gaudi/Makefile
habanalabs-y += $(HL_GAUDI_FILES)
include $(src)/gaudi2/Makefile
habanalabs-y += $(HL_GAUDI2_FILES)
include $(src)/goya/Makefile
habanalabs-y += $(HL_GOYA_FILES)
habanalabs-$(CONFIG_DEBUG_FS) += common/debugfs.o
......@@ -12,20 +12,18 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#define CB_VA_POOL_SIZE (4UL * SZ_1G)
static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
{
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_vm_va_block *va_block, *tmp;
dma_addr_t bus_addr;
u64 virt_addr;
u32 page_size = prop->pmmu.page_size;
s32 offset;
int rc;
if (!hdev->supports_cb_mapping) {
dev_err_ratelimited(hdev->dev,
"Cannot map CB because no VA range is allocated for CB mapping\n");
"Mapping a CB to the device's MMU is not supported\n");
return -EINVAL;
}
......@@ -35,106 +33,45 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
return -EINVAL;
}
INIT_LIST_HEAD(&cb->va_block_list);
if (cb->is_mmu_mapped)
return 0;
for (bus_addr = cb->bus_address;
bus_addr < cb->bus_address + cb->size;
bus_addr += page_size) {
cb->roundup_size = roundup(cb->size, page_size);
virt_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, page_size);
if (!virt_addr) {
dev_err(hdev->dev,
"Failed to allocate device virtual address for CB\n");
rc = -ENOMEM;
goto err_va_pool_free;
}
va_block = kzalloc(sizeof(*va_block), GFP_KERNEL);
if (!va_block) {
rc = -ENOMEM;
gen_pool_free(ctx->cb_va_pool, virt_addr, page_size);
goto err_va_pool_free;
}
va_block->start = virt_addr;
va_block->end = virt_addr + page_size - 1;
va_block->size = page_size;
list_add_tail(&va_block->node, &cb->va_block_list);
cb->virtual_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, cb->roundup_size);
if (!cb->virtual_addr) {
dev_err(hdev->dev, "Failed to allocate device virtual address for CB\n");
return -ENOMEM;
}
mutex_lock(&ctx->mmu_lock);
bus_addr = cb->bus_address;
offset = 0;
list_for_each_entry(va_block, &cb->va_block_list, node) {
rc = hl_mmu_map_page(ctx, va_block->start, bus_addr,
va_block->size, list_is_last(&va_block->node,
&cb->va_block_list));
mutex_lock(&hdev->mmu_lock);
rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size);
if (rc) {
dev_err(hdev->dev, "Failed to map VA %#llx to CB\n",
va_block->start);
dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr);
goto err_va_umap;
}
bus_addr += va_block->size;
offset += va_block->size;
}
rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV);
mutex_unlock(&ctx->mmu_lock);
mutex_unlock(&hdev->mmu_lock);
cb->is_mmu_mapped = true;
return rc;
err_va_umap:
list_for_each_entry(va_block, &cb->va_block_list, node) {
if (offset <= 0)
break;
hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
offset <= va_block->size);
offset -= va_block->size;
}
rc = hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
mutex_unlock(&ctx->mmu_lock);
err_va_pool_free:
list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
list_del(&va_block->node);
kfree(va_block);
}
mutex_unlock(&hdev->mmu_lock);
gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
return rc;
}
static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb)
{
struct hl_device *hdev = ctx->hdev;
struct hl_vm_va_block *va_block, *tmp;
mutex_lock(&ctx->mmu_lock);
list_for_each_entry(va_block, &cb->va_block_list, node)
if (hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
list_is_last(&va_block->node,
&cb->va_block_list)))
dev_warn_ratelimited(hdev->dev,
"Failed to unmap CB's va 0x%llx\n",
va_block->start);
mutex_lock(&hdev->mmu_lock);
hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size);
hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
mutex_unlock(&hdev->mmu_lock);
mutex_unlock(&ctx->mmu_lock);
list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
list_del(&va_block->node);
kfree(va_block);
}
gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
}
static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
......@@ -376,7 +313,6 @@ int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle)
static int hl_cb_info(struct hl_mem_mgr *mmg,
u64 handle, u32 flags, u32 *usage_cnt, u64 *device_va)
{
struct hl_vm_va_block *va_block;
struct hl_cb *cb;
int rc = 0;
......@@ -388,9 +324,8 @@ static int hl_cb_info(struct hl_mem_mgr *mmg,
}
if (flags & HL_CB_FLAGS_GET_DEVICE_VA) {
va_block = list_first_entry(&cb->va_block_list, struct hl_vm_va_block, node);
if (va_block) {
*device_va = va_block->start;
if (cb->is_mmu_mapped) {
*device_va = cb->virtual_addr;
} else {
dev_err(mmg->dev, "CB is not mapped to the device's MMU\n");
rc = -EINVAL;
......@@ -566,16 +501,23 @@ int hl_cb_va_pool_init(struct hl_ctx *ctx)
return -ENOMEM;
}
rc = gen_pool_add(ctx->cb_va_pool, prop->cb_va_start_addr,
prop->cb_va_end_addr - prop->cb_va_start_addr, -1);
ctx->cb_va_pool_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
CB_VA_POOL_SIZE, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
if (!ctx->cb_va_pool_base) {
rc = -ENOMEM;
goto err_pool_destroy;
}
rc = gen_pool_add(ctx->cb_va_pool, ctx->cb_va_pool_base, CB_VA_POOL_SIZE, -1);
if (rc) {
dev_err(hdev->dev,
"Failed to add memory to VA gen pool for CB mapping\n");
goto err_pool_destroy;
goto err_unreserve_va_block;
}
return 0;
err_unreserve_va_block:
hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE);
err_pool_destroy:
gen_pool_destroy(ctx->cb_va_pool);
......@@ -590,4 +532,5 @@ void hl_cb_va_pool_fini(struct hl_ctx *ctx)
return;
gen_pool_destroy(ctx->cb_va_pool);
hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE);
}
......@@ -12,7 +12,9 @@
#include <linux/slab.h>
#define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
HL_CS_FLAGS_COLLECTIVE_WAIT)
HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \
HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND)
#define MAX_TS_ITER_NUM 10
......@@ -824,10 +826,10 @@ static void cs_timedout(struct work_struct *work)
}
/* Save only the first CS timeout parameters */
rc = atomic_cmpxchg(&hdev->last_error.cs_timeout.write_enable, 1, 0);
rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0);
if (rc) {
hdev->last_error.cs_timeout.timestamp = ktime_get();
hdev->last_error.cs_timeout.seq = cs->sequence;
hdev->captured_err_info.cs_timeout.timestamp = ktime_get();
hdev->captured_err_info.cs_timeout.seq = cs->sequence;
event_mask = device_reset ? (HL_NOTIFIER_EVENT_CS_TIMEOUT |
HL_NOTIFIER_EVENT_DEVICE_RESET) : HL_NOTIFIER_EVENT_CS_TIMEOUT;
......@@ -1242,6 +1244,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
return CS_RESERVE_SIGNALS;
else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
return CS_UNRESERVE_SIGNALS;
else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND)
return CS_TYPE_ENGINE_CORE;
else
return CS_TYPE_DEFAULT;
}
......@@ -1253,6 +1257,7 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
u32 cs_type_flags, num_chunks;
enum hl_device_status status;
enum hl_cs_type cs_type;
bool is_sync_stream;
if (!hl_device_operational(hdev, &status)) {
return -EBUSY;
......@@ -1276,9 +1281,10 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
cs_type = hl_cs_get_cs_type(cs_type_flags);
num_chunks = args->in.num_chunks_execute;
if (unlikely((cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT ||
cs_type == CS_TYPE_COLLECTIVE_WAIT) &&
!hdev->supports_sync_stream)) {
is_sync_stream = (cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT ||
cs_type == CS_TYPE_COLLECTIVE_WAIT);
if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) {
dev_err(hdev->dev, "Sync stream CS is not supported\n");
return -EINVAL;
}
......@@ -1288,7 +1294,7 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid);
return -EINVAL;
}
} else if (num_chunks != 1) {
} else if (is_sync_stream && num_chunks != 1) {
dev_err(hdev->dev,
"Sync stream CS mandates one chunk only, context %d\n",
ctx->asid);
......@@ -1584,12 +1590,13 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
struct hl_device *hdev = hpriv->hdev;
struct hl_ctx *ctx = hpriv->ctx;
bool need_soft_reset = false;
int rc = 0, do_ctx_switch;
int rc = 0, do_ctx_switch = 0;
void __user *chunks;
u32 num_chunks, tmp;
u16 sob_count;
int ret;
if (hdev->supports_ctx_switch)
do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
......@@ -1661,9 +1668,10 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
}
}
if (hdev->supports_ctx_switch)
ctx->thread_ctx_switch_wait_token = 1;
} else if (!ctx->thread_ctx_switch_wait_token) {
} else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) {
rc = hl_poll_timeout_memory(hdev,
&ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
100, jiffies_to_usecs(hdev->timeout_jiffies), false);
......@@ -2351,6 +2359,41 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
return rc;
}
static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
u32 num_engine_cores, u32 core_command)
{
int rc;
struct hl_device *hdev = hpriv->hdev;
void __user *engine_cores_arr;
u32 *cores;
if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) {
dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores);
return -EINVAL;
}
if (core_command != HL_ENGINE_CORE_RUN && core_command != HL_ENGINE_CORE_HALT) {
dev_err(hdev->dev, "Engine core command is invalid\n");
return -EINVAL;
}
engine_cores_arr = (void __user *) (uintptr_t) engine_cores;
cores = kmalloc_array(num_engine_cores, sizeof(u32), GFP_KERNEL);
if (!cores)
return -ENOMEM;
if (copy_from_user(cores, engine_cores_arr, num_engine_cores * sizeof(u32))) {
dev_err(hdev->dev, "Failed to copy core-ids array from user\n");
kfree(cores);
return -EFAULT;
}
rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command);
kfree(cores);
return rc;
}
int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
{
union hl_cs_args *args = data;
......@@ -2403,6 +2446,10 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
rc = cs_ioctl_unreserve_signals(hpriv,
args->in.encaps_sig_handle_id);
break;
case CS_TYPE_ENGINE_CORE:
rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores,
args->in.num_engine_cores, args->in.core_command);
break;
default:
rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
args->in.cs_flags,
......@@ -2524,7 +2571,7 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_com
ktime_t max_ktime, first_cs_time;
enum hl_cs_wait_status status;
memset(fence_ptr, 0, arr_len * sizeof(*fence_ptr));
memset(fence_ptr, 0, arr_len * sizeof(struct hl_fence *));
/* get all fences under the same lock */
rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len);
......@@ -2826,7 +2873,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
}
/* allocate array for the fences */
fence_arr = kmalloc_array(seq_arr_len, sizeof(*fence_arr), GFP_KERNEL);
fence_arr = kmalloc_array(seq_arr_len, sizeof(struct hl_fence *), GFP_KERNEL);
if (!fence_arr) {
rc = -ENOMEM;
goto free_seq_arr;
......
......@@ -291,14 +291,16 @@ static int vm_show(struct seq_file *s, void *data)
if (ctx->asid != HL_KERNEL_ASID_ID &&
!list_empty(&ctx->hw_block_mem_list)) {
seq_puts(s, "\nhw_block mappings:\n\n");
seq_puts(s, " virtual address size HW block id\n");
seq_puts(s, "-------------------------------------------\n");
seq_puts(s,
" virtual address block size mapped size HW block id\n");
seq_puts(s,
"---------------------------------------------------------------\n");
mutex_lock(&ctx->hw_block_list_lock);
list_for_each_entry(lnode, &ctx->hw_block_mem_list,
node) {
list_for_each_entry(lnode, &ctx->hw_block_mem_list, node) {
seq_printf(s,
" 0x%-14lx %-6u %-9u\n",
lnode->vaddr, lnode->size, lnode->id);
" 0x%-14lx %-6u %-6u %-9u\n",
lnode->vaddr, lnode->block_size, lnode->mapped_size,
lnode->id);
}
mutex_unlock(&ctx->hw_block_list_lock);
}
......@@ -591,6 +593,7 @@ static int engines_show(struct seq_file *s, void *data)
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
struct engines_data eng_data;
if (hdev->reset_info.in_reset) {
dev_warn_ratelimited(hdev->dev,
......@@ -598,7 +601,25 @@ static int engines_show(struct seq_file *s, void *data)
return 0;
}
hdev->asic_funcs->is_device_idle(hdev, NULL, 0, s);
eng_data.actual_size = 0;
eng_data.allocated_buf_size = HL_ENGINES_DATA_MAX_SIZE;
eng_data.buf = vmalloc(eng_data.allocated_buf_size);
if (!eng_data.buf)
return -ENOMEM;
hdev->asic_funcs->is_device_idle(hdev, NULL, 0, &eng_data);
if (eng_data.actual_size > eng_data.allocated_buf_size) {
dev_err(hdev->dev,
"Engines data size (%d Bytes) is bigger than allocated size (%u Bytes)\n",
eng_data.actual_size, eng_data.allocated_buf_size);
vfree(eng_data.buf);
return -ENOMEM;
}
seq_write(s, eng_data.buf, eng_data.actual_size);
vfree(eng_data.buf);
return 0;
}
......
This diff is collapsed.
This diff is collapsed.
......@@ -14,6 +14,9 @@
#include <linux/aer.h>
#include <linux/module.h>
#define CREATE_TRACE_POINTS
#include <trace/events/habanalabs.h>
#define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team"
#define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators"
......@@ -27,7 +30,10 @@ static struct class *hl_class;
static DEFINE_IDR(hl_devs_idr);
static DEFINE_MUTEX(hl_devs_idr_lock);
static int timeout_locked = 30;
#define HL_DEFAULT_TIMEOUT_LOCKED 30 /* 30 seconds */
#define GAUDI_DEFAULT_TIMEOUT_LOCKED 600 /* 10 minutes */
static int timeout_locked = HL_DEFAULT_TIMEOUT_LOCKED;
static int reset_on_lockup = 1;
static int memory_scrub;
static ulong boot_error_status_mask = ULONG_MAX;
......@@ -55,14 +61,12 @@ MODULE_PARM_DESC(boot_error_status_mask,
#define PCI_IDS_GAUDI_SEC 0x1010
#define PCI_IDS_GAUDI2 0x1020
#define PCI_IDS_GAUDI2_SEC 0x1030
static const struct pci_device_id ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), },
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2), },
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2_SEC), },
{ 0, }
};
MODULE_DEVICE_TABLE(pci, ids);
......@@ -92,9 +96,6 @@ static enum hl_asic_type get_asic_type(u16 device)
case PCI_IDS_GAUDI2:
asic_type = ASIC_GAUDI2;
break;
case PCI_IDS_GAUDI2_SEC:
asic_type = ASIC_GAUDI2_SEC;
break;
default:
asic_type = ASIC_INVALID;
break;
......@@ -107,7 +108,6 @@ static bool is_asic_secured(enum hl_asic_type asic_type)
{
switch (asic_type) {
case ASIC_GAUDI_SEC:
case ASIC_GAUDI2_SEC:
return true;
default:
return false;
......@@ -161,7 +161,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
mutex_lock(&hdev->fpriv_list_lock);
if (!hl_device_operational(hdev, &status)) {
dev_err_ratelimited(hdev->dev,
dev_dbg_ratelimited(hdev->dev,
"Can't open %s because it is %s\n",
dev_name(hdev->dev), hdev->status[status]);
......@@ -207,11 +207,13 @@ int hl_device_open(struct inode *inode, struct file *filp)
list_add(&hpriv->dev_node, &hdev->fpriv_list);
mutex_unlock(&hdev->fpriv_list_lock);
hdev->asic_funcs->send_device_activity(hdev, true);
hl_debugfs_add_file(hpriv);
atomic_set(&hdev->last_error.cs_timeout.write_enable, 1);
atomic_set(&hdev->last_error.razwi.write_enable, 1);
hdev->last_error.undef_opcode.write_enable = true;
atomic_set(&hdev->captured_err_info.cs_timeout.write_enable, 1);
atomic_set(&hdev->captured_err_info.razwi.write_enable, 1);
hdev->captured_err_info.undef_opcode.write_enable = true;
hdev->open_counter++;
hdev->last_successful_open_jif = jiffies;
......@@ -269,7 +271,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
mutex_lock(&hdev->fpriv_ctrl_list_lock);
if (!hl_device_operational(hdev, NULL)) {
dev_err_ratelimited(hdev->dev_ctrl,
dev_dbg_ratelimited(hdev->dev_ctrl,
"Can't open %s because it is disabled or in reset\n",
dev_name(hdev->dev_ctrl));
rc = -EPERM;
......@@ -314,12 +316,22 @@ static void copy_kernel_module_params_to_device(struct hl_device *hdev)
hdev->boot_error_status_mask = boot_error_status_mask;
}
static void fixup_device_params_per_asic(struct hl_device *hdev)
static void fixup_device_params_per_asic(struct hl_device *hdev, int timeout)
{
switch (hdev->asic_type) {
case ASIC_GOYA:
case ASIC_GAUDI:
case ASIC_GAUDI_SEC:
/* If user didn't request a different timeout than the default one, we have
* a different default timeout for Gaudi
*/
if (timeout == HL_DEFAULT_TIMEOUT_LOCKED)
hdev->timeout_jiffies = msecs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED *
MSEC_PER_SEC);
hdev->reset_upon_device_release = 0;
break;
case ASIC_GOYA:
hdev->reset_upon_device_release = 0;
break;
......@@ -339,7 +351,7 @@ static int fixup_device_params(struct hl_device *hdev)
hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC;
if (tmp_timeout)
hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * 1000);
hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * MSEC_PER_SEC);
else
hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
......@@ -360,7 +372,7 @@ static int fixup_device_params(struct hl_device *hdev)
if (!hdev->cpu_queues_enable)
hdev->heartbeat = 0;
fixup_device_params_per_asic(hdev);
fixup_device_params_per_asic(hdev, tmp_timeout);
return 0;
}
......
......@@ -14,6 +14,7 @@
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
[HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
......@@ -103,6 +104,7 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
hw_ip.edma_enabled_mask = prop->edma_enabled_mask;
hw_ip.server_type = prop->server_type;
hw_ip.security_enabled = prop->fw_security_enabled;
return copy_to_user(out, &hw_ip,
min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0;
......@@ -591,8 +593,8 @@ static int cs_timeout_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
if ((!max_size) || (!out))
return -EINVAL;
info.seq = hdev->last_error.cs_timeout.seq;
info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout.timestamp);
info.seq = hdev->captured_err_info.cs_timeout.seq;
info.timestamp = ktime_to_ns(hdev->captured_err_info.cs_timeout.timestamp);
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
}
......@@ -607,12 +609,12 @@ static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
if ((!max_size) || (!out))
return -EINVAL;
info.timestamp = ktime_to_ns(hdev->last_error.razwi.timestamp);
info.addr = hdev->last_error.razwi.addr;
info.engine_id_1 = hdev->last_error.razwi.engine_id_1;
info.engine_id_2 = hdev->last_error.razwi.engine_id_2;
info.no_engine_id = hdev->last_error.razwi.non_engine_initiator;
info.error_type = hdev->last_error.razwi.type;
info.timestamp = ktime_to_ns(hdev->captured_err_info.razwi.timestamp);
info.addr = hdev->captured_err_info.razwi.addr;
info.engine_id_1 = hdev->captured_err_info.razwi.engine_id_1;
info.engine_id_2 = hdev->captured_err_info.razwi.engine_id_2;
info.no_engine_id = hdev->captured_err_info.razwi.non_engine_initiator;
info.error_type = hdev->captured_err_info.razwi.type;
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
}
......@@ -627,13 +629,13 @@ static int undefined_opcode_info(struct hl_fpriv *hpriv, struct hl_info_args *ar
if ((!max_size) || (!out))
return -EINVAL;
info.timestamp = ktime_to_ns(hdev->last_error.undef_opcode.timestamp);
info.engine_id = hdev->last_error.undef_opcode.engine_id;
info.cq_addr = hdev->last_error.undef_opcode.cq_addr;
info.cq_size = hdev->last_error.undef_opcode.cq_size;
info.stream_id = hdev->last_error.undef_opcode.stream_id;
info.cb_addr_streams_len = hdev->last_error.undef_opcode.cb_addr_streams_len;
memcpy(info.cb_addr_streams, hdev->last_error.undef_opcode.cb_addr_streams,
info.timestamp = ktime_to_ns(hdev->captured_err_info.undef_opcode.timestamp);
info.engine_id = hdev->captured_err_info.undef_opcode.engine_id;
info.cq_addr = hdev->captured_err_info.undef_opcode.cq_addr;
info.cq_size = hdev->captured_err_info.undef_opcode.cq_size;
info.stream_id = hdev->captured_err_info.undef_opcode.stream_id;
info.cb_addr_streams_len = hdev->captured_err_info.undef_opcode.cb_addr_streams_len;
memcpy(info.cb_addr_streams, hdev->captured_err_info.undef_opcode.cb_addr_streams,
sizeof(info.cb_addr_streams));
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
......@@ -660,6 +662,55 @@ static int dev_mem_alloc_page_sizes_info(struct hl_fpriv *hpriv, struct hl_info_
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
}
static int sec_attest_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
struct cpucp_sec_attest_info *sec_attest_info;
struct hl_info_sec_attest *info;
u32 max_size = args->return_size;
int rc;
if ((!max_size) || (!out))
return -EINVAL;
sec_attest_info = kmalloc(sizeof(*sec_attest_info), GFP_KERNEL);
if (!sec_attest_info)
return -ENOMEM;
info = kmalloc(sizeof(*info), GFP_KERNEL);
if (!info) {
rc = -ENOMEM;
goto free_sec_attest_info;
}
rc = hl_fw_get_sec_attest_info(hpriv->hdev, sec_attest_info, args->sec_attest_nonce);
if (rc)
goto free_info;
info->nonce = le32_to_cpu(sec_attest_info->nonce);
info->pcr_quote_len = le16_to_cpu(sec_attest_info->pcr_quote_len);
info->pub_data_len = le16_to_cpu(sec_attest_info->pub_data_len);
info->certificate_len = le16_to_cpu(sec_attest_info->certificate_len);
info->pcr_num_reg = sec_attest_info->pcr_num_reg;
info->pcr_reg_len = sec_attest_info->pcr_reg_len;
info->quote_sig_len = sec_attest_info->quote_sig_len;
memcpy(&info->pcr_data, &sec_attest_info->pcr_data, sizeof(info->pcr_data));
memcpy(&info->pcr_quote, &sec_attest_info->pcr_quote, sizeof(info->pcr_quote));
memcpy(&info->public_data, &sec_attest_info->public_data, sizeof(info->public_data));
memcpy(&info->certificate, &sec_attest_info->certificate, sizeof(info->certificate));
memcpy(&info->quote_sig, &sec_attest_info->quote_sig, sizeof(info->quote_sig));
rc = copy_to_user(out, info,
min_t(size_t, max_size, sizeof(*info))) ? -EFAULT : 0;
free_info:
kfree(info);
free_sec_attest_info:
kfree(sec_attest_info);
return rc;
}
static int eventfd_register(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
int rc;
......@@ -697,6 +748,42 @@ static int eventfd_unregister(struct hl_fpriv *hpriv, struct hl_info_args *args)
return 0;
}
static int engine_status_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
u32 status_buf_size = args->return_size;
struct hl_device *hdev = hpriv->hdev;
struct engines_data eng_data;
int rc;
if ((status_buf_size < SZ_1K) || (status_buf_size > HL_ENGINES_DATA_MAX_SIZE) || (!out))
return -EINVAL;
eng_data.actual_size = 0;
eng_data.allocated_buf_size = status_buf_size;
eng_data.buf = vmalloc(status_buf_size);
if (!eng_data.buf)
return -ENOMEM;
hdev->asic_funcs->is_device_idle(hdev, NULL, 0, &eng_data);
if (eng_data.actual_size > eng_data.allocated_buf_size) {
dev_err(hdev->dev,
"Engines data size (%d Bytes) is bigger than allocated size (%u Bytes)\n",
eng_data.actual_size, status_buf_size);
vfree(eng_data.buf);
return -ENOMEM;
}
args->user_buffer_actual_size = eng_data.actual_size;
rc = copy_to_user(out, eng_data.buf, min_t(size_t, status_buf_size, eng_data.actual_size)) ?
-EFAULT : 0;
vfree(eng_data.buf);
return rc;
}
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev)
{
......@@ -806,12 +893,18 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_DRAM_PENDING_ROWS:
return dram_pending_rows_info(hpriv, args);
case HL_INFO_SECURED_ATTESTATION:
return sec_attest_info(hpriv, args);
case HL_INFO_REGISTER_EVENTFD:
return eventfd_register(hpriv, args);
case HL_INFO_UNREGISTER_EVENTFD:
return eventfd_unregister(hpriv, args);
case HL_INFO_ENGINE_STATUS:
return engine_status_info(hpriv, args);
default:
dev_err(dev, "Invalid request %d\n", args->op);
rc = -EINVAL;
......
......@@ -826,9 +826,7 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
q->kernel_address = p;
q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH,
sizeof(*q->shadow_queue),
GFP_KERNEL);
q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, sizeof(struct hl_cs_job *), GFP_KERNEL);
if (!q->shadow_queue) {
dev_err(hdev->dev,
"Failed to allocate shadow queue for H/W queue %d\n",
......
......@@ -194,7 +194,8 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sen
curr_arr[sensors_by_type_next_index[type]++] = flags;
}
channels_info = kcalloc(num_active_sensor_types + 1, sizeof(*channels_info), GFP_KERNEL);
channels_info = kcalloc(num_active_sensor_types + 1, sizeof(struct hwmon_channel_info *),
GFP_KERNEL);
if (!channels_info) {
rc = -ENOMEM;
goto channels_info_array_err;
......@@ -910,3 +911,24 @@ void hl_hwmon_fini(struct hl_device *hdev)
hwmon_device_unregister(hdev->hwmon_dev);
}
void hl_hwmon_release_resources(struct hl_device *hdev)
{
const struct hwmon_channel_info **channel_info_arr;
int i = 0;
if (!hdev->hl_chip_info->info)
return;
channel_info_arr = hdev->hl_chip_info->info;
while (channel_info_arr[i]) {
kfree(channel_info_arr[i]->config);
kfree(channel_info_arr[i]);
i++;
}
kfree(channel_info_arr);
hdev->hl_chip_info->info = NULL;
}
......@@ -457,7 +457,7 @@ static void merge_va_blocks_locked(struct hl_device *hdev,
prev = list_prev_entry(va_block, node);
if (&prev->node != va_list && prev->end + 1 == va_block->start) {
prev->end = va_block->end;
prev->size = prev->end - prev->start;
prev->size = prev->end - prev->start + 1;
list_del(&va_block->node);
kfree(va_block);
va_block = prev;
......@@ -466,7 +466,7 @@ static void merge_va_blocks_locked(struct hl_device *hdev,
next = list_next_entry(va_block, node);
if (&next->node != va_list && va_block->end + 1 == next->start) {
next->start = va_block->start;
next->size = next->end - next->start;
next->size = next->end - next->start + 1;
list_del(&va_block->node);
kfree(va_block);
}
......@@ -755,7 +755,7 @@ static u64 get_va_block(struct hl_device *hdev,
* - Return the start address of the virtual block.
*/
u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
enum hl_va_range_type type, u32 size, u32 alignment)
enum hl_va_range_type type, u64 size, u32 alignment)
{
return get_va_block(hdev, ctx->va_range[type], size, 0,
max(alignment, ctx->va_range[type]->page_size),
......@@ -1210,18 +1210,18 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device
goto va_block_err;
}
mutex_lock(&ctx->mmu_lock);
mutex_lock(&hdev->mmu_lock);
rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
if (rc) {
dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle);
mutex_unlock(&ctx->mmu_lock);
mutex_unlock(&hdev->mmu_lock);
goto map_err;
}
rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV,
ctx->asid, ret_vaddr, phys_pg_pack->total_size);
mutex_unlock(&ctx->mmu_lock);
mutex_unlock(&hdev->mmu_lock);
if (rc)
goto map_err;
......@@ -1362,7 +1362,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
else
vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
mutex_lock(&ctx->mmu_lock);
mutex_lock(&hdev->mmu_lock);
unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack);
......@@ -1375,7 +1375,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
rc = hl_mmu_invalidate_cache_range(hdev, true, *vm_type, ctx->asid, vaddr,
phys_pg_pack->total_size);
mutex_unlock(&ctx->mmu_lock);
mutex_unlock(&hdev->mmu_lock);
/*
* If the context is closing we don't need to check for the MMU cache
......@@ -1418,18 +1418,23 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
return rc;
}
static int map_block(struct hl_device *hdev, u64 address, u64 *handle,
u32 *size)
static int map_block(struct hl_device *hdev, u64 address, u64 *handle, u32 *size)
{
u32 block_id = 0;
u32 block_id;
int rc;
*handle = 0;
if (size)
*size = 0;
rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id);
if (rc)
return rc;
*handle = block_id | HL_MMAP_TYPE_BLOCK;
*handle <<= PAGE_SHIFT;
return rc;
return 0;
}
static void hw_block_vm_close(struct vm_area_struct *vma)
......@@ -1437,6 +1442,13 @@ static void hw_block_vm_close(struct vm_area_struct *vma)
struct hl_vm_hw_block_list_node *lnode =
(struct hl_vm_hw_block_list_node *) vma->vm_private_data;
struct hl_ctx *ctx = lnode->ctx;
long new_mmap_size;
new_mmap_size = lnode->mapped_size - (vma->vm_end - vma->vm_start);
if (new_mmap_size > 0) {
lnode->mapped_size = new_mmap_size;
return;
}
mutex_lock(&ctx->hw_block_list_lock);
list_del(&lnode->node);
......@@ -1487,23 +1499,23 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
if (!lnode)
return -ENOMEM;
vma->vm_ops = &hw_block_vm_ops;
vma->vm_private_data = lnode;
hl_ctx_get(ctx);
rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size);
if (rc) {
hl_ctx_put(ctx);
kfree(lnode);
return rc;
}
hl_ctx_get(ctx);
lnode->ctx = ctx;
lnode->vaddr = vma->vm_start;
lnode->size = block_size;
lnode->block_size = block_size;
lnode->mapped_size = lnode->block_size;
lnode->id = block_id;
vma->vm_private_data = lnode;
vma->vm_ops = &hw_block_vm_ops;
mutex_lock(&ctx->hw_block_list_lock);
list_add_tail(&lnode->node, &ctx->hw_block_mem_list);
mutex_unlock(&ctx->hw_block_list_lock);
......@@ -2296,8 +2308,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
return -EFAULT;
}
userptr->pages = kvmalloc_array(npages, sizeof(*userptr->pages),
GFP_KERNEL);
userptr->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
if (!userptr->pages)
return -ENOMEM;
......@@ -2759,13 +2770,13 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
unmap_device_va(ctx, &args, true);
}
mutex_lock(&ctx->mmu_lock);
mutex_lock(&hdev->mmu_lock);
/* invalidate the cache once after the unmapping loop */
hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
hl_mmu_invalidate_cache(hdev, true, MMU_OP_PHYS_PACK);
mutex_unlock(&ctx->mmu_lock);
mutex_unlock(&hdev->mmu_lock);
INIT_LIST_HEAD(&free_list);
......
......@@ -11,7 +11,7 @@
* hl_mmap_mem_buf_get - increase the buffer refcount and return a pointer to
* the buffer descriptor.
*
* @mmg: parent unifed memory manager
* @mmg: parent unified memory manager
* @handle: requested buffer handle
*
* Find the buffer in the store and return a pointer to its descriptor.
......@@ -104,7 +104,7 @@ int hl_mmap_mem_buf_put(struct hl_mmap_mem_buf *buf)
* hl_mmap_mem_buf_put_handle - decrease the reference to the buffer with the
* given handle.
*
* @mmg: parent unifed memory manager
* @mmg: parent unified memory manager
* @handle: requested buffer handle
*
* Decrease the reference to the buffer, and release it if it was the last one.
......@@ -137,7 +137,7 @@ int hl_mmap_mem_buf_put_handle(struct hl_mem_mgr *mmg, u64 handle)
/**
* hl_mmap_mem_buf_alloc - allocate a new mappable buffer
*
* @mmg: parent unifed memory manager
* @mmg: parent unified memory manager
* @behavior: behavior object describing this buffer polymorphic behavior
* @gfp: gfp flags to use for the memory allocations
* @args: additional args passed to behavior->alloc
......@@ -222,7 +222,7 @@ static const struct vm_operations_struct hl_mmap_mem_buf_vm_ops = {
/**
* hl_mem_mgr_mmap - map the given buffer to the user
*
* @mmg: unifed memory manager
* @mmg: unified memory manager
* @vma: the vma object for which mmap was closed.
* @args: additional args passed to behavior->mmap
*
......@@ -322,7 +322,7 @@ void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg)
/**
* hl_mem_mgr_fini - release unified memory manager
*
* @mmg: parent unifed memory manager
* @mmg: parent unified memory manager
*
* Release the unified memory manager. Shall be called from an interrupt context.
*/
......
......@@ -9,6 +9,8 @@
#include "../habanalabs.h"
#include <trace/events/habanalabs.h>
/**
* hl_mmu_get_funcs() - get MMU functions structure
* @hdev: habanalabs device structure.
......@@ -45,6 +47,8 @@ int hl_mmu_init(struct hl_device *hdev)
if (!hdev->mmu_enable)
return 0;
mutex_init(&hdev->mmu_lock);
if (hdev->mmu_func[MMU_DR_PGT].init != NULL) {
rc = hdev->mmu_func[MMU_DR_PGT].init(hdev);
if (rc)
......@@ -86,6 +90,8 @@ void hl_mmu_fini(struct hl_device *hdev)
if (hdev->mmu_func[MMU_HR_PGT].fini != NULL)
hdev->mmu_func[MMU_HR_PGT].fini(hdev);
mutex_destroy(&hdev->mmu_lock);
}
/**
......@@ -104,8 +110,6 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx)
if (!hdev->mmu_enable)
return 0;
mutex_init(&ctx->mmu_lock);
if (hdev->mmu_func[MMU_DR_PGT].ctx_init != NULL) {
rc = hdev->mmu_func[MMU_DR_PGT].ctx_init(ctx);
if (rc)
......@@ -149,8 +153,6 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
if (hdev->mmu_func[MMU_HR_PGT].ctx_fini != NULL)
hdev->mmu_func[MMU_HR_PGT].ctx_fini(ctx);
mutex_destroy(&ctx->mmu_lock);
}
/*
......@@ -259,6 +261,9 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flu
if (flush_pte)
mmu_funcs->flush(ctx);
if (trace_habanalabs_mmu_unmap_enabled() && !rc)
trace_habanalabs_mmu_unmap(hdev->dev, virt_addr, 0, page_size, flush_pte);
return rc;
}
......@@ -344,6 +349,8 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_s
if (flush_pte)
mmu_funcs->flush(ctx);
trace_habanalabs_mmu_map(hdev->dev, virt_addr, phys_addr, page_size, flush_pte);
return 0;
err:
......@@ -403,6 +410,8 @@ int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr,
dev_err(hdev->dev,
"Map failed for va 0x%llx to pa 0x%llx\n",
curr_va, curr_pa);
/* last mapping failed so don't try to unmap it - reduce off by page_size */
off -= page_size;
goto unmap;
}
}
......@@ -600,9 +609,9 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr);
mutex_lock(&ctx->mmu_lock);
mutex_lock(&hdev->mmu_lock);
rc = mmu_funcs->get_tlb_info(ctx, virt_addr, hops);
mutex_unlock(&ctx->mmu_lock);
mutex_unlock(&hdev->mmu_lock);
if (rc)
return rc;
......@@ -692,16 +701,16 @@ static void hl_mmu_prefetch_work_function(struct work_struct *work)
{
struct hl_prefetch_work *pfw = container_of(work, struct hl_prefetch_work, pf_work);
struct hl_ctx *ctx = pfw->ctx;
struct hl_device *hdev = ctx->hdev;
if (!hl_device_operational(ctx->hdev, NULL))
if (!hl_device_operational(hdev, NULL))
goto put_ctx;
mutex_lock(&ctx->mmu_lock);
mutex_lock(&hdev->mmu_lock);
ctx->hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid,
pfw->va, pfw->size);
hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid, pfw->va, pfw->size);
mutex_unlock(&ctx->mmu_lock);
mutex_unlock(&hdev->mmu_lock);
put_ctx:
/*
......
......@@ -375,6 +375,14 @@ static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj,
return max_size;
}
static ssize_t security_enabled_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "%d\n", hdev->asic_prop.fw_security_enabled);
}
static DEVICE_ATTR_RO(armcp_kernel_ver);
static DEVICE_ATTR_RO(armcp_ver);
static DEVICE_ATTR_RO(cpld_ver);
......@@ -393,6 +401,7 @@ static DEVICE_ATTR_RO(status);
static DEVICE_ATTR_RO(thermal_ver);
static DEVICE_ATTR_RO(uboot_ver);
static DEVICE_ATTR_RO(fw_os_ver);
static DEVICE_ATTR_RO(security_enabled);
static struct bin_attribute bin_attr_eeprom = {
.attr = {.name = "eeprom", .mode = (0444)},
......@@ -417,6 +426,7 @@ static struct attribute *hl_dev_attrs[] = {
&dev_attr_thermal_ver.attr,
&dev_attr_uboot_ver.attr,
&dev_attr_fw_os_ver.attr,
&dev_attr_security_enabled.attr,
NULL,
};
......
This diff is collapsed.
This diff is collapsed.
......@@ -15,7 +15,6 @@
#include "../include/gaudi2/gaudi2_packets.h"
#include "../include/gaudi2/gaudi2_fw_if.h"
#include "../include/gaudi2/gaudi2_async_events.h"
#include "../include/gaudi2/gaudi2_async_virt_events.h"
#define GAUDI2_LINUX_FW_FILE "habanalabs/gaudi2/gaudi2-fit.itb"
#define GAUDI2_BOOT_FIT_FILE "habanalabs/gaudi2/gaudi2-boot-fit.itb"
......@@ -140,9 +139,6 @@
#define VA_HOST_SPACE_HPAGE_START 0xFFF0800000000000ull
#define VA_HOST_SPACE_HPAGE_END 0xFFF1000000000000ull /* 140TB */
#define VA_HOST_SPACE_USER_MAPPED_CB_START 0xFFF1000000000000ull
#define VA_HOST_SPACE_USER_MAPPED_CB_END 0xFFF1000100000000ull /* 4GB */
/* 140TB */
#define VA_HOST_SPACE_PAGE_SIZE (VA_HOST_SPACE_PAGE_END - VA_HOST_SPACE_PAGE_START)
......@@ -458,7 +454,6 @@ struct dup_block_ctx {
* the user can map.
* @lfsr_rand_seeds: array of MME ACC random seeds to set.
* @hw_queues_lock: protects the H/W queues from concurrent access.
* @kdma_lock: protects the KDMA engine from concurrent access.
* @scratchpad_kernel_address: general purpose PAGE_SIZE contiguous memory,
* this memory region should be write-only.
* currently used for HBW QMAN writes which is
......@@ -510,9 +505,6 @@ struct dup_block_ctx {
* @flush_db_fifo: flag to force flush DB FIFO after a write.
* @hbm_cfg: HBM subsystem settings
* @hw_queues_lock_mutex: used by simulator instead of hw_queues_lock.
* @kdma_lock_mutex: used by simulator instead of kdma_lock.
* @use_deprecated_event_mappings: use old event mappings which are about to be
* deprecated
*/
struct gaudi2_device {
int (*cpucp_info_get)(struct hl_device *hdev);
......@@ -521,7 +513,6 @@ struct gaudi2_device {
int lfsr_rand_seeds[MME_NUM_OF_LFSR_SEEDS];
spinlock_t hw_queues_lock;
spinlock_t kdma_lock;
void *scratchpad_kernel_address;
dma_addr_t scratchpad_bus_address;
......@@ -562,5 +553,6 @@ void gaudi2_pb_print_security_errors(struct hl_device *hdev, u32 block_addr, u32
u32 offended_addr);
int gaudi2_init_security(struct hl_device *hdev);
void gaudi2_ack_protection_bits_errors(struct hl_device *hdev);
int gaudi2_send_device_activity(struct hl_device *hdev, bool open);
#endif /* GAUDI2P_H_ */
......@@ -51,12 +51,18 @@
(0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \
(0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT))
#define PDMA1_QMAN_ENABLE \
#define PDMA0_QMAN_ENABLE \
((0x3 << PDMA0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \
(0x1F << PDMA0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \
(0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \
(0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT))
#define PDMA1_QMAN_ENABLE \
((0x1 << PDMA0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \
(0x1F << PDMA0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \
(0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \
(0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT))
/* QM_IDLE_MASK is valid for all engines QM idle check */
#define QM_IDLE_MASK (DCORE0_EDMA0_QM_GLBL_STS0_PQF_IDLE_MASK | \
DCORE0_EDMA0_QM_GLBL_STS0_CQF_IDLE_MASK | \
......@@ -138,4 +144,17 @@
#define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_SHIFT 15
#define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK 0x8000
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_SHIFT 0
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK 0x1
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_SHIFT 1
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK 0x2
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_SHIFT 2
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK 0x4
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK_SHIFT 3
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK_MASK 0x8
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK_SHIFT 4
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK_MASK 0x10
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK_SHIFT 5
#define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK_MASK 0x20
#endif /* GAUDI2_MASKS_H_ */
......@@ -2559,6 +2559,10 @@ static const u32 gaudi2_pb_pcie[] = {
mmPCIE_WRAP_BASE,
};
static const u32 gaudi2_pb_pcie_unsecured_regs[] = {
mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0,
};
static const u32 gaudi2_pb_thermal_sensor0[] = {
mmDCORE0_XFT_BASE,
mmDCORE0_TSTDVS_BASE,
......@@ -2583,9 +2587,9 @@ struct gaudi2_tpc_pb_data {
};
static void gaudi2_config_tpcs_glbl_sec(struct hl_device *hdev, int dcore, int inst, u32 offset,
void *data)
struct iterate_module_ctx *ctx)
{
struct gaudi2_tpc_pb_data *pb_data = (struct gaudi2_tpc_pb_data *)data;
struct gaudi2_tpc_pb_data *pb_data = ctx->data;
hl_config_glbl_sec(hdev, gaudi2_pb_dcr0_tpc0, pb_data->glbl_sec,
offset, pb_data->block_array_size);
......@@ -2660,15 +2664,14 @@ static int gaudi2_init_pb_tpc(struct hl_device *hdev)
struct gaudi2_tpc_arc_pb_data {
u32 unsecured_regs_arr_size;
u32 arc_regs_arr_size;
int rc;
};
static void gaudi2_config_tpcs_pb_ranges(struct hl_device *hdev, int dcore, int inst, u32 offset,
void *data)
struct iterate_module_ctx *ctx)
{
struct gaudi2_tpc_arc_pb_data *pb_data = (struct gaudi2_tpc_arc_pb_data *)data;
struct gaudi2_tpc_arc_pb_data *pb_data = ctx->data;
pb_data->rc |= hl_init_pb_ranges(hdev, HL_PB_SHARED, HL_PB_NA, 1,
ctx->rc = hl_init_pb_ranges(hdev, HL_PB_SHARED, HL_PB_NA, 1,
offset, gaudi2_pb_dcr0_tpc0_arc,
pb_data->arc_regs_arr_size,
gaudi2_pb_dcr0_tpc0_arc_unsecured_regs,
......@@ -2683,12 +2686,12 @@ static int gaudi2_init_pb_tpc_arc(struct hl_device *hdev)
tpc_arc_pb_data.arc_regs_arr_size = ARRAY_SIZE(gaudi2_pb_dcr0_tpc0_arc);
tpc_arc_pb_data.unsecured_regs_arr_size =
ARRAY_SIZE(gaudi2_pb_dcr0_tpc0_arc_unsecured_regs);
tpc_arc_pb_data.rc = 0;
tpc_iter.fn = &gaudi2_config_tpcs_pb_ranges;
tpc_iter.data = &tpc_arc_pb_data;
gaudi2_iterate_tpcs(hdev, &tpc_iter);
return tpc_arc_pb_data.rc;
return tpc_iter.rc;
}
static int gaudi2_init_pb_sm_objs(struct hl_device *hdev)
......@@ -3419,7 +3422,8 @@ static int gaudi2_init_protection_bits(struct hl_device *hdev)
rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA,
HL_PB_SINGLE_INSTANCE, HL_PB_NA,
gaudi2_pb_pcie, ARRAY_SIZE(gaudi2_pb_pcie),
NULL, HL_PB_NA);
gaudi2_pb_pcie_unsecured_regs,
ARRAY_SIZE(gaudi2_pb_pcie_unsecured_regs));
/* Thermal Sensor.
* Skip when security is enabled in F/W, because the blocks are protected by privileged RR.
......@@ -3547,9 +3551,9 @@ struct gaudi2_ack_pb_tpc_data {
};
static void gaudi2_ack_pb_tpc_config(struct hl_device *hdev, int dcore, int inst, u32 offset,
void *data)
struct iterate_module_ctx *ctx)
{
struct gaudi2_ack_pb_tpc_data *pb_data = (struct gaudi2_ack_pb_tpc_data *)data;
struct gaudi2_ack_pb_tpc_data *pb_data = ctx->data;
hl_ack_pb_single_dcore(hdev, offset, HL_PB_SINGLE_INSTANCE, HL_PB_NA,
gaudi2_pb_dcr0_tpc0, pb_data->tpc_regs_array_size);
......
......@@ -916,26 +916,11 @@ int goya_late_init(struct hl_device *hdev)
*/
void goya_late_fini(struct hl_device *hdev)
{
const struct hwmon_channel_info **channel_info_arr;
struct goya_device *goya = hdev->asic_specific;
int i = 0;
cancel_delayed_work_sync(&goya->goya_work->work_freq);
if (!hdev->hl_chip_info->info)
return;
channel_info_arr = hdev->hl_chip_info->info;
while (channel_info_arr[i]) {
kfree(channel_info_arr[i]->config);
kfree(channel_info_arr[i]);
i++;
}
kfree(channel_info_arr);
hdev->hl_chip_info->info = NULL;
hl_hwmon_release_resources(hdev);
}
static void goya_set_pci_memory_regions(struct hl_device *hdev)
......@@ -1040,6 +1025,7 @@ static int goya_sw_init(struct hl_device *hdev)
hdev->asic_prop.supports_compute_reset = true;
hdev->asic_prop.allow_inference_soft_reset = true;
hdev->supports_wait_for_multi_cs = false;
hdev->supports_ctx_switch = true;
hdev->asic_funcs->set_pci_memory_regions(hdev);
......@@ -4559,7 +4545,7 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
return rc;
}
static int goya_non_hard_reset_late_init(struct hl_device *hdev)
static int goya_compute_reset_late_init(struct hl_device *hdev)
{
/*
* Unmask all IRQs since some could have been received
......@@ -5137,8 +5123,8 @@ int goya_cpucp_info_get(struct hl_device *hdev)
return 0;
}
static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
u8 mask_len, struct seq_file *s)
static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
struct engines_data *e)
{
const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
......@@ -5149,8 +5135,8 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
u64 offset;
int i;
if (s)
seq_puts(s, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n"
if (e)
hl_engine_data_sprintf(e, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n"
"--- ------- ------------ -------------\n");
offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
......@@ -5164,13 +5150,13 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
if (mask && !is_eng_idle)
set_bit(GOYA_ENGINE_ID_DMA_0 + i, mask);
if (s)
seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
if (e)
hl_engine_data_sprintf(e, dma_fmt, i, is_eng_idle ? "Y" : "N",
qm_glbl_sts0, dma_core_sts0);
}
if (s)
seq_puts(s,
if (e)
hl_engine_data_sprintf(e,
"\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n"
"--- ------- ------------ -------------- ----------\n");
......@@ -5187,13 +5173,13 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
if (mask && !is_eng_idle)
set_bit(GOYA_ENGINE_ID_TPC_0 + i, mask);
if (s)
seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
if (e)
hl_engine_data_sprintf(e, fmt, i, is_eng_idle ? "Y" : "N",
qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
}
if (s)
seq_puts(s,
if (e)
hl_engine_data_sprintf(e,
"\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n"
"--- ------- ------------ -------------- -----------\n");
......@@ -5207,10 +5193,10 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
if (mask && !is_eng_idle)
set_bit(GOYA_ENGINE_ID_MME_0, mask);
if (s) {
seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
if (e) {
hl_engine_data_sprintf(e, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
cmdq_glbl_sts0, mme_arch_sts);
seq_puts(s, "\n");
hl_engine_data_sprintf(e, "\n");
}
return is_idle;
......@@ -5434,6 +5420,11 @@ static int goya_scrub_device_dram(struct hl_device *hdev, u64 val)
return -EOPNOTSUPP;
}
static int goya_send_device_activity(struct hl_device *hdev, bool open)
{
return 0;
}
static const struct hl_asic_funcs goya_funcs = {
.early_init = goya_early_init,
.early_fini = goya_early_fini,
......@@ -5478,11 +5469,9 @@ static const struct hl_asic_funcs goya_funcs = {
.send_heartbeat = goya_send_heartbeat,
.debug_coresight = goya_debug_coresight,
.is_device_idle = goya_is_device_idle,
.non_hard_reset_late_init = goya_non_hard_reset_late_init,
.compute_reset_late_init = goya_compute_reset_late_init,
.hw_queues_lock = goya_hw_queues_lock,
.hw_queues_unlock = goya_hw_queues_unlock,
.kdma_lock = NULL,
.kdma_unlock = NULL,
.get_pci_id = goya_get_pci_id,
.get_eeprom_data = goya_get_eeprom_data,
.get_monitor_dump = goya_get_monitor_dump,
......@@ -5528,6 +5517,7 @@ static const struct hl_asic_funcs goya_funcs = {
.mmu_get_real_page_size = hl_mmu_get_real_page_size,
.access_dev_mem = hl_access_dev_mem,
.set_dram_bar_base = goya_set_ddr_bar_base,
.send_device_activity = goya_send_device_activity,
};
/*
......
......@@ -629,6 +629,12 @@ enum pq_init_status {
* CPUCP_PACKET_ENGINE_CORE_ASID_SET -
* Packet to perform engine core ASID configuration
*
* CPUCP_PACKET_SEC_ATTEST_GET -
* Get the attestaion data that is collected during various stages of the
* boot sequence. the attestation data is also hashed with some unique
* number (nonce) provided by the host to prevent replay attacks.
* public key and certificate also provided as part of the FW response.
*
* CPUCP_PACKET_MONITOR_DUMP_GET -
* Get monitors registers dump from the CpuCP kernel.
* The CPU will put the registers dump in the a buffer allocated by the driver
......@@ -636,6 +642,10 @@ enum pq_init_status {
* passes the max size it allows the CpuCP to write to the structure, to prevent
* data corruption in case of mismatched driver/FW versions.
* Relevant only to Gaudi.
*
* CPUCP_PACKET_ACTIVE_STATUS_SET -
* LKD sends FW indication whether device is free or in use, this indication is reported
* also to the BMC.
*/
enum cpucp_packet_id {
......@@ -687,10 +697,17 @@ enum cpucp_packet_id {
CPUCP_PACKET_RESERVED, /* not used */
CPUCP_PACKET_ENGINE_CORE_ASID_SET, /* internal */
CPUCP_PACKET_RESERVED2, /* not used */
CPUCP_PACKET_SEC_ATTEST_GET, /* internal */
CPUCP_PACKET_RESERVED3, /* not used */
CPUCP_PACKET_RESERVED4, /* not used */
CPUCP_PACKET_RESERVED5, /* not used */
CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */
CPUCP_PACKET_RESERVED5, /* not used */
CPUCP_PACKET_RESERVED6, /* not used */
CPUCP_PACKET_RESERVED7, /* not used */
CPUCP_PACKET_RESERVED8, /* not used */
CPUCP_PACKET_RESERVED9, /* not used */
CPUCP_PACKET_ACTIVE_STATUS_SET, /* internal */
CPUCP_PACKET_ID_MAX /* must be last */
};
#define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5
......@@ -783,6 +800,9 @@ struct cpucp_packet {
* result cannot be used to hold general purpose data.
*/
__le32 status_mask;
/* random, used once number, for security packets */
__le32 nonce;
};
/* For NIC requests */
......@@ -813,10 +833,25 @@ enum cpucp_led_index {
CPUCP_LED2_INDEX
};
/*
* enum cpucp_packet_rc - Error return code
* @cpucp_packet_success -> in case of success.
* @cpucp_packet_invalid -> this is to support Goya and Gaudi platform.
* @cpucp_packet_fault -> in case of processing error like failing to
* get device binding or semaphore etc.
* @cpucp_packet_invalid_pkt -> when cpucp packet is un-supported. This is
* supported Greco onwards.
* @cpucp_packet_invalid_params -> when checking parameter like length of buffer
* or attribute value etc. Supported Greco onwards.
* @cpucp_packet_rc_max -> It indicates size of enum so should be at last.
*/
enum cpucp_packet_rc {
cpucp_packet_success,
cpucp_packet_invalid,
cpucp_packet_fault
cpucp_packet_fault,
cpucp_packet_invalid_pkt,
cpucp_packet_invalid_params,
cpucp_packet_rc_max
};
/*
......@@ -1193,6 +1228,70 @@ enum cpu_reset_status {
CPU_RST_STATUS_SOFT_RST_DONE = 1,
};
#define SEC_PCR_DATA_BUF_SZ 256
#define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */
#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */
#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */
#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */
/*
* struct cpucp_sec_attest_info - attestation report of the boot
* @pcr_data: raw values of the PCR registers
* @pcr_num_reg: number of PCR registers in the pcr_data array
* @pcr_reg_len: length of each PCR register in the pcr_data array (bytes)
* @nonce: number only used once. random number provided by host. this also
* passed to the quote command as a qualifying data.
* @pcr_quote_len: length of the attestation quote data (bytes)
* @pcr_quote: attestation report data structure
* @quote_sig_len: length of the attestation report signature (bytes)
* @quote_sig: signature structure of the attestation report
* @pub_data_len: length of the public data (bytes)
* @public_data: public key for the signed attestation
* (outPublic + name + qualifiedName)
* @certificate_len: length of the certificate (bytes)
* @certificate: certificate for the attestation signing key
*/
struct cpucp_sec_attest_info {
__u8 pcr_data[SEC_PCR_DATA_BUF_SZ];
__u8 pcr_num_reg;
__u8 pcr_reg_len;
__le16 pad0;
__le32 nonce;
__le16 pcr_quote_len;
__u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ];
__u8 quote_sig_len;
__u8 quote_sig[SEC_SIGNATURE_BUF_SZ];
__le16 pub_data_len;
__u8 public_data[SEC_PUB_DATA_BUF_SZ];
__le16 certificate_len;
__u8 certificate[SEC_CERTIFICATE_BUF_SZ];
};
/*
* struct cpucp_dev_info_signed - device information signed by a secured device
* @info: device information structure as defined above
* @nonce: number only used once. random number provided by host. this number is
* hashed and signed along with the device information.
* @info_sig_len: length of the attestation signature (bytes)
* @info_sig: signature of the info + nonce data.
* @pub_data_len: length of the public data (bytes)
* @public_data: public key info signed info data
* (outPublic + name + qualifiedName)
* @certificate_len: length of the certificate (bytes)
* @certificate: certificate for the signing key
*/
struct cpucp_dev_info_signed {
struct cpucp_info info; /* assumed to be 64bit aligned */
__le32 nonce;
__le32 pad0;
__u8 info_sig_len;
__u8 info_sig[SEC_SIGNATURE_BUF_SZ];
__le16 pub_data_len;
__u8 public_data[SEC_PUB_DATA_BUF_SZ];
__le16 certificate_len;
__u8 certificate[SEC_CERTIFICATE_BUF_SZ];
};
/*
* struct dcore_monitor_regs_data - DCORE monitor regs data.
* the structure follows sync manager block layout. relevant only to Gaudi.
......
......@@ -34,6 +34,7 @@ enum cpu_boot_err {
CPU_BOOT_ERR_BINNING_FAIL = 19,
CPU_BOOT_ERR_TPM_FAIL = 20,
CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21,
CPU_BOOT_ERR_EEPROM_FAIL = 22,
CPU_BOOT_ERR_ENABLED = 31,
CPU_BOOT_ERR_SCND_EN = 63,
CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */
......@@ -115,6 +116,9 @@ enum cpu_boot_err {
* CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL Failed to set threshold for tmperature
* sensor.
*
* CPU_BOOT_ERR_EEPROM_FAIL Failed reading EEPROM data. Defaults
* are used.
*
* CPU_BOOT_ERR0_ENABLED Error registers enabled.
* This is a main indication that the
* running FW populates the error
......@@ -139,6 +143,7 @@ enum cpu_boot_err {
#define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL)
#define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL)
#define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL (1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL)
#define CPU_BOOT_ERR0_EEPROM_FAIL (1 << CPU_BOOT_ERR_EEPROM_FAIL)
#define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED)
#define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED)
......@@ -426,7 +431,9 @@ struct cpu_dyn_regs {
__le32 gic_host_ints_irq;
__le32 gic_host_soft_rst_irq;
__le32 gic_rot_qm_irq_ctrl;
__le32 reserved1[22]; /* reserve for future use */
__le32 cpu_rst_status;
__le32 eng_arc_irq_ctrl;
__le32 reserved1[20]; /* reserve for future use */
};
/* TODO: remove the desc magic after the code is updated to use message */
......@@ -465,6 +472,26 @@ enum comms_msg_type {
HL_COMMS_BINNING_CONF_TYPE = 3,
};
/*
* Binning information shared between LKD and FW
* @tpc_mask - TPC binning information
* @dec_mask - Decoder binning information
* @hbm_mask - HBM binning information
* @edma_mask - EDMA binning information
* @mme_mask_l - MME binning information lower 32
* @mme_mask_h - MME binning information upper 32
* @reserved - reserved field for 64 bit alignment
*/
struct lkd_fw_binning_info {
__le64 tpc_mask;
__le32 dec_mask;
__le32 hbm_mask;
__le32 edma_mask;
__le32 mme_mask_l;
__le32 mme_mask_h;
__le32 reserved;
};
/* TODO: remove this struct after the code is updated to use message */
/* this is the comms descriptor header - meta data */
struct comms_desc_header {
......@@ -525,13 +552,7 @@ struct lkd_fw_comms_msg {
struct {
__u8 fw_cfg_skip; /* 1 - skip, 0 - don't skip */
};
struct {
__le64 tpc_binning_conf;
__le32 dec_binning_conf;
__le32 hbm_binning_conf;
__le32 edma_binning_conf;
__le32 mme_redundancy_conf; /* use MME_REDUNDANT_COLUMN */
};
struct lkd_fw_binning_info binning_info;
};
};
......
......@@ -132,6 +132,7 @@
#include "dcore0_mme_ctrl_lo_arch_tensor_a_regs.h"
#include "dcore0_mme_ctrl_lo_arch_tensor_b_regs.h"
#include "dcore0_mme_ctrl_lo_arch_tensor_cout_regs.h"
#include "pcie_wrap_special_regs.h"
#include "pdma0_qm_masks.h"
#include "pdma0_core_masks.h"
......@@ -239,6 +240,7 @@
#define SFT_IF_RTR_OFFSET (mmSFT0_HBW_RTR_IF1_RTR_H3_BASE - mmSFT0_HBW_RTR_IF0_RTR_H3_BASE)
#define ARC_HALT_REQ_OFFSET (mmARC_FARM_ARC0_AUX_RUN_HALT_REQ - mmARC_FARM_ARC0_AUX_BASE)
#define ARC_HALT_ACK_OFFSET (mmARC_FARM_ARC0_AUX_RUN_HALT_ACK - mmARC_FARM_ARC0_AUX_BASE)
#define ARC_REGION_CFG_OFFSET(region) \
(mmARC_FARM_ARC0_AUX_ARC_REGION_CFG_0 + (region * 4) - mmARC_FARM_ARC0_AUX_BASE)
......
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2016-2020 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
/************************************
** This is an auto-generated file **
** DO NOT EDIT BELOW **
************************************/
#ifndef ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_
#define ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_
/*
*****************************************
* PCIE_WRAP_SPECIAL
* (Prototype: SPECIAL_REGS)
*****************************************
*/
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_0 0x4C01E80
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_1 0x4C01E84
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_2 0x4C01E88
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_3 0x4C01E8C
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_4 0x4C01E90
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_5 0x4C01E94
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_6 0x4C01E98
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_7 0x4C01E9C
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_8 0x4C01EA0
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_9 0x4C01EA4
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_10 0x4C01EA8
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_11 0x4C01EAC
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_12 0x4C01EB0
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_13 0x4C01EB4
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_14 0x4C01EB8
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_15 0x4C01EBC
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_16 0x4C01EC0
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_17 0x4C01EC4
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_18 0x4C01EC8
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_19 0x4C01ECC
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_20 0x4C01ED0
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_21 0x4C01ED4
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_22 0x4C01ED8
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_23 0x4C01EDC
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_24 0x4C01EE0
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_25 0x4C01EE4
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_26 0x4C01EE8
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_27 0x4C01EEC
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_28 0x4C01EF0
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_29 0x4C01EF4
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_30 0x4C01EF8
#define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_31 0x4C01EFC
#define mmPCIE_WRAP_SPECIAL_MEM_GW_DATA 0x4C01F00
#define mmPCIE_WRAP_SPECIAL_MEM_GW_REQ 0x4C01F04
#define mmPCIE_WRAP_SPECIAL_MEM_NUMOF 0x4C01F0C
#define mmPCIE_WRAP_SPECIAL_MEM_ECC_SEL 0x4C01F10
#define mmPCIE_WRAP_SPECIAL_MEM_ECC_CTL 0x4C01F14
#define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_MASK 0x4C01F18
#define mmPCIE_WRAP_SPECIAL_MEM_ECC_GLBL_ERR_MASK 0x4C01F1C
#define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_STS 0x4C01F20
#define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_ADDR 0x4C01F24
#define mmPCIE_WRAP_SPECIAL_MEM_RM 0x4C01F28
#define mmPCIE_WRAP_SPECIAL_GLBL_ERR_MASK 0x4C01F40
#define mmPCIE_WRAP_SPECIAL_GLBL_ERR_ADDR 0x4C01F44
#define mmPCIE_WRAP_SPECIAL_GLBL_ERR_CAUSE 0x4C01F48
#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0 0x4C01F60
#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_1 0x4C01F64
#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_2 0x4C01F68
#define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_3 0x4C01F6C
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_0 0x4C01F80
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_1 0x4C01F84
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_2 0x4C01F88
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_3 0x4C01F8C
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_4 0x4C01F90
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_5 0x4C01F94
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_6 0x4C01F98
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_7 0x4C01F9C
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_8 0x4C01FA0
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_9 0x4C01FA4
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_10 0x4C01FA8
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_11 0x4C01FAC
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_12 0x4C01FB0
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_13 0x4C01FB4
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_14 0x4C01FB8
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_15 0x4C01FBC
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_16 0x4C01FC0
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_17 0x4C01FC4
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_18 0x4C01FC8
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_19 0x4C01FCC
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_20 0x4C01FD0
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_21 0x4C01FD4
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_22 0x4C01FD8
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_23 0x4C01FDC
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_24 0x4C01FE0
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_25 0x4C01FE4
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_26 0x4C01FE8
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_27 0x4C01FEC
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_28 0x4C01FF0
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_29 0x4C01FF4
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_30 0x4C01FF8
#define mmPCIE_WRAP_SPECIAL_GLBL_SEC_31 0x4C01FFC
#endif /* ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_ */
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2022 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
#ifndef __GAUDI2_ASYNC_VIRT_EVENTS_H_
#define __GAUDI2_ASYNC_VIRT_EVENTS_H_
enum gaudi2_async_virt_event_id {
GAUDI2_EVENT_NIC3_QM1_OLD = 1206,
GAUDI2_EVENT_NIC4_QM0_OLD = 1207,
GAUDI2_EVENT_NIC4_QM1_OLD = 1208,
GAUDI2_EVENT_NIC5_QM0_OLD = 1209,
GAUDI2_EVENT_NIC5_QM1_OLD = 1210,
GAUDI2_EVENT_NIC6_QM0_OLD = 1211,
GAUDI2_EVENT_NIC6_QM1_OLD = 1212,
GAUDI2_EVENT_NIC7_QM0_OLD = 1213,
GAUDI2_EVENT_NIC7_QM1_OLD = 1214,
GAUDI2_EVENT_NIC8_QM0_OLD = 1215,
GAUDI2_EVENT_NIC8_QM1_OLD = 1216,
GAUDI2_EVENT_NIC9_QM0_OLD = 1217,
GAUDI2_EVENT_NIC9_QM1_OLD = 1218,
GAUDI2_EVENT_NIC10_QM0_OLD = 1219,
GAUDI2_EVENT_NIC10_QM1_OLD = 1220,
GAUDI2_EVENT_NIC11_QM0_OLD = 1221,
GAUDI2_EVENT_NIC11_QM1_OLD = 1222,
GAUDI2_EVENT_CPU_PKT_SANITY_FAILED_OLD = 1223,
GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0_OLD = 1224,
GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG1_OLD = 1225,
GAUDI2_EVENT_CPU1_STATUS_NIC1_ENG0_OLD = 1226,
GAUDI2_EVENT_CPU1_STATUS_NIC1_ENG1_OLD = 1227,
GAUDI2_EVENT_CPU2_STATUS_NIC2_ENG0_OLD = 1228,
GAUDI2_EVENT_CPU2_STATUS_NIC2_ENG1_OLD = 1229,
GAUDI2_EVENT_CPU3_STATUS_NIC3_ENG0_OLD = 1230,
GAUDI2_EVENT_CPU3_STATUS_NIC3_ENG1_OLD = 1231,
GAUDI2_EVENT_CPU4_STATUS_NIC4_ENG0_OLD = 1232,
GAUDI2_EVENT_CPU4_STATUS_NIC4_ENG1_OLD = 1233,
GAUDI2_EVENT_CPU5_STATUS_NIC5_ENG0_OLD = 1234,
GAUDI2_EVENT_CPU5_STATUS_NIC5_ENG1_OLD = 1235,
GAUDI2_EVENT_CPU6_STATUS_NIC6_ENG0_OLD = 1236,
GAUDI2_EVENT_CPU6_STATUS_NIC6_ENG1_OLD = 1237,
GAUDI2_EVENT_CPU7_STATUS_NIC7_ENG0_OLD = 1238,
GAUDI2_EVENT_CPU7_STATUS_NIC7_ENG1_OLD = 1239,
GAUDI2_EVENT_CPU8_STATUS_NIC8_ENG0_OLD = 1240,
GAUDI2_EVENT_CPU8_STATUS_NIC8_ENG1_OLD = 1241,
GAUDI2_EVENT_CPU9_STATUS_NIC9_ENG0_OLD = 1242,
GAUDI2_EVENT_CPU9_STATUS_NIC9_ENG1_OLD = 1243,
GAUDI2_EVENT_CPU10_STATUS_NIC10_ENG0_OLD = 1244,
GAUDI2_EVENT_CPU10_STATUS_NIC10_ENG1_OLD = 1245,
GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG0_OLD = 1246,
GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1_OLD = 1247,
GAUDI2_EVENT_ARC_DCCM_FULL_OLD = 1248,
};
#endif /* __GAUDI2_ASYNC_VIRT_EVENTS_H_ */
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2016-2021 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM habanalabs
#if !defined(_TRACE_HABANALABS_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_HABANALABS_H
#include <linux/tracepoint.h>
DECLARE_EVENT_CLASS(habanalabs_mmu_template,
TP_PROTO(struct device *dev, u64 virt_addr, u64 phys_addr, u32 page_size, bool flush_pte),
TP_ARGS(dev, virt_addr, phys_addr, page_size, flush_pte),
TP_STRUCT__entry(
__string(dname, dev_name(dev))
__field(u64, virt_addr)
__field(u64, phys_addr)
__field(u32, page_size)
__field(u8, flush_pte)
),
TP_fast_assign(
__assign_str(dname, dev_name(dev));
__entry->virt_addr = virt_addr;
__entry->phys_addr = phys_addr;
__entry->page_size = page_size;
__entry->flush_pte = flush_pte;
),
TP_printk("%s: vaddr: %#llx, paddr: %#llx, psize: %#x, flush: %s",
__get_str(dname),
__entry->virt_addr,
__entry->phys_addr,
__entry->page_size,
__entry->flush_pte ? "true" : "false")
);
DEFINE_EVENT(habanalabs_mmu_template, habanalabs_mmu_map,
TP_PROTO(struct device *dev, u64 virt_addr, u64 phys_addr, u32 page_size, bool flush_pte),
TP_ARGS(dev, virt_addr, phys_addr, page_size, flush_pte));
DEFINE_EVENT(habanalabs_mmu_template, habanalabs_mmu_unmap,
TP_PROTO(struct device *dev, u64 virt_addr, u64 phys_addr, u32 page_size, bool flush_pte),
TP_ARGS(dev, virt_addr, phys_addr, page_size, flush_pte));
DECLARE_EVENT_CLASS(habanalabs_dma_alloc_template,
TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, const char *caller),
TP_ARGS(dev, cpu_addr, dma_addr, size, caller),
TP_STRUCT__entry(
__string(dname, dev_name(dev))
__field(u64, cpu_addr)
__field(u64, dma_addr)
__field(u32, size)
__field(const char *, caller)
),
TP_fast_assign(
__assign_str(dname, dev_name(dev));
__entry->cpu_addr = cpu_addr;
__entry->dma_addr = dma_addr;
__entry->size = size;
__entry->caller = caller;
),
TP_printk("%s: cpu_addr: %#llx, dma_addr: %#llx, size: %#x, caller: %s",
__get_str(dname),
__entry->cpu_addr,
__entry->dma_addr,
__entry->size,
__entry->caller)
);
DEFINE_EVENT(habanalabs_dma_alloc_template, habanalabs_dma_alloc,
TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, const char *caller),
TP_ARGS(dev, cpu_addr, dma_addr, size, caller));
DEFINE_EVENT(habanalabs_dma_alloc_template, habanalabs_dma_free,
TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, const char *caller),
TP_ARGS(dev, cpu_addr, dma_addr, size, caller));
#endif /* if !defined(_TRACE_HABANALABS_H) || defined(TRACE_HEADER_MULTI_READ) */
/* This part must be outside protection */
#include <trace/define_trace.h>
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment