Commit 9e072793 authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman

Merge tag 'misc-habanalabs-next-2020-09-22' of...

Merge tag 'misc-habanalabs-next-2020-09-22' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next

Oded writes:

This tag contains the following changes for kernel 5.10-rc1:

- Stop using the DRM's dma-fence module and instead use kernel completions.
- Support PCIe AER
- Use dma_mmap_coherent for memory allocated using dma_alloc_coherent
- Use smallest possible alignment when allocating virtual addresses in our
  MMU driver.
- Refactor MMU driver code to be device-oriented
- Allow user to check CS status without any sleep
- Add an option to map a Command Buffer to the Device's MMU
- Expose sync manager resource allocation to user through INFO IOCTL
- Convert code to use standard BIT(), GENMASK() and FIELD_PREP()
- Many small fixes (casting, better error messages, remove unused
  defines, h/w configuration fixes, etc.)

* tag 'misc-habanalabs-next-2020-09-22' of git://people.freedesktop.org/~gabbayo/linux: (46 commits)
  habanalabs: update scratchpad register map
  habanalabs: add indication of security-enabled F/W
  habanalabs/gaudi: fix DMA completions max outstanding to 15
  habanalabs/gaudi: remove axi drain support
  habanalabs: update firmware interface file
  habanalabs: Add an option to map CB to device MMU
  habanalabs: Save context in a command buffer object
  habanalabs: no need for DMA_SHARED_BUFFER
  habanalabs: allow to wait on CS without sleep
  habanalabs/gaudi: increase timeout for boot fit load
  habanalabs: add debugfs support for MMU with 6 HOPs
  habanalabs: add num_hops to hl_mmu_properties
  habanalabs: refactor MMU as device-oriented
  habanalabs: rename mmu.c to mmu_v1.c
  habanalabs: use smallest possible alignment for virtual addresses
  habanalabs: check flag before reset because of f/w event
  habanalabs: increase PQ COMP_OFFSET by one nibble
  habanalabs: Fix alignment issue in cpucp_info structure
  habanalabs: remove unused define
  habanalabs: remove unused ASIC function pointer
  ...
parents e82ed736 f279e5cd
...@@ -2,13 +2,17 @@ What: /sys/class/habanalabs/hl<n>/armcp_kernel_ver ...@@ -2,13 +2,17 @@ What: /sys/class/habanalabs/hl<n>/armcp_kernel_ver
Date: Jan 2019 Date: Jan 2019
KernelVersion: 5.1 KernelVersion: 5.1
Contact: oded.gabbay@gmail.com Contact: oded.gabbay@gmail.com
Description: Version of the Linux kernel running on the device's CPU Description: Version of the Linux kernel running on the device's CPU.
Will be DEPRECATED in Linux kernel version 5.10, and be
replaced with cpucp_kernel_ver
What: /sys/class/habanalabs/hl<n>/armcp_ver What: /sys/class/habanalabs/hl<n>/armcp_ver
Date: Jan 2019 Date: Jan 2019
KernelVersion: 5.1 KernelVersion: 5.1
Contact: oded.gabbay@gmail.com Contact: oded.gabbay@gmail.com
Description: Version of the application running on the device's CPU Description: Version of the application running on the device's CPU
Will be DEPRECATED in Linux kernel version 5.10, and be
replaced with cpucp_ver
What: /sys/class/habanalabs/hl<n>/clk_max_freq_mhz What: /sys/class/habanalabs/hl<n>/clk_max_freq_mhz
Date: Jun 2019 Date: Jun 2019
...@@ -33,6 +37,18 @@ KernelVersion: 5.1 ...@@ -33,6 +37,18 @@ KernelVersion: 5.1
Contact: oded.gabbay@gmail.com Contact: oded.gabbay@gmail.com
Description: Version of the Device's CPLD F/W Description: Version of the Device's CPLD F/W
What: /sys/class/habanalabs/hl<n>/cpucp_kernel_ver
Date: Oct 2020
KernelVersion: 5.10
Contact: oded.gabbay@gmail.com
Description: Version of the Linux kernel running on the device's CPU
What: /sys/class/habanalabs/hl<n>/cpucp_ver
Date: Oct 2020
KernelVersion: 5.10
Contact: oded.gabbay@gmail.com
Description: Version of the application running on the device's CPU
What: /sys/class/habanalabs/hl<n>/device_type What: /sys/class/habanalabs/hl<n>/device_type
Date: Jan 2019 Date: Jan 2019
KernelVersion: 5.1 KernelVersion: 5.1
......
...@@ -7,7 +7,6 @@ config HABANA_AI ...@@ -7,7 +7,6 @@ config HABANA_AI
tristate "HabanaAI accelerators (habanalabs)" tristate "HabanaAI accelerators (habanalabs)"
depends on PCI && HAS_IOMEM depends on PCI && HAS_IOMEM
select FRAME_VECTOR select FRAME_VECTOR
select DMA_SHARED_BUFFER
select GENERIC_ALLOCATOR select GENERIC_ALLOCATOR
select HWMON select HWMON
help help
......
...@@ -3,5 +3,5 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \ ...@@ -3,5 +3,5 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
common/asid.o common/habanalabs_ioctl.o \ common/asid.o common/habanalabs_ioctl.o \
common/command_buffer.o common/hw_queue.o common/irq.o \ common/command_buffer.o common/hw_queue.o common/irq.o \
common/sysfs.o common/hwmon.o common/memory.o \ common/sysfs.o common/hwmon.o common/memory.o \
common/command_submission.o common/mmu.o common/firmware_if.o \ common/command_submission.o common/mmu.o common/mmu_v1.o \
common/pci.o common/firmware_if.o common/pci.o
...@@ -13,6 +13,131 @@ ...@@ -13,6 +13,131 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/genalloc.h> #include <linux/genalloc.h>
static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
{
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_vm_va_block *va_block, *tmp;
dma_addr_t bus_addr;
u64 virt_addr;
u32 page_size = prop->pmmu.page_size;
s32 offset;
int rc;
if (!hdev->supports_cb_mapping) {
dev_err_ratelimited(hdev->dev,
"Cannot map CB because no VA range is allocated for CB mapping\n");
return -EINVAL;
}
if (!hdev->mmu_enable) {
dev_err_ratelimited(hdev->dev,
"Cannot map CB because MMU is disabled\n");
return -EINVAL;
}
INIT_LIST_HEAD(&cb->va_block_list);
for (bus_addr = cb->bus_address;
bus_addr < cb->bus_address + cb->size;
bus_addr += page_size) {
virt_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, page_size);
if (!virt_addr) {
dev_err(hdev->dev,
"Failed to allocate device virtual address for CB\n");
rc = -ENOMEM;
goto err_va_pool_free;
}
va_block = kzalloc(sizeof(*va_block), GFP_KERNEL);
if (!va_block) {
rc = -ENOMEM;
gen_pool_free(ctx->cb_va_pool, virt_addr, page_size);
goto err_va_pool_free;
}
va_block->start = virt_addr;
va_block->end = virt_addr + page_size;
va_block->size = page_size;
list_add_tail(&va_block->node, &cb->va_block_list);
}
mutex_lock(&ctx->mmu_lock);
bus_addr = cb->bus_address;
offset = 0;
list_for_each_entry(va_block, &cb->va_block_list, node) {
rc = hl_mmu_map(ctx, va_block->start, bus_addr, va_block->size,
list_is_last(&va_block->node,
&cb->va_block_list));
if (rc) {
dev_err(hdev->dev, "Failed to map VA %#llx to CB\n",
va_block->start);
goto err_va_umap;
}
bus_addr += va_block->size;
offset += va_block->size;
}
hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
mutex_unlock(&ctx->mmu_lock);
cb->is_mmu_mapped = true;
return 0;
err_va_umap:
list_for_each_entry(va_block, &cb->va_block_list, node) {
if (offset <= 0)
break;
hl_mmu_unmap(ctx, va_block->start, va_block->size,
offset <= va_block->size);
offset -= va_block->size;
}
hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
mutex_unlock(&ctx->mmu_lock);
err_va_pool_free:
list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
list_del(&va_block->node);
kfree(va_block);
}
return rc;
}
static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb)
{
struct hl_device *hdev = ctx->hdev;
struct hl_vm_va_block *va_block, *tmp;
mutex_lock(&ctx->mmu_lock);
list_for_each_entry(va_block, &cb->va_block_list, node)
if (hl_mmu_unmap(ctx, va_block->start, va_block->size,
list_is_last(&va_block->node,
&cb->va_block_list)))
dev_warn_ratelimited(hdev->dev,
"Failed to unmap CB's va 0x%llx\n",
va_block->start);
hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
mutex_unlock(&ctx->mmu_lock);
list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
list_del(&va_block->node);
kfree(va_block);
}
}
static void cb_fini(struct hl_device *hdev, struct hl_cb *cb) static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
{ {
if (cb->is_internal) if (cb->is_internal)
...@@ -47,6 +172,11 @@ static void cb_release(struct kref *ref) ...@@ -47,6 +172,11 @@ static void cb_release(struct kref *ref)
hl_debugfs_remove_cb(cb); hl_debugfs_remove_cb(cb);
if (cb->is_mmu_mapped)
cb_unmap_mem(cb->ctx, cb);
hl_ctx_put(cb->ctx);
cb_do_release(hdev, cb); cb_do_release(hdev, cb);
} }
...@@ -107,11 +237,12 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, ...@@ -107,11 +237,12 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
} }
int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
u32 cb_size, u64 *handle, int ctx_id, bool internal_cb) struct hl_ctx *ctx, u32 cb_size, bool internal_cb,
bool map_cb, u64 *handle)
{ {
struct hl_cb *cb; struct hl_cb *cb;
bool alloc_new_cb = true; bool alloc_new_cb = true;
int rc; int rc, ctx_id = ctx->asid;
/* /*
* Can't use generic function to check this because of special case * Can't use generic function to check this because of special case
...@@ -163,7 +294,21 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, ...@@ -163,7 +294,21 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
} }
cb->hdev = hdev; cb->hdev = hdev;
cb->ctx_id = ctx_id; cb->ctx = ctx;
hl_ctx_get(hdev, cb->ctx);
if (map_cb) {
if (ctx_id == HL_KERNEL_ASID_ID) {
dev_err(hdev->dev,
"CB mapping is not supported for kernel context\n");
rc = -EINVAL;
goto release_cb;
}
rc = cb_map_mem(ctx, cb);
if (rc)
goto release_cb;
}
spin_lock(&mgr->cb_lock); spin_lock(&mgr->cb_lock);
rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC); rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
...@@ -171,10 +316,10 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, ...@@ -171,10 +316,10 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
if (rc < 0) { if (rc < 0) {
dev_err(hdev->dev, "Failed to allocate IDR for a new CB\n"); dev_err(hdev->dev, "Failed to allocate IDR for a new CB\n");
goto release_cb; goto unmap_mem;
} }
cb->id = rc; cb->id = (u64) rc;
kref_init(&cb->refcount); kref_init(&cb->refcount);
spin_lock_init(&cb->lock); spin_lock_init(&cb->lock);
...@@ -183,14 +328,18 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, ...@@ -183,14 +328,18 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
* idr is 32-bit so we can safely OR it with a mask that is above * idr is 32-bit so we can safely OR it with a mask that is above
* 32 bit * 32 bit
*/ */
*handle = cb->id | HL_MMAP_CB_MASK; *handle = cb->id | HL_MMAP_TYPE_CB;
*handle <<= PAGE_SHIFT; *handle <<= PAGE_SHIFT;
hl_debugfs_add_cb(cb); hl_debugfs_add_cb(cb);
return 0; return 0;
unmap_mem:
if (cb->is_mmu_mapped)
cb_unmap_mem(cb->ctx, cb);
release_cb: release_cb:
hl_ctx_put(cb->ctx);
cb_do_release(hdev, cb); cb_do_release(hdev, cb);
out_err: out_err:
*handle = 0; *handle = 0;
...@@ -250,9 +399,10 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -250,9 +399,10 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
args->in.cb_size, HL_MAX_CB_SIZE); args->in.cb_size, HL_MAX_CB_SIZE);
rc = -EINVAL; rc = -EINVAL;
} else { } else {
rc = hl_cb_create(hdev, &hpriv->cb_mgr, rc = hl_cb_create(hdev, &hpriv->cb_mgr, hpriv->ctx,
args->in.cb_size, &handle, args->in.cb_size, false,
hpriv->ctx->asid, false); !!(args->in.flags & HL_CB_FLAGS_MAP),
&handle);
} }
memset(args, 0, sizeof(*args)); memset(args, 0, sizeof(*args));
...@@ -300,11 +450,14 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) ...@@ -300,11 +450,14 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
{ {
struct hl_device *hdev = hpriv->hdev; struct hl_device *hdev = hpriv->hdev;
struct hl_cb *cb; struct hl_cb *cb;
phys_addr_t address;
u32 handle, user_cb_size; u32 handle, user_cb_size;
int rc; int rc;
/* We use the page offset to hold the idr and thus we need to clear
* it before doing the mmap itself
*/
handle = vma->vm_pgoff; handle = vma->vm_pgoff;
vma->vm_pgoff = 0;
/* reference was taken here */ /* reference was taken here */
cb = hl_cb_get(hdev, &hpriv->cb_mgr, handle); cb = hl_cb_get(hdev, &hpriv->cb_mgr, handle);
...@@ -356,12 +509,8 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) ...@@ -356,12 +509,8 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
vma->vm_private_data = cb; vma->vm_private_data = cb;
/* Calculate address for CB */ rc = hdev->asic_funcs->cb_mmap(hdev, vma, (void *) cb->kernel_address,
address = virt_to_phys((void *) (uintptr_t) cb->kernel_address); cb->bus_address, cb->size);
rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address,
address, cb->size);
if (rc) { if (rc) {
spin_lock(&cb->lock); spin_lock(&cb->lock);
cb->mmap = false; cb->mmap = false;
...@@ -425,7 +574,7 @@ void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr) ...@@ -425,7 +574,7 @@ void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr)
if (kref_put(&cb->refcount, cb_release) != 1) if (kref_put(&cb->refcount, cb_release) != 1)
dev_err(hdev->dev, dev_err(hdev->dev,
"CB %d for CTX ID %d is still alive\n", "CB %d for CTX ID %d is still alive\n",
id, cb->ctx_id); id, cb->ctx->asid);
} }
idr_destroy(&mgr->cb_handles); idr_destroy(&mgr->cb_handles);
...@@ -438,8 +587,8 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, ...@@ -438,8 +587,8 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
struct hl_cb *cb; struct hl_cb *cb;
int rc; int rc;
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle, rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, cb_size,
HL_KERNEL_ASID_ID, internal_cb); internal_cb, false, &cb_handle);
if (rc) { if (rc) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to allocate CB for the kernel driver %d\n", rc); "Failed to allocate CB for the kernel driver %d\n", rc);
...@@ -495,3 +644,45 @@ int hl_cb_pool_fini(struct hl_device *hdev) ...@@ -495,3 +644,45 @@ int hl_cb_pool_fini(struct hl_device *hdev)
return 0; return 0;
} }
int hl_cb_va_pool_init(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
int rc;
if (!hdev->supports_cb_mapping)
return 0;
ctx->cb_va_pool = gen_pool_create(__ffs(prop->pmmu.page_size), -1);
if (!ctx->cb_va_pool) {
dev_err(hdev->dev,
"Failed to create VA gen pool for CB mapping\n");
return -ENOMEM;
}
rc = gen_pool_add(ctx->cb_va_pool, prop->cb_va_start_addr,
prop->cb_va_end_addr - prop->cb_va_start_addr, -1);
if (rc) {
dev_err(hdev->dev,
"Failed to add memory to VA gen pool for CB mapping\n");
goto err_pool_destroy;
}
return 0;
err_pool_destroy:
gen_pool_destroy(ctx->cb_va_pool);
return rc;
}
void hl_cb_va_pool_fini(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
if (!hdev->supports_cb_mapping)
return;
gen_pool_destroy(ctx->cb_va_pool);
}
...@@ -38,26 +38,10 @@ void hl_sob_reset_error(struct kref *ref) ...@@ -38,26 +38,10 @@ void hl_sob_reset_error(struct kref *ref)
hw_sob->q_idx, hw_sob->sob_id); hw_sob->q_idx, hw_sob->sob_id);
} }
static const char *hl_fence_get_driver_name(struct dma_fence *fence) static void hl_fence_release(struct kref *kref)
{
return "HabanaLabs";
}
static const char *hl_fence_get_timeline_name(struct dma_fence *fence)
{
struct hl_cs_compl *hl_cs_compl =
container_of(fence, struct hl_cs_compl, base_fence);
return dev_name(hl_cs_compl->hdev->dev);
}
static bool hl_fence_enable_signaling(struct dma_fence *fence)
{
return true;
}
static void hl_fence_release(struct dma_fence *fence)
{ {
struct hl_fence *fence =
container_of(kref, struct hl_fence, refcount);
struct hl_cs_compl *hl_cs_cmpl = struct hl_cs_compl *hl_cs_cmpl =
container_of(fence, struct hl_cs_compl, base_fence); container_of(fence, struct hl_cs_compl, base_fence);
struct hl_device *hdev = hl_cs_cmpl->hdev; struct hl_device *hdev = hl_cs_cmpl->hdev;
...@@ -99,15 +83,27 @@ static void hl_fence_release(struct dma_fence *fence) ...@@ -99,15 +83,27 @@ static void hl_fence_release(struct dma_fence *fence)
} }
free: free:
kfree_rcu(hl_cs_cmpl, base_fence.rcu); kfree(hl_cs_cmpl);
} }
static const struct dma_fence_ops hl_fence_ops = { void hl_fence_put(struct hl_fence *fence)
.get_driver_name = hl_fence_get_driver_name, {
.get_timeline_name = hl_fence_get_timeline_name, if (fence)
.enable_signaling = hl_fence_enable_signaling, kref_put(&fence->refcount, hl_fence_release);
.release = hl_fence_release }
};
void hl_fence_get(struct hl_fence *fence)
{
if (fence)
kref_get(&fence->refcount);
}
static void hl_fence_init(struct hl_fence *fence)
{
kref_init(&fence->refcount);
fence->error = 0;
init_completion(&fence->completion);
}
static void cs_get(struct hl_cs *cs) static void cs_get(struct hl_cs *cs)
{ {
...@@ -256,6 +252,8 @@ static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx) ...@@ -256,6 +252,8 @@ static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx)
ctx->cs_counters.parsing_drop_cnt; ctx->cs_counters.parsing_drop_cnt;
hdev->aggregated_cs_counters.queue_full_drop_cnt += hdev->aggregated_cs_counters.queue_full_drop_cnt +=
ctx->cs_counters.queue_full_drop_cnt; ctx->cs_counters.queue_full_drop_cnt;
hdev->aggregated_cs_counters.max_cs_in_flight_drop_cnt +=
ctx->cs_counters.max_cs_in_flight_drop_cnt;
} }
static void cs_do_release(struct kref *ref) static void cs_do_release(struct kref *ref)
...@@ -336,7 +334,7 @@ static void cs_do_release(struct kref *ref) ...@@ -336,7 +334,7 @@ static void cs_do_release(struct kref *ref)
* In case the wait for signal CS was submitted, the put occurs * In case the wait for signal CS was submitted, the put occurs
* in init_signal_wait_cs() right before hanging on the PQ. * in init_signal_wait_cs() right before hanging on the PQ.
*/ */
dma_fence_put(cs->signal_fence); hl_fence_put(cs->signal_fence);
} }
/* /*
...@@ -348,19 +346,18 @@ static void cs_do_release(struct kref *ref) ...@@ -348,19 +346,18 @@ static void cs_do_release(struct kref *ref)
hl_ctx_put(cs->ctx); hl_ctx_put(cs->ctx);
/* We need to mark an error for not submitted because in that case /* We need to mark an error for not submitted because in that case
* the dma fence release flow is different. Mainly, we don't need * the hl fence release flow is different. Mainly, we don't need
* to handle hw_sob for signal/wait * to handle hw_sob for signal/wait
*/ */
if (cs->timedout) if (cs->timedout)
dma_fence_set_error(cs->fence, -ETIMEDOUT); cs->fence->error = -ETIMEDOUT;
else if (cs->aborted) else if (cs->aborted)
dma_fence_set_error(cs->fence, -EIO); cs->fence->error = -EIO;
else if (!cs->submitted) else if (!cs->submitted)
dma_fence_set_error(cs->fence, -EBUSY); cs->fence->error = -EBUSY;
dma_fence_signal(cs->fence);
dma_fence_put(cs->fence);
complete_all(&cs->fence->completion);
hl_fence_put(cs->fence);
cs_counters_aggregate(hdev, cs->ctx); cs_counters_aggregate(hdev, cs->ctx);
kfree(cs->jobs_in_queue_cnt); kfree(cs->jobs_in_queue_cnt);
...@@ -401,7 +398,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, ...@@ -401,7 +398,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
enum hl_cs_type cs_type, struct hl_cs **cs_new) enum hl_cs_type cs_type, struct hl_cs **cs_new)
{ {
struct hl_cs_compl *cs_cmpl; struct hl_cs_compl *cs_cmpl;
struct dma_fence *other = NULL; struct hl_fence *other = NULL;
struct hl_cs *cs; struct hl_cs *cs;
int rc; int rc;
...@@ -434,9 +431,11 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, ...@@ -434,9 +431,11 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
cs_cmpl->cs_seq = ctx->cs_sequence; cs_cmpl->cs_seq = ctx->cs_sequence;
other = ctx->cs_pending[cs_cmpl->cs_seq & other = ctx->cs_pending[cs_cmpl->cs_seq &
(hdev->asic_prop.max_pending_cs - 1)]; (hdev->asic_prop.max_pending_cs - 1)];
if ((other) && (!dma_fence_is_signaled(other))) {
dev_dbg(hdev->dev, if (other && !completion_done(&other->completion)) {
dev_dbg_ratelimited(hdev->dev,
"Rejecting CS because of too many in-flights CS\n"); "Rejecting CS because of too many in-flights CS\n");
ctx->cs_counters.max_cs_in_flight_drop_cnt++;
rc = -EAGAIN; rc = -EAGAIN;
goto free_fence; goto free_fence;
} }
...@@ -448,8 +447,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, ...@@ -448,8 +447,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
goto free_fence; goto free_fence;
} }
dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock, /* init hl_fence */
ctx->asid, ctx->cs_sequence); hl_fence_init(&cs_cmpl->base_fence);
cs->sequence = cs_cmpl->cs_seq; cs->sequence = cs_cmpl->cs_seq;
...@@ -458,9 +457,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, ...@@ -458,9 +457,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
&cs_cmpl->base_fence; &cs_cmpl->base_fence;
ctx->cs_sequence++; ctx->cs_sequence++;
dma_fence_get(&cs_cmpl->base_fence); hl_fence_get(&cs_cmpl->base_fence);
dma_fence_put(other); hl_fence_put(other);
spin_unlock(&ctx->cs_lock); spin_unlock(&ctx->cs_lock);
...@@ -690,8 +689,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, ...@@ -690,8 +689,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
rc = -ENOMEM; rc = -ENOMEM;
if (is_kernel_allocated_cb) if (is_kernel_allocated_cb)
goto release_cb; goto release_cb;
else
goto free_cs_object; goto free_cs_object;
} }
job->id = i + 1; job->id = i + 1;
...@@ -773,7 +772,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, ...@@ -773,7 +772,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
struct hl_ctx *ctx = hpriv->ctx; struct hl_ctx *ctx = hpriv->ctx;
struct hl_cs_chunk *cs_chunk_array, *chunk; struct hl_cs_chunk *cs_chunk_array, *chunk;
struct hw_queue_properties *hw_queue_prop; struct hw_queue_properties *hw_queue_prop;
struct dma_fence *sig_fence = NULL; struct hl_fence *sig_fence = NULL;
struct hl_cs_job *job; struct hl_cs_job *job;
struct hl_cs *cs; struct hl_cs *cs;
struct hl_cb *cb; struct hl_cb *cb;
...@@ -883,14 +882,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, ...@@ -883,14 +882,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
dev_err(hdev->dev, dev_err(hdev->dev,
"CS seq 0x%llx is not of a signal CS\n", "CS seq 0x%llx is not of a signal CS\n",
signal_seq); signal_seq);
dma_fence_put(sig_fence); hl_fence_put(sig_fence);
rc = -EINVAL; rc = -EINVAL;
goto free_signal_seq_array; goto free_signal_seq_array;
} }
if (dma_fence_is_signaled(sig_fence)) { if (completion_done(&sig_fence->completion)) {
/* signal CS already finished */ /* signal CS already finished */
dma_fence_put(sig_fence); hl_fence_put(sig_fence);
rc = 0; rc = 0;
goto free_signal_seq_array; goto free_signal_seq_array;
} }
...@@ -902,7 +901,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, ...@@ -902,7 +901,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
rc = allocate_cs(hdev, ctx, cs_type, &cs); rc = allocate_cs(hdev, ctx, cs_type, &cs);
if (rc) { if (rc) {
if (cs_type == CS_TYPE_WAIT) if (cs_type == CS_TYPE_WAIT)
dma_fence_put(sig_fence); hl_fence_put(sig_fence);
hl_ctx_put(ctx); hl_ctx_put(ctx);
goto free_signal_seq_array; goto free_signal_seq_array;
} }
...@@ -1162,7 +1161,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -1162,7 +1161,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
static long _hl_cs_wait_ioctl(struct hl_device *hdev, static long _hl_cs_wait_ioctl(struct hl_device *hdev,
struct hl_ctx *ctx, u64 timeout_us, u64 seq) struct hl_ctx *ctx, u64 timeout_us, u64 seq)
{ {
struct dma_fence *fence; struct hl_fence *fence;
unsigned long timeout; unsigned long timeout;
long rc; long rc;
...@@ -1181,12 +1180,18 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev, ...@@ -1181,12 +1180,18 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
"Can't wait on CS %llu because current CS is at seq %llu\n", "Can't wait on CS %llu because current CS is at seq %llu\n",
seq, ctx->cs_sequence); seq, ctx->cs_sequence);
} else if (fence) { } else if (fence) {
rc = dma_fence_wait_timeout(fence, true, timeout); if (!timeout_us)
rc = completion_done(&fence->completion);
else
rc = wait_for_completion_interruptible_timeout(
&fence->completion, timeout);
if (fence->error == -ETIMEDOUT) if (fence->error == -ETIMEDOUT)
rc = -ETIMEDOUT; rc = -ETIMEDOUT;
else if (fence->error == -EIO) else if (fence->error == -EIO)
rc = -EIO; rc = -EIO;
dma_fence_put(fence);
hl_fence_put(fence);
} else { } else {
dev_dbg(hdev->dev, dev_dbg(hdev->dev,
"Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
......
...@@ -23,7 +23,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx) ...@@ -23,7 +23,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
*/ */
for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++) for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++)
dma_fence_put(ctx->cs_pending[i]); hl_fence_put(ctx->cs_pending[i]);
kfree(ctx->cs_pending); kfree(ctx->cs_pending);
...@@ -37,6 +37,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx) ...@@ -37,6 +37,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
if ((hdev->in_debug) && (hdev->compute_ctx == ctx)) if ((hdev->in_debug) && (hdev->compute_ctx == ctx))
hl_device_set_debug_mode(hdev, false); hl_device_set_debug_mode(hdev, false);
hl_cb_va_pool_fini(ctx);
hl_vm_ctx_fini(ctx); hl_vm_ctx_fini(ctx);
hl_asid_free(hdev, ctx->asid); hl_asid_free(hdev, ctx->asid);
} else { } else {
...@@ -128,7 +129,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) ...@@ -128,7 +129,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
atomic_set(&ctx->thread_ctx_switch_token, 1); atomic_set(&ctx->thread_ctx_switch_token, 1);
ctx->thread_ctx_switch_wait_token = 0; ctx->thread_ctx_switch_wait_token = 0;
ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs, ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
sizeof(struct dma_fence *), sizeof(struct hl_fence *),
GFP_KERNEL); GFP_KERNEL);
if (!ctx->cs_pending) if (!ctx->cs_pending)
return -ENOMEM; return -ENOMEM;
...@@ -155,15 +156,24 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) ...@@ -155,15 +156,24 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
goto err_asid_free; goto err_asid_free;
} }
rc = hl_cb_va_pool_init(ctx);
if (rc) {
dev_err(hdev->dev,
"Failed to init VA pool for mapped CB\n");
goto err_vm_ctx_fini;
}
rc = hdev->asic_funcs->ctx_init(ctx); rc = hdev->asic_funcs->ctx_init(ctx);
if (rc) { if (rc) {
dev_err(hdev->dev, "ctx_init failed\n"); dev_err(hdev->dev, "ctx_init failed\n");
goto err_vm_ctx_fini; goto err_cb_va_pool_fini;
} }
} }
return 0; return 0;
err_cb_va_pool_fini:
hl_cb_va_pool_fini(ctx);
err_vm_ctx_fini: err_vm_ctx_fini:
hl_vm_ctx_fini(ctx); hl_vm_ctx_fini(ctx);
err_asid_free: err_asid_free:
...@@ -184,10 +194,10 @@ int hl_ctx_put(struct hl_ctx *ctx) ...@@ -184,10 +194,10 @@ int hl_ctx_put(struct hl_ctx *ctx)
return kref_put(&ctx->refcount, hl_ctx_do_release); return kref_put(&ctx->refcount, hl_ctx_do_release);
} }
struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq) struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
{ {
struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop; struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
struct dma_fence *fence; struct hl_fence *fence;
spin_lock(&ctx->cs_lock); spin_lock(&ctx->cs_lock);
...@@ -201,8 +211,9 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq) ...@@ -201,8 +211,9 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
return NULL; return NULL;
} }
fence = dma_fence_get( fence = ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)];
ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]); hl_fence_get(fence);
spin_unlock(&ctx->cs_lock); spin_unlock(&ctx->cs_lock);
return fence; return fence;
......
...@@ -21,7 +21,7 @@ static struct dentry *hl_debug_root; ...@@ -21,7 +21,7 @@ static struct dentry *hl_debug_root;
static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
u8 i2c_reg, long *val) u8 i2c_reg, long *val)
{ {
struct armcp_packet pkt; struct cpucp_packet pkt;
int rc; int rc;
if (hl_device_disabled_or_in_reset(hdev)) if (hl_device_disabled_or_in_reset(hdev))
...@@ -29,8 +29,8 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, ...@@ -29,8 +29,8 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_RD << pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_RD <<
ARMCP_PKT_CTL_OPCODE_SHIFT); CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.i2c_bus = i2c_bus; pkt.i2c_bus = i2c_bus;
pkt.i2c_addr = i2c_addr; pkt.i2c_addr = i2c_addr;
pkt.i2c_reg = i2c_reg; pkt.i2c_reg = i2c_reg;
...@@ -47,7 +47,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, ...@@ -47,7 +47,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
u8 i2c_reg, u32 val) u8 i2c_reg, u32 val)
{ {
struct armcp_packet pkt; struct cpucp_packet pkt;
int rc; int rc;
if (hl_device_disabled_or_in_reset(hdev)) if (hl_device_disabled_or_in_reset(hdev))
...@@ -55,8 +55,8 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, ...@@ -55,8 +55,8 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_WR << pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_WR <<
ARMCP_PKT_CTL_OPCODE_SHIFT); CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.i2c_bus = i2c_bus; pkt.i2c_bus = i2c_bus;
pkt.i2c_addr = i2c_addr; pkt.i2c_addr = i2c_addr;
pkt.i2c_reg = i2c_reg; pkt.i2c_reg = i2c_reg;
...@@ -73,7 +73,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, ...@@ -73,7 +73,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state) static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
{ {
struct armcp_packet pkt; struct cpucp_packet pkt;
int rc; int rc;
if (hl_device_disabled_or_in_reset(hdev)) if (hl_device_disabled_or_in_reset(hdev))
...@@ -81,8 +81,8 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state) ...@@ -81,8 +81,8 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_LED_SET << pkt.ctl = cpu_to_le32(CPUCP_PACKET_LED_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT); CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.led_index = cpu_to_le32(led); pkt.led_index = cpu_to_le32(led);
pkt.value = cpu_to_le64(state); pkt.value = cpu_to_le64(state);
...@@ -110,8 +110,8 @@ static int command_buffers_show(struct seq_file *s, void *data) ...@@ -110,8 +110,8 @@ static int command_buffers_show(struct seq_file *s, void *data)
seq_puts(s, "---------------------------------------------------------------\n"); seq_puts(s, "---------------------------------------------------------------\n");
} }
seq_printf(s, seq_printf(s,
" %03d %d 0x%08x %d %d %d\n", " %03llu %d 0x%08x %d %d %d\n",
cb->id, cb->ctx_id, cb->size, cb->id, cb->ctx->asid, cb->size,
kref_read(&cb->refcount), kref_read(&cb->refcount),
cb->mmap, cb->cs_cnt); cb->mmap, cb->cs_cnt);
} }
...@@ -354,6 +354,14 @@ static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, ...@@ -354,6 +354,14 @@ static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
mmu_specs->hop4_shift); mmu_specs->hop4_shift);
} }
static inline u64 get_hop5_pte_addr(struct hl_ctx *ctx,
struct hl_mmu_properties *mmu_specs,
u64 hop_addr, u64 vaddr)
{
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop5_mask,
mmu_specs->hop5_shift);
}
static inline u64 get_next_hop_addr(u64 curr_pte) static inline u64 get_next_hop_addr(u64 curr_pte)
{ {
if (curr_pte & PAGE_PRESENT_MASK) if (curr_pte & PAGE_PRESENT_MASK)
...@@ -377,6 +385,7 @@ static int mmu_show(struct seq_file *s, void *data) ...@@ -377,6 +385,7 @@ static int mmu_show(struct seq_file *s, void *data)
hop2_addr = 0, hop2_pte_addr = 0, hop2_pte = 0, hop2_addr = 0, hop2_pte_addr = 0, hop2_pte = 0,
hop3_addr = 0, hop3_pte_addr = 0, hop3_pte = 0, hop3_addr = 0, hop3_pte_addr = 0, hop3_pte = 0,
hop4_addr = 0, hop4_pte_addr = 0, hop4_pte = 0, hop4_addr = 0, hop4_pte_addr = 0, hop4_pte = 0,
hop5_addr = 0, hop5_pte_addr = 0, hop5_pte = 0,
virt_addr = dev_entry->mmu_addr; virt_addr = dev_entry->mmu_addr;
if (!hdev->mmu_enable) if (!hdev->mmu_enable)
...@@ -428,20 +437,49 @@ static int mmu_show(struct seq_file *s, void *data) ...@@ -428,20 +437,49 @@ static int mmu_show(struct seq_file *s, void *data)
hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr); hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
if (!(hop3_pte & LAST_MASK)) { if (mmu_prop->num_hops == MMU_ARCH_5_HOPS) {
if (!(hop3_pte & LAST_MASK)) {
hop4_addr = get_next_hop_addr(hop3_pte);
if (hop4_addr == ULLONG_MAX)
goto not_mapped;
hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop,
hop4_addr, virt_addr);
hop4_pte = hdev->asic_funcs->read_pte(hdev,
hop4_pte_addr);
if (!(hop4_pte & PAGE_PRESENT_MASK))
goto not_mapped;
} else {
if (!(hop3_pte & PAGE_PRESENT_MASK))
goto not_mapped;
}
} else {
hop4_addr = get_next_hop_addr(hop3_pte); hop4_addr = get_next_hop_addr(hop3_pte);
if (hop4_addr == ULLONG_MAX) if (hop4_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop,
virt_addr); hop4_addr, virt_addr);
hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr); hop4_pte = hdev->asic_funcs->read_pte(hdev,
if (!(hop4_pte & PAGE_PRESENT_MASK)) hop4_pte_addr);
goto not_mapped; if (!(hop4_pte & LAST_MASK)) {
} else { hop5_addr = get_next_hop_addr(hop4_pte);
if (!(hop3_pte & PAGE_PRESENT_MASK))
goto not_mapped; if (hop5_addr == ULLONG_MAX)
goto not_mapped;
hop5_pte_addr = get_hop5_pte_addr(ctx, mmu_prop,
hop5_addr, virt_addr);
hop5_pte = hdev->asic_funcs->read_pte(hdev,
hop5_pte_addr);
if (!(hop5_pte & PAGE_PRESENT_MASK))
goto not_mapped;
} else {
if (!(hop4_pte & PAGE_PRESENT_MASK))
goto not_mapped;
}
} }
seq_printf(s, "asid: %u, virt_addr: 0x%llx\n", seq_printf(s, "asid: %u, virt_addr: 0x%llx\n",
...@@ -463,10 +501,22 @@ static int mmu_show(struct seq_file *s, void *data) ...@@ -463,10 +501,22 @@ static int mmu_show(struct seq_file *s, void *data)
seq_printf(s, "hop3_pte_addr: 0x%llx\n", hop3_pte_addr); seq_printf(s, "hop3_pte_addr: 0x%llx\n", hop3_pte_addr);
seq_printf(s, "hop3_pte: 0x%llx\n", hop3_pte); seq_printf(s, "hop3_pte: 0x%llx\n", hop3_pte);
if (!(hop3_pte & LAST_MASK)) { if (mmu_prop->num_hops == MMU_ARCH_5_HOPS) {
if (!(hop3_pte & LAST_MASK)) {
seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr);
seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr);
seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte);
}
} else {
seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr); seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr);
seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr); seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr);
seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte); seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte);
if (!(hop4_pte & LAST_MASK)) {
seq_printf(s, "hop5_addr: 0x%llx\n", hop5_addr);
seq_printf(s, "hop5_pte_addr: 0x%llx\n", hop5_pte_addr);
seq_printf(s, "hop5_pte: 0x%llx\n", hop5_pte);
}
} }
goto out; goto out;
......
...@@ -123,9 +123,13 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp) ...@@ -123,9 +123,13 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
static int hl_mmap(struct file *filp, struct vm_area_struct *vma) static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
{ {
struct hl_fpriv *hpriv = filp->private_data; struct hl_fpriv *hpriv = filp->private_data;
unsigned long vm_pgoff;
if ((vma->vm_pgoff & HL_MMAP_CB_MASK) == HL_MMAP_CB_MASK) { vm_pgoff = vma->vm_pgoff;
vma->vm_pgoff ^= HL_MMAP_CB_MASK; vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff);
switch (vm_pgoff & HL_MMAP_TYPE_MASK) {
case HL_MMAP_TYPE_CB:
return hl_cb_mmap(hpriv, vma); return hl_cb_mmap(hpriv, vma);
} }
...@@ -286,7 +290,7 @@ static int device_early_init(struct hl_device *hdev) ...@@ -286,7 +290,7 @@ static int device_early_init(struct hl_device *hdev)
} }
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) { for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
snprintf(workq_name, 32, "hl-free-jobs-%u", i); snprintf(workq_name, 32, "hl-free-jobs-%u", (u32) i);
hdev->cq_wq[i] = create_singlethread_workqueue(workq_name); hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
if (hdev->cq_wq[i] == NULL) { if (hdev->cq_wq[i] == NULL) {
dev_err(hdev->dev, "Failed to allocate CQ workqueue\n"); dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
...@@ -317,6 +321,10 @@ static int device_early_init(struct hl_device *hdev) ...@@ -317,6 +321,10 @@ static int device_early_init(struct hl_device *hdev)
goto free_chip_info; goto free_chip_info;
} }
rc = hl_mmu_if_set_funcs(hdev);
if (rc)
goto free_idle_busy_ts_arr;
hl_cb_mgr_init(&hdev->kernel_cb_mgr); hl_cb_mgr_init(&hdev->kernel_cb_mgr);
mutex_init(&hdev->send_cpu_message_lock); mutex_init(&hdev->send_cpu_message_lock);
...@@ -330,6 +338,8 @@ static int device_early_init(struct hl_device *hdev) ...@@ -330,6 +338,8 @@ static int device_early_init(struct hl_device *hdev)
return 0; return 0;
free_idle_busy_ts_arr:
kfree(hdev->idle_busy_ts_arr);
free_chip_info: free_chip_info:
kfree(hdev->hl_chip_info); kfree(hdev->hl_chip_info);
free_eq_wq: free_eq_wq:
...@@ -871,7 +881,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, ...@@ -871,7 +881,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
* so this message won't be sent * so this message won't be sent
*/ */
if (hl_fw_send_pci_access_msg(hdev, if (hl_fw_send_pci_access_msg(hdev,
ARMCP_PACKET_DISABLE_PCI_ACCESS)) CPUCP_PACKET_DISABLE_PCI_ACCESS))
dev_warn(hdev->dev, dev_warn(hdev->dev,
"Failed to disable PCI access by F/W\n"); "Failed to disable PCI access by F/W\n");
} }
......
This diff is collapsed.
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "habanalabs.h" #include "habanalabs.h"
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/aer.h>
#include <linux/module.h> #include <linux/module.h>
#define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team" #define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team"
...@@ -408,6 +409,8 @@ static int hl_pci_probe(struct pci_dev *pdev, ...@@ -408,6 +409,8 @@ static int hl_pci_probe(struct pci_dev *pdev,
pci_set_drvdata(pdev, hdev); pci_set_drvdata(pdev, hdev);
pci_enable_pcie_error_reporting(pdev);
rc = hl_device_init(hdev, hl_class); rc = hl_device_init(hdev, hl_class);
if (rc) { if (rc) {
dev_err(&pdev->dev, "Fatal error during habanalabs device init\n"); dev_err(&pdev->dev, "Fatal error during habanalabs device init\n");
...@@ -440,22 +443,93 @@ static void hl_pci_remove(struct pci_dev *pdev) ...@@ -440,22 +443,93 @@ static void hl_pci_remove(struct pci_dev *pdev)
return; return;
hl_device_fini(hdev); hl_device_fini(hdev);
pci_disable_pcie_error_reporting(pdev);
pci_set_drvdata(pdev, NULL); pci_set_drvdata(pdev, NULL);
destroy_hdev(hdev); destroy_hdev(hdev);
} }
/**
* hl_pci_err_detected - a PCI bus error detected on this device
*
* @pdev: pointer to pci device
* @state: PCI error type
*
* Called by the PCI subsystem whenever a non-correctable
* PCI bus error is detected
*/
static pci_ers_result_t
hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state)
{
struct hl_device *hdev = pci_get_drvdata(pdev);
enum pci_ers_result result;
switch (state) {
case pci_channel_io_normal:
return PCI_ERS_RESULT_CAN_RECOVER;
case pci_channel_io_frozen:
dev_warn(hdev->dev, "frozen state error detected\n");
result = PCI_ERS_RESULT_NEED_RESET;
break;
case pci_channel_io_perm_failure:
dev_warn(hdev->dev, "failure state error detected\n");
result = PCI_ERS_RESULT_DISCONNECT;
break;
default:
result = PCI_ERS_RESULT_NONE;
}
hdev->asic_funcs->halt_engines(hdev, true);
return result;
}
/**
* hl_pci_err_resume - resume after a PCI slot reset
*
* @pdev: pointer to pci device
*
*/
static void hl_pci_err_resume(struct pci_dev *pdev)
{
struct hl_device *hdev = pci_get_drvdata(pdev);
dev_warn(hdev->dev, "Resuming device after PCI slot reset\n");
hl_device_resume(hdev);
}
/**
* hl_pci_err_slot_reset - a PCI slot reset has just happened
*
* @pdev: pointer to pci device
*
* Determine if the driver can recover from the PCI slot reset
*/
static pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev)
{
return PCI_ERS_RESULT_RECOVERED;
}
static const struct dev_pm_ops hl_pm_ops = { static const struct dev_pm_ops hl_pm_ops = {
.suspend = hl_pmops_suspend, .suspend = hl_pmops_suspend,
.resume = hl_pmops_resume, .resume = hl_pmops_resume,
}; };
static const struct pci_error_handlers hl_pci_err_handler = {
.error_detected = hl_pci_err_detected,
.slot_reset = hl_pci_err_slot_reset,
.resume = hl_pci_err_resume,
};
static struct pci_driver hl_pci_driver = { static struct pci_driver hl_pci_driver = {
.name = HL_NAME, .name = HL_NAME,
.id_table = ids, .id_table = ids,
.probe = hl_pci_probe, .probe = hl_pci_probe,
.remove = hl_pci_remove, .remove = hl_pci_remove,
.driver.pm = &hl_pm_ops, .driver.pm = &hl_pm_ops,
.err_handler = &hl_pci_err_handler,
}; };
/* /*
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <uapi/misc/habanalabs.h> #include <uapi/misc/habanalabs.h>
#include "habanalabs.h" #include "habanalabs.h"
#include <linux/kernel.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/slab.h> #include <linux/slab.h>
...@@ -64,14 +65,14 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) ...@@ -64,14 +65,14 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
hw_ip.dram_enabled = 1; hw_ip.dram_enabled = 1;
hw_ip.num_of_events = prop->num_of_events; hw_ip.num_of_events = prop->num_of_events;
memcpy(hw_ip.armcp_version, prop->armcp_info.armcp_version, memcpy(hw_ip.cpucp_version, prop->cpucp_info.cpucp_version,
min(VERSION_MAX_LEN, HL_INFO_VERSION_MAX_LEN)); min(VERSION_MAX_LEN, HL_INFO_VERSION_MAX_LEN));
memcpy(hw_ip.card_name, prop->armcp_info.card_name, memcpy(hw_ip.card_name, prop->cpucp_info.card_name,
min(CARD_NAME_MAX_LEN, HL_INFO_CARD_NAME_MAX_LEN)); min(CARD_NAME_MAX_LEN, HL_INFO_CARD_NAME_MAX_LEN));
hw_ip.armcp_cpld_version = le32_to_cpu(prop->armcp_info.cpld_version); hw_ip.cpld_version = le32_to_cpu(prop->cpucp_info.cpld_version);
hw_ip.module_id = le32_to_cpu(prop->armcp_info.card_location); hw_ip.module_id = le32_to_cpu(prop->cpucp_info.card_location);
hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr; hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr;
hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf; hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf;
...@@ -131,7 +132,7 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args) ...@@ -131,7 +132,7 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
return -EINVAL; return -EINVAL;
hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
&hw_idle.busy_engines_mask, NULL); &hw_idle.busy_engines_mask_ext, NULL);
return copy_to_user(out, &hw_idle, return copy_to_user(out, &hw_idle,
min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0; min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
...@@ -276,10 +277,45 @@ static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args) ...@@ -276,10 +277,45 @@ static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args)
min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0; min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0;
} }
static int pci_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
struct hl_device *hdev = hpriv->hdev;
struct hl_info_pci_counters pci_counters = {0};
u32 max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
int rc;
if ((!max_size) || (!out))
return -EINVAL;
rc = hl_fw_cpucp_pci_counters_get(hdev, &pci_counters);
if (rc)
return rc;
return copy_to_user(out, &pci_counters,
min((size_t) max_size, sizeof(pci_counters))) ? -EFAULT : 0;
}
static int clk_throttle_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
struct hl_device *hdev = hpriv->hdev;
struct hl_info_clk_throttle clk_throttle = {0};
u32 max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
if ((!max_size) || (!out))
return -EINVAL;
clk_throttle.clk_throttling_reason = hdev->clk_throttling_reason;
return copy_to_user(out, &clk_throttle,
min((size_t) max_size, sizeof(clk_throttle))) ? -EFAULT : 0;
}
static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{ {
struct hl_device *hdev = hpriv->hdev; struct hl_device *hdev = hpriv->hdev;
struct hl_info_cs_counters cs_counters = {0}; struct hl_info_cs_counters cs_counters = { {0} };
u32 max_size = args->return_size; u32 max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer; void __user *out = (void __user *) (uintptr_t) args->return_pointer;
...@@ -297,6 +333,51 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args) ...@@ -297,6 +333,51 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0; min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0;
} }
static int sync_manager_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
struct hl_device *hdev = hpriv->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_info_sync_manager sm_info = {0};
u32 max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
if ((!max_size) || (!out))
return -EINVAL;
if (args->dcore_id >= HL_MAX_DCORES)
return -EINVAL;
sm_info.first_available_sync_object =
prop->first_available_user_sob[args->dcore_id];
sm_info.first_available_monitor =
prop->first_available_user_mon[args->dcore_id];
return copy_to_user(out, &sm_info, min_t(size_t, (size_t) max_size,
sizeof(sm_info))) ? -EFAULT : 0;
}
static int total_energy_consumption_info(struct hl_fpriv *hpriv,
struct hl_info_args *args)
{
struct hl_device *hdev = hpriv->hdev;
struct hl_info_energy total_energy = {0};
u32 max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
int rc;
if ((!max_size) || (!out))
return -EINVAL;
rc = hl_fw_cpucp_total_energy_get(hdev,
&total_energy.total_energy_consumption);
if (rc)
return rc;
return copy_to_user(out, &total_energy,
min((size_t) max_size, sizeof(total_energy))) ? -EFAULT : 0;
}
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev) struct device *dev)
{ {
...@@ -360,6 +441,18 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, ...@@ -360,6 +441,18 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_CS_COUNTERS: case HL_INFO_CS_COUNTERS:
return cs_counters_info(hpriv, args); return cs_counters_info(hpriv, args);
case HL_INFO_PCI_COUNTERS:
return pci_counters_info(hpriv, args);
case HL_INFO_CLK_THROTTLE_REASON:
return clk_throttle_info(hpriv, args);
case HL_INFO_SYNC_MANAGER:
return sync_manager_info(hpriv, args);
case HL_INFO_TOTAL_ENERGY:
return total_energy_consumption_info(hpriv, args);
default: default:
dev_err(dev, "Invalid request %d\n", args->op); dev_err(dev, "Invalid request %d\n", args->op);
rc = -ENOTTY; rc = -ENOTTY;
......
...@@ -288,10 +288,10 @@ static void ext_queue_schedule_job(struct hl_cs_job *job) ...@@ -288,10 +288,10 @@ static void ext_queue_schedule_job(struct hl_cs_job *job)
ptr = cb->bus_address; ptr = cb->bus_address;
cq_pkt.data = cpu_to_le32( cq_pkt.data = cpu_to_le32(
((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT) ((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
& CQ_ENTRY_SHADOW_INDEX_MASK) | & CQ_ENTRY_SHADOW_INDEX_MASK) |
(1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) | FIELD_PREP(CQ_ENTRY_SHADOW_INDEX_VALID_MASK, 1) |
(1 << CQ_ENTRY_READY_SHIFT)); FIELD_PREP(CQ_ENTRY_READY_MASK, 1));
/* /*
* No need to protect pi_offset because scheduling to the * No need to protect pi_offset because scheduling to the
...@@ -474,7 +474,7 @@ static void init_signal_wait_cs(struct hl_cs *cs) ...@@ -474,7 +474,7 @@ static void init_signal_wait_cs(struct hl_cs *cs)
* wait CS was submitted. * wait CS was submitted.
*/ */
mb(); mb();
dma_fence_put(cs->signal_fence); hl_fence_put(cs->signal_fence);
cs->signal_fence = NULL; cs->signal_fence = NULL;
} }
} }
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
#define HWMON_NR_SENSOR_TYPES (hwmon_pwm + 1) #define HWMON_NR_SENSOR_TYPES (hwmon_pwm + 1)
int hl_build_hwmon_channel_info(struct hl_device *hdev, int hl_build_hwmon_channel_info(struct hl_device *hdev,
struct armcp_sensor *sensors_arr) struct cpucp_sensor *sensors_arr)
{ {
u32 counts[HWMON_NR_SENSOR_TYPES] = {0}; u32 counts[HWMON_NR_SENSOR_TYPES] = {0};
u32 *sensors_by_type[HWMON_NR_SENSOR_TYPES] = {NULL}; u32 *sensors_by_type[HWMON_NR_SENSOR_TYPES] = {NULL};
...@@ -24,7 +24,7 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, ...@@ -24,7 +24,7 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev,
enum hwmon_sensor_types type; enum hwmon_sensor_types type;
int rc, i, j; int rc, i, j;
for (i = 0 ; i < ARMCP_MAX_SENSORS ; i++) { for (i = 0 ; i < CPUCP_MAX_SENSORS ; i++) {
type = le32_to_cpu(sensors_arr[i].type); type = le32_to_cpu(sensors_arr[i].type);
if ((type == 0) && (sensors_arr[i].flags == 0)) if ((type == 0) && (sensors_arr[i].flags == 0))
...@@ -311,13 +311,13 @@ static const struct hwmon_ops hl_hwmon_ops = { ...@@ -311,13 +311,13 @@ static const struct hwmon_ops hl_hwmon_ops = {
int hl_get_temperature(struct hl_device *hdev, int hl_get_temperature(struct hl_device *hdev,
int sensor_index, u32 attr, long *value) int sensor_index, u32 attr, long *value)
{ {
struct armcp_packet pkt; struct cpucp_packet pkt;
int rc; int rc;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_GET << pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEMPERATURE_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT); CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr); pkt.type = __cpu_to_le16(attr);
...@@ -337,13 +337,13 @@ int hl_get_temperature(struct hl_device *hdev, ...@@ -337,13 +337,13 @@ int hl_get_temperature(struct hl_device *hdev,
int hl_set_temperature(struct hl_device *hdev, int hl_set_temperature(struct hl_device *hdev,
int sensor_index, u32 attr, long value) int sensor_index, u32 attr, long value)
{ {
struct armcp_packet pkt; struct cpucp_packet pkt;
int rc; int rc;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_SET << pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEMPERATURE_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT); CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr); pkt.type = __cpu_to_le16(attr);
pkt.value = __cpu_to_le64(value); pkt.value = __cpu_to_le64(value);
...@@ -362,13 +362,13 @@ int hl_set_temperature(struct hl_device *hdev, ...@@ -362,13 +362,13 @@ int hl_set_temperature(struct hl_device *hdev,
int hl_get_voltage(struct hl_device *hdev, int hl_get_voltage(struct hl_device *hdev,
int sensor_index, u32 attr, long *value) int sensor_index, u32 attr, long *value)
{ {
struct armcp_packet pkt; struct cpucp_packet pkt;
int rc; int rc;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_GET << pkt.ctl = cpu_to_le32(CPUCP_PACKET_VOLTAGE_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT); CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr); pkt.type = __cpu_to_le16(attr);
...@@ -388,13 +388,13 @@ int hl_get_voltage(struct hl_device *hdev, ...@@ -388,13 +388,13 @@ int hl_get_voltage(struct hl_device *hdev,
int hl_get_current(struct hl_device *hdev, int hl_get_current(struct hl_device *hdev,
int sensor_index, u32 attr, long *value) int sensor_index, u32 attr, long *value)
{ {
struct armcp_packet pkt; struct cpucp_packet pkt;
int rc; int rc;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_GET << pkt.ctl = cpu_to_le32(CPUCP_PACKET_CURRENT_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT); CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr); pkt.type = __cpu_to_le16(attr);
...@@ -414,13 +414,13 @@ int hl_get_current(struct hl_device *hdev, ...@@ -414,13 +414,13 @@ int hl_get_current(struct hl_device *hdev,
int hl_get_fan_speed(struct hl_device *hdev, int hl_get_fan_speed(struct hl_device *hdev,
int sensor_index, u32 attr, long *value) int sensor_index, u32 attr, long *value)
{ {
struct armcp_packet pkt; struct cpucp_packet pkt;
int rc; int rc;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_FAN_SPEED_GET << pkt.ctl = cpu_to_le32(CPUCP_PACKET_FAN_SPEED_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT); CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr); pkt.type = __cpu_to_le16(attr);
...@@ -440,13 +440,13 @@ int hl_get_fan_speed(struct hl_device *hdev, ...@@ -440,13 +440,13 @@ int hl_get_fan_speed(struct hl_device *hdev,
int hl_get_pwm_info(struct hl_device *hdev, int hl_get_pwm_info(struct hl_device *hdev,
int sensor_index, u32 attr, long *value) int sensor_index, u32 attr, long *value)
{ {
struct armcp_packet pkt; struct cpucp_packet pkt;
int rc; int rc;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_GET << pkt.ctl = cpu_to_le32(CPUCP_PACKET_PWM_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT); CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr); pkt.type = __cpu_to_le16(attr);
...@@ -466,13 +466,13 @@ int hl_get_pwm_info(struct hl_device *hdev, ...@@ -466,13 +466,13 @@ int hl_get_pwm_info(struct hl_device *hdev,
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
long value) long value)
{ {
struct armcp_packet pkt; struct cpucp_packet pkt;
int rc; int rc;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_SET << pkt.ctl = cpu_to_le32(CPUCP_PACKET_PWM_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT); CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr); pkt.type = __cpu_to_le16(attr);
pkt.value = cpu_to_le64(value); pkt.value = cpu_to_le64(value);
...@@ -489,13 +489,13 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, ...@@ -489,13 +489,13 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
int hl_set_voltage(struct hl_device *hdev, int hl_set_voltage(struct hl_device *hdev,
int sensor_index, u32 attr, long value) int sensor_index, u32 attr, long value)
{ {
struct armcp_packet pkt; struct cpucp_packet pkt;
int rc; int rc;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_SET << pkt.ctl = cpu_to_le32(CPUCP_PACKET_VOLTAGE_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT); CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr); pkt.type = __cpu_to_le16(attr);
pkt.value = __cpu_to_le64(value); pkt.value = __cpu_to_le64(value);
...@@ -514,13 +514,13 @@ int hl_set_voltage(struct hl_device *hdev, ...@@ -514,13 +514,13 @@ int hl_set_voltage(struct hl_device *hdev,
int hl_set_current(struct hl_device *hdev, int hl_set_current(struct hl_device *hdev,
int sensor_index, u32 attr, long value) int sensor_index, u32 attr, long value)
{ {
struct armcp_packet pkt; struct cpucp_packet pkt;
int rc; int rc;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_SET << pkt.ctl = cpu_to_le32(CPUCP_PACKET_CURRENT_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT); CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index); pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr); pkt.type = __cpu_to_le16(attr);
pkt.value = __cpu_to_le64(value); pkt.value = __cpu_to_le64(value);
...@@ -549,7 +549,7 @@ int hl_hwmon_init(struct hl_device *hdev) ...@@ -549,7 +549,7 @@ int hl_hwmon_init(struct hl_device *hdev)
hdev->hl_chip_info->ops = &hl_hwmon_ops; hdev->hl_chip_info->ops = &hl_hwmon_ops;
hdev->hwmon_dev = hwmon_device_register_with_info(dev, hdev->hwmon_dev = hwmon_device_register_with_info(dev,
prop->armcp_info.card_name, hdev, prop->cpucp_info.card_name, hdev,
hdev->hl_chip_info, NULL); hdev->hl_chip_info, NULL);
if (IS_ERR(hdev->hwmon_dev)) { if (IS_ERR(hdev->hwmon_dev)) {
rc = PTR_ERR(hdev->hwmon_dev); rc = PTR_ERR(hdev->hwmon_dev);
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
/** /**
* struct hl_eqe_work - This structure is used to schedule work of EQ * struct hl_eqe_work - This structure is used to schedule work of EQ
* entry and armcp_reset event * entry and cpucp_reset event
* *
* @eq_work: workqueue object to run when EQ entry is received * @eq_work: workqueue object to run when EQ entry is received
* @hdev: pointer to device structure * @hdev: pointer to device structure
......
...@@ -505,41 +505,32 @@ static inline int add_va_block(struct hl_device *hdev, ...@@ -505,41 +505,32 @@ static inline int add_va_block(struct hl_device *hdev,
} }
/* /*
* get_va_block - get a virtual block with the requested size * get_va_block() - get a virtual block for the given size and alignment.
* * @hdev: pointer to the habanalabs device structure.
* @hdev : pointer to the habanalabs device structure * @va_range: pointer to the virtual addresses range.
* @va_range : pointer to the virtual addresses range * @size: requested block size.
* @size : requested block size * @hint_addr: hint for requested address by the user.
* @hint_addr : hint for request address by the user * @va_block_align: required alignment of the virtual block start address.
* @is_userptr : is host or DRAM memory
* *
* This function does the following: * This function does the following:
* - Iterate on the virtual block list to find a suitable virtual block for the * - Iterate on the virtual block list to find a suitable virtual block for the
* requested size * given size and alignment.
* - Reserve the requested block and update the list * - Reserve the requested block and update the list.
* - Return the start address of the virtual block * - Return the start address of the virtual block.
*/ */
static u64 get_va_block(struct hl_device *hdev, static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range,
struct hl_va_range *va_range, u64 size, u64 hint_addr, u64 size, u64 hint_addr, u32 va_block_align)
bool is_userptr)
{ {
struct hl_vm_va_block *va_block, *new_va_block = NULL; struct hl_vm_va_block *va_block, *new_va_block = NULL;
u64 valid_start, valid_size, prev_start, prev_end, page_mask, u64 valid_start, valid_size, prev_start, prev_end, align_mask,
res_valid_start = 0, res_valid_size = 0; res_valid_start = 0, res_valid_size = 0;
u32 page_size;
bool add_prev = false; bool add_prev = false;
if (is_userptr) align_mask = ~((u64)va_block_align - 1);
/*
* We cannot know if the user allocated memory with huge pages
* or not, hence we continue with the biggest possible
* granularity.
*/
page_size = hdev->asic_prop.pmmu_huge.page_size;
else
page_size = hdev->asic_prop.dmmu.page_size;
page_mask = ~((u64)page_size - 1); /* check if hint_addr is aligned */
if (hint_addr & (va_block_align - 1))
hint_addr = 0;
mutex_lock(&va_range->lock); mutex_lock(&va_range->lock);
...@@ -549,9 +540,9 @@ static u64 get_va_block(struct hl_device *hdev, ...@@ -549,9 +540,9 @@ static u64 get_va_block(struct hl_device *hdev,
/* calc the first possible aligned addr */ /* calc the first possible aligned addr */
valid_start = va_block->start; valid_start = va_block->start;
if (valid_start & (page_size - 1)) { if (valid_start & (va_block_align - 1)) {
valid_start &= page_mask; valid_start &= align_mask;
valid_start += page_size; valid_start += va_block_align;
if (valid_start > va_block->end) if (valid_start > va_block->end)
continue; continue;
} }
...@@ -863,7 +854,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, ...@@ -863,7 +854,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
struct hl_va_range *va_range; struct hl_va_range *va_range;
enum vm_type_t *vm_type; enum vm_type_t *vm_type;
u64 ret_vaddr, hint_addr; u64 ret_vaddr, hint_addr;
u32 handle = 0; u32 handle = 0, va_block_align;
int rc; int rc;
bool is_userptr = args->flags & HL_MEM_USERPTR; bool is_userptr = args->flags & HL_MEM_USERPTR;
...@@ -873,6 +864,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, ...@@ -873,6 +864,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
if (is_userptr) { if (is_userptr) {
u64 addr = args->map_host.host_virt_addr, u64 addr = args->map_host.host_virt_addr,
size = args->map_host.mem_size; size = args->map_host.mem_size;
u32 page_size = hdev->asic_prop.pmmu.page_size,
huge_page_size = hdev->asic_prop.pmmu_huge.page_size;
rc = dma_map_host_va(hdev, addr, size, &userptr); rc = dma_map_host_va(hdev, addr, size, &userptr);
if (rc) { if (rc) {
...@@ -892,6 +885,27 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, ...@@ -892,6 +885,27 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
vm_type = (enum vm_type_t *) userptr; vm_type = (enum vm_type_t *) userptr;
hint_addr = args->map_host.hint_addr; hint_addr = args->map_host.hint_addr;
handle = phys_pg_pack->handle; handle = phys_pg_pack->handle;
/* get required alignment */
if (phys_pg_pack->page_size == page_size) {
va_range = ctx->host_va_range;
/*
* huge page alignment may be needed in case of regular
* page mapping, depending on the host VA alignment
*/
if (addr & (huge_page_size - 1))
va_block_align = page_size;
else
va_block_align = huge_page_size;
} else {
/*
* huge page alignment is needed in case of huge page
* mapping
*/
va_range = ctx->host_huge_va_range;
va_block_align = huge_page_size;
}
} else { } else {
handle = lower_32_bits(args->map_device.handle); handle = lower_32_bits(args->map_device.handle);
...@@ -912,6 +926,10 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, ...@@ -912,6 +926,10 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
vm_type = (enum vm_type_t *) phys_pg_pack; vm_type = (enum vm_type_t *) phys_pg_pack;
hint_addr = args->map_device.hint_addr; hint_addr = args->map_device.hint_addr;
/* DRAM VA alignment is the same as the DRAM page size */
va_range = ctx->dram_va_range;
va_block_align = hdev->asic_prop.dmmu.page_size;
} }
/* /*
...@@ -933,16 +951,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, ...@@ -933,16 +951,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
goto hnode_err; goto hnode_err;
} }
if (is_userptr)
if (phys_pg_pack->page_size == hdev->asic_prop.pmmu.page_size)
va_range = ctx->host_va_range;
else
va_range = ctx->host_huge_va_range;
else
va_range = ctx->dram_va_range;
ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size, ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
hint_addr, is_userptr); hint_addr, va_block_align);
if (!ret_vaddr) { if (!ret_vaddr) {
dev_err(hdev->dev, "no available va block for handle %u\n", dev_err(hdev->dev, "no available va block for handle %u\n",
handle); handle);
......
This diff is collapsed.
This diff is collapsed.
...@@ -9,7 +9,6 @@ ...@@ -9,7 +9,6 @@
#include "../include/hw_ip/pci/pci_general.h" #include "../include/hw_ip/pci/pci_general.h"
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/bitfield.h>
#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC (HL_PCI_ELBI_TIMEOUT_MSEC * 10) #define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC (HL_PCI_ELBI_TIMEOUT_MSEC * 10)
...@@ -339,12 +338,17 @@ static int hl_pci_set_dma_mask(struct hl_device *hdev) ...@@ -339,12 +338,17 @@ static int hl_pci_set_dma_mask(struct hl_device *hdev)
/** /**
* hl_pci_init() - PCI initialization code. * hl_pci_init() - PCI initialization code.
* @hdev: Pointer to hl_device structure. * @hdev: Pointer to hl_device structure.
* @cpu_boot_status_reg: status register of the device's CPU
* @boot_err0_reg: boot error register of the device's CPU
* @preboot_ver_timeout: how much to wait before bailing out on reading
* the preboot version
* *
* Set DMA masks, initialize the PCI controller and map the PCI BARs. * Set DMA masks, initialize the PCI controller and map the PCI BARs.
* *
* Return: 0 on success, non-zero for failure. * Return: 0 on success, non-zero for failure.
*/ */
int hl_pci_init(struct hl_device *hdev) int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 boot_err0_reg, u32 preboot_ver_timeout)
{ {
struct pci_dev *pdev = hdev->pdev; struct pci_dev *pdev = hdev->pdev;
int rc; int rc;
...@@ -376,6 +380,15 @@ int hl_pci_init(struct hl_device *hdev) ...@@ -376,6 +380,15 @@ int hl_pci_init(struct hl_device *hdev)
if (rc) if (rc)
goto unmap_pci_bars; goto unmap_pci_bars;
/* Before continuing in the initialization, we need to read the preboot
* version to determine whether we run with a security-enabled firmware
* The check will be done in each ASIC's specific code
*/
rc = hl_fw_read_preboot_ver(hdev, cpu_boot_status_reg, boot_err0_reg,
preboot_ver_timeout);
if (rc)
goto unmap_pci_bars;
return 0; return 0;
unmap_pci_bars: unmap_pci_bars:
......
...@@ -11,18 +11,18 @@ ...@@ -11,18 +11,18 @@
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
{ {
struct armcp_packet pkt; struct cpucp_packet pkt;
long result; long result;
int rc; int rc;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
if (curr) if (curr)
pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_CURR_GET << pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT); CPUCP_PKT_CTL_OPCODE_SHIFT);
else else
pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_GET << pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT); CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.pll_index = cpu_to_le32(pll_index); pkt.pll_index = cpu_to_le32(pll_index);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
...@@ -40,13 +40,13 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) ...@@ -40,13 +40,13 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
{ {
struct armcp_packet pkt; struct cpucp_packet pkt;
int rc; int rc;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_SET << pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT); CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.pll_index = cpu_to_le32(pll_index); pkt.pll_index = cpu_to_le32(pll_index);
pkt.value = cpu_to_le64(freq); pkt.value = cpu_to_le64(freq);
...@@ -61,14 +61,14 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) ...@@ -61,14 +61,14 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
u64 hl_get_max_power(struct hl_device *hdev) u64 hl_get_max_power(struct hl_device *hdev)
{ {
struct armcp_packet pkt; struct cpucp_packet pkt;
long result; long result;
int rc; int rc;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_GET << pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT); CPUCP_PKT_CTL_OPCODE_SHIFT);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, &result); 0, &result);
...@@ -83,13 +83,13 @@ u64 hl_get_max_power(struct hl_device *hdev) ...@@ -83,13 +83,13 @@ u64 hl_get_max_power(struct hl_device *hdev)
void hl_set_max_power(struct hl_device *hdev) void hl_set_max_power(struct hl_device *hdev)
{ {
struct armcp_packet pkt; struct cpucp_packet pkt;
int rc; int rc;
memset(&pkt, 0, sizeof(pkt)); memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET << pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT); CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.value = cpu_to_le64(hdev->max_power); pkt.value = cpu_to_le64(hdev->max_power);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
...@@ -112,7 +112,7 @@ static ssize_t armcp_kernel_ver_show(struct device *dev, ...@@ -112,7 +112,7 @@ static ssize_t armcp_kernel_ver_show(struct device *dev,
{ {
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "%s", hdev->asic_prop.armcp_info.kernel_version); return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.kernel_version);
} }
static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr, static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr,
...@@ -120,7 +120,7 @@ static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr, ...@@ -120,7 +120,7 @@ static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr,
{ {
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.armcp_version); return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.cpucp_version);
} }
static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr, static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr,
...@@ -129,7 +129,23 @@ static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr, ...@@ -129,7 +129,23 @@ static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr,
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "0x%08x\n", return sprintf(buf, "0x%08x\n",
hdev->asic_prop.armcp_info.cpld_version); hdev->asic_prop.cpucp_info.cpld_version);
}
static ssize_t cpucp_kernel_ver_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.kernel_version);
}
static ssize_t cpucp_ver_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.cpucp_version);
} }
static ssize_t infineon_ver_show(struct device *dev, static ssize_t infineon_ver_show(struct device *dev,
...@@ -138,7 +154,7 @@ static ssize_t infineon_ver_show(struct device *dev, ...@@ -138,7 +154,7 @@ static ssize_t infineon_ver_show(struct device *dev,
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "0x%04x\n", return sprintf(buf, "0x%04x\n",
hdev->asic_prop.armcp_info.infineon_version); hdev->asic_prop.cpucp_info.infineon_version);
} }
static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr, static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr,
...@@ -146,7 +162,7 @@ static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr, ...@@ -146,7 +162,7 @@ static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr,
{ {
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.fuse_version); return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.fuse_version);
} }
static ssize_t thermal_ver_show(struct device *dev, static ssize_t thermal_ver_show(struct device *dev,
...@@ -154,7 +170,7 @@ static ssize_t thermal_ver_show(struct device *dev, ...@@ -154,7 +170,7 @@ static ssize_t thermal_ver_show(struct device *dev,
{ {
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "%s", hdev->asic_prop.armcp_info.thermal_version); return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.thermal_version);
} }
static ssize_t preboot_btl_ver_show(struct device *dev, static ssize_t preboot_btl_ver_show(struct device *dev,
...@@ -356,6 +372,8 @@ static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj, ...@@ -356,6 +372,8 @@ static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj,
static DEVICE_ATTR_RO(armcp_kernel_ver); static DEVICE_ATTR_RO(armcp_kernel_ver);
static DEVICE_ATTR_RO(armcp_ver); static DEVICE_ATTR_RO(armcp_ver);
static DEVICE_ATTR_RO(cpld_ver); static DEVICE_ATTR_RO(cpld_ver);
static DEVICE_ATTR_RO(cpucp_kernel_ver);
static DEVICE_ATTR_RO(cpucp_ver);
static DEVICE_ATTR_RO(device_type); static DEVICE_ATTR_RO(device_type);
static DEVICE_ATTR_RO(fuse_ver); static DEVICE_ATTR_RO(fuse_ver);
static DEVICE_ATTR_WO(hard_reset); static DEVICE_ATTR_WO(hard_reset);
...@@ -380,6 +398,8 @@ static struct attribute *hl_dev_attrs[] = { ...@@ -380,6 +398,8 @@ static struct attribute *hl_dev_attrs[] = {
&dev_attr_armcp_kernel_ver.attr, &dev_attr_armcp_kernel_ver.attr,
&dev_attr_armcp_ver.attr, &dev_attr_armcp_ver.attr,
&dev_attr_cpld_ver.attr, &dev_attr_cpld_ver.attr,
&dev_attr_cpucp_kernel_ver.attr,
&dev_attr_cpucp_ver.attr,
&dev_attr_device_type.attr, &dev_attr_device_type.attr,
&dev_attr_fuse_ver.attr, &dev_attr_fuse_ver.attr,
&dev_attr_hard_reset.attr, &dev_attr_hard_reset.attr,
......
This diff is collapsed.
...@@ -35,8 +35,6 @@ ...@@ -35,8 +35,6 @@
#error "Number of MSI interrupts must be smaller or equal to GAUDI_MSI_ENTRIES" #error "Number of MSI interrupts must be smaller or equal to GAUDI_MSI_ENTRIES"
#endif #endif
#define QMAN_FENCE_TIMEOUT_USEC 10000 /* 10 ms */
#define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */ #define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */
#define GAUDI_MAX_CLK_FREQ 2200000000ull /* 2200 MHz */ #define GAUDI_MAX_CLK_FREQ 2200000000ull /* 2200 MHz */
...@@ -44,7 +42,7 @@ ...@@ -44,7 +42,7 @@
#define MAX_POWER_DEFAULT_PCI 200000 /* 200W */ #define MAX_POWER_DEFAULT_PCI 200000 /* 200W */
#define MAX_POWER_DEFAULT_PMC 350000 /* 350W */ #define MAX_POWER_DEFAULT_PMC 350000 /* 350W */
#define GAUDI_CPU_TIMEOUT_USEC 15000000 /* 15s */ #define GAUDI_CPU_TIMEOUT_USEC 30000000 /* 30s */
#define TPC_ENABLED_MASK 0xFF #define TPC_ENABLED_MASK 0xFF
...@@ -142,28 +140,28 @@ ...@@ -142,28 +140,28 @@
#define VA_HOST_SPACE_SIZE (VA_HOST_SPACE_END - \ #define VA_HOST_SPACE_SIZE (VA_HOST_SPACE_END - \
VA_HOST_SPACE_START) /* 767TB */ VA_HOST_SPACE_START) /* 767TB */
#define HW_CAP_PLL 0x00000001 #define HW_CAP_PLL BIT(0)
#define HW_CAP_HBM 0x00000002 #define HW_CAP_HBM BIT(1)
#define HW_CAP_MMU 0x00000004 #define HW_CAP_MMU BIT(2)
#define HW_CAP_MME 0x00000008 #define HW_CAP_MME BIT(3)
#define HW_CAP_CPU 0x00000010 #define HW_CAP_CPU BIT(4)
#define HW_CAP_PCI_DMA 0x00000020 #define HW_CAP_PCI_DMA BIT(5)
#define HW_CAP_MSI 0x00000040 #define HW_CAP_MSI BIT(6)
#define HW_CAP_CPU_Q 0x00000080 #define HW_CAP_CPU_Q BIT(7)
#define HW_CAP_HBM_DMA 0x00000100 #define HW_CAP_HBM_DMA BIT(8)
#define HW_CAP_CLK_GATE 0x00000200 #define HW_CAP_CLK_GATE BIT(9)
#define HW_CAP_SRAM_SCRAMBLER 0x00000400 #define HW_CAP_SRAM_SCRAMBLER BIT(10)
#define HW_CAP_HBM_SCRAMBLER 0x00000800 #define HW_CAP_HBM_SCRAMBLER BIT(11)
#define HW_CAP_TPC0 0x01000000 #define HW_CAP_TPC0 BIT(24)
#define HW_CAP_TPC1 0x02000000 #define HW_CAP_TPC1 BIT(25)
#define HW_CAP_TPC2 0x04000000 #define HW_CAP_TPC2 BIT(26)
#define HW_CAP_TPC3 0x08000000 #define HW_CAP_TPC3 BIT(27)
#define HW_CAP_TPC4 0x10000000 #define HW_CAP_TPC4 BIT(28)
#define HW_CAP_TPC5 0x20000000 #define HW_CAP_TPC5 BIT(29)
#define HW_CAP_TPC6 0x40000000 #define HW_CAP_TPC6 BIT(30)
#define HW_CAP_TPC7 0x80000000 #define HW_CAP_TPC7 BIT(31)
#define HW_CAP_TPC_MASK 0xFF000000 #define HW_CAP_TPC_MASK GENMASK(31, 24)
#define HW_CAP_TPC_SHIFT 24 #define HW_CAP_TPC_SHIFT 24
#define GAUDI_CPU_PCI_MSB_ADDR(addr) (((addr) & GENMASK_ULL(49, 39)) >> 39) #define GAUDI_CPU_PCI_MSB_ADDR(addr) (((addr) & GENMASK_ULL(49, 39)) >> 39)
...@@ -216,7 +214,7 @@ struct gaudi_internal_qman_info { ...@@ -216,7 +214,7 @@ struct gaudi_internal_qman_info {
/** /**
* struct gaudi_device - ASIC specific manage structure. * struct gaudi_device - ASIC specific manage structure.
* @armcp_info_get: get information on device from ArmCP * @cpucp_info_get: get information on device from CPU-CP
* @hw_queues_lock: protects the H/W queues from concurrent access. * @hw_queues_lock: protects the H/W queues from concurrent access.
* @clk_gate_mutex: protects code areas that require clock gating to be disabled * @clk_gate_mutex: protects code areas that require clock gating to be disabled
* temporarily * temporarily
...@@ -239,7 +237,7 @@ struct gaudi_internal_qman_info { ...@@ -239,7 +237,7 @@ struct gaudi_internal_qman_info {
* 8-bit value so use u8. * 8-bit value so use u8.
*/ */
struct gaudi_device { struct gaudi_device {
int (*armcp_info_get)(struct hl_device *hdev); int (*cpucp_info_get)(struct hl_device *hdev);
/* TODO: remove hw_queues_lock after moving to scheduler code */ /* TODO: remove hw_queues_lock after moving to scheduler code */
spinlock_t hw_queues_lock; spinlock_t hw_queues_lock;
......
This diff is collapsed.
...@@ -207,7 +207,7 @@ void goya_set_max_power(struct hl_device *hdev, u64 value); ...@@ -207,7 +207,7 @@ void goya_set_max_power(struct hl_device *hdev, u64 value);
void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq); void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
void goya_add_device_attr(struct hl_device *hdev, void goya_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp); struct attribute_group *dev_attr_grp);
int goya_armcp_info_get(struct hl_device *hdev); int goya_cpucp_info_get(struct hl_device *hdev);
int goya_debug_coresight(struct hl_device *hdev, void *data); int goya_debug_coresight(struct hl_device *hdev, void *data);
void goya_halt_coresight(struct hl_device *hdev); void goya_halt_coresight(struct hl_device *hdev);
......
...@@ -40,7 +40,7 @@ struct hl_bd { ...@@ -40,7 +40,7 @@ struct hl_bd {
*/ */
#define BD_CTL_COMP_OFFSET_SHIFT 16 #define BD_CTL_COMP_OFFSET_SHIFT 16
#define BD_CTL_COMP_OFFSET_MASK 0x00FF0000 #define BD_CTL_COMP_OFFSET_MASK 0x0FFF0000
#define BD_CTL_COMP_DATA_SHIFT 0 #define BD_CTL_COMP_DATA_SHIFT 0
#define BD_CTL_COMP_DATA_MASK 0x0000FFFF #define BD_CTL_COMP_DATA_MASK 0x0000FFFF
......
...@@ -44,6 +44,8 @@ ...@@ -44,6 +44,8 @@
#define MME_NUMBER_OF_MASTER_ENGINES 2 #define MME_NUMBER_OF_MASTER_ENGINES 2
#define MME_NUMBER_OF_SLAVE_ENGINES 2
#define TPC_NUMBER_OF_ENGINES 8 #define TPC_NUMBER_OF_ENGINES 8
#define DMA_NUMBER_OF_CHANNELS 8 #define DMA_NUMBER_OF_CHANNELS 8
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
* PSOC scratch-pad registers * PSOC scratch-pad registers
*/ */
#define mmHW_STATE mmPSOC_GLOBAL_CONF_SCRATCHPAD_0 #define mmHW_STATE mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
#define mmFUSE_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_22
#define mmCPU_CMD_STATUS_TO_HOST mmPSOC_GLOBAL_CONF_SCRATCHPAD_23 #define mmCPU_CMD_STATUS_TO_HOST mmPSOC_GLOBAL_CONF_SCRATCHPAD_23
#define mmCPU_BOOT_ERR0 mmPSOC_GLOBAL_CONF_SCRATCHPAD_24 #define mmCPU_BOOT_ERR0 mmPSOC_GLOBAL_CONF_SCRATCHPAD_24
#define mmCPU_BOOT_ERR1 mmPSOC_GLOBAL_CONF_SCRATCHPAD_25 #define mmCPU_BOOT_ERR1 mmPSOC_GLOBAL_CONF_SCRATCHPAD_25
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#define mmCPU_CQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_8 #define mmCPU_CQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_8
#define mmCPU_CQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_9 #define mmCPU_CQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_9
#define mmCPU_CQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_10 #define mmCPU_CQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_10
#define mmFUSE_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_22
#define mmCPU_CMD_STATUS_TO_HOST mmPSOC_GLOBAL_CONF_SCRATCHPAD_23 #define mmCPU_CMD_STATUS_TO_HOST mmPSOC_GLOBAL_CONF_SCRATCHPAD_23
#define mmCPU_BOOT_ERR0 mmPSOC_GLOBAL_CONF_SCRATCHPAD_24 #define mmCPU_BOOT_ERR0 mmPSOC_GLOBAL_CONF_SCRATCHPAD_24
#define mmCPU_BOOT_ERR1 mmPSOC_GLOBAL_CONF_SCRATCHPAD_25 #define mmCPU_BOOT_ERR1 mmPSOC_GLOBAL_CONF_SCRATCHPAD_25
......
...@@ -29,6 +29,8 @@ ...@@ -29,6 +29,8 @@
#define HOP3_SHIFT 21 #define HOP3_SHIFT 21
#define HOP4_SHIFT 12 #define HOP4_SHIFT 12
#define MMU_ARCH_5_HOPS 5
#define HOP_PHYS_ADDR_MASK (~FLAGS_MASK) #define HOP_PHYS_ADDR_MASK (~FLAGS_MASK)
#define HL_PTE_SIZE sizeof(u64) #define HL_PTE_SIZE sizeof(u64)
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment