Commit 9e072793 authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman

Merge tag 'misc-habanalabs-next-2020-09-22' of...

Merge tag 'misc-habanalabs-next-2020-09-22' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next

Oded writes:

This tag contains the following changes for kernel 5.10-rc1:

- Stop using the DRM's dma-fence module and instead use kernel completions.
- Support PCIe AER
- Use dma_mmap_coherent for memory allocated using dma_alloc_coherent
- Use smallest possible alignment when allocating virtual addresses in our
  MMU driver.
- Refactor MMU driver code to be device-oriented
- Allow user to check CS status without any sleep
- Add an option to map a Command Buffer to the Device's MMU
- Expose sync manager resource allocation to user through INFO IOCTL
- Convert code to use standard BIT(), GENMASK() and FIELD_PREP()
- Many small fixes (casting, better error messages, remove unused
  defines, h/w configuration fixes, etc.)

* tag 'misc-habanalabs-next-2020-09-22' of git://people.freedesktop.org/~gabbayo/linux: (46 commits)
  habanalabs: update scratchpad register map
  habanalabs: add indication of security-enabled F/W
  habanalabs/gaudi: fix DMA completions max outstanding to 15
  habanalabs/gaudi: remove axi drain support
  habanalabs: update firmware interface file
  habanalabs: Add an option to map CB to device MMU
  habanalabs: Save context in a command buffer object
  habanalabs: no need for DMA_SHARED_BUFFER
  habanalabs: allow to wait on CS without sleep
  habanalabs/gaudi: increase timeout for boot fit load
  habanalabs: add debugfs support for MMU with 6 HOPs
  habanalabs: add num_hops to hl_mmu_properties
  habanalabs: refactor MMU as device-oriented
  habanalabs: rename mmu.c to mmu_v1.c
  habanalabs: use smallest possible alignment for virtual addresses
  habanalabs: check flag before reset because of f/w event
  habanalabs: increase PQ COMP_OFFSET by one nibble
  habanalabs: Fix alignment issue in cpucp_info structure
  habanalabs: remove unused define
  habanalabs: remove unused ASIC function pointer
  ...
parents e82ed736 f279e5cd
......@@ -2,13 +2,17 @@ What: /sys/class/habanalabs/hl<n>/armcp_kernel_ver
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Description: Version of the Linux kernel running on the device's CPU
Description: Version of the Linux kernel running on the device's CPU.
Will be DEPRECATED in Linux kernel version 5.10, and be
replaced with cpucp_kernel_ver
What: /sys/class/habanalabs/hl<n>/armcp_ver
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Description: Version of the application running on the device's CPU
Will be DEPRECATED in Linux kernel version 5.10, and be
replaced with cpucp_ver
What: /sys/class/habanalabs/hl<n>/clk_max_freq_mhz
Date: Jun 2019
......@@ -33,6 +37,18 @@ KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Description: Version of the Device's CPLD F/W
What: /sys/class/habanalabs/hl<n>/cpucp_kernel_ver
Date: Oct 2020
KernelVersion: 5.10
Contact: oded.gabbay@gmail.com
Description: Version of the Linux kernel running on the device's CPU
What: /sys/class/habanalabs/hl<n>/cpucp_ver
Date: Oct 2020
KernelVersion: 5.10
Contact: oded.gabbay@gmail.com
Description: Version of the application running on the device's CPU
What: /sys/class/habanalabs/hl<n>/device_type
Date: Jan 2019
KernelVersion: 5.1
......
......@@ -7,7 +7,6 @@ config HABANA_AI
tristate "HabanaAI accelerators (habanalabs)"
depends on PCI && HAS_IOMEM
select FRAME_VECTOR
select DMA_SHARED_BUFFER
select GENERIC_ALLOCATOR
select HWMON
help
......
......@@ -3,5 +3,5 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
common/asid.o common/habanalabs_ioctl.o \
common/command_buffer.o common/hw_queue.o common/irq.o \
common/sysfs.o common/hwmon.o common/memory.o \
common/command_submission.o common/mmu.o common/firmware_if.o \
common/pci.o
common/command_submission.o common/mmu.o common/mmu_v1.o \
common/firmware_if.o common/pci.o
......@@ -13,6 +13,131 @@
#include <linux/uaccess.h>
#include <linux/genalloc.h>
static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
{
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_vm_va_block *va_block, *tmp;
dma_addr_t bus_addr;
u64 virt_addr;
u32 page_size = prop->pmmu.page_size;
s32 offset;
int rc;
if (!hdev->supports_cb_mapping) {
dev_err_ratelimited(hdev->dev,
"Cannot map CB because no VA range is allocated for CB mapping\n");
return -EINVAL;
}
if (!hdev->mmu_enable) {
dev_err_ratelimited(hdev->dev,
"Cannot map CB because MMU is disabled\n");
return -EINVAL;
}
INIT_LIST_HEAD(&cb->va_block_list);
for (bus_addr = cb->bus_address;
bus_addr < cb->bus_address + cb->size;
bus_addr += page_size) {
virt_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, page_size);
if (!virt_addr) {
dev_err(hdev->dev,
"Failed to allocate device virtual address for CB\n");
rc = -ENOMEM;
goto err_va_pool_free;
}
va_block = kzalloc(sizeof(*va_block), GFP_KERNEL);
if (!va_block) {
rc = -ENOMEM;
gen_pool_free(ctx->cb_va_pool, virt_addr, page_size);
goto err_va_pool_free;
}
va_block->start = virt_addr;
va_block->end = virt_addr + page_size;
va_block->size = page_size;
list_add_tail(&va_block->node, &cb->va_block_list);
}
mutex_lock(&ctx->mmu_lock);
bus_addr = cb->bus_address;
offset = 0;
list_for_each_entry(va_block, &cb->va_block_list, node) {
rc = hl_mmu_map(ctx, va_block->start, bus_addr, va_block->size,
list_is_last(&va_block->node,
&cb->va_block_list));
if (rc) {
dev_err(hdev->dev, "Failed to map VA %#llx to CB\n",
va_block->start);
goto err_va_umap;
}
bus_addr += va_block->size;
offset += va_block->size;
}
hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
mutex_unlock(&ctx->mmu_lock);
cb->is_mmu_mapped = true;
return 0;
err_va_umap:
list_for_each_entry(va_block, &cb->va_block_list, node) {
if (offset <= 0)
break;
hl_mmu_unmap(ctx, va_block->start, va_block->size,
offset <= va_block->size);
offset -= va_block->size;
}
hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
mutex_unlock(&ctx->mmu_lock);
err_va_pool_free:
list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
list_del(&va_block->node);
kfree(va_block);
}
return rc;
}
static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb)
{
struct hl_device *hdev = ctx->hdev;
struct hl_vm_va_block *va_block, *tmp;
mutex_lock(&ctx->mmu_lock);
list_for_each_entry(va_block, &cb->va_block_list, node)
if (hl_mmu_unmap(ctx, va_block->start, va_block->size,
list_is_last(&va_block->node,
&cb->va_block_list)))
dev_warn_ratelimited(hdev->dev,
"Failed to unmap CB's va 0x%llx\n",
va_block->start);
hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
mutex_unlock(&ctx->mmu_lock);
list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
list_del(&va_block->node);
kfree(va_block);
}
}
static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
{
if (cb->is_internal)
......@@ -47,6 +172,11 @@ static void cb_release(struct kref *ref)
hl_debugfs_remove_cb(cb);
if (cb->is_mmu_mapped)
cb_unmap_mem(cb->ctx, cb);
hl_ctx_put(cb->ctx);
cb_do_release(hdev, cb);
}
......@@ -107,11 +237,12 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
}
int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
u32 cb_size, u64 *handle, int ctx_id, bool internal_cb)
struct hl_ctx *ctx, u32 cb_size, bool internal_cb,
bool map_cb, u64 *handle)
{
struct hl_cb *cb;
bool alloc_new_cb = true;
int rc;
int rc, ctx_id = ctx->asid;
/*
* Can't use generic function to check this because of special case
......@@ -163,7 +294,21 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
}
cb->hdev = hdev;
cb->ctx_id = ctx_id;
cb->ctx = ctx;
hl_ctx_get(hdev, cb->ctx);
if (map_cb) {
if (ctx_id == HL_KERNEL_ASID_ID) {
dev_err(hdev->dev,
"CB mapping is not supported for kernel context\n");
rc = -EINVAL;
goto release_cb;
}
rc = cb_map_mem(ctx, cb);
if (rc)
goto release_cb;
}
spin_lock(&mgr->cb_lock);
rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
......@@ -171,10 +316,10 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
if (rc < 0) {
dev_err(hdev->dev, "Failed to allocate IDR for a new CB\n");
goto release_cb;
goto unmap_mem;
}
cb->id = rc;
cb->id = (u64) rc;
kref_init(&cb->refcount);
spin_lock_init(&cb->lock);
......@@ -183,14 +328,18 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
* idr is 32-bit so we can safely OR it with a mask that is above
* 32 bit
*/
*handle = cb->id | HL_MMAP_CB_MASK;
*handle = cb->id | HL_MMAP_TYPE_CB;
*handle <<= PAGE_SHIFT;
hl_debugfs_add_cb(cb);
return 0;
unmap_mem:
if (cb->is_mmu_mapped)
cb_unmap_mem(cb->ctx, cb);
release_cb:
hl_ctx_put(cb->ctx);
cb_do_release(hdev, cb);
out_err:
*handle = 0;
......@@ -250,9 +399,10 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
args->in.cb_size, HL_MAX_CB_SIZE);
rc = -EINVAL;
} else {
rc = hl_cb_create(hdev, &hpriv->cb_mgr,
args->in.cb_size, &handle,
hpriv->ctx->asid, false);
rc = hl_cb_create(hdev, &hpriv->cb_mgr, hpriv->ctx,
args->in.cb_size, false,
!!(args->in.flags & HL_CB_FLAGS_MAP),
&handle);
}
memset(args, 0, sizeof(*args));
......@@ -300,11 +450,14 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
{
struct hl_device *hdev = hpriv->hdev;
struct hl_cb *cb;
phys_addr_t address;
u32 handle, user_cb_size;
int rc;
/* We use the page offset to hold the idr and thus we need to clear
* it before doing the mmap itself
*/
handle = vma->vm_pgoff;
vma->vm_pgoff = 0;
/* reference was taken here */
cb = hl_cb_get(hdev, &hpriv->cb_mgr, handle);
......@@ -356,12 +509,8 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
vma->vm_private_data = cb;
/* Calculate address for CB */
address = virt_to_phys((void *) (uintptr_t) cb->kernel_address);
rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address,
address, cb->size);
rc = hdev->asic_funcs->cb_mmap(hdev, vma, (void *) cb->kernel_address,
cb->bus_address, cb->size);
if (rc) {
spin_lock(&cb->lock);
cb->mmap = false;
......@@ -425,7 +574,7 @@ void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr)
if (kref_put(&cb->refcount, cb_release) != 1)
dev_err(hdev->dev,
"CB %d for CTX ID %d is still alive\n",
id, cb->ctx_id);
id, cb->ctx->asid);
}
idr_destroy(&mgr->cb_handles);
......@@ -438,8 +587,8 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
struct hl_cb *cb;
int rc;
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle,
HL_KERNEL_ASID_ID, internal_cb);
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, cb_size,
internal_cb, false, &cb_handle);
if (rc) {
dev_err(hdev->dev,
"Failed to allocate CB for the kernel driver %d\n", rc);
......@@ -495,3 +644,45 @@ int hl_cb_pool_fini(struct hl_device *hdev)
return 0;
}
int hl_cb_va_pool_init(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
int rc;
if (!hdev->supports_cb_mapping)
return 0;
ctx->cb_va_pool = gen_pool_create(__ffs(prop->pmmu.page_size), -1);
if (!ctx->cb_va_pool) {
dev_err(hdev->dev,
"Failed to create VA gen pool for CB mapping\n");
return -ENOMEM;
}
rc = gen_pool_add(ctx->cb_va_pool, prop->cb_va_start_addr,
prop->cb_va_end_addr - prop->cb_va_start_addr, -1);
if (rc) {
dev_err(hdev->dev,
"Failed to add memory to VA gen pool for CB mapping\n");
goto err_pool_destroy;
}
return 0;
err_pool_destroy:
gen_pool_destroy(ctx->cb_va_pool);
return rc;
}
void hl_cb_va_pool_fini(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
if (!hdev->supports_cb_mapping)
return;
gen_pool_destroy(ctx->cb_va_pool);
}
......@@ -38,26 +38,10 @@ void hl_sob_reset_error(struct kref *ref)
hw_sob->q_idx, hw_sob->sob_id);
}
static const char *hl_fence_get_driver_name(struct dma_fence *fence)
{
return "HabanaLabs";
}
static const char *hl_fence_get_timeline_name(struct dma_fence *fence)
{
struct hl_cs_compl *hl_cs_compl =
container_of(fence, struct hl_cs_compl, base_fence);
return dev_name(hl_cs_compl->hdev->dev);
}
static bool hl_fence_enable_signaling(struct dma_fence *fence)
{
return true;
}
static void hl_fence_release(struct dma_fence *fence)
static void hl_fence_release(struct kref *kref)
{
struct hl_fence *fence =
container_of(kref, struct hl_fence, refcount);
struct hl_cs_compl *hl_cs_cmpl =
container_of(fence, struct hl_cs_compl, base_fence);
struct hl_device *hdev = hl_cs_cmpl->hdev;
......@@ -99,15 +83,27 @@ static void hl_fence_release(struct dma_fence *fence)
}
free:
kfree_rcu(hl_cs_cmpl, base_fence.rcu);
kfree(hl_cs_cmpl);
}
static const struct dma_fence_ops hl_fence_ops = {
.get_driver_name = hl_fence_get_driver_name,
.get_timeline_name = hl_fence_get_timeline_name,
.enable_signaling = hl_fence_enable_signaling,
.release = hl_fence_release
};
void hl_fence_put(struct hl_fence *fence)
{
if (fence)
kref_put(&fence->refcount, hl_fence_release);
}
void hl_fence_get(struct hl_fence *fence)
{
if (fence)
kref_get(&fence->refcount);
}
static void hl_fence_init(struct hl_fence *fence)
{
kref_init(&fence->refcount);
fence->error = 0;
init_completion(&fence->completion);
}
static void cs_get(struct hl_cs *cs)
{
......@@ -256,6 +252,8 @@ static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx)
ctx->cs_counters.parsing_drop_cnt;
hdev->aggregated_cs_counters.queue_full_drop_cnt +=
ctx->cs_counters.queue_full_drop_cnt;
hdev->aggregated_cs_counters.max_cs_in_flight_drop_cnt +=
ctx->cs_counters.max_cs_in_flight_drop_cnt;
}
static void cs_do_release(struct kref *ref)
......@@ -336,7 +334,7 @@ static void cs_do_release(struct kref *ref)
* In case the wait for signal CS was submitted, the put occurs
* in init_signal_wait_cs() right before hanging on the PQ.
*/
dma_fence_put(cs->signal_fence);
hl_fence_put(cs->signal_fence);
}
/*
......@@ -348,19 +346,18 @@ static void cs_do_release(struct kref *ref)
hl_ctx_put(cs->ctx);
/* We need to mark an error for not submitted because in that case
* the dma fence release flow is different. Mainly, we don't need
* the hl fence release flow is different. Mainly, we don't need
* to handle hw_sob for signal/wait
*/
if (cs->timedout)
dma_fence_set_error(cs->fence, -ETIMEDOUT);
cs->fence->error = -ETIMEDOUT;
else if (cs->aborted)
dma_fence_set_error(cs->fence, -EIO);
cs->fence->error = -EIO;
else if (!cs->submitted)
dma_fence_set_error(cs->fence, -EBUSY);
dma_fence_signal(cs->fence);
dma_fence_put(cs->fence);
cs->fence->error = -EBUSY;
complete_all(&cs->fence->completion);
hl_fence_put(cs->fence);
cs_counters_aggregate(hdev, cs->ctx);
kfree(cs->jobs_in_queue_cnt);
......@@ -401,7 +398,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
enum hl_cs_type cs_type, struct hl_cs **cs_new)
{
struct hl_cs_compl *cs_cmpl;
struct dma_fence *other = NULL;
struct hl_fence *other = NULL;
struct hl_cs *cs;
int rc;
......@@ -434,9 +431,11 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
cs_cmpl->cs_seq = ctx->cs_sequence;
other = ctx->cs_pending[cs_cmpl->cs_seq &
(hdev->asic_prop.max_pending_cs - 1)];
if ((other) && (!dma_fence_is_signaled(other))) {
dev_dbg(hdev->dev,
if (other && !completion_done(&other->completion)) {
dev_dbg_ratelimited(hdev->dev,
"Rejecting CS because of too many in-flights CS\n");
ctx->cs_counters.max_cs_in_flight_drop_cnt++;
rc = -EAGAIN;
goto free_fence;
}
......@@ -448,8 +447,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
goto free_fence;
}
dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock,
ctx->asid, ctx->cs_sequence);
/* init hl_fence */
hl_fence_init(&cs_cmpl->base_fence);
cs->sequence = cs_cmpl->cs_seq;
......@@ -458,9 +457,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
&cs_cmpl->base_fence;
ctx->cs_sequence++;
dma_fence_get(&cs_cmpl->base_fence);
hl_fence_get(&cs_cmpl->base_fence);
dma_fence_put(other);
hl_fence_put(other);
spin_unlock(&ctx->cs_lock);
......@@ -690,8 +689,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
rc = -ENOMEM;
if (is_kernel_allocated_cb)
goto release_cb;
else
goto free_cs_object;
goto free_cs_object;
}
job->id = i + 1;
......@@ -773,7 +772,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
struct hl_ctx *ctx = hpriv->ctx;
struct hl_cs_chunk *cs_chunk_array, *chunk;
struct hw_queue_properties *hw_queue_prop;
struct dma_fence *sig_fence = NULL;
struct hl_fence *sig_fence = NULL;
struct hl_cs_job *job;
struct hl_cs *cs;
struct hl_cb *cb;
......@@ -883,14 +882,14 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
dev_err(hdev->dev,
"CS seq 0x%llx is not of a signal CS\n",
signal_seq);
dma_fence_put(sig_fence);
hl_fence_put(sig_fence);
rc = -EINVAL;
goto free_signal_seq_array;
}
if (dma_fence_is_signaled(sig_fence)) {
if (completion_done(&sig_fence->completion)) {
/* signal CS already finished */
dma_fence_put(sig_fence);
hl_fence_put(sig_fence);
rc = 0;
goto free_signal_seq_array;
}
......@@ -902,7 +901,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
rc = allocate_cs(hdev, ctx, cs_type, &cs);
if (rc) {
if (cs_type == CS_TYPE_WAIT)
dma_fence_put(sig_fence);
hl_fence_put(sig_fence);
hl_ctx_put(ctx);
goto free_signal_seq_array;
}
......@@ -1162,7 +1161,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
static long _hl_cs_wait_ioctl(struct hl_device *hdev,
struct hl_ctx *ctx, u64 timeout_us, u64 seq)
{
struct dma_fence *fence;
struct hl_fence *fence;
unsigned long timeout;
long rc;
......@@ -1181,12 +1180,18 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
"Can't wait on CS %llu because current CS is at seq %llu\n",
seq, ctx->cs_sequence);
} else if (fence) {
rc = dma_fence_wait_timeout(fence, true, timeout);
if (!timeout_us)
rc = completion_done(&fence->completion);
else
rc = wait_for_completion_interruptible_timeout(
&fence->completion, timeout);
if (fence->error == -ETIMEDOUT)
rc = -ETIMEDOUT;
else if (fence->error == -EIO)
rc = -EIO;
dma_fence_put(fence);
hl_fence_put(fence);
} else {
dev_dbg(hdev->dev,
"Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
......
......@@ -23,7 +23,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
*/
for (i = 0 ; i < hdev->asic_prop.max_pending_cs ; i++)
dma_fence_put(ctx->cs_pending[i]);
hl_fence_put(ctx->cs_pending[i]);
kfree(ctx->cs_pending);
......@@ -37,6 +37,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
if ((hdev->in_debug) && (hdev->compute_ctx == ctx))
hl_device_set_debug_mode(hdev, false);
hl_cb_va_pool_fini(ctx);
hl_vm_ctx_fini(ctx);
hl_asid_free(hdev, ctx->asid);
} else {
......@@ -128,7 +129,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
atomic_set(&ctx->thread_ctx_switch_token, 1);
ctx->thread_ctx_switch_wait_token = 0;
ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
sizeof(struct dma_fence *),
sizeof(struct hl_fence *),
GFP_KERNEL);
if (!ctx->cs_pending)
return -ENOMEM;
......@@ -155,15 +156,24 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
goto err_asid_free;
}
rc = hl_cb_va_pool_init(ctx);
if (rc) {
dev_err(hdev->dev,
"Failed to init VA pool for mapped CB\n");
goto err_vm_ctx_fini;
}
rc = hdev->asic_funcs->ctx_init(ctx);
if (rc) {
dev_err(hdev->dev, "ctx_init failed\n");
goto err_vm_ctx_fini;
goto err_cb_va_pool_fini;
}
}
return 0;
err_cb_va_pool_fini:
hl_cb_va_pool_fini(ctx);
err_vm_ctx_fini:
hl_vm_ctx_fini(ctx);
err_asid_free:
......@@ -184,10 +194,10 @@ int hl_ctx_put(struct hl_ctx *ctx)
return kref_put(&ctx->refcount, hl_ctx_do_release);
}
struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
{
struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
struct dma_fence *fence;
struct hl_fence *fence;
spin_lock(&ctx->cs_lock);
......@@ -201,8 +211,9 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
return NULL;
}
fence = dma_fence_get(
ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)]);
fence = ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)];
hl_fence_get(fence);
spin_unlock(&ctx->cs_lock);
return fence;
......
......@@ -21,7 +21,7 @@ static struct dentry *hl_debug_root;
static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
u8 i2c_reg, long *val)
{
struct armcp_packet pkt;
struct cpucp_packet pkt;
int rc;
if (hl_device_disabled_or_in_reset(hdev))
......@@ -29,8 +29,8 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_RD <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_RD <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.i2c_bus = i2c_bus;
pkt.i2c_addr = i2c_addr;
pkt.i2c_reg = i2c_reg;
......@@ -47,7 +47,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
u8 i2c_reg, u32 val)
{
struct armcp_packet pkt;
struct cpucp_packet pkt;
int rc;
if (hl_device_disabled_or_in_reset(hdev))
......@@ -55,8 +55,8 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_WR <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_WR <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.i2c_bus = i2c_bus;
pkt.i2c_addr = i2c_addr;
pkt.i2c_reg = i2c_reg;
......@@ -73,7 +73,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
{
struct armcp_packet pkt;
struct cpucp_packet pkt;
int rc;
if (hl_device_disabled_or_in_reset(hdev))
......@@ -81,8 +81,8 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_LED_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.ctl = cpu_to_le32(CPUCP_PACKET_LED_SET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.led_index = cpu_to_le32(led);
pkt.value = cpu_to_le64(state);
......@@ -110,8 +110,8 @@ static int command_buffers_show(struct seq_file *s, void *data)
seq_puts(s, "---------------------------------------------------------------\n");
}
seq_printf(s,
" %03d %d 0x%08x %d %d %d\n",
cb->id, cb->ctx_id, cb->size,
" %03llu %d 0x%08x %d %d %d\n",
cb->id, cb->ctx->asid, cb->size,
kref_read(&cb->refcount),
cb->mmap, cb->cs_cnt);
}
......@@ -354,6 +354,14 @@ static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
mmu_specs->hop4_shift);
}
static inline u64 get_hop5_pte_addr(struct hl_ctx *ctx,
struct hl_mmu_properties *mmu_specs,
u64 hop_addr, u64 vaddr)
{
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop5_mask,
mmu_specs->hop5_shift);
}
static inline u64 get_next_hop_addr(u64 curr_pte)
{
if (curr_pte & PAGE_PRESENT_MASK)
......@@ -377,6 +385,7 @@ static int mmu_show(struct seq_file *s, void *data)
hop2_addr = 0, hop2_pte_addr = 0, hop2_pte = 0,
hop3_addr = 0, hop3_pte_addr = 0, hop3_pte = 0,
hop4_addr = 0, hop4_pte_addr = 0, hop4_pte = 0,
hop5_addr = 0, hop5_pte_addr = 0, hop5_pte = 0,
virt_addr = dev_entry->mmu_addr;
if (!hdev->mmu_enable)
......@@ -428,20 +437,49 @@ static int mmu_show(struct seq_file *s, void *data)
hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
if (!(hop3_pte & LAST_MASK)) {
if (mmu_prop->num_hops == MMU_ARCH_5_HOPS) {
if (!(hop3_pte & LAST_MASK)) {
hop4_addr = get_next_hop_addr(hop3_pte);
if (hop4_addr == ULLONG_MAX)
goto not_mapped;
hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop,
hop4_addr, virt_addr);
hop4_pte = hdev->asic_funcs->read_pte(hdev,
hop4_pte_addr);
if (!(hop4_pte & PAGE_PRESENT_MASK))
goto not_mapped;
} else {
if (!(hop3_pte & PAGE_PRESENT_MASK))
goto not_mapped;
}
} else {
hop4_addr = get_next_hop_addr(hop3_pte);
if (hop4_addr == ULLONG_MAX)
goto not_mapped;
hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
virt_addr);
hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr);
if (!(hop4_pte & PAGE_PRESENT_MASK))
goto not_mapped;
} else {
if (!(hop3_pte & PAGE_PRESENT_MASK))
goto not_mapped;
hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop,
hop4_addr, virt_addr);
hop4_pte = hdev->asic_funcs->read_pte(hdev,
hop4_pte_addr);
if (!(hop4_pte & LAST_MASK)) {
hop5_addr = get_next_hop_addr(hop4_pte);
if (hop5_addr == ULLONG_MAX)
goto not_mapped;
hop5_pte_addr = get_hop5_pte_addr(ctx, mmu_prop,
hop5_addr, virt_addr);
hop5_pte = hdev->asic_funcs->read_pte(hdev,
hop5_pte_addr);
if (!(hop5_pte & PAGE_PRESENT_MASK))
goto not_mapped;
} else {
if (!(hop4_pte & PAGE_PRESENT_MASK))
goto not_mapped;
}
}
seq_printf(s, "asid: %u, virt_addr: 0x%llx\n",
......@@ -463,10 +501,22 @@ static int mmu_show(struct seq_file *s, void *data)
seq_printf(s, "hop3_pte_addr: 0x%llx\n", hop3_pte_addr);
seq_printf(s, "hop3_pte: 0x%llx\n", hop3_pte);
if (!(hop3_pte & LAST_MASK)) {
if (mmu_prop->num_hops == MMU_ARCH_5_HOPS) {
if (!(hop3_pte & LAST_MASK)) {
seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr);
seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr);
seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte);
}
} else {
seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr);
seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr);
seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte);
if (!(hop4_pte & LAST_MASK)) {
seq_printf(s, "hop5_addr: 0x%llx\n", hop5_addr);
seq_printf(s, "hop5_pte_addr: 0x%llx\n", hop5_pte_addr);
seq_printf(s, "hop5_pte: 0x%llx\n", hop5_pte);
}
}
goto out;
......
......@@ -123,9 +123,13 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct hl_fpriv *hpriv = filp->private_data;
unsigned long vm_pgoff;
if ((vma->vm_pgoff & HL_MMAP_CB_MASK) == HL_MMAP_CB_MASK) {
vma->vm_pgoff ^= HL_MMAP_CB_MASK;
vm_pgoff = vma->vm_pgoff;
vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff);
switch (vm_pgoff & HL_MMAP_TYPE_MASK) {
case HL_MMAP_TYPE_CB:
return hl_cb_mmap(hpriv, vma);
}
......@@ -286,7 +290,7 @@ static int device_early_init(struct hl_device *hdev)
}
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
snprintf(workq_name, 32, "hl-free-jobs-%u", i);
snprintf(workq_name, 32, "hl-free-jobs-%u", (u32) i);
hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
if (hdev->cq_wq[i] == NULL) {
dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
......@@ -317,6 +321,10 @@ static int device_early_init(struct hl_device *hdev)
goto free_chip_info;
}
rc = hl_mmu_if_set_funcs(hdev);
if (rc)
goto free_idle_busy_ts_arr;
hl_cb_mgr_init(&hdev->kernel_cb_mgr);
mutex_init(&hdev->send_cpu_message_lock);
......@@ -330,6 +338,8 @@ static int device_early_init(struct hl_device *hdev)
return 0;
free_idle_busy_ts_arr:
kfree(hdev->idle_busy_ts_arr);
free_chip_info:
kfree(hdev->hl_chip_info);
free_eq_wq:
......@@ -871,7 +881,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
* so this message won't be sent
*/
if (hl_fw_send_pci_access_msg(hdev,
ARMCP_PACKET_DISABLE_PCI_ACCESS))
CPUCP_PACKET_DISABLE_PCI_ACCESS))
dev_warn(hdev->dev,
"Failed to disable PCI access by F/W\n");
}
......
This diff is collapsed.
......@@ -11,6 +11,7 @@
#include "habanalabs.h"
#include <linux/pci.h>
#include <linux/aer.h>
#include <linux/module.h>
#define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team"
......@@ -408,6 +409,8 @@ static int hl_pci_probe(struct pci_dev *pdev,
pci_set_drvdata(pdev, hdev);
pci_enable_pcie_error_reporting(pdev);
rc = hl_device_init(hdev, hl_class);
if (rc) {
dev_err(&pdev->dev, "Fatal error during habanalabs device init\n");
......@@ -440,22 +443,93 @@ static void hl_pci_remove(struct pci_dev *pdev)
return;
hl_device_fini(hdev);
pci_disable_pcie_error_reporting(pdev);
pci_set_drvdata(pdev, NULL);
destroy_hdev(hdev);
}
/**
* hl_pci_err_detected - a PCI bus error detected on this device
*
* @pdev: pointer to pci device
* @state: PCI error type
*
* Called by the PCI subsystem whenever a non-correctable
* PCI bus error is detected
*/
static pci_ers_result_t
hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state)
{
struct hl_device *hdev = pci_get_drvdata(pdev);
enum pci_ers_result result;
switch (state) {
case pci_channel_io_normal:
return PCI_ERS_RESULT_CAN_RECOVER;
case pci_channel_io_frozen:
dev_warn(hdev->dev, "frozen state error detected\n");
result = PCI_ERS_RESULT_NEED_RESET;
break;
case pci_channel_io_perm_failure:
dev_warn(hdev->dev, "failure state error detected\n");
result = PCI_ERS_RESULT_DISCONNECT;
break;
default:
result = PCI_ERS_RESULT_NONE;
}
hdev->asic_funcs->halt_engines(hdev, true);
return result;
}
/**
* hl_pci_err_resume - resume after a PCI slot reset
*
* @pdev: pointer to pci device
*
*/
static void hl_pci_err_resume(struct pci_dev *pdev)
{
struct hl_device *hdev = pci_get_drvdata(pdev);
dev_warn(hdev->dev, "Resuming device after PCI slot reset\n");
hl_device_resume(hdev);
}
/**
* hl_pci_err_slot_reset - a PCI slot reset has just happened
*
* @pdev: pointer to pci device
*
* Determine if the driver can recover from the PCI slot reset
*/
static pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev)
{
return PCI_ERS_RESULT_RECOVERED;
}
static const struct dev_pm_ops hl_pm_ops = {
.suspend = hl_pmops_suspend,
.resume = hl_pmops_resume,
};
static const struct pci_error_handlers hl_pci_err_handler = {
.error_detected = hl_pci_err_detected,
.slot_reset = hl_pci_err_slot_reset,
.resume = hl_pci_err_resume,
};
static struct pci_driver hl_pci_driver = {
.name = HL_NAME,
.id_table = ids,
.probe = hl_pci_probe,
.remove = hl_pci_remove,
.driver.pm = &hl_pm_ops,
.err_handler = &hl_pci_err_handler,
};
/*
......
......@@ -8,6 +8,7 @@
#include <uapi/misc/habanalabs.h>
#include "habanalabs.h"
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
......@@ -64,14 +65,14 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
hw_ip.dram_enabled = 1;
hw_ip.num_of_events = prop->num_of_events;
memcpy(hw_ip.armcp_version, prop->armcp_info.armcp_version,
memcpy(hw_ip.cpucp_version, prop->cpucp_info.cpucp_version,
min(VERSION_MAX_LEN, HL_INFO_VERSION_MAX_LEN));
memcpy(hw_ip.card_name, prop->armcp_info.card_name,
memcpy(hw_ip.card_name, prop->cpucp_info.card_name,
min(CARD_NAME_MAX_LEN, HL_INFO_CARD_NAME_MAX_LEN));
hw_ip.armcp_cpld_version = le32_to_cpu(prop->armcp_info.cpld_version);
hw_ip.module_id = le32_to_cpu(prop->armcp_info.card_location);
hw_ip.cpld_version = le32_to_cpu(prop->cpucp_info.cpld_version);
hw_ip.module_id = le32_to_cpu(prop->cpucp_info.card_location);
hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr;
hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf;
......@@ -131,7 +132,7 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
return -EINVAL;
hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
&hw_idle.busy_engines_mask, NULL);
&hw_idle.busy_engines_mask_ext, NULL);
return copy_to_user(out, &hw_idle,
min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
......@@ -276,10 +277,45 @@ static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args)
min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0;
}
static int pci_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
struct hl_device *hdev = hpriv->hdev;
struct hl_info_pci_counters pci_counters = {0};
u32 max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
int rc;
if ((!max_size) || (!out))
return -EINVAL;
rc = hl_fw_cpucp_pci_counters_get(hdev, &pci_counters);
if (rc)
return rc;
return copy_to_user(out, &pci_counters,
min((size_t) max_size, sizeof(pci_counters))) ? -EFAULT : 0;
}
static int clk_throttle_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
struct hl_device *hdev = hpriv->hdev;
struct hl_info_clk_throttle clk_throttle = {0};
u32 max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
if ((!max_size) || (!out))
return -EINVAL;
clk_throttle.clk_throttling_reason = hdev->clk_throttling_reason;
return copy_to_user(out, &clk_throttle,
min((size_t) max_size, sizeof(clk_throttle))) ? -EFAULT : 0;
}
static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
struct hl_device *hdev = hpriv->hdev;
struct hl_info_cs_counters cs_counters = {0};
struct hl_info_cs_counters cs_counters = { {0} };
u32 max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
......@@ -297,6 +333,51 @@ static int cs_counters_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
min((size_t) max_size, sizeof(cs_counters))) ? -EFAULT : 0;
}
static int sync_manager_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
struct hl_device *hdev = hpriv->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_info_sync_manager sm_info = {0};
u32 max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
if ((!max_size) || (!out))
return -EINVAL;
if (args->dcore_id >= HL_MAX_DCORES)
return -EINVAL;
sm_info.first_available_sync_object =
prop->first_available_user_sob[args->dcore_id];
sm_info.first_available_monitor =
prop->first_available_user_mon[args->dcore_id];
return copy_to_user(out, &sm_info, min_t(size_t, (size_t) max_size,
sizeof(sm_info))) ? -EFAULT : 0;
}
static int total_energy_consumption_info(struct hl_fpriv *hpriv,
struct hl_info_args *args)
{
struct hl_device *hdev = hpriv->hdev;
struct hl_info_energy total_energy = {0};
u32 max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
int rc;
if ((!max_size) || (!out))
return -EINVAL;
rc = hl_fw_cpucp_total_energy_get(hdev,
&total_energy.total_energy_consumption);
if (rc)
return rc;
return copy_to_user(out, &total_energy,
min((size_t) max_size, sizeof(total_energy))) ? -EFAULT : 0;
}
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev)
{
......@@ -360,6 +441,18 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_CS_COUNTERS:
return cs_counters_info(hpriv, args);
case HL_INFO_PCI_COUNTERS:
return pci_counters_info(hpriv, args);
case HL_INFO_CLK_THROTTLE_REASON:
return clk_throttle_info(hpriv, args);
case HL_INFO_SYNC_MANAGER:
return sync_manager_info(hpriv, args);
case HL_INFO_TOTAL_ENERGY:
return total_energy_consumption_info(hpriv, args);
default:
dev_err(dev, "Invalid request %d\n", args->op);
rc = -ENOTTY;
......
......@@ -288,10 +288,10 @@ static void ext_queue_schedule_job(struct hl_cs_job *job)
ptr = cb->bus_address;
cq_pkt.data = cpu_to_le32(
((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
& CQ_ENTRY_SHADOW_INDEX_MASK) |
(1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
(1 << CQ_ENTRY_READY_SHIFT));
((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
& CQ_ENTRY_SHADOW_INDEX_MASK) |
FIELD_PREP(CQ_ENTRY_SHADOW_INDEX_VALID_MASK, 1) |
FIELD_PREP(CQ_ENTRY_READY_MASK, 1));
/*
* No need to protect pi_offset because scheduling to the
......@@ -474,7 +474,7 @@ static void init_signal_wait_cs(struct hl_cs *cs)
* wait CS was submitted.
*/
mb();
dma_fence_put(cs->signal_fence);
hl_fence_put(cs->signal_fence);
cs->signal_fence = NULL;
}
}
......
......@@ -13,7 +13,7 @@
#define HWMON_NR_SENSOR_TYPES (hwmon_pwm + 1)
int hl_build_hwmon_channel_info(struct hl_device *hdev,
struct armcp_sensor *sensors_arr)
struct cpucp_sensor *sensors_arr)
{
u32 counts[HWMON_NR_SENSOR_TYPES] = {0};
u32 *sensors_by_type[HWMON_NR_SENSOR_TYPES] = {NULL};
......@@ -24,7 +24,7 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev,
enum hwmon_sensor_types type;
int rc, i, j;
for (i = 0 ; i < ARMCP_MAX_SENSORS ; i++) {
for (i = 0 ; i < CPUCP_MAX_SENSORS ; i++) {
type = le32_to_cpu(sensors_arr[i].type);
if ((type == 0) && (sensors_arr[i].flags == 0))
......@@ -311,13 +311,13 @@ static const struct hwmon_ops hl_hwmon_ops = {
int hl_get_temperature(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{
struct armcp_packet pkt;
struct cpucp_packet pkt;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEMPERATURE_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
......@@ -337,13 +337,13 @@ int hl_get_temperature(struct hl_device *hdev,
int hl_set_temperature(struct hl_device *hdev,
int sensor_index, u32 attr, long value)
{
struct armcp_packet pkt;
struct cpucp_packet pkt;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEMPERATURE_SET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
pkt.value = __cpu_to_le64(value);
......@@ -362,13 +362,13 @@ int hl_set_temperature(struct hl_device *hdev,
int hl_get_voltage(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{
struct armcp_packet pkt;
struct cpucp_packet pkt;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.ctl = cpu_to_le32(CPUCP_PACKET_VOLTAGE_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
......@@ -388,13 +388,13 @@ int hl_get_voltage(struct hl_device *hdev,
int hl_get_current(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{
struct armcp_packet pkt;
struct cpucp_packet pkt;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.ctl = cpu_to_le32(CPUCP_PACKET_CURRENT_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
......@@ -414,13 +414,13 @@ int hl_get_current(struct hl_device *hdev,
int hl_get_fan_speed(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{
struct armcp_packet pkt;
struct cpucp_packet pkt;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_FAN_SPEED_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FAN_SPEED_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
......@@ -440,13 +440,13 @@ int hl_get_fan_speed(struct hl_device *hdev,
int hl_get_pwm_info(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{
struct armcp_packet pkt;
struct cpucp_packet pkt;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.ctl = cpu_to_le32(CPUCP_PACKET_PWM_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
......@@ -466,13 +466,13 @@ int hl_get_pwm_info(struct hl_device *hdev,
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
long value)
{
struct armcp_packet pkt;
struct cpucp_packet pkt;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.ctl = cpu_to_le32(CPUCP_PACKET_PWM_SET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
pkt.value = cpu_to_le64(value);
......@@ -489,13 +489,13 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
int hl_set_voltage(struct hl_device *hdev,
int sensor_index, u32 attr, long value)
{
struct armcp_packet pkt;
struct cpucp_packet pkt;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.ctl = cpu_to_le32(CPUCP_PACKET_VOLTAGE_SET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
pkt.value = __cpu_to_le64(value);
......@@ -514,13 +514,13 @@ int hl_set_voltage(struct hl_device *hdev,
int hl_set_current(struct hl_device *hdev,
int sensor_index, u32 attr, long value)
{
struct armcp_packet pkt;
struct cpucp_packet pkt;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.ctl = cpu_to_le32(CPUCP_PACKET_CURRENT_SET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
pkt.value = __cpu_to_le64(value);
......@@ -549,7 +549,7 @@ int hl_hwmon_init(struct hl_device *hdev)
hdev->hl_chip_info->ops = &hl_hwmon_ops;
hdev->hwmon_dev = hwmon_device_register_with_info(dev,
prop->armcp_info.card_name, hdev,
prop->cpucp_info.card_name, hdev,
hdev->hl_chip_info, NULL);
if (IS_ERR(hdev->hwmon_dev)) {
rc = PTR_ERR(hdev->hwmon_dev);
......
......@@ -11,7 +11,7 @@
/**
* struct hl_eqe_work - This structure is used to schedule work of EQ
* entry and armcp_reset event
* entry and cpucp_reset event
*
* @eq_work: workqueue object to run when EQ entry is received
* @hdev: pointer to device structure
......
......@@ -505,41 +505,32 @@ static inline int add_va_block(struct hl_device *hdev,
}
/*
* get_va_block - get a virtual block with the requested size
*
* @hdev : pointer to the habanalabs device structure
* @va_range : pointer to the virtual addresses range
* @size : requested block size
* @hint_addr : hint for request address by the user
* @is_userptr : is host or DRAM memory
* get_va_block() - get a virtual block for the given size and alignment.
* @hdev: pointer to the habanalabs device structure.
* @va_range: pointer to the virtual addresses range.
* @size: requested block size.
* @hint_addr: hint for requested address by the user.
* @va_block_align: required alignment of the virtual block start address.
*
* This function does the following:
* - Iterate on the virtual block list to find a suitable virtual block for the
* requested size
* - Reserve the requested block and update the list
* - Return the start address of the virtual block
* given size and alignment.
* - Reserve the requested block and update the list.
* - Return the start address of the virtual block.
*/
static u64 get_va_block(struct hl_device *hdev,
struct hl_va_range *va_range, u64 size, u64 hint_addr,
bool is_userptr)
static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range,
u64 size, u64 hint_addr, u32 va_block_align)
{
struct hl_vm_va_block *va_block, *new_va_block = NULL;
u64 valid_start, valid_size, prev_start, prev_end, page_mask,
u64 valid_start, valid_size, prev_start, prev_end, align_mask,
res_valid_start = 0, res_valid_size = 0;
u32 page_size;
bool add_prev = false;
if (is_userptr)
/*
* We cannot know if the user allocated memory with huge pages
* or not, hence we continue with the biggest possible
* granularity.
*/
page_size = hdev->asic_prop.pmmu_huge.page_size;
else
page_size = hdev->asic_prop.dmmu.page_size;
align_mask = ~((u64)va_block_align - 1);
page_mask = ~((u64)page_size - 1);
/* check if hint_addr is aligned */
if (hint_addr & (va_block_align - 1))
hint_addr = 0;
mutex_lock(&va_range->lock);
......@@ -549,9 +540,9 @@ static u64 get_va_block(struct hl_device *hdev,
/* calc the first possible aligned addr */
valid_start = va_block->start;
if (valid_start & (page_size - 1)) {
valid_start &= page_mask;
valid_start += page_size;
if (valid_start & (va_block_align - 1)) {
valid_start &= align_mask;
valid_start += va_block_align;
if (valid_start > va_block->end)
continue;
}
......@@ -863,7 +854,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
struct hl_va_range *va_range;
enum vm_type_t *vm_type;
u64 ret_vaddr, hint_addr;
u32 handle = 0;
u32 handle = 0, va_block_align;
int rc;
bool is_userptr = args->flags & HL_MEM_USERPTR;
......@@ -873,6 +864,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
if (is_userptr) {
u64 addr = args->map_host.host_virt_addr,
size = args->map_host.mem_size;
u32 page_size = hdev->asic_prop.pmmu.page_size,
huge_page_size = hdev->asic_prop.pmmu_huge.page_size;
rc = dma_map_host_va(hdev, addr, size, &userptr);
if (rc) {
......@@ -892,6 +885,27 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
vm_type = (enum vm_type_t *) userptr;
hint_addr = args->map_host.hint_addr;
handle = phys_pg_pack->handle;
/* get required alignment */
if (phys_pg_pack->page_size == page_size) {
va_range = ctx->host_va_range;
/*
* huge page alignment may be needed in case of regular
* page mapping, depending on the host VA alignment
*/
if (addr & (huge_page_size - 1))
va_block_align = page_size;
else
va_block_align = huge_page_size;
} else {
/*
* huge page alignment is needed in case of huge page
* mapping
*/
va_range = ctx->host_huge_va_range;
va_block_align = huge_page_size;
}
} else {
handle = lower_32_bits(args->map_device.handle);
......@@ -912,6 +926,10 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
vm_type = (enum vm_type_t *) phys_pg_pack;
hint_addr = args->map_device.hint_addr;
/* DRAM VA alignment is the same as the DRAM page size */
va_range = ctx->dram_va_range;
va_block_align = hdev->asic_prop.dmmu.page_size;
}
/*
......@@ -933,16 +951,8 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
goto hnode_err;
}
if (is_userptr)
if (phys_pg_pack->page_size == hdev->asic_prop.pmmu.page_size)
va_range = ctx->host_va_range;
else
va_range = ctx->host_huge_va_range;
else
va_range = ctx->dram_va_range;
ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
hint_addr, is_userptr);
hint_addr, va_block_align);
if (!ret_vaddr) {
dev_err(hdev->dev, "no available va block for handle %u\n",
handle);
......
This diff is collapsed.
This diff is collapsed.
......@@ -9,7 +9,6 @@
#include "../include/hw_ip/pci/pci_general.h"
#include <linux/pci.h>
#include <linux/bitfield.h>
#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC (HL_PCI_ELBI_TIMEOUT_MSEC * 10)
......@@ -339,12 +338,17 @@ static int hl_pci_set_dma_mask(struct hl_device *hdev)
/**
* hl_pci_init() - PCI initialization code.
* @hdev: Pointer to hl_device structure.
* @cpu_boot_status_reg: status register of the device's CPU
* @boot_err0_reg: boot error register of the device's CPU
* @preboot_ver_timeout: how much to wait before bailing out on reading
* the preboot version
*
* Set DMA masks, initialize the PCI controller and map the PCI BARs.
*
* Return: 0 on success, non-zero for failure.
*/
int hl_pci_init(struct hl_device *hdev)
int hl_pci_init(struct hl_device *hdev, u32 cpu_boot_status_reg,
u32 boot_err0_reg, u32 preboot_ver_timeout)
{
struct pci_dev *pdev = hdev->pdev;
int rc;
......@@ -376,6 +380,15 @@ int hl_pci_init(struct hl_device *hdev)
if (rc)
goto unmap_pci_bars;
/* Before continuing in the initialization, we need to read the preboot
* version to determine whether we run with a security-enabled firmware
* The check will be done in each ASIC's specific code
*/
rc = hl_fw_read_preboot_ver(hdev, cpu_boot_status_reg, boot_err0_reg,
preboot_ver_timeout);
if (rc)
goto unmap_pci_bars;
return 0;
unmap_pci_bars:
......
......@@ -11,18 +11,18 @@
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
{
struct armcp_packet pkt;
struct cpucp_packet pkt;
long result;
int rc;
memset(&pkt, 0, sizeof(pkt));
if (curr)
pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_CURR_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_CURR_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
else
pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.pll_index = cpu_to_le32(pll_index);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
......@@ -40,13 +40,13 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
{
struct armcp_packet pkt;
struct cpucp_packet pkt;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.ctl = cpu_to_le32(CPUCP_PACKET_FREQUENCY_SET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.pll_index = cpu_to_le32(pll_index);
pkt.value = cpu_to_le64(freq);
......@@ -61,14 +61,14 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
u64 hl_get_max_power(struct hl_device *hdev)
{
struct armcp_packet pkt;
struct cpucp_packet pkt;
long result;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, &result);
......@@ -83,13 +83,13 @@ u64 hl_get_max_power(struct hl_device *hdev)
void hl_set_max_power(struct hl_device *hdev)
{
struct armcp_packet pkt;
struct cpucp_packet pkt;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.ctl = cpu_to_le32(CPUCP_PACKET_MAX_POWER_SET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.value = cpu_to_le64(hdev->max_power);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
......@@ -112,7 +112,7 @@ static ssize_t armcp_kernel_ver_show(struct device *dev,
{
struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "%s", hdev->asic_prop.armcp_info.kernel_version);
return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.kernel_version);
}
static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr,
......@@ -120,7 +120,7 @@ static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr,
{
struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.armcp_version);
return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.cpucp_version);
}
static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr,
......@@ -129,7 +129,23 @@ static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr,
struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "0x%08x\n",
hdev->asic_prop.armcp_info.cpld_version);
hdev->asic_prop.cpucp_info.cpld_version);
}
static ssize_t cpucp_kernel_ver_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.kernel_version);
}
static ssize_t cpucp_ver_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.cpucp_version);
}
static ssize_t infineon_ver_show(struct device *dev,
......@@ -138,7 +154,7 @@ static ssize_t infineon_ver_show(struct device *dev,
struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "0x%04x\n",
hdev->asic_prop.armcp_info.infineon_version);
hdev->asic_prop.cpucp_info.infineon_version);
}
static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr,
......@@ -146,7 +162,7 @@ static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr,
{
struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.fuse_version);
return sprintf(buf, "%s\n", hdev->asic_prop.cpucp_info.fuse_version);
}
static ssize_t thermal_ver_show(struct device *dev,
......@@ -154,7 +170,7 @@ static ssize_t thermal_ver_show(struct device *dev,
{
struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "%s", hdev->asic_prop.armcp_info.thermal_version);
return sprintf(buf, "%s", hdev->asic_prop.cpucp_info.thermal_version);
}
static ssize_t preboot_btl_ver_show(struct device *dev,
......@@ -356,6 +372,8 @@ static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj,
static DEVICE_ATTR_RO(armcp_kernel_ver);
static DEVICE_ATTR_RO(armcp_ver);
static DEVICE_ATTR_RO(cpld_ver);
static DEVICE_ATTR_RO(cpucp_kernel_ver);
static DEVICE_ATTR_RO(cpucp_ver);
static DEVICE_ATTR_RO(device_type);
static DEVICE_ATTR_RO(fuse_ver);
static DEVICE_ATTR_WO(hard_reset);
......@@ -380,6 +398,8 @@ static struct attribute *hl_dev_attrs[] = {
&dev_attr_armcp_kernel_ver.attr,
&dev_attr_armcp_ver.attr,
&dev_attr_cpld_ver.attr,
&dev_attr_cpucp_kernel_ver.attr,
&dev_attr_cpucp_ver.attr,
&dev_attr_device_type.attr,
&dev_attr_fuse_ver.attr,
&dev_attr_hard_reset.attr,
......
This diff is collapsed.
......@@ -35,8 +35,6 @@
#error "Number of MSI interrupts must be smaller or equal to GAUDI_MSI_ENTRIES"
#endif
#define QMAN_FENCE_TIMEOUT_USEC 10000 /* 10 ms */
#define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */
#define GAUDI_MAX_CLK_FREQ 2200000000ull /* 2200 MHz */
......@@ -44,7 +42,7 @@
#define MAX_POWER_DEFAULT_PCI 200000 /* 200W */
#define MAX_POWER_DEFAULT_PMC 350000 /* 350W */
#define GAUDI_CPU_TIMEOUT_USEC 15000000 /* 15s */
#define GAUDI_CPU_TIMEOUT_USEC 30000000 /* 30s */
#define TPC_ENABLED_MASK 0xFF
......@@ -142,28 +140,28 @@
#define VA_HOST_SPACE_SIZE (VA_HOST_SPACE_END - \
VA_HOST_SPACE_START) /* 767TB */
#define HW_CAP_PLL 0x00000001
#define HW_CAP_HBM 0x00000002
#define HW_CAP_MMU 0x00000004
#define HW_CAP_MME 0x00000008
#define HW_CAP_CPU 0x00000010
#define HW_CAP_PCI_DMA 0x00000020
#define HW_CAP_MSI 0x00000040
#define HW_CAP_CPU_Q 0x00000080
#define HW_CAP_HBM_DMA 0x00000100
#define HW_CAP_CLK_GATE 0x00000200
#define HW_CAP_SRAM_SCRAMBLER 0x00000400
#define HW_CAP_HBM_SCRAMBLER 0x00000800
#define HW_CAP_TPC0 0x01000000
#define HW_CAP_TPC1 0x02000000
#define HW_CAP_TPC2 0x04000000
#define HW_CAP_TPC3 0x08000000
#define HW_CAP_TPC4 0x10000000
#define HW_CAP_TPC5 0x20000000
#define HW_CAP_TPC6 0x40000000
#define HW_CAP_TPC7 0x80000000
#define HW_CAP_TPC_MASK 0xFF000000
#define HW_CAP_PLL BIT(0)
#define HW_CAP_HBM BIT(1)
#define HW_CAP_MMU BIT(2)
#define HW_CAP_MME BIT(3)
#define HW_CAP_CPU BIT(4)
#define HW_CAP_PCI_DMA BIT(5)
#define HW_CAP_MSI BIT(6)
#define HW_CAP_CPU_Q BIT(7)
#define HW_CAP_HBM_DMA BIT(8)
#define HW_CAP_CLK_GATE BIT(9)
#define HW_CAP_SRAM_SCRAMBLER BIT(10)
#define HW_CAP_HBM_SCRAMBLER BIT(11)
#define HW_CAP_TPC0 BIT(24)
#define HW_CAP_TPC1 BIT(25)
#define HW_CAP_TPC2 BIT(26)
#define HW_CAP_TPC3 BIT(27)
#define HW_CAP_TPC4 BIT(28)
#define HW_CAP_TPC5 BIT(29)
#define HW_CAP_TPC6 BIT(30)
#define HW_CAP_TPC7 BIT(31)
#define HW_CAP_TPC_MASK GENMASK(31, 24)
#define HW_CAP_TPC_SHIFT 24
#define GAUDI_CPU_PCI_MSB_ADDR(addr) (((addr) & GENMASK_ULL(49, 39)) >> 39)
......@@ -216,7 +214,7 @@ struct gaudi_internal_qman_info {
/**
* struct gaudi_device - ASIC specific manage structure.
* @armcp_info_get: get information on device from ArmCP
* @cpucp_info_get: get information on device from CPU-CP
* @hw_queues_lock: protects the H/W queues from concurrent access.
* @clk_gate_mutex: protects code areas that require clock gating to be disabled
* temporarily
......@@ -239,7 +237,7 @@ struct gaudi_internal_qman_info {
* 8-bit value so use u8.
*/
struct gaudi_device {
int (*armcp_info_get)(struct hl_device *hdev);
int (*cpucp_info_get)(struct hl_device *hdev);
/* TODO: remove hw_queues_lock after moving to scheduler code */
spinlock_t hw_queues_lock;
......
This diff is collapsed.
......@@ -207,7 +207,7 @@ void goya_set_max_power(struct hl_device *hdev, u64 value);
void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
void goya_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
int goya_armcp_info_get(struct hl_device *hdev);
int goya_cpucp_info_get(struct hl_device *hdev);
int goya_debug_coresight(struct hl_device *hdev, void *data);
void goya_halt_coresight(struct hl_device *hdev);
......
......@@ -40,7 +40,7 @@ struct hl_bd {
*/
#define BD_CTL_COMP_OFFSET_SHIFT 16
#define BD_CTL_COMP_OFFSET_MASK 0x00FF0000
#define BD_CTL_COMP_OFFSET_MASK 0x0FFF0000
#define BD_CTL_COMP_DATA_SHIFT 0
#define BD_CTL_COMP_DATA_MASK 0x0000FFFF
......
......@@ -44,6 +44,8 @@
#define MME_NUMBER_OF_MASTER_ENGINES 2
#define MME_NUMBER_OF_SLAVE_ENGINES 2
#define TPC_NUMBER_OF_ENGINES 8
#define DMA_NUMBER_OF_CHANNELS 8
......
......@@ -12,6 +12,7 @@
* PSOC scratch-pad registers
*/
#define mmHW_STATE mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
#define mmFUSE_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_22
#define mmCPU_CMD_STATUS_TO_HOST mmPSOC_GLOBAL_CONF_SCRATCHPAD_23
#define mmCPU_BOOT_ERR0 mmPSOC_GLOBAL_CONF_SCRATCHPAD_24
#define mmCPU_BOOT_ERR1 mmPSOC_GLOBAL_CONF_SCRATCHPAD_25
......
......@@ -22,6 +22,7 @@
#define mmCPU_CQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_8
#define mmCPU_CQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_9
#define mmCPU_CQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_10
#define mmFUSE_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_22
#define mmCPU_CMD_STATUS_TO_HOST mmPSOC_GLOBAL_CONF_SCRATCHPAD_23
#define mmCPU_BOOT_ERR0 mmPSOC_GLOBAL_CONF_SCRATCHPAD_24
#define mmCPU_BOOT_ERR1 mmPSOC_GLOBAL_CONF_SCRATCHPAD_25
......
......@@ -29,6 +29,8 @@
#define HOP3_SHIFT 21
#define HOP4_SHIFT 12
#define MMU_ARCH_5_HOPS 5
#define HOP_PHYS_ADDR_MASK (~FLAGS_MASK)
#define HL_PTE_SIZE sizeof(u64)
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment