Commit b78cda79 authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman

Merge tag 'misc-habanalabs-next-2019-11-21' of...

Merge tag 'misc-habanalabs-next-2019-11-21' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next

Oded writes:

This tag contains the following changes for kernel 5.5:

- MMU code improvements that includes:
  - Distinguish between "normal" unmapping and unmapping that is done as
    part of the tear-down of a user process. This improves performance of
    unmapping during reset of the device.
  - Add future ASIC support in generic MMU code.

- Improve device reset code by adding more protection around accessing the
  device during the reset process.

- Add new H/W queue type for future ASIC support

- Add more information to be retrieved by users through INFO IOCTL:
  - clock rate
  - board name
  - reset counters

- Small bug fixes and minor improvements to code.

* tag 'misc-habanalabs-next-2019-11-21' of git://people.freedesktop.org/~gabbayo/linux: (31 commits)
  habanalabs: add more protection of device during reset
  habanalabs: flush EQ workers in hard reset
  habanalabs: make the reset code more consistent
  habanalabs: expose reset counters via existing INFO IOCTL
  habanalabs: make code more concise
  habanalabs: use defines for F/W files
  habanalabs: remove prints on successful device initialization
  habanalabs: remove unnecessary checks
  habanalabs: invalidate MMU cache only once
  habanalabs: skip VA block list update in reset flow
  habanalabs: optimize MMU unmap
  habanalabs: prevent read/write from/to the device during hard reset
  habanalabs: split MMU properties to PCI/DRAM
  habanalabs: re-factor MMU masks and documentation
  habanalabs: type specific MMU cache invalidation
  habanalabs: re-factor memory module code
  habanalabs: export uapi defines to user-space
  habanalabs: don't print error when queues are full
  habanalabs: increase max jobs number to 512
  habanalabs: set ETR as non-secured
  ...
parents 599ea01c 5feccddc
...@@ -65,6 +65,18 @@ static void cs_put(struct hl_cs *cs) ...@@ -65,6 +65,18 @@ static void cs_put(struct hl_cs *cs)
kref_put(&cs->refcount, cs_do_release); kref_put(&cs->refcount, cs_do_release);
} }
static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
{
/*
* Patched CB is created for external queues jobs, and for H/W queues
* jobs if the user CB was allocated by driver and MMU is disabled.
*/
return (job->queue_type == QUEUE_TYPE_EXT ||
(job->queue_type == QUEUE_TYPE_HW &&
job->is_kernel_allocated_cb &&
!hdev->mmu_enable));
}
/* /*
* cs_parser - parse the user command submission * cs_parser - parse the user command submission
* *
...@@ -91,11 +103,13 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) ...@@ -91,11 +103,13 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
parser.patched_cb = NULL; parser.patched_cb = NULL;
parser.user_cb = job->user_cb; parser.user_cb = job->user_cb;
parser.user_cb_size = job->user_cb_size; parser.user_cb_size = job->user_cb_size;
parser.ext_queue = job->ext_queue; parser.queue_type = job->queue_type;
parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
job->patched_cb = NULL; job->patched_cb = NULL;
rc = hdev->asic_funcs->cs_parser(hdev, &parser); rc = hdev->asic_funcs->cs_parser(hdev, &parser);
if (job->ext_queue) {
if (is_cb_patched(hdev, job)) {
if (!rc) { if (!rc) {
job->patched_cb = parser.patched_cb; job->patched_cb = parser.patched_cb;
job->job_cb_size = parser.patched_cb_size; job->job_cb_size = parser.patched_cb_size;
...@@ -124,7 +138,7 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job) ...@@ -124,7 +138,7 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
{ {
struct hl_cs *cs = job->cs; struct hl_cs *cs = job->cs;
if (job->ext_queue) { if (is_cb_patched(hdev, job)) {
hl_userptr_delete_list(hdev, &job->userptr_list); hl_userptr_delete_list(hdev, &job->userptr_list);
/* /*
...@@ -140,6 +154,19 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job) ...@@ -140,6 +154,19 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
} }
} }
/* For H/W queue jobs, if a user CB was allocated by driver and MMU is
* enabled, the user CB isn't released in cs_parser() and thus should be
* released here.
*/
if (job->queue_type == QUEUE_TYPE_HW &&
job->is_kernel_allocated_cb && hdev->mmu_enable) {
spin_lock(&job->user_cb->lock);
job->user_cb->cs_cnt--;
spin_unlock(&job->user_cb->lock);
hl_cb_put(job->user_cb);
}
/* /*
* This is the only place where there can be multiple threads * This is the only place where there can be multiple threads
* modifying the list at the same time * modifying the list at the same time
...@@ -150,7 +177,8 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job) ...@@ -150,7 +177,8 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
hl_debugfs_remove_job(hdev, job); hl_debugfs_remove_job(hdev, job);
if (job->ext_queue) if (job->queue_type == QUEUE_TYPE_EXT ||
job->queue_type == QUEUE_TYPE_HW)
cs_put(cs); cs_put(cs);
kfree(job); kfree(job);
...@@ -387,18 +415,13 @@ static void job_wq_completion(struct work_struct *work) ...@@ -387,18 +415,13 @@ static void job_wq_completion(struct work_struct *work)
free_job(hdev, job); free_job(hdev, job);
} }
static struct hl_cb *validate_queue_index(struct hl_device *hdev, static int validate_queue_index(struct hl_device *hdev,
struct hl_cb_mgr *cb_mgr,
struct hl_cs_chunk *chunk, struct hl_cs_chunk *chunk,
bool *ext_queue) enum hl_queue_type *queue_type,
bool *is_kernel_allocated_cb)
{ {
struct asic_fixed_properties *asic = &hdev->asic_prop; struct asic_fixed_properties *asic = &hdev->asic_prop;
struct hw_queue_properties *hw_queue_prop; struct hw_queue_properties *hw_queue_prop;
u32 cb_handle;
struct hl_cb *cb;
/* Assume external queue */
*ext_queue = true;
hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
...@@ -406,20 +429,29 @@ static struct hl_cb *validate_queue_index(struct hl_device *hdev, ...@@ -406,20 +429,29 @@ static struct hl_cb *validate_queue_index(struct hl_device *hdev,
(hw_queue_prop->type == QUEUE_TYPE_NA)) { (hw_queue_prop->type == QUEUE_TYPE_NA)) {
dev_err(hdev->dev, "Queue index %d is invalid\n", dev_err(hdev->dev, "Queue index %d is invalid\n",
chunk->queue_index); chunk->queue_index);
return NULL; return -EINVAL;
} }
if (hw_queue_prop->driver_only) { if (hw_queue_prop->driver_only) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Queue index %d is restricted for the kernel driver\n", "Queue index %d is restricted for the kernel driver\n",
chunk->queue_index); chunk->queue_index);
return NULL; return -EINVAL;
} else if (hw_queue_prop->type == QUEUE_TYPE_INT) {
*ext_queue = false;
return (struct hl_cb *) (uintptr_t) chunk->cb_handle;
} }
/* Retrieve CB object */ *queue_type = hw_queue_prop->type;
*is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb;
return 0;
}
static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
struct hl_cb_mgr *cb_mgr,
struct hl_cs_chunk *chunk)
{
struct hl_cb *cb;
u32 cb_handle;
cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT); cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
cb = hl_cb_get(hdev, cb_mgr, cb_handle); cb = hl_cb_get(hdev, cb_mgr, cb_handle);
...@@ -444,7 +476,8 @@ static struct hl_cb *validate_queue_index(struct hl_device *hdev, ...@@ -444,7 +476,8 @@ static struct hl_cb *validate_queue_index(struct hl_device *hdev,
return NULL; return NULL;
} }
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue) struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
{ {
struct hl_cs_job *job; struct hl_cs_job *job;
...@@ -452,12 +485,14 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue) ...@@ -452,12 +485,14 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue)
if (!job) if (!job)
return NULL; return NULL;
job->ext_queue = ext_queue; job->queue_type = queue_type;
job->is_kernel_allocated_cb = is_kernel_allocated_cb;
if (job->ext_queue) { if (is_cb_patched(hdev, job))
INIT_LIST_HEAD(&job->userptr_list); INIT_LIST_HEAD(&job->userptr_list);
if (job->queue_type == QUEUE_TYPE_EXT)
INIT_WORK(&job->finish_work, job_wq_completion); INIT_WORK(&job->finish_work, job_wq_completion);
}
return job; return job;
} }
...@@ -470,7 +505,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, ...@@ -470,7 +505,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
struct hl_cs_job *job; struct hl_cs_job *job;
struct hl_cs *cs; struct hl_cs *cs;
struct hl_cb *cb; struct hl_cb *cb;
bool ext_queue_present = false; bool int_queues_only = true;
u32 size_to_copy; u32 size_to_copy;
int rc, i, parse_cnt; int rc, i, parse_cnt;
...@@ -514,23 +549,33 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, ...@@ -514,23 +549,33 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
/* Validate ALL the CS chunks before submitting the CS */ /* Validate ALL the CS chunks before submitting the CS */
for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) { for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) {
struct hl_cs_chunk *chunk = &cs_chunk_array[i]; struct hl_cs_chunk *chunk = &cs_chunk_array[i];
bool ext_queue; enum hl_queue_type queue_type;
bool is_kernel_allocated_cb;
cb = validate_queue_index(hdev, &hpriv->cb_mgr, chunk, rc = validate_queue_index(hdev, chunk, &queue_type,
&ext_queue); &is_kernel_allocated_cb);
if (ext_queue) { if (rc)
ext_queue_present = true; goto free_cs_object;
if (is_kernel_allocated_cb) {
cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
if (!cb) { if (!cb) {
rc = -EINVAL; rc = -EINVAL;
goto free_cs_object; goto free_cs_object;
} }
} else {
cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
} }
job = hl_cs_allocate_job(hdev, ext_queue); if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW)
int_queues_only = false;
job = hl_cs_allocate_job(hdev, queue_type,
is_kernel_allocated_cb);
if (!job) { if (!job) {
dev_err(hdev->dev, "Failed to allocate a new job\n"); dev_err(hdev->dev, "Failed to allocate a new job\n");
rc = -ENOMEM; rc = -ENOMEM;
if (ext_queue) if (is_kernel_allocated_cb)
goto release_cb; goto release_cb;
else else
goto free_cs_object; goto free_cs_object;
...@@ -540,7 +585,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, ...@@ -540,7 +585,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
job->cs = cs; job->cs = cs;
job->user_cb = cb; job->user_cb = cb;
job->user_cb_size = chunk->cb_size; job->user_cb_size = chunk->cb_size;
if (job->ext_queue) if (is_kernel_allocated_cb)
job->job_cb_size = cb->size; job->job_cb_size = cb->size;
else else
job->job_cb_size = chunk->cb_size; job->job_cb_size = chunk->cb_size;
...@@ -553,10 +598,11 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, ...@@ -553,10 +598,11 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
/* /*
* Increment CS reference. When CS reference is 0, CS is * Increment CS reference. When CS reference is 0, CS is
* done and can be signaled to user and free all its resources * done and can be signaled to user and free all its resources
* Only increment for JOB on external queues, because only * Only increment for JOB on external or H/W queues, because
* for those JOBs we get completion * only for those JOBs we get completion
*/ */
if (job->ext_queue) if (job->queue_type == QUEUE_TYPE_EXT ||
job->queue_type == QUEUE_TYPE_HW)
cs_get(cs); cs_get(cs);
hl_debugfs_add_job(hdev, job); hl_debugfs_add_job(hdev, job);
...@@ -570,9 +616,9 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, ...@@ -570,9 +616,9 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
} }
} }
if (!ext_queue_present) { if (int_queues_only) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Reject CS %d.%llu because no external queues jobs\n", "Reject CS %d.%llu because only internal queues jobs are present\n",
cs->ctx->asid, cs->sequence); cs->ctx->asid, cs->sequence);
rc = -EINVAL; rc = -EINVAL;
goto free_cs_object; goto free_cs_object;
...@@ -580,6 +626,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, ...@@ -580,6 +626,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
rc = hl_hw_queue_schedule_cs(cs); rc = hl_hw_queue_schedule_cs(cs);
if (rc) { if (rc) {
if (rc != -EAGAIN)
dev_err(hdev->dev, dev_err(hdev->dev,
"Failed to submit CS %d.%llu to H/W queues, error %d\n", "Failed to submit CS %d.%llu to H/W queues, error %d\n",
cs->ctx->asid, cs->sequence, rc); cs->ctx->asid, cs->sequence, rc);
......
...@@ -307,45 +307,57 @@ static inline u64 get_hop0_addr(struct hl_ctx *ctx) ...@@ -307,45 +307,57 @@ static inline u64 get_hop0_addr(struct hl_ctx *ctx)
(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
} }
static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr, static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
u64 virt_addr) u64 virt_addr, u64 mask, u64 shift)
{ {
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
((virt_addr & HOP0_MASK) >> HOP0_SHIFT); ((virt_addr & mask) >> shift);
} }
static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr, static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
u64 virt_addr) struct hl_mmu_properties *mmu_specs,
u64 hop_addr, u64 vaddr)
{ {
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask,
((virt_addr & HOP1_MASK) >> HOP1_SHIFT); mmu_specs->hop0_shift);
} }
static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr, static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
u64 virt_addr) struct hl_mmu_properties *mmu_specs,
u64 hop_addr, u64 vaddr)
{ {
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask,
((virt_addr & HOP2_MASK) >> HOP2_SHIFT); mmu_specs->hop1_shift);
} }
static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr, static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
u64 virt_addr) struct hl_mmu_properties *mmu_specs,
u64 hop_addr, u64 vaddr)
{ {
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask,
((virt_addr & HOP3_MASK) >> HOP3_SHIFT); mmu_specs->hop2_shift);
} }
static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
u64 virt_addr) struct hl_mmu_properties *mmu_specs,
u64 hop_addr, u64 vaddr)
{ {
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask,
((virt_addr & HOP4_MASK) >> HOP4_SHIFT); mmu_specs->hop3_shift);
}
static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
struct hl_mmu_properties *mmu_specs,
u64 hop_addr, u64 vaddr)
{
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask,
mmu_specs->hop4_shift);
} }
static inline u64 get_next_hop_addr(u64 curr_pte) static inline u64 get_next_hop_addr(u64 curr_pte)
{ {
if (curr_pte & PAGE_PRESENT_MASK) if (curr_pte & PAGE_PRESENT_MASK)
return curr_pte & PHYS_ADDR_MASK; return curr_pte & HOP_PHYS_ADDR_MASK;
else else
return ULLONG_MAX; return ULLONG_MAX;
} }
...@@ -355,7 +367,10 @@ static int mmu_show(struct seq_file *s, void *data) ...@@ -355,7 +367,10 @@ static int mmu_show(struct seq_file *s, void *data)
struct hl_debugfs_entry *entry = s->private; struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev; struct hl_device *hdev = dev_entry->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop;
struct hl_ctx *ctx; struct hl_ctx *ctx;
bool is_dram_addr;
u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0, u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0,
hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0, hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0,
...@@ -377,33 +392,39 @@ static int mmu_show(struct seq_file *s, void *data) ...@@ -377,33 +392,39 @@ static int mmu_show(struct seq_file *s, void *data)
return 0; return 0;
} }
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
mutex_lock(&ctx->mmu_lock); mutex_lock(&ctx->mmu_lock);
/* the following lookup is copied from unmap() in mmu.c */ /* the following lookup is copied from unmap() in mmu.c */
hop0_addr = get_hop0_addr(ctx); hop0_addr = get_hop0_addr(ctx);
hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr); hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr); hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr);
hop1_addr = get_next_hop_addr(hop0_pte); hop1_addr = get_next_hop_addr(hop0_pte);
if (hop1_addr == ULLONG_MAX) if (hop1_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr); hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr); hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr);
hop2_addr = get_next_hop_addr(hop1_pte); hop2_addr = get_next_hop_addr(hop1_pte);
if (hop2_addr == ULLONG_MAX) if (hop2_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr); hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr); hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr);
hop3_addr = get_next_hop_addr(hop2_pte); hop3_addr = get_next_hop_addr(hop2_pte);
if (hop3_addr == ULLONG_MAX) if (hop3_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr); hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr); hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
if (!(hop3_pte & LAST_MASK)) { if (!(hop3_pte & LAST_MASK)) {
...@@ -412,7 +433,8 @@ static int mmu_show(struct seq_file *s, void *data) ...@@ -412,7 +433,8 @@ static int mmu_show(struct seq_file *s, void *data)
if (hop4_addr == ULLONG_MAX) if (hop4_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr); hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
virt_addr);
hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr); hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr);
if (!(hop4_pte & PAGE_PRESENT_MASK)) if (!(hop4_pte & PAGE_PRESENT_MASK))
goto not_mapped; goto not_mapped;
...@@ -506,6 +528,12 @@ static int engines_show(struct seq_file *s, void *data) ...@@ -506,6 +528,12 @@ static int engines_show(struct seq_file *s, void *data)
struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev; struct hl_device *hdev = dev_entry->hdev;
if (atomic_read(&hdev->in_reset)) {
dev_warn_ratelimited(hdev->dev,
"Can't check device idle during reset\n");
return 0;
}
hdev->asic_funcs->is_device_idle(hdev, NULL, s); hdev->asic_funcs->is_device_idle(hdev, NULL, s);
return 0; return 0;
...@@ -534,41 +562,50 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, ...@@ -534,41 +562,50 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
u64 *phys_addr) u64 *phys_addr)
{ {
struct hl_ctx *ctx = hdev->compute_ctx; struct hl_ctx *ctx = hdev->compute_ctx;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop;
u64 hop_addr, hop_pte_addr, hop_pte; u64 hop_addr, hop_pte_addr, hop_pte;
u64 offset_mask = HOP4_MASK | OFFSET_MASK; u64 offset_mask = HOP4_MASK | FLAGS_MASK;
int rc = 0; int rc = 0;
bool is_dram_addr;
if (!ctx) { if (!ctx) {
dev_err(hdev->dev, "no ctx available\n"); dev_err(hdev->dev, "no ctx available\n");
return -EINVAL; return -EINVAL;
} }
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
mutex_lock(&ctx->mmu_lock); mutex_lock(&ctx->mmu_lock);
/* hop 0 */ /* hop 0 */
hop_addr = get_hop0_addr(ctx); hop_addr = get_hop0_addr(ctx);
hop_pte_addr = get_hop0_pte_addr(ctx, hop_addr, virt_addr); hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
/* hop 1 */ /* hop 1 */
hop_addr = get_next_hop_addr(hop_pte); hop_addr = get_next_hop_addr(hop_pte);
if (hop_addr == ULLONG_MAX) if (hop_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop_pte_addr = get_hop1_pte_addr(ctx, hop_addr, virt_addr); hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
/* hop 2 */ /* hop 2 */
hop_addr = get_next_hop_addr(hop_pte); hop_addr = get_next_hop_addr(hop_pte);
if (hop_addr == ULLONG_MAX) if (hop_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop_pte_addr = get_hop2_pte_addr(ctx, hop_addr, virt_addr); hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
/* hop 3 */ /* hop 3 */
hop_addr = get_next_hop_addr(hop_pte); hop_addr = get_next_hop_addr(hop_pte);
if (hop_addr == ULLONG_MAX) if (hop_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop_pte_addr = get_hop3_pte_addr(ctx, hop_addr, virt_addr); hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
if (!(hop_pte & LAST_MASK)) { if (!(hop_pte & LAST_MASK)) {
...@@ -576,10 +613,11 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, ...@@ -576,10 +613,11 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
hop_addr = get_next_hop_addr(hop_pte); hop_addr = get_next_hop_addr(hop_pte);
if (hop_addr == ULLONG_MAX) if (hop_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop_pte_addr = get_hop4_pte_addr(ctx, hop_addr, virt_addr); hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr,
virt_addr);
hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
offset_mask = OFFSET_MASK; offset_mask = FLAGS_MASK;
} }
if (!(hop_pte & PAGE_PRESENT_MASK)) if (!(hop_pte & PAGE_PRESENT_MASK))
...@@ -608,6 +646,11 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf, ...@@ -608,6 +646,11 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf,
u32 val; u32 val;
ssize_t rc; ssize_t rc;
if (atomic_read(&hdev->in_reset)) {
dev_warn_ratelimited(hdev->dev, "Can't read during reset\n");
return 0;
}
if (*ppos) if (*ppos)
return 0; return 0;
...@@ -637,6 +680,11 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, ...@@ -637,6 +680,11 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
u32 value; u32 value;
ssize_t rc; ssize_t rc;
if (atomic_read(&hdev->in_reset)) {
dev_warn_ratelimited(hdev->dev, "Can't write during reset\n");
return 0;
}
rc = kstrtouint_from_user(buf, count, 16, &value); rc = kstrtouint_from_user(buf, count, 16, &value);
if (rc) if (rc)
return rc; return rc;
......
...@@ -42,12 +42,10 @@ static void hpriv_release(struct kref *ref) ...@@ -42,12 +42,10 @@ static void hpriv_release(struct kref *ref)
{ {
struct hl_fpriv *hpriv; struct hl_fpriv *hpriv;
struct hl_device *hdev; struct hl_device *hdev;
struct hl_ctx *ctx;
hpriv = container_of(ref, struct hl_fpriv, refcount); hpriv = container_of(ref, struct hl_fpriv, refcount);
hdev = hpriv->hdev; hdev = hpriv->hdev;
ctx = hpriv->ctx;
put_pid(hpriv->taskpid); put_pid(hpriv->taskpid);
...@@ -889,13 +887,19 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset, ...@@ -889,13 +887,19 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
/* Go over all the queues, release all CS and their jobs */ /* Go over all the queues, release all CS and their jobs */
hl_cs_rollback_all(hdev); hl_cs_rollback_all(hdev);
/* Kill processes here after CS rollback. This is because the process if (hard_reset) {
* can't really exit until all its CSs are done, which is what we /* Kill processes here after CS rollback. This is because the
* do in cs rollback * process can't really exit until all its CSs are done, which
* is what we do in cs rollback
*/ */
if (from_hard_reset_thread)
device_kill_open_processes(hdev); device_kill_open_processes(hdev);
/* Flush the Event queue workers to make sure no other thread is
* reading or writing to registers during the reset
*/
flush_workqueue(hdev->eq_wq);
}
/* Release kernel context */ /* Release kernel context */
if ((hard_reset) && (hl_ctx_put(hdev->kernel_ctx) == 1)) if ((hard_reset) && (hl_ctx_put(hdev->kernel_ctx) == 1))
hdev->kernel_ctx = NULL; hdev->kernel_ctx = NULL;
......
...@@ -143,10 +143,7 @@ int hl_fw_test_cpu_queue(struct hl_device *hdev) ...@@ -143,10 +143,7 @@ int hl_fw_test_cpu_queue(struct hl_device *hdev)
sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result); sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
if (!rc) { if (!rc) {
if (result == ARMCP_PACKET_FENCE_VAL) if (result != ARMCP_PACKET_FENCE_VAL)
dev_info(hdev->dev,
"queue test on CPU queue succeeded\n");
else
dev_err(hdev->dev, dev_err(hdev->dev,
"CPU queue test failed (0x%08lX)\n", result); "CPU queue test failed (0x%08lX)\n", result);
} else { } else {
......
...@@ -72,6 +72,9 @@ ...@@ -72,6 +72,9 @@
* *
*/ */
#define GOYA_UBOOT_FW_FILE "habanalabs/goya/goya-u-boot.bin"
#define GOYA_LINUX_FW_FILE "habanalabs/goya/goya-fit.itb"
#define GOYA_MMU_REGS_NUM 63 #define GOYA_MMU_REGS_NUM 63
#define GOYA_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ #define GOYA_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
...@@ -337,17 +340,20 @@ void goya_get_fixed_properties(struct hl_device *hdev) ...@@ -337,17 +340,20 @@ void goya_get_fixed_properties(struct hl_device *hdev)
for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) { for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
prop->hw_queues_props[i].driver_only = 0; prop->hw_queues_props[i].driver_only = 0;
prop->hw_queues_props[i].requires_kernel_cb = 1;
} }
for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) { for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
prop->hw_queues_props[i].driver_only = 1; prop->hw_queues_props[i].driver_only = 1;
prop->hw_queues_props[i].requires_kernel_cb = 0;
} }
for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES + for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
NUMBER_OF_INT_HW_QUEUES; i++) { NUMBER_OF_INT_HW_QUEUES; i++) {
prop->hw_queues_props[i].type = QUEUE_TYPE_INT; prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
prop->hw_queues_props[i].driver_only = 0; prop->hw_queues_props[i].driver_only = 0;
prop->hw_queues_props[i].requires_kernel_cb = 0;
} }
for (; i < HL_MAX_QUEUES; i++) for (; i < HL_MAX_QUEUES; i++)
...@@ -377,6 +383,23 @@ void goya_get_fixed_properties(struct hl_device *hdev) ...@@ -377,6 +383,23 @@ void goya_get_fixed_properties(struct hl_device *hdev)
prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE; prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
prop->dram_page_size = PAGE_SIZE_2MB; prop->dram_page_size = PAGE_SIZE_2MB;
prop->dmmu.hop0_shift = HOP0_SHIFT;
prop->dmmu.hop1_shift = HOP1_SHIFT;
prop->dmmu.hop2_shift = HOP2_SHIFT;
prop->dmmu.hop3_shift = HOP3_SHIFT;
prop->dmmu.hop4_shift = HOP4_SHIFT;
prop->dmmu.hop0_mask = HOP0_MASK;
prop->dmmu.hop1_mask = HOP1_MASK;
prop->dmmu.hop2_mask = HOP2_MASK;
prop->dmmu.hop3_mask = HOP3_MASK;
prop->dmmu.hop4_mask = HOP4_MASK;
prop->dmmu.huge_page_size = PAGE_SIZE_2MB;
/* No difference between PMMU and DMMU except of page size */
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
prop->dmmu.page_size = PAGE_SIZE_2MB;
prop->pmmu.page_size = PAGE_SIZE_4KB;
prop->va_space_host_start_address = VA_HOST_SPACE_START; prop->va_space_host_start_address = VA_HOST_SPACE_START;
prop->va_space_host_end_address = VA_HOST_SPACE_END; prop->va_space_host_end_address = VA_HOST_SPACE_END;
prop->va_space_dram_start_address = VA_DDR_SPACE_START; prop->va_space_dram_start_address = VA_DDR_SPACE_START;
...@@ -393,6 +416,9 @@ void goya_get_fixed_properties(struct hl_device *hdev) ...@@ -393,6 +416,9 @@ void goya_get_fixed_properties(struct hl_device *hdev)
prop->tpc_enabled_mask = TPC_ENABLED_MASK; prop->tpc_enabled_mask = TPC_ENABLED_MASK;
prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN);
} }
/* /*
...@@ -1454,6 +1480,9 @@ static void goya_init_golden_registers(struct hl_device *hdev) ...@@ -1454,6 +1480,9 @@ static void goya_init_golden_registers(struct hl_device *hdev)
1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT); 1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset, WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT); 1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset,
ICACHE_FETCH_LINE_NUM, 2);
} }
WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT); WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
...@@ -1533,7 +1562,6 @@ static void goya_init_mme_cmdq(struct hl_device *hdev) ...@@ -1533,7 +1562,6 @@ static void goya_init_mme_cmdq(struct hl_device *hdev)
u32 mtr_base_lo, mtr_base_hi; u32 mtr_base_lo, mtr_base_hi;
u32 so_base_lo, so_base_hi; u32 so_base_lo, so_base_hi;
u32 gic_base_lo, gic_base_hi; u32 gic_base_lo, gic_base_hi;
u64 qman_base_addr;
mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0); mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
...@@ -1545,9 +1573,6 @@ static void goya_init_mme_cmdq(struct hl_device *hdev) ...@@ -1545,9 +1573,6 @@ static void goya_init_mme_cmdq(struct hl_device *hdev)
gic_base_hi = gic_base_hi =
upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR); upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
qman_base_addr = hdev->asic_prop.sram_base_address +
MME_QMAN_BASE_OFFSET;
WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo); WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi); WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo); WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo);
...@@ -2141,13 +2166,11 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset) ...@@ -2141,13 +2166,11 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
*/ */
static int goya_push_uboot_to_device(struct hl_device *hdev) static int goya_push_uboot_to_device(struct hl_device *hdev)
{ {
char fw_name[200];
void __iomem *dst; void __iomem *dst;
snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-u-boot.bin");
dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + UBOOT_FW_OFFSET; dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + UBOOT_FW_OFFSET;
return hl_fw_push_fw_to_device(hdev, fw_name, dst); return hl_fw_push_fw_to_device(hdev, GOYA_UBOOT_FW_FILE, dst);
} }
/* /*
...@@ -2160,13 +2183,11 @@ static int goya_push_uboot_to_device(struct hl_device *hdev) ...@@ -2160,13 +2183,11 @@ static int goya_push_uboot_to_device(struct hl_device *hdev)
*/ */
static int goya_push_linux_to_device(struct hl_device *hdev) static int goya_push_linux_to_device(struct hl_device *hdev)
{ {
char fw_name[200];
void __iomem *dst; void __iomem *dst;
snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb");
dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET; dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
return hl_fw_push_fw_to_device(hdev, fw_name, dst); return hl_fw_push_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst);
} }
static int goya_pldm_init_cpu(struct hl_device *hdev) static int goya_pldm_init_cpu(struct hl_device *hdev)
...@@ -2291,6 +2312,10 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout) ...@@ -2291,6 +2312,10 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
10000, 10000,
cpu_timeout); cpu_timeout);
/* Read U-Boot version now in case we will later fail */
goya_read_device_fw_version(hdev, FW_COMP_UBOOT);
goya_read_device_fw_version(hdev, FW_COMP_PREBOOT);
if (rc) { if (rc) {
dev_err(hdev->dev, "Error in ARM u-boot!"); dev_err(hdev->dev, "Error in ARM u-boot!");
switch (status) { switch (status) {
...@@ -2328,6 +2353,11 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout) ...@@ -2328,6 +2353,11 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
"ARM status %d - u-boot stopped by user\n", "ARM status %d - u-boot stopped by user\n",
status); status);
break; break;
case CPU_BOOT_STATUS_TS_INIT_FAIL:
dev_err(hdev->dev,
"ARM status %d - Thermal Sensor initialization failed\n",
status);
break;
default: default:
dev_err(hdev->dev, dev_err(hdev->dev,
"ARM status %d - Invalid status code\n", "ARM status %d - Invalid status code\n",
...@@ -2337,10 +2367,6 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout) ...@@ -2337,10 +2367,6 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
return -EIO; return -EIO;
} }
/* Read U-Boot version now in case we will later fail */
goya_read_device_fw_version(hdev, FW_COMP_UBOOT);
goya_read_device_fw_version(hdev, FW_COMP_PREBOOT);
if (!hdev->fw_loading) { if (!hdev->fw_loading) {
dev_info(hdev->dev, "Skip loading FW\n"); dev_info(hdev->dev, "Skip loading FW\n");
goto out; goto out;
...@@ -2453,7 +2479,8 @@ int goya_mmu_init(struct hl_device *hdev) ...@@ -2453,7 +2479,8 @@ int goya_mmu_init(struct hl_device *hdev)
WREG32_AND(mmSTLB_STLB_FEATURE_EN, WREG32_AND(mmSTLB_STLB_FEATURE_EN,
(~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK)); (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
hdev->asic_funcs->mmu_invalidate_cache(hdev, true); hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK);
WREG32(mmMMU_MMU_ENABLE, 1); WREG32(mmMMU_MMU_ENABLE, 1);
WREG32(mmMMU_SPI_MASK, 0xF); WREG32(mmMMU_SPI_MASK, 0xF);
...@@ -2978,9 +3005,6 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id) ...@@ -2978,9 +3005,6 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n", "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
hw_queue_id, (unsigned long long) fence_dma_addr, tmp); hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
rc = -EIO; rc = -EIO;
} else {
dev_info(hdev->dev, "queue test on H/W queue %d succeeded\n",
hw_queue_id);
} }
free_pkt: free_pkt:
...@@ -3925,7 +3949,7 @@ static int goya_parse_cb_no_ext_queue(struct hl_device *hdev, ...@@ -3925,7 +3949,7 @@ static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
return 0; return 0;
dev_err(hdev->dev, dev_err(hdev->dev,
"Internal CB address %px + 0x%x is not in SRAM nor in DRAM\n", "Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n",
parser->user_cb, parser->user_cb_size); parser->user_cb, parser->user_cb_size);
return -EFAULT; return -EFAULT;
...@@ -3935,7 +3959,7 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) ...@@ -3935,7 +3959,7 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
{ {
struct goya_device *goya = hdev->asic_specific; struct goya_device *goya = hdev->asic_specific;
if (!parser->ext_queue) if (parser->queue_type == QUEUE_TYPE_INT)
return goya_parse_cb_no_ext_queue(hdev, parser); return goya_parse_cb_no_ext_queue(hdev, parser);
if (goya->hw_cap_initialized & HW_CAP_MMU) if (goya->hw_cap_initialized & HW_CAP_MMU)
...@@ -4606,7 +4630,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, ...@@ -4606,7 +4630,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
lin_dma_pkt++; lin_dma_pkt++;
} while (--lin_dma_pkts_cnt); } while (--lin_dma_pkts_cnt);
job = hl_cs_allocate_job(hdev, true); job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
if (!job) { if (!job) {
dev_err(hdev->dev, "Failed to allocate a new job\n"); dev_err(hdev->dev, "Failed to allocate a new job\n");
rc = -ENOMEM; rc = -ENOMEM;
...@@ -4835,13 +4859,15 @@ static void goya_mmu_prepare(struct hl_device *hdev, u32 asid) ...@@ -4835,13 +4859,15 @@ static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid); goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
} }
static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard) static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
u32 flags)
{ {
struct goya_device *goya = hdev->asic_specific; struct goya_device *goya = hdev->asic_specific;
u32 status, timeout_usec; u32 status, timeout_usec;
int rc; int rc;
if (!(goya->hw_cap_initialized & HW_CAP_MMU)) if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
hdev->hard_reset_pending)
return; return;
/* no need in L1 only invalidation in Goya */ /* no need in L1 only invalidation in Goya */
...@@ -4880,7 +4906,8 @@ static void goya_mmu_invalidate_cache_range(struct hl_device *hdev, ...@@ -4880,7 +4906,8 @@ static void goya_mmu_invalidate_cache_range(struct hl_device *hdev,
u32 status, timeout_usec, inv_data, pi; u32 status, timeout_usec, inv_data, pi;
int rc; int rc;
if (!(goya->hw_cap_initialized & HW_CAP_MMU)) if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
hdev->hard_reset_pending)
return; return;
/* no need in L1 only invalidation in Goya */ /* no need in L1 only invalidation in Goya */
...@@ -5137,7 +5164,8 @@ static const struct hl_asic_funcs goya_funcs = { ...@@ -5137,7 +5164,8 @@ static const struct hl_asic_funcs goya_funcs = {
.init_iatu = goya_init_iatu, .init_iatu = goya_init_iatu,
.rreg = hl_rreg, .rreg = hl_rreg,
.wreg = hl_wreg, .wreg = hl_wreg,
.halt_coresight = goya_halt_coresight .halt_coresight = goya_halt_coresight,
.get_clk_rate = goya_get_clk_rate
}; };
/* /*
......
...@@ -233,4 +233,6 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, ...@@ -233,4 +233,6 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
void *vaddr); void *vaddr);
void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev); void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev);
int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
#endif /* GOYAP_H_ */ #endif /* GOYAP_H_ */
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include "goyaP.h" #include "goyaP.h"
#include "include/goya/goya_coresight.h" #include "include/goya/goya_coresight.h"
#include "include/goya/asic_reg/goya_regs.h" #include "include/goya/asic_reg/goya_regs.h"
#include "include/goya/asic_reg/goya_masks.h"
#include <uapi/misc/habanalabs.h> #include <uapi/misc/habanalabs.h>
...@@ -377,33 +378,32 @@ static int goya_config_etr(struct hl_device *hdev, ...@@ -377,33 +378,32 @@ static int goya_config_etr(struct hl_device *hdev,
struct hl_debug_params *params) struct hl_debug_params *params)
{ {
struct hl_debug_params_etr *input; struct hl_debug_params_etr *input;
u64 base_reg = mmPSOC_ETR_BASE - CFG_BASE;
u32 val; u32 val;
int rc; int rc;
WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK); WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK);
val = RREG32(base_reg + 0x304); val = RREG32(mmPSOC_ETR_FFCR);
val |= 0x1000; val |= 0x1000;
WREG32(base_reg + 0x304, val); WREG32(mmPSOC_ETR_FFCR, val);
val |= 0x40; val |= 0x40;
WREG32(base_reg + 0x304, val); WREG32(mmPSOC_ETR_FFCR, val);
rc = goya_coresight_timeout(hdev, base_reg + 0x304, 6, false); rc = goya_coresight_timeout(hdev, mmPSOC_ETR_FFCR, 6, false);
if (rc) { if (rc) {
dev_err(hdev->dev, "Failed to %s ETR on timeout, error %d\n", dev_err(hdev->dev, "Failed to %s ETR on timeout, error %d\n",
params->enable ? "enable" : "disable", rc); params->enable ? "enable" : "disable", rc);
return rc; return rc;
} }
rc = goya_coresight_timeout(hdev, base_reg + 0xC, 2, true); rc = goya_coresight_timeout(hdev, mmPSOC_ETR_STS, 2, true);
if (rc) { if (rc) {
dev_err(hdev->dev, "Failed to %s ETR on timeout, error %d\n", dev_err(hdev->dev, "Failed to %s ETR on timeout, error %d\n",
params->enable ? "enable" : "disable", rc); params->enable ? "enable" : "disable", rc);
return rc; return rc;
} }
WREG32(base_reg + 0x20, 0); WREG32(mmPSOC_ETR_CTL, 0);
if (params->enable) { if (params->enable) {
input = params->input; input = params->input;
...@@ -423,25 +423,26 @@ static int goya_config_etr(struct hl_device *hdev, ...@@ -423,25 +423,26 @@ static int goya_config_etr(struct hl_device *hdev,
return -EINVAL; return -EINVAL;
} }
WREG32(base_reg + 0x34, 0x3FFC); WREG32(mmPSOC_ETR_BUFWM, 0x3FFC);
WREG32(base_reg + 0x4, input->buffer_size); WREG32(mmPSOC_ETR_RSZ, input->buffer_size);
WREG32(base_reg + 0x28, input->sink_mode); WREG32(mmPSOC_ETR_MODE, input->sink_mode);
WREG32(base_reg + 0x110, 0x700); WREG32(mmPSOC_ETR_AXICTL,
WREG32(base_reg + 0x118, 0x700 | PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT);
WREG32(mmPSOC_ETR_DBALO,
lower_32_bits(input->buffer_address)); lower_32_bits(input->buffer_address));
WREG32(base_reg + 0x11C, WREG32(mmPSOC_ETR_DBAHI,
upper_32_bits(input->buffer_address)); upper_32_bits(input->buffer_address));
WREG32(base_reg + 0x304, 3); WREG32(mmPSOC_ETR_FFCR, 3);
WREG32(base_reg + 0x308, 0xA); WREG32(mmPSOC_ETR_PSCR, 0xA);
WREG32(base_reg + 0x20, 1); WREG32(mmPSOC_ETR_CTL, 1);
} else { } else {
WREG32(base_reg + 0x34, 0); WREG32(mmPSOC_ETR_BUFWM, 0);
WREG32(base_reg + 0x4, 0x400); WREG32(mmPSOC_ETR_RSZ, 0x400);
WREG32(base_reg + 0x118, 0); WREG32(mmPSOC_ETR_DBALO, 0);
WREG32(base_reg + 0x11C, 0); WREG32(mmPSOC_ETR_DBAHI, 0);
WREG32(base_reg + 0x308, 0); WREG32(mmPSOC_ETR_PSCR, 0);
WREG32(base_reg + 0x28, 0); WREG32(mmPSOC_ETR_MODE, 0);
WREG32(base_reg + 0x304, 0); WREG32(mmPSOC_ETR_FFCR, 0);
if (params->output_size >= sizeof(u64)) { if (params->output_size >= sizeof(u64)) {
u32 rwp, rwphi; u32 rwp, rwphi;
...@@ -451,8 +452,8 @@ static int goya_config_etr(struct hl_device *hdev, ...@@ -451,8 +452,8 @@ static int goya_config_etr(struct hl_device *hdev,
* the buffer is set in the RWP register (lower 32 * the buffer is set in the RWP register (lower 32
* bits), and in the RWPHI register (upper 8 bits). * bits), and in the RWPHI register (upper 8 bits).
*/ */
rwp = RREG32(base_reg + 0x18); rwp = RREG32(mmPSOC_ETR_RWP);
rwphi = RREG32(base_reg + 0x3c) & 0xff; rwphi = RREG32(mmPSOC_ETR_RWPHI) & 0xff;
*(u64 *) params->output = ((u64) rwphi << 32) | rwp; *(u64 *) params->output = ((u64) rwphi << 32) | rwp;
} }
} }
......
...@@ -32,6 +32,37 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq) ...@@ -32,6 +32,37 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
} }
} }
int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
{
long value;
if (hl_device_disabled_or_in_reset(hdev))
return -ENODEV;
value = hl_get_frequency(hdev, MME_PLL, false);
if (value < 0) {
dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n",
value);
return value;
}
*max_clk = (value / 1000 / 1000);
value = hl_get_frequency(hdev, MME_PLL, true);
if (value < 0) {
dev_err(hdev->dev,
"Failed to retrieve device current clock %ld\n",
value);
return value;
}
*cur_clk = (value / 1000 / 1000);
return 0;
}
static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr, static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr,
char *buf) char *buf)
{ {
......
This diff is collapsed.
...@@ -60,11 +60,16 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args) ...@@ -60,11 +60,16 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
hw_ip.tpc_enabled_mask = prop->tpc_enabled_mask; hw_ip.tpc_enabled_mask = prop->tpc_enabled_mask;
hw_ip.sram_size = prop->sram_size - sram_kmd_size; hw_ip.sram_size = prop->sram_size - sram_kmd_size;
hw_ip.dram_size = prop->dram_size - dram_kmd_size; hw_ip.dram_size = prop->dram_size - dram_kmd_size;
if (hw_ip.dram_size > 0) if (hw_ip.dram_size > PAGE_SIZE)
hw_ip.dram_enabled = 1; hw_ip.dram_enabled = 1;
hw_ip.num_of_events = prop->num_of_events; hw_ip.num_of_events = prop->num_of_events;
memcpy(hw_ip.armcp_version,
prop->armcp_info.armcp_version, VERSION_MAX_LEN); memcpy(hw_ip.armcp_version, prop->armcp_info.armcp_version,
min(VERSION_MAX_LEN, HL_INFO_VERSION_MAX_LEN));
memcpy(hw_ip.card_name, prop->armcp_info.card_name,
min(CARD_NAME_MAX_LEN, HL_INFO_CARD_NAME_MAX_LEN));
hw_ip.armcp_cpld_version = le32_to_cpu(prop->armcp_info.cpld_version); hw_ip.armcp_cpld_version = le32_to_cpu(prop->armcp_info.cpld_version);
hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr; hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr;
hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf; hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf;
...@@ -179,16 +184,13 @@ static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args) ...@@ -179,16 +184,13 @@ static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args)
goto out; goto out;
} }
if (output) { if (output && copy_to_user((void __user *) (uintptr_t) args->output_ptr,
if (copy_to_user((void __user *) (uintptr_t) args->output_ptr, output, args->output_size)) {
output, dev_err(hdev->dev, "copy to user failed in debug ioctl\n");
args->output_size)) {
dev_err(hdev->dev,
"copy to user failed in debug ioctl\n");
rc = -EFAULT; rc = -EFAULT;
goto out; goto out;
} }
}
out: out:
kfree(params); kfree(params);
...@@ -221,6 +223,41 @@ static int device_utilization(struct hl_device *hdev, struct hl_info_args *args) ...@@ -221,6 +223,41 @@ static int device_utilization(struct hl_device *hdev, struct hl_info_args *args)
min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0; min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0;
} }
static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args)
{
struct hl_info_clk_rate clk_rate = {0};
u32 max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
int rc;
if ((!max_size) || (!out))
return -EINVAL;
rc = hdev->asic_funcs->get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz,
&clk_rate.max_clk_rate_mhz);
if (rc)
return rc;
return copy_to_user(out, &clk_rate,
min((size_t) max_size, sizeof(clk_rate))) ? -EFAULT : 0;
}
static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args)
{
struct hl_info_reset_count reset_count = {0};
u32 max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
if ((!max_size) || (!out))
return -EINVAL;
reset_count.hard_reset_cnt = hdev->hard_reset_cnt;
reset_count.soft_reset_cnt = hdev->soft_reset_cnt;
return copy_to_user(out, &reset_count,
min((size_t) max_size, sizeof(reset_count))) ? -EFAULT : 0;
}
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev) struct device *dev)
{ {
...@@ -239,6 +276,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, ...@@ -239,6 +276,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_DEVICE_STATUS: case HL_INFO_DEVICE_STATUS:
return device_status_info(hdev, args); return device_status_info(hdev, args);
case HL_INFO_RESET_COUNT:
return get_reset_count(hdev, args);
default: default:
break; break;
} }
...@@ -271,6 +311,10 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, ...@@ -271,6 +311,10 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
rc = hw_events_info(hdev, true, args); rc = hw_events_info(hdev, true, args);
break; break;
case HL_INFO_CLK_RATE:
rc = get_clk_rate(hdev, args);
break;
default: default:
dev_err(dev, "Invalid request %d\n", args->op); dev_err(dev, "Invalid request %d\n", args->op);
rc = -ENOTTY; rc = -ENOTTY;
...@@ -406,8 +450,7 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg, ...@@ -406,8 +450,7 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
retcode = func(hpriv, kdata); retcode = func(hpriv, kdata);
if (cmd & IOC_OUT) if ((cmd & IOC_OUT) && copy_to_user((void __user *)arg, kdata, usize))
if (copy_to_user((void __user *)arg, kdata, usize))
retcode = -EFAULT; retcode = -EFAULT;
out_err: out_err:
......
This diff is collapsed.
...@@ -260,4 +260,6 @@ ...@@ -260,4 +260,6 @@
#define DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT #define DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT
#define DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT #define DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT
#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT 1
#endif /* ASIC_REG_GOYA_MASKS_H_ */ #endif /* ASIC_REG_GOYA_MASKS_H_ */
...@@ -84,6 +84,7 @@ ...@@ -84,6 +84,7 @@
#include "tpc6_rtr_regs.h" #include "tpc6_rtr_regs.h"
#include "tpc7_nrtr_regs.h" #include "tpc7_nrtr_regs.h"
#include "tpc0_eml_cfg_regs.h" #include "tpc0_eml_cfg_regs.h"
#include "psoc_etr_regs.h"
#include "psoc_global_conf_masks.h" #include "psoc_global_conf_masks.h"
#include "dma_macro_masks.h" #include "dma_macro_masks.h"
......
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2016-2018 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
/************************************
** This is an auto-generated file **
** DO NOT EDIT BELOW **
************************************/
#ifndef ASIC_REG_PSOC_ETR_REGS_H_
#define ASIC_REG_PSOC_ETR_REGS_H_
/*
*****************************************
* PSOC_ETR (Prototype: ETR)
*****************************************
*/
#define mmPSOC_ETR_RSZ 0x2C43004
#define mmPSOC_ETR_STS 0x2C4300C
#define mmPSOC_ETR_RRD 0x2C43010
#define mmPSOC_ETR_RRP 0x2C43014
#define mmPSOC_ETR_RWP 0x2C43018
#define mmPSOC_ETR_TRG 0x2C4301C
#define mmPSOC_ETR_CTL 0x2C43020
#define mmPSOC_ETR_RWD 0x2C43024
#define mmPSOC_ETR_MODE 0x2C43028
#define mmPSOC_ETR_LBUFLEVEL 0x2C4302C
#define mmPSOC_ETR_CBUFLEVEL 0x2C43030
#define mmPSOC_ETR_BUFWM 0x2C43034
#define mmPSOC_ETR_RRPHI 0x2C43038
#define mmPSOC_ETR_RWPHI 0x2C4303C
#define mmPSOC_ETR_AXICTL 0x2C43110
#define mmPSOC_ETR_DBALO 0x2C43118
#define mmPSOC_ETR_DBAHI 0x2C4311C
#define mmPSOC_ETR_FFSR 0x2C43300
#define mmPSOC_ETR_FFCR 0x2C43304
#define mmPSOC_ETR_PSCR 0x2C43308
#define mmPSOC_ETR_ITMISCOP0 0x2C43EE0
#define mmPSOC_ETR_ITTRFLIN 0x2C43EE8
#define mmPSOC_ETR_ITATBDATA0 0x2C43EEC
#define mmPSOC_ETR_ITATBCTR2 0x2C43EF0
#define mmPSOC_ETR_ITATBCTR1 0x2C43EF4
#define mmPSOC_ETR_ITATBCTR0 0x2C43EF8
#define mmPSOC_ETR_ITCTRL 0x2C43F00
#define mmPSOC_ETR_CLAIMSET 0x2C43FA0
#define mmPSOC_ETR_CLAIMCLR 0x2C43FA4
#define mmPSOC_ETR_LAR 0x2C43FB0
#define mmPSOC_ETR_LSR 0x2C43FB4
#define mmPSOC_ETR_AUTHSTATUS 0x2C43FB8
#define mmPSOC_ETR_DEVID 0x2C43FC8
#define mmPSOC_ETR_DEVTYPE 0x2C43FCC
#define mmPSOC_ETR_PERIPHID4 0x2C43FD0
#define mmPSOC_ETR_PERIPHID5 0x2C43FD4
#define mmPSOC_ETR_PERIPHID6 0x2C43FD8
#define mmPSOC_ETR_PERIPHID7 0x2C43FDC
#define mmPSOC_ETR_PERIPHID0 0x2C43FE0
#define mmPSOC_ETR_PERIPHID1 0x2C43FE4
#define mmPSOC_ETR_PERIPHID2 0x2C43FE8
#define mmPSOC_ETR_PERIPHID3 0x2C43FEC
#define mmPSOC_ETR_COMPID0 0x2C43FF0
#define mmPSOC_ETR_COMPID1 0x2C43FF4
#define mmPSOC_ETR_COMPID2 0x2C43FF8
#define mmPSOC_ETR_COMPID3 0x2C43FFC
#endif /* ASIC_REG_PSOC_ETR_REGS_H_ */
...@@ -20,6 +20,8 @@ enum cpu_boot_status { ...@@ -20,6 +20,8 @@ enum cpu_boot_status {
CPU_BOOT_STATUS_DRAM_INIT_FAIL, CPU_BOOT_STATUS_DRAM_INIT_FAIL,
CPU_BOOT_STATUS_FIT_CORRUPTED, CPU_BOOT_STATUS_FIT_CORRUPTED,
CPU_BOOT_STATUS_UBOOT_NOT_READY, CPU_BOOT_STATUS_UBOOT_NOT_READY,
CPU_BOOT_STATUS_RESERVED,
CPU_BOOT_STATUS_TS_INIT_FAIL,
}; };
enum kmd_msg { enum kmd_msg {
......
...@@ -12,18 +12,16 @@ ...@@ -12,18 +12,16 @@
#define PAGE_SHIFT_2MB 21 #define PAGE_SHIFT_2MB 21
#define PAGE_SIZE_2MB (_AC(1, UL) << PAGE_SHIFT_2MB) #define PAGE_SIZE_2MB (_AC(1, UL) << PAGE_SHIFT_2MB)
#define PAGE_SIZE_4KB (_AC(1, UL) << PAGE_SHIFT_4KB) #define PAGE_SIZE_4KB (_AC(1, UL) << PAGE_SHIFT_4KB)
#define PAGE_MASK_2MB (~(PAGE_SIZE_2MB - 1))
#define PAGE_PRESENT_MASK 0x0000000000001ull #define PAGE_PRESENT_MASK 0x0000000000001ull
#define SWAP_OUT_MASK 0x0000000000004ull #define SWAP_OUT_MASK 0x0000000000004ull
#define LAST_MASK 0x0000000000800ull #define LAST_MASK 0x0000000000800ull
#define PHYS_ADDR_MASK 0xFFFFFFFFFFFFF000ull
#define HOP0_MASK 0x3000000000000ull #define HOP0_MASK 0x3000000000000ull
#define HOP1_MASK 0x0FF8000000000ull #define HOP1_MASK 0x0FF8000000000ull
#define HOP2_MASK 0x0007FC0000000ull #define HOP2_MASK 0x0007FC0000000ull
#define HOP3_MASK 0x000003FE00000ull #define HOP3_MASK 0x000003FE00000ull
#define HOP4_MASK 0x00000001FF000ull #define HOP4_MASK 0x00000001FF000ull
#define OFFSET_MASK 0x0000000000FFFull #define FLAGS_MASK 0x0000000000FFFull
#define HOP0_SHIFT 48 #define HOP0_SHIFT 48
#define HOP1_SHIFT 39 #define HOP1_SHIFT 39
...@@ -31,8 +29,7 @@ ...@@ -31,8 +29,7 @@
#define HOP3_SHIFT 21 #define HOP3_SHIFT 21
#define HOP4_SHIFT 12 #define HOP4_SHIFT 12
#define PTE_PHYS_ADDR_SHIFT 12 #define HOP_PHYS_ADDR_MASK (~FLAGS_MASK)
#define PTE_PHYS_ADDR_MASK ~OFFSET_MASK
#define HL_PTE_SIZE sizeof(u64) #define HL_PTE_SIZE sizeof(u64)
#define HOP_TABLE_SIZE PAGE_SIZE_4KB #define HOP_TABLE_SIZE PAGE_SIZE_4KB
......
...@@ -23,6 +23,8 @@ struct hl_bd { ...@@ -23,6 +23,8 @@ struct hl_bd {
#define HL_BD_SIZE sizeof(struct hl_bd) #define HL_BD_SIZE sizeof(struct hl_bd)
/* /*
* S/W CTL FIELDS.
*
* BD_CTL_REPEAT_VALID tells the CP whether the repeat field in the BD CTL is * BD_CTL_REPEAT_VALID tells the CP whether the repeat field in the BD CTL is
* valid. 1 means the repeat field is valid, 0 means not-valid, * valid. 1 means the repeat field is valid, 0 means not-valid,
* i.e. repeat == 1 * i.e. repeat == 1
...@@ -33,6 +35,16 @@ struct hl_bd { ...@@ -33,6 +35,16 @@ struct hl_bd {
#define BD_CTL_SHADOW_INDEX_SHIFT 0 #define BD_CTL_SHADOW_INDEX_SHIFT 0
#define BD_CTL_SHADOW_INDEX_MASK 0x00000FFF #define BD_CTL_SHADOW_INDEX_MASK 0x00000FFF
/*
* H/W CTL FIELDS
*/
#define BD_CTL_COMP_OFFSET_SHIFT 16
#define BD_CTL_COMP_OFFSET_MASK 0x00FF0000
#define BD_CTL_COMP_DATA_SHIFT 0
#define BD_CTL_COMP_DATA_MASK 0x0000FFFF
/* /*
* COMPLETION QUEUE * COMPLETION QUEUE
*/ */
......
This diff is collapsed.
This diff is collapsed.
...@@ -95,6 +95,12 @@ enum hl_device_status { ...@@ -95,6 +95,12 @@ enum hl_device_status {
* percentage of the utilization rate. * percentage of the utilization rate.
* HL_INFO_HW_EVENTS_AGGREGATE - Receive an array describing how many times each * HL_INFO_HW_EVENTS_AGGREGATE - Receive an array describing how many times each
* event occurred since the driver was loaded. * event occurred since the driver was loaded.
* HL_INFO_CLK_RATE - Retrieve the current and maximum clock rate
* of the device in MHz. The maximum clock rate is
* configurable via sysfs parameter
* HL_INFO_RESET_COUNT - Retrieve the counts of the soft and hard reset
* operations performed on the device since the last
* time the driver was loaded.
*/ */
#define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_IP_INFO 0
#define HL_INFO_HW_EVENTS 1 #define HL_INFO_HW_EVENTS 1
...@@ -103,8 +109,11 @@ enum hl_device_status { ...@@ -103,8 +109,11 @@ enum hl_device_status {
#define HL_INFO_DEVICE_STATUS 4 #define HL_INFO_DEVICE_STATUS 4
#define HL_INFO_DEVICE_UTILIZATION 6 #define HL_INFO_DEVICE_UTILIZATION 6
#define HL_INFO_HW_EVENTS_AGGREGATE 7 #define HL_INFO_HW_EVENTS_AGGREGATE 7
#define HL_INFO_CLK_RATE 8
#define HL_INFO_RESET_COUNT 9
#define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_VERSION_MAX_LEN 128
#define HL_INFO_CARD_NAME_MAX_LEN 16
struct hl_info_hw_ip_info { struct hl_info_hw_ip_info {
__u64 sram_base_address; __u64 sram_base_address;
...@@ -123,6 +132,7 @@ struct hl_info_hw_ip_info { ...@@ -123,6 +132,7 @@ struct hl_info_hw_ip_info {
__u8 dram_enabled; __u8 dram_enabled;
__u8 pad[2]; __u8 pad[2];
__u8 armcp_version[HL_INFO_VERSION_MAX_LEN]; __u8 armcp_version[HL_INFO_VERSION_MAX_LEN];
__u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
}; };
struct hl_info_dram_usage { struct hl_info_dram_usage {
...@@ -149,6 +159,16 @@ struct hl_info_device_utilization { ...@@ -149,6 +159,16 @@ struct hl_info_device_utilization {
__u32 pad; __u32 pad;
}; };
struct hl_info_clk_rate {
__u32 cur_clk_rate_mhz;
__u32 max_clk_rate_mhz;
};
struct hl_info_reset_count {
__u32 hard_reset_cnt;
__u32 soft_reset_cnt;
};
struct hl_info_args { struct hl_info_args {
/* Location of relevant struct in userspace */ /* Location of relevant struct in userspace */
__u64 return_pointer; __u64 return_pointer;
...@@ -181,13 +201,15 @@ struct hl_info_args { ...@@ -181,13 +201,15 @@ struct hl_info_args {
/* Opcode to destroy previously created command buffer */ /* Opcode to destroy previously created command buffer */
#define HL_CB_OP_DESTROY 1 #define HL_CB_OP_DESTROY 1
#define HL_MAX_CB_SIZE 0x200000 /* 2MB */
struct hl_cb_in { struct hl_cb_in {
/* Handle of CB or 0 if we want to create one */ /* Handle of CB or 0 if we want to create one */
__u64 cb_handle; __u64 cb_handle;
/* HL_CB_OP_* */ /* HL_CB_OP_* */
__u32 op; __u32 op;
/* Size of CB. Maximum size is 2MB. The minimum size that will be /* Size of CB. Maximum size is HL_MAX_CB_SIZE. The minimum size that
* allocated, regardless of this parameter's value, is PAGE_SIZE * will be allocated, regardless of this parameter's value, is PAGE_SIZE
*/ */
__u32 cb_size; __u32 cb_size;
/* Context ID - Currently not in use */ /* Context ID - Currently not in use */
...@@ -233,6 +255,8 @@ struct hl_cs_chunk { ...@@ -233,6 +255,8 @@ struct hl_cs_chunk {
#define HL_CS_STATUS_SUCCESS 0 #define HL_CS_STATUS_SUCCESS 0
#define HL_MAX_JOBS_PER_CS 512
struct hl_cs_in { struct hl_cs_in {
/* this holds address of array of hl_cs_chunk for restore phase */ /* this holds address of array of hl_cs_chunk for restore phase */
__u64 chunks_restore; __u64 chunks_restore;
...@@ -242,9 +266,13 @@ struct hl_cs_in { ...@@ -242,9 +266,13 @@ struct hl_cs_in {
* Currently not in use * Currently not in use
*/ */
__u64 chunks_store; __u64 chunks_store;
/* Number of chunks in restore phase array */ /* Number of chunks in restore phase array. Maximum number is
* HL_MAX_JOBS_PER_CS
*/
__u32 num_chunks_restore; __u32 num_chunks_restore;
/* Number of chunks in execution array */ /* Number of chunks in execution array. Maximum number is
* HL_MAX_JOBS_PER_CS
*/
__u32 num_chunks_execute; __u32 num_chunks_execute;
/* Number of chunks in restore phase array - Currently not in use */ /* Number of chunks in restore phase array - Currently not in use */
__u32 num_chunks_store; __u32 num_chunks_store;
...@@ -589,7 +617,7 @@ struct hl_debug_args { ...@@ -589,7 +617,7 @@ struct hl_debug_args {
* *
* The user can call this IOCTL with a handle it received from the CS IOCTL * The user can call this IOCTL with a handle it received from the CS IOCTL
* to wait until the handle's CS has finished executing. The user will wait * to wait until the handle's CS has finished executing. The user will wait
* inside the kernel until the CS has finished or until the user-requeusted * inside the kernel until the CS has finished or until the user-requested
* timeout has expired. * timeout has expired.
* *
* The return value of the IOCTL is a standard Linux error code. The possible * The return value of the IOCTL is a standard Linux error code. The possible
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment