Commit cb596aee authored by Tomer Tayar's avatar Tomer Tayar Committed by Oded Gabbay

habanalabs: Add a new H/W queue type

This patch adds a support for a new H/W queue type.
This type of queue is for DMA and compute engines jobs, for which
completion notification are sent by H/W.
Command buffer for this queue can be created either through the CB
IOCTL and using the retrieved CB handle, or by preparing a buffer on the
host or device SRAM/DRAM, and using the device address to that buffer.
The patch includes the handling of the 2 options, as well as the
initialization of the H/W queue and its jobs scheduling.
Signed-off-by: default avatarTomer Tayar <ttayar@habana.ai>
Reviewed-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent df762375
...@@ -65,6 +65,18 @@ static void cs_put(struct hl_cs *cs) ...@@ -65,6 +65,18 @@ static void cs_put(struct hl_cs *cs)
kref_put(&cs->refcount, cs_do_release); kref_put(&cs->refcount, cs_do_release);
} }
static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
{
/*
* Patched CB is created for external queues jobs, and for H/W queues
* jobs if the user CB was allocated by driver and MMU is disabled.
*/
return (job->queue_type == QUEUE_TYPE_EXT ||
(job->queue_type == QUEUE_TYPE_HW &&
job->is_kernel_allocated_cb &&
!hdev->mmu_enable));
}
/* /*
* cs_parser - parse the user command submission * cs_parser - parse the user command submission
* *
...@@ -91,11 +103,13 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) ...@@ -91,11 +103,13 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
parser.patched_cb = NULL; parser.patched_cb = NULL;
parser.user_cb = job->user_cb; parser.user_cb = job->user_cb;
parser.user_cb_size = job->user_cb_size; parser.user_cb_size = job->user_cb_size;
parser.ext_queue = job->ext_queue; parser.queue_type = job->queue_type;
parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
job->patched_cb = NULL; job->patched_cb = NULL;
rc = hdev->asic_funcs->cs_parser(hdev, &parser); rc = hdev->asic_funcs->cs_parser(hdev, &parser);
if (job->ext_queue) {
if (is_cb_patched(hdev, job)) {
if (!rc) { if (!rc) {
job->patched_cb = parser.patched_cb; job->patched_cb = parser.patched_cb;
job->job_cb_size = parser.patched_cb_size; job->job_cb_size = parser.patched_cb_size;
...@@ -124,7 +138,7 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job) ...@@ -124,7 +138,7 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
{ {
struct hl_cs *cs = job->cs; struct hl_cs *cs = job->cs;
if (job->ext_queue) { if (is_cb_patched(hdev, job)) {
hl_userptr_delete_list(hdev, &job->userptr_list); hl_userptr_delete_list(hdev, &job->userptr_list);
/* /*
...@@ -140,6 +154,19 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job) ...@@ -140,6 +154,19 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
} }
} }
/* For H/W queue jobs, if a user CB was allocated by driver and MMU is
* enabled, the user CB isn't released in cs_parser() and thus should be
* released here.
*/
if (job->queue_type == QUEUE_TYPE_HW &&
job->is_kernel_allocated_cb && hdev->mmu_enable) {
spin_lock(&job->user_cb->lock);
job->user_cb->cs_cnt--;
spin_unlock(&job->user_cb->lock);
hl_cb_put(job->user_cb);
}
/* /*
* This is the only place where there can be multiple threads * This is the only place where there can be multiple threads
* modifying the list at the same time * modifying the list at the same time
...@@ -150,7 +177,8 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job) ...@@ -150,7 +177,8 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
hl_debugfs_remove_job(hdev, job); hl_debugfs_remove_job(hdev, job);
if (job->ext_queue) if (job->queue_type == QUEUE_TYPE_EXT ||
job->queue_type == QUEUE_TYPE_HW)
cs_put(cs); cs_put(cs);
kfree(job); kfree(job);
...@@ -387,18 +415,13 @@ static void job_wq_completion(struct work_struct *work) ...@@ -387,18 +415,13 @@ static void job_wq_completion(struct work_struct *work)
free_job(hdev, job); free_job(hdev, job);
} }
static struct hl_cb *validate_queue_index(struct hl_device *hdev, static int validate_queue_index(struct hl_device *hdev,
struct hl_cb_mgr *cb_mgr, struct hl_cs_chunk *chunk,
struct hl_cs_chunk *chunk, enum hl_queue_type *queue_type,
bool *ext_queue) bool *is_kernel_allocated_cb)
{ {
struct asic_fixed_properties *asic = &hdev->asic_prop; struct asic_fixed_properties *asic = &hdev->asic_prop;
struct hw_queue_properties *hw_queue_prop; struct hw_queue_properties *hw_queue_prop;
u32 cb_handle;
struct hl_cb *cb;
/* Assume external queue */
*ext_queue = true;
hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
...@@ -406,22 +429,29 @@ static struct hl_cb *validate_queue_index(struct hl_device *hdev, ...@@ -406,22 +429,29 @@ static struct hl_cb *validate_queue_index(struct hl_device *hdev,
(hw_queue_prop->type == QUEUE_TYPE_NA)) { (hw_queue_prop->type == QUEUE_TYPE_NA)) {
dev_err(hdev->dev, "Queue index %d is invalid\n", dev_err(hdev->dev, "Queue index %d is invalid\n",
chunk->queue_index); chunk->queue_index);
return NULL; return -EINVAL;
} }
if (hw_queue_prop->driver_only) { if (hw_queue_prop->driver_only) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Queue index %d is restricted for the kernel driver\n", "Queue index %d is restricted for the kernel driver\n",
chunk->queue_index); chunk->queue_index);
return NULL; return -EINVAL;
} }
if (!hw_queue_prop->requires_kernel_cb) { *queue_type = hw_queue_prop->type;
*ext_queue = false; *is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb;
return (struct hl_cb *) (uintptr_t) chunk->cb_handle;
} return 0;
}
static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
struct hl_cb_mgr *cb_mgr,
struct hl_cs_chunk *chunk)
{
struct hl_cb *cb;
u32 cb_handle;
/* Retrieve CB object */
cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT); cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
cb = hl_cb_get(hdev, cb_mgr, cb_handle); cb = hl_cb_get(hdev, cb_mgr, cb_handle);
...@@ -446,7 +476,8 @@ static struct hl_cb *validate_queue_index(struct hl_device *hdev, ...@@ -446,7 +476,8 @@ static struct hl_cb *validate_queue_index(struct hl_device *hdev,
return NULL; return NULL;
} }
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue) struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
{ {
struct hl_cs_job *job; struct hl_cs_job *job;
...@@ -454,12 +485,14 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue) ...@@ -454,12 +485,14 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue)
if (!job) if (!job)
return NULL; return NULL;
job->ext_queue = ext_queue; job->queue_type = queue_type;
job->is_kernel_allocated_cb = is_kernel_allocated_cb;
if (job->ext_queue) { if (is_cb_patched(hdev, job))
INIT_LIST_HEAD(&job->userptr_list); INIT_LIST_HEAD(&job->userptr_list);
if (job->queue_type == QUEUE_TYPE_EXT)
INIT_WORK(&job->finish_work, job_wq_completion); INIT_WORK(&job->finish_work, job_wq_completion);
}
return job; return job;
} }
...@@ -472,7 +505,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, ...@@ -472,7 +505,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
struct hl_cs_job *job; struct hl_cs_job *job;
struct hl_cs *cs; struct hl_cs *cs;
struct hl_cb *cb; struct hl_cb *cb;
bool ext_queue_present = false; bool int_queues_only = true;
u32 size_to_copy; u32 size_to_copy;
int rc, i, parse_cnt; int rc, i, parse_cnt;
...@@ -516,23 +549,33 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, ...@@ -516,23 +549,33 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
/* Validate ALL the CS chunks before submitting the CS */ /* Validate ALL the CS chunks before submitting the CS */
for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) { for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) {
struct hl_cs_chunk *chunk = &cs_chunk_array[i]; struct hl_cs_chunk *chunk = &cs_chunk_array[i];
bool ext_queue; enum hl_queue_type queue_type;
bool is_kernel_allocated_cb;
cb = validate_queue_index(hdev, &hpriv->cb_mgr, chunk, rc = validate_queue_index(hdev, chunk, &queue_type,
&ext_queue); &is_kernel_allocated_cb);
if (ext_queue) { if (rc)
ext_queue_present = true; goto free_cs_object;
if (is_kernel_allocated_cb) {
cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
if (!cb) { if (!cb) {
rc = -EINVAL; rc = -EINVAL;
goto free_cs_object; goto free_cs_object;
} }
} else {
cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
} }
job = hl_cs_allocate_job(hdev, ext_queue); if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW)
int_queues_only = false;
job = hl_cs_allocate_job(hdev, queue_type,
is_kernel_allocated_cb);
if (!job) { if (!job) {
dev_err(hdev->dev, "Failed to allocate a new job\n"); dev_err(hdev->dev, "Failed to allocate a new job\n");
rc = -ENOMEM; rc = -ENOMEM;
if (ext_queue) if (is_kernel_allocated_cb)
goto release_cb; goto release_cb;
else else
goto free_cs_object; goto free_cs_object;
...@@ -542,7 +585,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, ...@@ -542,7 +585,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
job->cs = cs; job->cs = cs;
job->user_cb = cb; job->user_cb = cb;
job->user_cb_size = chunk->cb_size; job->user_cb_size = chunk->cb_size;
if (job->ext_queue) if (is_kernel_allocated_cb)
job->job_cb_size = cb->size; job->job_cb_size = cb->size;
else else
job->job_cb_size = chunk->cb_size; job->job_cb_size = chunk->cb_size;
...@@ -555,10 +598,11 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, ...@@ -555,10 +598,11 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
/* /*
* Increment CS reference. When CS reference is 0, CS is * Increment CS reference. When CS reference is 0, CS is
* done and can be signaled to user and free all its resources * done and can be signaled to user and free all its resources
* Only increment for JOB on external queues, because only * Only increment for JOB on external or H/W queues, because
* for those JOBs we get completion * only for those JOBs we get completion
*/ */
if (job->ext_queue) if (job->queue_type == QUEUE_TYPE_EXT ||
job->queue_type == QUEUE_TYPE_HW)
cs_get(cs); cs_get(cs);
hl_debugfs_add_job(hdev, job); hl_debugfs_add_job(hdev, job);
...@@ -572,9 +616,9 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks, ...@@ -572,9 +616,9 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
} }
} }
if (!ext_queue_present) { if (int_queues_only) {
dev_err(hdev->dev, dev_err(hdev->dev,
"Reject CS %d.%llu because no external queues jobs\n", "Reject CS %d.%llu because only internal queues jobs are present\n",
cs->ctx->asid, cs->sequence); cs->ctx->asid, cs->sequence);
rc = -EINVAL; rc = -EINVAL;
goto free_cs_object; goto free_cs_object;
......
...@@ -3943,7 +3943,7 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) ...@@ -3943,7 +3943,7 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
{ {
struct goya_device *goya = hdev->asic_specific; struct goya_device *goya = hdev->asic_specific;
if (!parser->ext_queue) if (parser->queue_type == QUEUE_TYPE_INT)
return goya_parse_cb_no_ext_queue(hdev, parser); return goya_parse_cb_no_ext_queue(hdev, parser);
if (goya->hw_cap_initialized & HW_CAP_MMU) if (goya->hw_cap_initialized & HW_CAP_MMU)
...@@ -4614,7 +4614,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, ...@@ -4614,7 +4614,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
lin_dma_pkt++; lin_dma_pkt++;
} while (--lin_dma_pkts_cnt); } while (--lin_dma_pkts_cnt);
job = hl_cs_allocate_job(hdev, true); job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
if (!job) { if (!job) {
dev_err(hdev->dev, "Failed to allocate a new job\n"); dev_err(hdev->dev, "Failed to allocate a new job\n");
rc = -ENOMEM; rc = -ENOMEM;
......
...@@ -85,12 +85,15 @@ struct hl_fpriv; ...@@ -85,12 +85,15 @@ struct hl_fpriv;
* @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's * @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's
* memories and/or operates the compute engines. * memories and/or operates the compute engines.
* @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU. * @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU.
* @QUEUE_TYPE_HW: queue of DMA and compute engines jobs, for which completion
* notifications are sent by H/W.
*/ */
enum hl_queue_type { enum hl_queue_type {
QUEUE_TYPE_NA, QUEUE_TYPE_NA,
QUEUE_TYPE_EXT, QUEUE_TYPE_EXT,
QUEUE_TYPE_INT, QUEUE_TYPE_INT,
QUEUE_TYPE_CPU QUEUE_TYPE_CPU,
QUEUE_TYPE_HW
}; };
/** /**
...@@ -755,11 +758,14 @@ struct hl_cs { ...@@ -755,11 +758,14 @@ struct hl_cs {
* @userptr_list: linked-list of userptr mappings that belong to this job and * @userptr_list: linked-list of userptr mappings that belong to this job and
* wait for completion. * wait for completion.
* @debugfs_list: node in debugfs list of command submission jobs. * @debugfs_list: node in debugfs list of command submission jobs.
* @queue_type: the type of the H/W queue this job is submitted to.
* @id: the id of this job inside a CS. * @id: the id of this job inside a CS.
* @hw_queue_id: the id of the H/W queue this job is submitted to. * @hw_queue_id: the id of the H/W queue this job is submitted to.
* @user_cb_size: the actual size of the CB we got from the user. * @user_cb_size: the actual size of the CB we got from the user.
* @job_cb_size: the actual size of the CB that we put on the queue. * @job_cb_size: the actual size of the CB that we put on the queue.
* @ext_queue: whether the job is for external queue or internal queue. * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
* handle to a kernel-allocated CB object, false
* otherwise (SRAM/DRAM/host address).
*/ */
struct hl_cs_job { struct hl_cs_job {
struct list_head cs_node; struct list_head cs_node;
...@@ -769,11 +775,12 @@ struct hl_cs_job { ...@@ -769,11 +775,12 @@ struct hl_cs_job {
struct work_struct finish_work; struct work_struct finish_work;
struct list_head userptr_list; struct list_head userptr_list;
struct list_head debugfs_list; struct list_head debugfs_list;
enum hl_queue_type queue_type;
u32 id; u32 id;
u32 hw_queue_id; u32 hw_queue_id;
u32 user_cb_size; u32 user_cb_size;
u32 job_cb_size; u32 job_cb_size;
u8 ext_queue; u8 is_kernel_allocated_cb;
}; };
/** /**
...@@ -784,24 +791,28 @@ struct hl_cs_job { ...@@ -784,24 +791,28 @@ struct hl_cs_job {
* @job_userptr_list: linked-list of userptr mappings that belong to the related * @job_userptr_list: linked-list of userptr mappings that belong to the related
* job and wait for completion. * job and wait for completion.
* @cs_sequence: the sequence number of the related CS. * @cs_sequence: the sequence number of the related CS.
* @queue_type: the type of the H/W queue this job is submitted to.
* @ctx_id: the ID of the context the related CS belongs to. * @ctx_id: the ID of the context the related CS belongs to.
* @hw_queue_id: the id of the H/W queue this job is submitted to. * @hw_queue_id: the id of the H/W queue this job is submitted to.
* @user_cb_size: the actual size of the CB we got from the user. * @user_cb_size: the actual size of the CB we got from the user.
* @patched_cb_size: the size of the CB after parsing. * @patched_cb_size: the size of the CB after parsing.
* @ext_queue: whether the job is for external queue or internal queue.
* @job_id: the id of the related job inside the related CS. * @job_id: the id of the related job inside the related CS.
* @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
* handle to a kernel-allocated CB object, false
* otherwise (SRAM/DRAM/host address).
*/ */
struct hl_cs_parser { struct hl_cs_parser {
struct hl_cb *user_cb; struct hl_cb *user_cb;
struct hl_cb *patched_cb; struct hl_cb *patched_cb;
struct list_head *job_userptr_list; struct list_head *job_userptr_list;
u64 cs_sequence; u64 cs_sequence;
enum hl_queue_type queue_type;
u32 ctx_id; u32 ctx_id;
u32 hw_queue_id; u32 hw_queue_id;
u32 user_cb_size; u32 user_cb_size;
u32 patched_cb_size; u32 patched_cb_size;
u8 ext_queue;
u8 job_id; u8 job_id;
u8 is_kernel_allocated_cb;
}; };
...@@ -1504,7 +1515,8 @@ int hl_cb_pool_init(struct hl_device *hdev); ...@@ -1504,7 +1515,8 @@ int hl_cb_pool_init(struct hl_device *hdev);
int hl_cb_pool_fini(struct hl_device *hdev); int hl_cb_pool_fini(struct hl_device *hdev);
void hl_cs_rollback_all(struct hl_device *hdev); void hl_cs_rollback_all(struct hl_device *hdev);
struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue); struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
void goya_set_asic_funcs(struct hl_device *hdev); void goya_set_asic_funcs(struct hl_device *hdev);
......
This diff is collapsed.
...@@ -23,6 +23,8 @@ struct hl_bd { ...@@ -23,6 +23,8 @@ struct hl_bd {
#define HL_BD_SIZE sizeof(struct hl_bd) #define HL_BD_SIZE sizeof(struct hl_bd)
/* /*
* S/W CTL FIELDS.
*
* BD_CTL_REPEAT_VALID tells the CP whether the repeat field in the BD CTL is * BD_CTL_REPEAT_VALID tells the CP whether the repeat field in the BD CTL is
* valid. 1 means the repeat field is valid, 0 means not-valid, * valid. 1 means the repeat field is valid, 0 means not-valid,
* i.e. repeat == 1 * i.e. repeat == 1
...@@ -33,6 +35,16 @@ struct hl_bd { ...@@ -33,6 +35,16 @@ struct hl_bd {
#define BD_CTL_SHADOW_INDEX_SHIFT 0 #define BD_CTL_SHADOW_INDEX_SHIFT 0
#define BD_CTL_SHADOW_INDEX_MASK 0x00000FFF #define BD_CTL_SHADOW_INDEX_MASK 0x00000FFF
/*
* H/W CTL FIELDS
*/
#define BD_CTL_COMP_OFFSET_SHIFT 16
#define BD_CTL_COMP_OFFSET_MASK 0x00FF0000
#define BD_CTL_COMP_DATA_SHIFT 0
#define BD_CTL_COMP_DATA_MASK 0x0000FFFF
/* /*
* COMPLETION QUEUE * COMPLETION QUEUE
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment