Commit 2795c889 authored by Ofir Bitton's avatar Ofir Bitton Committed by Oded Gabbay

habanalabs: staged submission support

We introduce a new mechanism named Staged Submission.
This mechanism allows the user to send a whole CS in pieces.
Each CS will not require completion rather than the
last CS. Timeout timer will be triggered upon reception of the first
CS in group.
Signed-off-by: default avatarOfir Bitton <obitton@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent cf30339d
...@@ -334,6 +334,133 @@ static void complete_job(struct hl_device *hdev, struct hl_cs_job *job) ...@@ -334,6 +334,133 @@ static void complete_job(struct hl_device *hdev, struct hl_cs_job *job)
cs_job_put(job); cs_job_put(job);
} }
/*
* hl_staged_cs_find_first - locate the first CS in this staged submission
*
* @hdev: pointer to device structure
* @cs_seq: staged submission sequence number
*
* @note: This function must be called under 'hdev->cs_mirror_lock'
*
* Find and return a CS pointer with the given sequence
*/
struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq)
{
struct hl_cs *cs;
list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node)
if (cs->staged_cs && cs->staged_first &&
cs->sequence == cs_seq)
return cs;
return NULL;
}
/*
* is_staged_cs_last_exists - returns true if the last CS in sequence exists
*
* @hdev: pointer to device structure
* @cs: staged submission member
*
*/
bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs)
{
struct hl_cs *last_entry;
last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs,
staged_cs_node);
if (last_entry->staged_last)
return true;
return false;
}
/*
* staged_cs_get - get CS reference if this CS is a part of a staged CS
*
* @hdev: pointer to device structure
* @cs: current CS
* @cs_seq: staged submission sequence number
*
* Increment CS reference for every CS in this staged submission except for
* the CS which get completion.
*/
static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs)
{
/* Only the last CS in this staged submission will get a completion.
* We must increment the reference for all other CS's in this
* staged submission.
* Once we get a completion we will release the whole staged submission.
*/
if (!cs->staged_last)
cs_get(cs);
}
/*
* staged_cs_put - put a CS in case it is part of staged submission
*
* @hdev: pointer to device structure
* @cs: CS to put
*
* This function decrements a CS reference (for a non completion CS)
*/
static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
{
/* We release all CS's in a staged submission except the last
* CS which we have never incremented its reference.
*/
if (!cs_needs_completion(cs))
cs_put(cs);
}
static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
{
bool next_entry_found = false;
struct hl_cs *next;
if (!cs_needs_timeout(cs))
return;
spin_lock(&hdev->cs_mirror_lock);
/* We need to handle tdr only once for the complete staged submission.
* Hence, we choose the CS that reaches this function first which is
* the CS marked as 'staged_last'.
*/
if (cs->staged_cs && cs->staged_last)
cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
spin_unlock(&hdev->cs_mirror_lock);
/* Don't cancel TDR in case this CS was timedout because we might be
* running from the TDR context
*/
if (cs && (cs->timedout ||
hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT))
return;
if (cs && cs->tdr_active)
cancel_delayed_work_sync(&cs->work_tdr);
spin_lock(&hdev->cs_mirror_lock);
/* queue TDR for next CS */
list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node)
if (cs_needs_timeout(next)) {
next_entry_found = true;
break;
}
if (next_entry_found && !next->tdr_active) {
next->tdr_active = true;
schedule_delayed_work(&next->work_tdr,
hdev->timeout_jiffies);
}
spin_unlock(&hdev->cs_mirror_lock);
}
static void cs_do_release(struct kref *ref) static void cs_do_release(struct kref *ref)
{ {
struct hl_cs *cs = container_of(ref, struct hl_cs, refcount); struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
...@@ -391,33 +518,30 @@ static void cs_do_release(struct kref *ref) ...@@ -391,33 +518,30 @@ static void cs_do_release(struct kref *ref)
list_del_init(&cs->mirror_node); list_del_init(&cs->mirror_node);
spin_unlock(&hdev->cs_mirror_lock); spin_unlock(&hdev->cs_mirror_lock);
/* Don't cancel TDR in case this CS was timedout because we might be cs_handle_tdr(hdev, cs);
* running from the TDR context
*/
if (!cs->timedout && hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
bool next_entry_found = false;
struct hl_cs *next;
if (cs->tdr_active) if (cs->staged_cs) {
cancel_delayed_work_sync(&cs->work_tdr); /* the completion CS decrements reference for the entire
* staged submission
spin_lock(&hdev->cs_mirror_lock); */
if (cs->staged_last) {
/* queue TDR for next CS */ struct hl_cs *staged_cs, *tmp;
list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node)
if (cs_needs_timeout(next)) {
next_entry_found = true;
break;
}
if (next_entry_found && !next->tdr_active) { list_for_each_entry_safe(staged_cs, tmp,
next->tdr_active = true; &cs->staged_cs_node, staged_cs_node)
schedule_delayed_work(&next->work_tdr, staged_cs_put(hdev, staged_cs);
hdev->timeout_jiffies);
} }
/* A staged CS will be a member in the list only after it
* was submitted. We used 'cs_mirror_lock' when inserting
* it to list so we will use it again when removing it
*/
if (cs->submitted) {
spin_lock(&hdev->cs_mirror_lock);
list_del(&cs->staged_cs_node);
spin_unlock(&hdev->cs_mirror_lock); spin_unlock(&hdev->cs_mirror_lock);
} }
}
out: out:
/* Must be called before hl_ctx_put because inside we use ctx to get /* Must be called before hl_ctx_put because inside we use ctx to get
...@@ -614,6 +738,8 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) ...@@ -614,6 +738,8 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
{ {
struct hl_cs_job *job, *tmp; struct hl_cs_job *job, *tmp;
staged_cs_put(hdev, cs);
list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
complete_job(hdev, job); complete_job(hdev, job);
} }
...@@ -623,7 +749,9 @@ void hl_cs_rollback_all(struct hl_device *hdev) ...@@ -623,7 +749,9 @@ void hl_cs_rollback_all(struct hl_device *hdev)
int i; int i;
struct hl_cs *cs, *tmp; struct hl_cs *cs, *tmp;
/* flush all completions */ /* flush all completions before iterating over the CS mirror list in
* order to avoid a race with the release functions
*/
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
flush_workqueue(hdev->cq_wq[i]); flush_workqueue(hdev->cq_wq[i]);
...@@ -804,6 +932,12 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) ...@@ -804,6 +932,12 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
return -EBUSY; return -EBUSY;
} }
if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
!hdev->supports_staged_submission) {
dev_err(hdev->dev, "staged submission not supported");
return -EPERM;
}
cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK; cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK;
if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) { if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
...@@ -875,6 +1009,34 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev, ...@@ -875,6 +1009,34 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev,
return 0; return 0;
} }
static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
u64 sequence, u32 flags)
{
if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION))
return 0;
cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST);
cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST);
if (cs->staged_first) {
/* Staged CS sequence is the first CS sequence */
INIT_LIST_HEAD(&cs->staged_cs_node);
cs->staged_sequence = cs->sequence;
} else {
/* User sequence will be validated in 'hl_hw_queue_schedule_cs'
* under the cs_mirror_lock
*/
cs->staged_sequence = sequence;
}
/* Increment CS reference if needed */
staged_cs_get(hdev, cs);
cs->staged_cs = true;
return 0;
}
static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
u32 num_chunks, u64 *cs_seq, u32 flags) u32 num_chunks, u64 *cs_seq, u32 flags)
{ {
...@@ -914,6 +1076,10 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, ...@@ -914,6 +1076,10 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
hl_debugfs_add_cs(cs); hl_debugfs_add_cs(cs);
rc = cs_staged_submission(hdev, cs, user_sequence, flags);
if (rc)
goto free_cs_object;
/* Validate ALL the CS chunks before submitting the CS */ /* Validate ALL the CS chunks before submitting the CS */
for (i = 0 ; i < num_chunks ; i++) { for (i = 0 ; i < num_chunks ; i++) {
struct hl_cs_chunk *chunk = &cs_chunk_array[i]; struct hl_cs_chunk *chunk = &cs_chunk_array[i];
......
...@@ -1169,8 +1169,11 @@ struct hl_userptr { ...@@ -1169,8 +1169,11 @@ struct hl_userptr {
* @finish_work: workqueue object to run when CS is completed by H/W. * @finish_work: workqueue object to run when CS is completed by H/W.
* @work_tdr: delayed work node for TDR. * @work_tdr: delayed work node for TDR.
* @mirror_node : node in device mirror list of command submissions. * @mirror_node : node in device mirror list of command submissions.
* @staged_cs_node: node in the staged cs list.
* @debugfs_list: node in debugfs list of command submissions. * @debugfs_list: node in debugfs list of command submissions.
* @sequence: the sequence number of this CS. * @sequence: the sequence number of this CS.
* @staged_sequence: the sequence of the staged submission this CS is part of,
* relevant only if staged_cs is set.
* @type: CS_TYPE_*. * @type: CS_TYPE_*.
* @submitted: true if CS was submitted to H/W. * @submitted: true if CS was submitted to H/W.
* @completed: true if CS was completed by device. * @completed: true if CS was completed by device.
...@@ -1195,8 +1198,10 @@ struct hl_cs { ...@@ -1195,8 +1198,10 @@ struct hl_cs {
struct work_struct finish_work; struct work_struct finish_work;
struct delayed_work work_tdr; struct delayed_work work_tdr;
struct list_head mirror_node; struct list_head mirror_node;
struct list_head staged_cs_node;
struct list_head debugfs_list; struct list_head debugfs_list;
u64 sequence; u64 sequence;
u64 staged_sequence;
enum hl_cs_type type; enum hl_cs_type type;
u8 submitted; u8 submitted;
u8 completed; u8 completed;
...@@ -1905,6 +1910,7 @@ struct hl_mmu_funcs { ...@@ -1905,6 +1910,7 @@ struct hl_mmu_funcs {
* user processes * user processes
* @device_fini_pending: true if device_fini was called and might be * @device_fini_pending: true if device_fini was called and might be
* waiting for the reset thread to finish * waiting for the reset thread to finish
* @supports_staged_submission: true if staged submissions are supported
*/ */
struct hl_device { struct hl_device {
struct pci_dev *pdev; struct pci_dev *pdev;
...@@ -2010,6 +2016,7 @@ struct hl_device { ...@@ -2010,6 +2016,7 @@ struct hl_device {
u8 needs_reset; u8 needs_reset;
u8 process_kill_trial_cnt; u8 process_kill_trial_cnt;
u8 device_fini_pending; u8 device_fini_pending;
u8 supports_staged_submission;
/* Parameters for bring-up */ /* Parameters for bring-up */
u64 nic_ports_mask; u64 nic_ports_mask;
...@@ -2207,6 +2214,8 @@ void hl_fence_get(struct hl_fence *fence); ...@@ -2207,6 +2214,8 @@ void hl_fence_get(struct hl_fence *fence);
void cs_get(struct hl_cs *cs); void cs_get(struct hl_cs *cs);
bool cs_needs_completion(struct hl_cs *cs); bool cs_needs_completion(struct hl_cs *cs);
bool cs_needs_timeout(struct hl_cs *cs); bool cs_needs_timeout(struct hl_cs *cs);
bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs);
struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq);
void goya_set_asic_funcs(struct hl_device *hdev); void goya_set_asic_funcs(struct hl_device *hdev);
void gaudi_set_asic_funcs(struct hl_device *hdev); void gaudi_set_asic_funcs(struct hl_device *hdev);
......
...@@ -596,6 +596,31 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) ...@@ -596,6 +596,31 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
hdev->asic_funcs->collective_wait_init_cs(cs); hdev->asic_funcs->collective_wait_init_cs(cs);
spin_lock(&hdev->cs_mirror_lock); spin_lock(&hdev->cs_mirror_lock);
/* Verify staged CS exists and add to the staged list */
if (cs->staged_cs && !cs->staged_first) {
struct hl_cs *staged_cs;
staged_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
if (!staged_cs) {
dev_err(hdev->dev,
"Cannot find staged submission sequence %llu",
cs->staged_sequence);
rc = -EINVAL;
goto unlock_cs_mirror;
}
if (is_staged_cs_last_exists(hdev, staged_cs)) {
dev_err(hdev->dev,
"Staged submission sequence %llu already submitted",
cs->staged_sequence);
rc = -EINVAL;
goto unlock_cs_mirror;
}
list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node);
}
list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list); list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list);
/* Queue TDR if the CS is the first entry and if timeout is wanted */ /* Queue TDR if the CS is the first entry and if timeout is wanted */
...@@ -637,6 +662,8 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) ...@@ -637,6 +662,8 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
goto out; goto out;
unlock_cs_mirror:
spin_unlock(&hdev->cs_mirror_lock);
unroll_cq_resv: unroll_cq_resv:
q = &hdev->kernel_queues[0]; q = &hdev->kernel_queues[0];
for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) { for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) {
......
...@@ -1627,6 +1627,7 @@ static int gaudi_sw_init(struct hl_device *hdev) ...@@ -1627,6 +1627,7 @@ static int gaudi_sw_init(struct hl_device *hdev)
hdev->supports_sync_stream = true; hdev->supports_sync_stream = true;
hdev->supports_coresight = true; hdev->supports_coresight = true;
hdev->supports_staged_submission = true;
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment