Commit b0222053 authored by Ohad Sharabi's avatar Ohad Sharabi Committed by Oded Gabbay

habanalabs: wait again for multi-CS if no CS completed

The original multi-CS design assumption that stream masters are used
exclusively (i.e. multi-CS with set of stream master QIDs will not get
completed by CS not from the multi-CS set) is inaccurate.

Thus multi-CS behavior is now modified not to treat such case as an
error.

Instead, if we have multi-CS completion but we detect that no CS from
the list is actually completed we will do another multi-CS wait (with
modified timeout).
Signed-off-by: default avatarOhad Sharabi <osharabi@habana.ai>
Reviewed-by: default avatarDani Liberman <dliberman@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent 5b90e59d
...@@ -545,13 +545,6 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs) ...@@ -545,13 +545,6 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
* mcs fences. * mcs fences.
*/ */
fence->mcs_handling_done = true; fence->mcs_handling_done = true;
/*
* Since CS (and its related fence) can be associated with only one
* multi CS context, once it triggered multi CS completion no need to
* continue checking other multi CS contexts.
*/
spin_unlock(&mcs_compl->lock);
break;
} }
spin_unlock(&mcs_compl->lock); spin_unlock(&mcs_compl->lock);
...@@ -2498,6 +2491,21 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, ...@@ -2498,6 +2491,21 @@ static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
return rc; return rc;
} }
static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs)
{
if (usecs <= U32_MAX)
return usecs_to_jiffies(usecs);
/*
* If the value in nanoseconds is larger than 64 bit, use the largest
* 64 bit value.
*/
if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC)))
return nsecs_to_jiffies(U64_MAX);
return nsecs_to_jiffies(usecs * NSEC_PER_USEC);
}
/* /*
* hl_wait_multi_cs_completion_init - init completion structure * hl_wait_multi_cs_completion_init - init completion structure
* *
...@@ -2534,8 +2542,7 @@ static struct multi_cs_completion *hl_wait_multi_cs_completion_init( ...@@ -2534,8 +2542,7 @@ static struct multi_cs_completion *hl_wait_multi_cs_completion_init(
} }
if (i == MULTI_CS_MAX_USER_CTX) { if (i == MULTI_CS_MAX_USER_CTX) {
dev_err(hdev->dev, dev_err(hdev->dev, "no available multi-CS completion structure\n");
"no available multi-CS completion structure\n");
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
return mcs_compl; return mcs_compl;
...@@ -2566,27 +2573,18 @@ static void hl_wait_multi_cs_completion_fini( ...@@ -2566,27 +2573,18 @@ static void hl_wait_multi_cs_completion_fini(
* *
* @return 0 on success, otherwise non 0 error code * @return 0 on success, otherwise non 0 error code
*/ */
static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data) static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data,
struct multi_cs_completion *mcs_compl)
{ {
struct hl_device *hdev = mcs_data->ctx->hdev;
struct multi_cs_completion *mcs_compl;
long completion_rc; long completion_rc;
mcs_compl = hl_wait_multi_cs_completion_init(hdev, completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion,
mcs_data->stream_master_qid_map); mcs_data->timeout_jiffies);
if (IS_ERR(mcs_compl))
return PTR_ERR(mcs_compl);
completion_rc = wait_for_completion_interruptible_timeout(
&mcs_compl->completion,
usecs_to_jiffies(mcs_data->timeout_us));
/* update timestamp */ /* update timestamp */
if (completion_rc > 0) if (completion_rc > 0)
mcs_data->timestamp = mcs_compl->timestamp; mcs_data->timestamp = mcs_compl->timestamp;
hl_wait_multi_cs_completion_fini(mcs_compl);
mcs_data->wait_status = completion_rc; mcs_data->wait_status = completion_rc;
return 0; return 0;
...@@ -2619,6 +2617,7 @@ void hl_multi_cs_completion_init(struct hl_device *hdev) ...@@ -2619,6 +2617,7 @@ void hl_multi_cs_completion_init(struct hl_device *hdev)
*/ */
static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
{ {
struct multi_cs_completion *mcs_compl;
struct hl_device *hdev = hpriv->hdev; struct hl_device *hdev = hpriv->hdev;
struct multi_cs_data mcs_data = {0}; struct multi_cs_data mcs_data = {0};
union hl_wait_cs_args *args = data; union hl_wait_cs_args *args = data;
...@@ -2686,12 +2685,19 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -2686,12 +2685,19 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
goto put_ctx; goto put_ctx;
/* wait (with timeout) for the first CS to be completed */ /* wait (with timeout) for the first CS to be completed */
mcs_data.timeout_us = args->in.timeout_us; mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us);
rc = hl_wait_multi_cs_completion(&mcs_data);
if (rc) mcs_compl = hl_wait_multi_cs_completion_init(hdev, mcs_data.stream_master_qid_map);
if (IS_ERR(mcs_compl)) {
rc = PTR_ERR(mcs_compl);
goto put_ctx; goto put_ctx;
}
while (true) {
rc = hl_wait_multi_cs_completion(&mcs_data, mcs_compl);
if (rc || (mcs_data.wait_status == 0))
break;
if (mcs_data.wait_status > 0) {
/* /*
* poll fences once again to update the CS map. * poll fences once again to update the CS map.
* no timestamp should be updated this time. * no timestamp should be updated this time.
...@@ -2699,18 +2705,26 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -2699,18 +2705,26 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
mcs_data.update_ts = false; mcs_data.update_ts = false;
rc = hl_cs_poll_fences(&mcs_data); rc = hl_cs_poll_fences(&mcs_data);
if (mcs_data.completion_bitmap)
break;
/* /*
* if hl_wait_multi_cs_completion returned before timeout (i.e. * if hl_wait_multi_cs_completion returned before timeout (i.e.
* it got a completion) we expect to see at least one CS * it got a completion) it either got completed by CS in the multi CS list
* completed after the poll function. * (in which case the indication will be non empty completion_bitmap) or it
* got completed by CS submitted to one of the shared stream master but
* not in the multi CS list (in which case we should wait again but reinit
* the completion, modify the timeout and set timestamp as zero to let a CS
* related to the current multi-CS set a new, relevant, timestamp)
*/ */
if (!mcs_data.completion_bitmap) { /* wait again with modified timeout */
dev_warn_ratelimited(hdev->dev, mcs_data.timeout_jiffies = mcs_data.wait_status;
"Multi-CS got completion on wait but no CS completed\n"); reinit_completion(&mcs_compl->completion);
rc = -EFAULT; mcs_compl->timestamp = 0;
}
} }
hl_wait_multi_cs_completion_fini(mcs_compl);
put_ctx: put_ctx:
hl_ctx_put(ctx); hl_ctx_put(ctx);
kfree(fence_arr); kfree(fence_arr);
...@@ -2741,7 +2755,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -2741,7 +2755,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
} }
/* update if some CS was gone */ /* update if some CS was gone */
if (mcs_data.timestamp) if (!mcs_data.timestamp)
args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
} else { } else {
args->out.status = HL_WAIT_CS_STATUS_BUSY; args->out.status = HL_WAIT_CS_STATUS_BUSY;
...@@ -2807,21 +2821,6 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -2807,21 +2821,6 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
return 0; return 0;
} }
static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs)
{
if (usecs <= U32_MAX)
return usecs_to_jiffies(usecs);
/*
* If the value in nanoseconds is larger than 64 bit, use the largest
* 64 bit value.
*/
if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC)))
return nsecs_to_jiffies(U64_MAX);
return nsecs_to_jiffies(usecs * NSEC_PER_USEC);
}
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
u64 timeout_us, u64 user_address, u64 timeout_us, u64 user_address,
u64 target_value, struct hl_user_interrupt *interrupt, u64 target_value, struct hl_user_interrupt *interrupt,
......
...@@ -2362,7 +2362,7 @@ struct multi_cs_completion { ...@@ -2362,7 +2362,7 @@ struct multi_cs_completion {
* @ctx: pointer to the context structure * @ctx: pointer to the context structure
* @fence_arr: array of fences of all CSs * @fence_arr: array of fences of all CSs
* @seq_arr: array of CS sequence numbers * @seq_arr: array of CS sequence numbers
* @timeout_us: timeout in usec for waiting for CS to complete * @timeout_jiffies: timeout in jiffies for waiting for CS to complete
* @timestamp: timestamp of first completed CS * @timestamp: timestamp of first completed CS
* @wait_status: wait for CS status * @wait_status: wait for CS status
* @completion_bitmap: bitmap of completed CSs (1- completed, otherwise 0) * @completion_bitmap: bitmap of completed CSs (1- completed, otherwise 0)
...@@ -2376,7 +2376,7 @@ struct multi_cs_data { ...@@ -2376,7 +2376,7 @@ struct multi_cs_data {
struct hl_ctx *ctx; struct hl_ctx *ctx;
struct hl_fence **fence_arr; struct hl_fence **fence_arr;
u64 *seq_arr; u64 *seq_arr;
s64 timeout_us; s64 timeout_jiffies;
s64 timestamp; s64 timestamp;
long wait_status; long wait_status;
u32 completion_bitmap; u32 completion_bitmap;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment