Commit 7a78d4d4 authored by Oded Gabbay's avatar Oded Gabbay

habanalabs: fix race between wait and irq

There is a race in the user interrupts code, where between checking
the target value and adding the new pend to the list, there is a chance
the interrupt happened.

In that case, no one will complete the node, and we will get a timeout
on it.
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent 54faa560
...@@ -2892,16 +2892,21 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, ...@@ -2892,16 +2892,21 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset; pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset;
pend->cq_target_value = target_value; pend->cq_target_value = target_value;
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
/* We check for completion value as interrupt could have been received /* We check for completion value as interrupt could have been received
* before we added the node to the wait list * before we added the node to the wait list
*/ */
if (*pend->cq_kernel_addr >= target_value) { if (*pend->cq_kernel_addr >= target_value) {
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
*status = HL_WAIT_CS_STATUS_COMPLETED; *status = HL_WAIT_CS_STATUS_COMPLETED;
/* There was no interrupt, we assume the completion is now. */ /* There was no interrupt, we assume the completion is now. */
pend->fence.timestamp = ktime_get(); pend->fence.timestamp = ktime_get();
goto set_timestamp; goto set_timestamp;
} else if (!timeout_us) { } else if (!timeout_us) {
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
*status = HL_WAIT_CS_STATUS_BUSY; *status = HL_WAIT_CS_STATUS_BUSY;
pend->fence.timestamp = ktime_get(); pend->fence.timestamp = ktime_get();
goto set_timestamp; goto set_timestamp;
...@@ -2910,7 +2915,6 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, ...@@ -2910,7 +2915,6 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
/* Add pending user interrupt to relevant list for the interrupt /* Add pending user interrupt to relevant list for the interrupt
* handler to monitor * handler to monitor
*/ */
spin_lock_irqsave(&interrupt->wait_list_lock, flags);
list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment