Commit e669f10c authored by Matthew Brost's avatar Matthew Brost Committed by Rodrigo Vivi

drm/xe: Fix VM bind out-sync signaling ordering

A case existed where an out-sync of a later VM bind operation could
signal before a previous one if the later operation results in a NOP
(e.g. a unbind or prefetch to a VA range without any mappings). This
breaks the ordering rules, fix this. This patch also lays the groundwork
for users to pass in num_binds == 0 and out-syncs.
Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Reviewed-by: default avatarThomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
parent f3e9b1f4
...@@ -156,6 +156,7 @@ void xe_exec_queue_destroy(struct kref *ref) ...@@ -156,6 +156,7 @@ void xe_exec_queue_destroy(struct kref *ref)
struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount);
struct xe_exec_queue *eq, *next; struct xe_exec_queue *eq, *next;
xe_exec_queue_last_fence_put_unlocked(q);
if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
list_for_each_entry_safe(eq, next, &q->multi_gt_list, list_for_each_entry_safe(eq, next, &q->multi_gt_list,
multi_gt_link) multi_gt_link)
...@@ -916,3 +917,77 @@ int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data, ...@@ -916,3 +917,77 @@ int xe_exec_queue_set_property_ioctl(struct drm_device *dev, void *data,
return ret; return ret;
} }
static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
struct xe_vm *vm)
{
lockdep_assert_held_write(&vm->lock);
}
/**
* xe_exec_queue_last_fence_put() - Drop ref to last fence
* @q: The exec queue
* @vm: The VM the engine does a bind or exec for
*/
void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm)
{
xe_exec_queue_last_fence_lockdep_assert(q, vm);
if (q->last_fence) {
dma_fence_put(q->last_fence);
q->last_fence = NULL;
}
}
/**
* xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked
* @q: The exec queue
*
* Only safe to be called from xe_exec_queue_destroy().
*/
void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q)
{
if (q->last_fence) {
dma_fence_put(q->last_fence);
q->last_fence = NULL;
}
}
/**
* xe_exec_queue_last_fence_get() - Get last fence
* @q: The exec queue
* @vm: The VM the engine does a bind or exec for
*
* Get last fence, does not take a ref
*
* Returns: last fence if not signaled, dma fence stub if signaled
*/
struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q,
struct xe_vm *vm)
{
xe_exec_queue_last_fence_lockdep_assert(q, vm);
if (q->last_fence &&
test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags))
xe_exec_queue_last_fence_put(q, vm);
return q->last_fence ? q->last_fence : dma_fence_get_stub();
}
/**
* xe_exec_queue_last_fence_set() - Set last fence
* @q: The exec queue
* @vm: The VM the engine does a bind or exec for
* @fence: The fence
*
* Set the last fence for the engine. Increases reference count for fence, when
* closing engine xe_exec_queue_last_fence_put should be called.
*/
void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm,
struct dma_fence *fence)
{
xe_exec_queue_last_fence_lockdep_assert(q, vm);
xe_exec_queue_last_fence_put(q, vm);
q->last_fence = dma_fence_get(fence);
}
...@@ -61,4 +61,11 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, ...@@ -61,4 +61,11 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data,
struct drm_file *file); struct drm_file *file);
enum xe_exec_queue_priority xe_exec_queue_device_get_max_priority(struct xe_device *xe); enum xe_exec_queue_priority xe_exec_queue_device_get_max_priority(struct xe_device *xe);
void xe_exec_queue_last_fence_put(struct xe_exec_queue *e, struct xe_vm *vm);
void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *e);
struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *e,
struct xe_vm *vm);
void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm,
struct dma_fence *fence);
#endif #endif
...@@ -65,6 +65,12 @@ struct xe_exec_queue { ...@@ -65,6 +65,12 @@ struct xe_exec_queue {
/** @fence_irq: fence IRQ used to signal job completion */ /** @fence_irq: fence IRQ used to signal job completion */
struct xe_hw_fence_irq *fence_irq; struct xe_hw_fence_irq *fence_irq;
/**
* @last_fence: last fence on engine, protected by vm->lock in write
* mode if bind engine
*/
struct dma_fence *last_fence;
/* queue no longer allowed to submit */ /* queue no longer allowed to submit */
#define EXEC_QUEUE_FLAG_BANNED BIT(0) #define EXEC_QUEUE_FLAG_BANNED BIT(0)
/* queue used for kernel submission only */ /* queue used for kernel submission only */
......
...@@ -1524,6 +1524,13 @@ void xe_vm_close_and_put(struct xe_vm *vm) ...@@ -1524,6 +1524,13 @@ void xe_vm_close_and_put(struct xe_vm *vm)
if (xe_vm_in_compute_mode(vm)) if (xe_vm_in_compute_mode(vm))
flush_work(&vm->preempt.rebind_work); flush_work(&vm->preempt.rebind_work);
down_write(&vm->lock);
for_each_tile(tile, xe, id) {
if (vm->q[id])
xe_exec_queue_last_fence_put(vm->q[id], vm);
}
up_write(&vm->lock);
for_each_tile(tile, xe, id) { for_each_tile(tile, xe, id) {
if (vm->q[id]) { if (vm->q[id]) {
xe_exec_queue_kill(vm->q[id]); xe_exec_queue_kill(vm->q[id]);
...@@ -1665,16 +1672,23 @@ u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) ...@@ -1665,16 +1672,23 @@ u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
tile_to_xe(tile)->pat.idx[XE_CACHE_WB]); tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
} }
static struct xe_exec_queue *
to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
{
return q ? q : vm->q[0];
}
static struct dma_fence * static struct dma_fence *
xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
struct xe_sync_entry *syncs, u32 num_syncs, struct xe_sync_entry *syncs, u32 num_syncs,
bool first_op, bool last_op) bool first_op, bool last_op)
{ {
struct xe_vm *vm = xe_vma_vm(vma);
struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
struct xe_tile *tile; struct xe_tile *tile;
struct dma_fence *fence = NULL; struct dma_fence *fence = NULL;
struct dma_fence **fences = NULL; struct dma_fence **fences = NULL;
struct dma_fence_array *cf = NULL; struct dma_fence_array *cf = NULL;
struct xe_vm *vm = xe_vma_vm(vma);
int cur_fence = 0, i; int cur_fence = 0, i;
int number_tiles = hweight8(vma->tile_present); int number_tiles = hweight8(vma->tile_present);
int err; int err;
...@@ -1727,7 +1741,8 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q, ...@@ -1727,7 +1741,8 @@ xe_vm_unbind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
cf ? &cf->base : fence); cf ? &cf->base : fence);
} }
return cf ? &cf->base : !fence ? dma_fence_get_stub() : fence; return cf ? &cf->base : !fence ?
xe_exec_queue_last_fence_get(wait_exec_queue, vm) : fence;
err_fences: err_fences:
if (fences) { if (fences) {
...@@ -1826,6 +1841,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, ...@@ -1826,6 +1841,7 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
bool last_op) bool last_op)
{ {
struct dma_fence *fence; struct dma_fence *fence;
struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
xe_vm_assert_held(vm); xe_vm_assert_held(vm);
...@@ -1839,13 +1855,15 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma, ...@@ -1839,13 +1855,15 @@ static int __xe_vm_bind(struct xe_vm *vm, struct xe_vma *vma,
xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
fence = dma_fence_get_stub(); fence = xe_exec_queue_last_fence_get(wait_exec_queue, vm);
if (last_op) { if (last_op) {
for (i = 0; i < num_syncs; i++) for (i = 0; i < num_syncs; i++)
xe_sync_entry_signal(&syncs[i], NULL, fence); xe_sync_entry_signal(&syncs[i], NULL, fence);
} }
} }
if (last_op)
xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
if (last_op && xe_vm_sync_mode(vm, q)) if (last_op && xe_vm_sync_mode(vm, q))
dma_fence_wait(fence, true); dma_fence_wait(fence, true);
dma_fence_put(fence); dma_fence_put(fence);
...@@ -1878,6 +1896,7 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, ...@@ -1878,6 +1896,7 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
u32 num_syncs, bool first_op, bool last_op) u32 num_syncs, bool first_op, bool last_op)
{ {
struct dma_fence *fence; struct dma_fence *fence;
struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
xe_vm_assert_held(vm); xe_vm_assert_held(vm);
xe_bo_assert_held(xe_vma_bo(vma)); xe_bo_assert_held(xe_vma_bo(vma));
...@@ -1887,6 +1906,8 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma, ...@@ -1887,6 +1906,8 @@ static int xe_vm_unbind(struct xe_vm *vm, struct xe_vma *vma,
return PTR_ERR(fence); return PTR_ERR(fence);
xe_vma_destroy(vma, fence); xe_vma_destroy(vma, fence);
if (last_op)
xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
if (last_op && xe_vm_sync_mode(vm, q)) if (last_op && xe_vm_sync_mode(vm, q))
dma_fence_wait(fence, true); dma_fence_wait(fence, true);
dma_fence_put(fence); dma_fence_put(fence);
...@@ -2036,6 +2057,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, ...@@ -2036,6 +2057,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
struct xe_sync_entry *syncs, u32 num_syncs, struct xe_sync_entry *syncs, u32 num_syncs,
bool first_op, bool last_op) bool first_op, bool last_op)
{ {
struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, q);
int err; int err;
xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
...@@ -2054,9 +2076,12 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, ...@@ -2054,9 +2076,12 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma,
/* Nothing to do, signal fences now */ /* Nothing to do, signal fences now */
if (last_op) { if (last_op) {
for (i = 0; i < num_syncs; i++) for (i = 0; i < num_syncs; i++) {
xe_sync_entry_signal(&syncs[i], NULL, struct dma_fence *fence =
dma_fence_get_stub()); xe_exec_queue_last_fence_get(wait_exec_queue, vm);
xe_sync_entry_signal(&syncs[i], NULL, fence);
}
} }
return 0; return 0;
...@@ -3108,8 +3133,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) ...@@ -3108,8 +3133,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
unwind_ops: unwind_ops:
vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds);
free_syncs: free_syncs:
for (i = 0; err == -ENODATA && i < num_syncs; i++) for (i = 0; err == -ENODATA && i < num_syncs; i++) {
xe_sync_entry_signal(&syncs[i], NULL, dma_fence_get_stub()); struct dma_fence *fence =
xe_exec_queue_last_fence_get(to_wait_exec_queue(vm, q), vm);
xe_sync_entry_signal(&syncs[i], NULL, fence);
}
while (num_syncs--) while (num_syncs--)
xe_sync_entry_cleanup(&syncs[num_syncs]); xe_sync_entry_cleanup(&syncs[num_syncs]);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment