Commit 03ccf481 authored by Monk Liu's avatar Monk Liu Committed by Alex Deucher

drm/amdgpu: patch cond exec for SDMA

More ground work for conditional execution on SDMA
necessary for preemption.
Signed-off-by: default avatarMonk Liu <monk.liu@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 128cff1a
...@@ -302,6 +302,8 @@ struct amdgpu_ring_funcs { ...@@ -302,6 +302,8 @@ struct amdgpu_ring_funcs {
void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
/* pad the indirect buffer to the necessary number of dw */ /* pad the indirect buffer to the necessary number of dw */
void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset);
}; };
/* /*
...@@ -2182,6 +2184,8 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) ...@@ -2182,6 +2184,8 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r)) #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev)) #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
......
...@@ -124,7 +124,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -124,7 +124,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
struct amdgpu_ctx *ctx, *old_ctx; struct amdgpu_ctx *ctx, *old_ctx;
struct amdgpu_vm *vm; struct amdgpu_vm *vm;
struct fence *hwf; struct fence *hwf;
unsigned i; unsigned i, patch_offset = ~0;
int r = 0; int r = 0;
if (num_ibs == 0) if (num_ibs == 0)
...@@ -149,6 +150,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -149,6 +150,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
return r; return r;
} }
if (ring->type == AMDGPU_RING_TYPE_SDMA && ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
if (vm) { if (vm) {
/* do context switch */ /* do context switch */
amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr, amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr,
...@@ -204,6 +208,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -204,6 +208,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
if (f) if (f)
*f = fence_get(hwf); *f = fence_get(hwf);
if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
amdgpu_ring_patch_cond_exec(ring, patch_offset);
amdgpu_ring_commit(ring); amdgpu_ring_commit(ring);
return 0; return 0;
} }
......
...@@ -452,6 +452,31 @@ static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se ...@@ -452,6 +452,31 @@ static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
} }
unsigned init_cond_exec(struct amdgpu_ring *ring)
{
unsigned ret;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
amdgpu_ring_write(ring, 1);
ret = ring->wptr;/* this is the offset we need patch later */
amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
return ret;
}
void patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
{
unsigned cur;
BUG_ON(ring->ring[offset] != 0x55aa55aa);
cur = ring->wptr - 1;
if (likely(cur > offset))
ring->ring[offset] = cur - offset;
else
ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
}
/** /**
* sdma_v3_0_gfx_stop - stop the gfx async dma engines * sdma_v3_0_gfx_stop - stop the gfx async dma engines
* *
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment