Commit ce73516d authored by Luben Tuikov's avatar Luben Tuikov Committed by Alex Deucher

drm/amdgpu: simplify padding calculations (v2)

Simplify padding calculations.

v2: Comment update and spacing.
Signed-off-by: default avatarLuben Tuikov <luben.tuikov@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent f4feb9fa
...@@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, ...@@ -228,7 +228,7 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
u32 extra_bits = vmid & 0xf; u32 extra_bits = vmid & 0xf;
/* IB packet must end on a 8 DW boundary */ /* IB packet must end on a 8 DW boundary */
cik_sdma_ring_insert_nop(ring, (12 - (lower_32_bits(ring->wptr) & 7)) % 8); cik_sdma_ring_insert_nop(ring, (4 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
...@@ -811,7 +811,7 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) ...@@ -811,7 +811,7 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
u32 pad_count; u32 pad_count;
int i; int i;
pad_count = (8 - (ib->length_dw & 0x7)) % 8; pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++) for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0)) if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] = ib->ptr[ib->length_dw++] =
......
...@@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, ...@@ -255,7 +255,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job); unsigned vmid = AMDGPU_JOB_GET_VMID(job);
/* IB packet must end on a 8 DW boundary */ /* IB packet must end on a 8 DW boundary */
sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
...@@ -750,7 +750,7 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib ...@@ -750,7 +750,7 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
u32 pad_count; u32 pad_count;
int i; int i;
pad_count = (8 - (ib->length_dw & 0x7)) % 8; pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++) for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0)) if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] = ib->ptr[ib->length_dw++] =
......
...@@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, ...@@ -429,7 +429,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job); unsigned vmid = AMDGPU_JOB_GET_VMID(job);
/* IB packet must end on a 8 DW boundary */ /* IB packet must end on a 8 DW boundary */
sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); sdma_v3_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
...@@ -1021,7 +1021,7 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib ...@@ -1021,7 +1021,7 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
u32 pad_count; u32 pad_count;
int i; int i;
pad_count = (8 - (ib->length_dw & 0x7)) % 8; pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++) for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0)) if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] = ib->ptr[ib->length_dw++] =
......
...@@ -698,7 +698,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, ...@@ -698,7 +698,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job); unsigned vmid = AMDGPU_JOB_GET_VMID(job);
/* IB packet must end on a 8 DW boundary */ /* IB packet must end on a 8 DW boundary */
sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); sdma_v4_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
...@@ -1579,7 +1579,7 @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib ...@@ -1579,7 +1579,7 @@ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
u32 pad_count; u32 pad_count;
int i; int i;
pad_count = (8 - (ib->length_dw & 0x7)) % 8; pad_count = (-ib->length_dw) & 7;
for (i = 0; i < pad_count; i++) for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0)) if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] = ib->ptr[ib->length_dw++] =
......
...@@ -382,8 +382,15 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring, ...@@ -382,8 +382,15 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job); unsigned vmid = AMDGPU_JOB_GET_VMID(job);
uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid); uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
/* IB packet must end on a 8 DW boundary */ /* An IB packet must end on a 8 DW boundary--the next dword
sdma_v5_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); * must be on a 8-dword boundary. Our IB packet below is 6
* dwords long, thus add x number of NOPs, such that, in
* modular arithmetic,
* wptr + 6 + x = 8k, k >= 0, which in C is,
* (wptr + 6 + x) % 8 = 0.
* The expression below, is a solution of x.
*/
sdma_v5_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf)); SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
...@@ -1076,10 +1083,10 @@ static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib, ...@@ -1076,10 +1083,10 @@ static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib,
} }
/** /**
* sdma_v5_0_ring_pad_ib - pad the IB to the required number of dw * sdma_v5_0_ring_pad_ib - pad the IB
*
* @ib: indirect buffer to fill with padding * @ib: indirect buffer to fill with padding
* *
* Pad the IB with NOPs to a boundary multiple of 8.
*/ */
static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{ {
...@@ -1087,7 +1094,7 @@ static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib ...@@ -1087,7 +1094,7 @@ static void sdma_v5_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
u32 pad_count; u32 pad_count;
int i; int i;
pad_count = (8 - (ib->length_dw & 0x7)) % 8; pad_count = (-ib->length_dw) & 0x7;
for (i = 0; i < pad_count; i++) for (i = 0; i < pad_count; i++)
if (sdma && sdma->burst_nop && (i == 0)) if (sdma && sdma->burst_nop && (i == 0))
ib->ptr[ib->length_dw++] = ib->ptr[ib->length_dw++] =
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment