Commit f60cbd11 authored by Alex Deucher's avatar Alex Deucher

drm/radeon/kms: Add initial support for async DMA on cayman/TN

There are 2 async DMA engines on cayman, one at 0xd000 and
one at 0xd800.  The programming interface is the same as
evergreen however there are some changes to the commands
for using vmids.
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 233d1ad5
...@@ -2404,6 +2404,8 @@ void evergreen_disable_interrupt_state(struct radeon_device *rdev) ...@@ -2404,6 +2404,8 @@ void evergreen_disable_interrupt_state(struct radeon_device *rdev)
CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
cayman_cp_int_cntl_setup(rdev, 1, 0); cayman_cp_int_cntl_setup(rdev, 1, 0);
cayman_cp_int_cntl_setup(rdev, 2, 0); cayman_cp_int_cntl_setup(rdev, 2, 0);
tmp = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE;
WREG32(CAYMAN_DMA1_CNTL, tmp);
} else } else
WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE; tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
...@@ -2460,7 +2462,7 @@ int evergreen_irq_set(struct radeon_device *rdev) ...@@ -2460,7 +2462,7 @@ int evergreen_irq_set(struct radeon_device *rdev)
u32 grbm_int_cntl = 0; u32 grbm_int_cntl = 0;
u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0; u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
u32 afmt1 = 0, afmt2 = 0, afmt3 = 0, afmt4 = 0, afmt5 = 0, afmt6 = 0; u32 afmt1 = 0, afmt2 = 0, afmt3 = 0, afmt4 = 0, afmt5 = 0, afmt6 = 0;
u32 dma_cntl; u32 dma_cntl, dma_cntl1 = 0;
if (!rdev->irq.installed) { if (!rdev->irq.installed) {
WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
...@@ -2517,6 +2519,14 @@ int evergreen_irq_set(struct radeon_device *rdev) ...@@ -2517,6 +2519,14 @@ int evergreen_irq_set(struct radeon_device *rdev)
dma_cntl |= TRAP_ENABLE; dma_cntl |= TRAP_ENABLE;
} }
if (rdev->family >= CHIP_CAYMAN) {
dma_cntl1 = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE;
if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
DRM_DEBUG("r600_irq_set: sw int dma1\n");
dma_cntl1 |= TRAP_ENABLE;
}
}
if (rdev->irq.crtc_vblank_int[0] || if (rdev->irq.crtc_vblank_int[0] ||
atomic_read(&rdev->irq.pflip[0])) { atomic_read(&rdev->irq.pflip[0])) {
DRM_DEBUG("evergreen_irq_set: vblank 0\n"); DRM_DEBUG("evergreen_irq_set: vblank 0\n");
...@@ -2605,6 +2615,9 @@ int evergreen_irq_set(struct radeon_device *rdev) ...@@ -2605,6 +2615,9 @@ int evergreen_irq_set(struct radeon_device *rdev)
WREG32(DMA_CNTL, dma_cntl); WREG32(DMA_CNTL, dma_cntl);
if (rdev->family >= CHIP_CAYMAN)
WREG32(CAYMAN_DMA1_CNTL, dma_cntl1);
WREG32(GRBM_INT_CNTL, grbm_int_cntl); WREG32(GRBM_INT_CNTL, grbm_int_cntl);
WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
...@@ -3147,6 +3160,12 @@ int evergreen_irq_process(struct radeon_device *rdev) ...@@ -3147,6 +3160,12 @@ int evergreen_irq_process(struct radeon_device *rdev)
case 233: /* GUI IDLE */ case 233: /* GUI IDLE */
DRM_DEBUG("IH: GUI idle\n"); DRM_DEBUG("IH: GUI idle\n");
break; break;
case 244: /* DMA trap event */
if (rdev->family >= CHIP_CAYMAN) {
DRM_DEBUG("IH: DMA1 trap\n");
radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
}
break;
default: default:
DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
break; break;
......
...@@ -918,6 +918,8 @@ ...@@ -918,6 +918,8 @@
# define CTXEMPTY_INT_ENABLE (1 << 28) # define CTXEMPTY_INT_ENABLE (1 << 28)
#define DMA_TILING_CONFIG 0xD0B8 #define DMA_TILING_CONFIG 0xD0B8
#define CAYMAN_DMA1_CNTL 0xd82c
/* async DMA packets */ /* async DMA packets */
#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \ #define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
(((t) & 0x1) << 23) | \ (((t) & 0x1) << 23) | \
......
...@@ -611,6 +611,8 @@ static void cayman_gpu_init(struct radeon_device *rdev) ...@@ -611,6 +611,8 @@ static void cayman_gpu_init(struct radeon_device *rdev)
WREG32(GB_ADDR_CONFIG, gb_addr_config); WREG32(GB_ADDR_CONFIG, gb_addr_config);
WREG32(DMIF_ADDR_CONFIG, gb_addr_config); WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
WREG32(HDP_ADDR_CONFIG, gb_addr_config); WREG32(HDP_ADDR_CONFIG, gb_addr_config);
WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
tmp = gb_addr_config & NUM_PIPES_MASK; tmp = gb_addr_config & NUM_PIPES_MASK;
tmp = r6xx_remap_render_backend(rdev, tmp, tmp = r6xx_remap_render_backend(rdev, tmp,
...@@ -915,6 +917,7 @@ static void cayman_cp_enable(struct radeon_device *rdev, bool enable) ...@@ -915,6 +917,7 @@ static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
WREG32(SCRATCH_UMSK, 0); WREG32(SCRATCH_UMSK, 0);
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
} }
} }
...@@ -1128,6 +1131,181 @@ static int cayman_cp_resume(struct radeon_device *rdev) ...@@ -1128,6 +1131,181 @@ static int cayman_cp_resume(struct radeon_device *rdev)
return 0; return 0;
} }
/*
* DMA
* Starting with R600, the GPU has an asynchronous
* DMA engine. The programming model is very similar
* to the 3D engine (ring buffer, IBs, etc.), but the
* DMA controller has it's own packet format that is
* different form the PM4 format used by the 3D engine.
* It supports copying data, writing embedded data,
* solid fills, and a number of other things. It also
* has support for tiling/detiling of buffers.
* Cayman and newer support two asynchronous DMA engines.
*/
/**
* cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
*
* @rdev: radeon_device pointer
* @ib: IB object to schedule
*
* Schedule an IB in the DMA ring (cayman-SI).
*/
void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
struct radeon_ib *ib)
{
struct radeon_ring *ring = &rdev->ring[ib->ring];
if (rdev->wb.enabled) {
u32 next_rptr = ring->wptr + 4;
while ((next_rptr & 7) != 5)
next_rptr++;
next_rptr += 3;
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
radeon_ring_write(ring, next_rptr);
}
/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
* Pad as necessary with NOPs.
*/
while ((ring->wptr & 7) != 5)
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
}
/**
* cayman_dma_stop - stop the async dma engines
*
* @rdev: radeon_device pointer
*
* Stop the async dma engines (cayman-SI).
*/
void cayman_dma_stop(struct radeon_device *rdev)
{
u32 rb_cntl;
radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
/* dma0 */
rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
rb_cntl &= ~DMA_RB_ENABLE;
WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
/* dma1 */
rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
rb_cntl &= ~DMA_RB_ENABLE;
WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
}
/**
* cayman_dma_resume - setup and start the async dma engines
*
* @rdev: radeon_device pointer
*
* Set up the DMA ring buffers and enable them. (cayman-SI).
* Returns 0 for success, error for failure.
*/
int cayman_dma_resume(struct radeon_device *rdev)
{
struct radeon_ring *ring;
u32 rb_cntl, dma_cntl;
u32 rb_bufsz;
u32 reg_offset, wb_offset;
int i, r;
/* Reset dma */
WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
RREG32(SRBM_SOFT_RESET);
udelay(50);
WREG32(SRBM_SOFT_RESET, 0);
for (i = 0; i < 2; i++) {
if (i == 0) {
ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
reg_offset = DMA0_REGISTER_OFFSET;
wb_offset = R600_WB_DMA_RPTR_OFFSET;
} else {
ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
reg_offset = DMA1_REGISTER_OFFSET;
wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
}
WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
/* Set ring buffer size in dwords */
rb_bufsz = drm_order(ring->ring_size / 4);
rb_cntl = rb_bufsz << 1;
#ifdef __BIG_ENDIAN
rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
#endif
WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
/* Initialize the ring buffer's read and write pointers */
WREG32(DMA_RB_RPTR + reg_offset, 0);
WREG32(DMA_RB_WPTR + reg_offset, 0);
/* set the wb address whether it's enabled or not */
WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
if (rdev->wb.enabled)
rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
/* enable DMA IBs */
WREG32(DMA_IB_CNTL + reg_offset, DMA_IB_ENABLE | CMD_VMID_FORCE);
dma_cntl = RREG32(DMA_CNTL + reg_offset);
dma_cntl &= ~CTXEMPTY_INT_ENABLE;
WREG32(DMA_CNTL + reg_offset, dma_cntl);
ring->wptr = 0;
WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
ring->ready = true;
r = radeon_ring_test(rdev, ring->idx, ring);
if (r) {
ring->ready = false;
return r;
}
}
radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
return 0;
}
/**
* cayman_dma_fini - tear down the async dma engines
*
* @rdev: radeon_device pointer
*
* Stop the async dma engines and free the rings (cayman-SI).
*/
void cayman_dma_fini(struct radeon_device *rdev)
{
cayman_dma_stop(rdev);
radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
}
static int cayman_gpu_soft_reset(struct radeon_device *rdev) static int cayman_gpu_soft_reset(struct radeon_device *rdev)
{ {
struct evergreen_mc_save save; struct evergreen_mc_save save;
...@@ -1218,6 +1396,32 @@ int cayman_asic_reset(struct radeon_device *rdev) ...@@ -1218,6 +1396,32 @@ int cayman_asic_reset(struct radeon_device *rdev)
return cayman_gpu_soft_reset(rdev); return cayman_gpu_soft_reset(rdev);
} }
/**
* cayman_dma_is_lockup - Check if the DMA engine is locked up
*
* @rdev: radeon_device pointer
* @ring: radeon_ring structure holding ring information
*
* Check if the async DMA engine is locked up (cayman-SI).
* Returns true if the engine appears to be locked up, false if not.
*/
bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
{
u32 dma_status_reg;
if (ring->idx == R600_RING_TYPE_DMA_INDEX)
dma_status_reg = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
else
dma_status_reg = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
if (dma_status_reg & DMA_IDLE) {
radeon_ring_lockup_update(ring);
return false;
}
/* force ring activities */
radeon_ring_force_activity(rdev, ring);
return radeon_ring_test_lockup(rdev, ring);
}
static int cayman_startup(struct radeon_device *rdev) static int cayman_startup(struct radeon_device *rdev)
{ {
struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
...@@ -1299,6 +1503,18 @@ static int cayman_startup(struct radeon_device *rdev) ...@@ -1299,6 +1503,18 @@ static int cayman_startup(struct radeon_device *rdev)
return r; return r;
} }
r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
if (r) {
dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
return r;
}
r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
if (r) {
dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
return r;
}
/* Enable IRQ */ /* Enable IRQ */
r = r600_irq_init(rdev); r = r600_irq_init(rdev);
if (r) { if (r) {
...@@ -1313,6 +1529,23 @@ static int cayman_startup(struct radeon_device *rdev) ...@@ -1313,6 +1529,23 @@ static int cayman_startup(struct radeon_device *rdev)
0, 0xfffff, RADEON_CP_PACKET2); 0, 0xfffff, RADEON_CP_PACKET2);
if (r) if (r)
return r; return r;
ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
if (r)
return r;
ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
if (r)
return r;
r = cayman_cp_load_microcode(rdev); r = cayman_cp_load_microcode(rdev);
if (r) if (r)
return r; return r;
...@@ -1320,6 +1553,10 @@ static int cayman_startup(struct radeon_device *rdev) ...@@ -1320,6 +1553,10 @@ static int cayman_startup(struct radeon_device *rdev)
if (r) if (r)
return r; return r;
r = cayman_dma_resume(rdev);
if (r)
return r;
r = radeon_ib_pool_init(rdev); r = radeon_ib_pool_init(rdev);
if (r) { if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r); dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
...@@ -1364,7 +1601,7 @@ int cayman_suspend(struct radeon_device *rdev) ...@@ -1364,7 +1601,7 @@ int cayman_suspend(struct radeon_device *rdev)
{ {
r600_audio_fini(rdev); r600_audio_fini(rdev);
cayman_cp_enable(rdev, false); cayman_cp_enable(rdev, false);
rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; cayman_dma_stop(rdev);
evergreen_irq_suspend(rdev); evergreen_irq_suspend(rdev);
radeon_wb_disable(rdev); radeon_wb_disable(rdev);
cayman_pcie_gart_disable(rdev); cayman_pcie_gart_disable(rdev);
...@@ -1431,6 +1668,14 @@ int cayman_init(struct radeon_device *rdev) ...@@ -1431,6 +1668,14 @@ int cayman_init(struct radeon_device *rdev)
ring->ring_obj = NULL; ring->ring_obj = NULL;
r600_ring_init(rdev, ring, 1024 * 1024); r600_ring_init(rdev, ring, 1024 * 1024);
ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
ring->ring_obj = NULL;
r600_ring_init(rdev, ring, 64 * 1024);
ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
ring->ring_obj = NULL;
r600_ring_init(rdev, ring, 64 * 1024);
rdev->ih.ring_obj = NULL; rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024); r600_ih_ring_init(rdev, 64 * 1024);
...@@ -1443,6 +1688,7 @@ int cayman_init(struct radeon_device *rdev) ...@@ -1443,6 +1688,7 @@ int cayman_init(struct radeon_device *rdev)
if (r) { if (r) {
dev_err(rdev->dev, "disabling GPU acceleration\n"); dev_err(rdev->dev, "disabling GPU acceleration\n");
cayman_cp_fini(rdev); cayman_cp_fini(rdev);
cayman_dma_fini(rdev);
r600_irq_fini(rdev); r600_irq_fini(rdev);
if (rdev->flags & RADEON_IS_IGP) if (rdev->flags & RADEON_IS_IGP)
si_rlc_fini(rdev); si_rlc_fini(rdev);
...@@ -1473,6 +1719,7 @@ void cayman_fini(struct radeon_device *rdev) ...@@ -1473,6 +1719,7 @@ void cayman_fini(struct radeon_device *rdev)
{ {
r600_blit_fini(rdev); r600_blit_fini(rdev);
cayman_cp_fini(rdev); cayman_cp_fini(rdev);
cayman_dma_fini(rdev);
r600_irq_fini(rdev); r600_irq_fini(rdev);
if (rdev->flags & RADEON_IS_IGP) if (rdev->flags & RADEON_IS_IGP)
si_rlc_fini(rdev); si_rlc_fini(rdev);
...@@ -1606,3 +1853,26 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) ...@@ -1606,3 +1853,26 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
radeon_ring_write(ring, 0x0); radeon_ring_write(ring, 0x0);
} }
void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
{
struct radeon_ring *ring = &rdev->ring[ridx];
if (vm == NULL)
return;
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
/* flush hdp cache */
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
radeon_ring_write(ring, 1);
/* bits 0-7 are the VM contexts0-7 */
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
radeon_ring_write(ring, 1 << vm->id);
}
...@@ -50,6 +50,24 @@ ...@@ -50,6 +50,24 @@
#define VMID(x) (((x) & 0x7) << 0) #define VMID(x) (((x) & 0x7) << 0)
#define SRBM_STATUS 0x0E50 #define SRBM_STATUS 0x0E50
#define SRBM_SOFT_RESET 0x0E60
#define SOFT_RESET_BIF (1 << 1)
#define SOFT_RESET_CG (1 << 2)
#define SOFT_RESET_DC (1 << 5)
#define SOFT_RESET_DMA1 (1 << 6)
#define SOFT_RESET_GRBM (1 << 8)
#define SOFT_RESET_HDP (1 << 9)
#define SOFT_RESET_IH (1 << 10)
#define SOFT_RESET_MC (1 << 11)
#define SOFT_RESET_RLC (1 << 13)
#define SOFT_RESET_ROM (1 << 14)
#define SOFT_RESET_SEM (1 << 15)
#define SOFT_RESET_VMC (1 << 17)
#define SOFT_RESET_DMA (1 << 20)
#define SOFT_RESET_TST (1 << 21)
#define SOFT_RESET_REGBB (1 << 22)
#define SOFT_RESET_ORB (1 << 23)
#define VM_CONTEXT0_REQUEST_RESPONSE 0x1470 #define VM_CONTEXT0_REQUEST_RESPONSE 0x1470
#define REQUEST_TYPE(x) (((x) & 0xf) << 0) #define REQUEST_TYPE(x) (((x) & 0xf) << 0)
#define RESPONSE_TYPE_MASK 0x000000F0 #define RESPONSE_TYPE_MASK 0x000000F0
...@@ -599,5 +617,62 @@ ...@@ -599,5 +617,62 @@
#define PACKET3_SET_APPEND_CNT 0x75 #define PACKET3_SET_APPEND_CNT 0x75
#define PACKET3_ME_WRITE 0x7A #define PACKET3_ME_WRITE 0x7A
/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
#define DMA0_REGISTER_OFFSET 0x0 /* not a register */
#define DMA1_REGISTER_OFFSET 0x800 /* not a register */
#define DMA_RB_CNTL 0xd000
# define DMA_RB_ENABLE (1 << 0)
# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
#define DMA_RB_BASE 0xd004
#define DMA_RB_RPTR 0xd008
#define DMA_RB_WPTR 0xd00c
#define DMA_RB_RPTR_ADDR_HI 0xd01c
#define DMA_RB_RPTR_ADDR_LO 0xd020
#define DMA_IB_CNTL 0xd024
# define DMA_IB_ENABLE (1 << 0)
# define DMA_IB_SWAP_ENABLE (1 << 4)
# define CMD_VMID_FORCE (1 << 31)
#define DMA_IB_RPTR 0xd028
#define DMA_CNTL 0xd02c
# define TRAP_ENABLE (1 << 0)
# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
# define SEM_WAIT_INT_ENABLE (1 << 2)
# define DATA_SWAP_ENABLE (1 << 3)
# define FENCE_SWAP_ENABLE (1 << 4)
# define CTXEMPTY_INT_ENABLE (1 << 28)
#define DMA_STATUS_REG 0xd034
# define DMA_IDLE (1 << 0)
#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0xd044
#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0xd048
#define DMA_TILING_CONFIG 0xd0b8
#define DMA_MODE 0xd0bc
#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
(((t) & 0x1) << 23) | \
(((s) & 0x1) << 22) | \
(((n) & 0xFFFFF) << 0))
#define DMA_IB_PACKET(cmd, vmid, n) ((((cmd) & 0xF) << 28) | \
(((vmid) & 0xF) << 20) | \
(((n) & 0xFFFFF) << 0))
/* async DMA Packet types */
#define DMA_PACKET_WRITE 0x2
#define DMA_PACKET_COPY 0x3
#define DMA_PACKET_INDIRECT_BUFFER 0x4
#define DMA_PACKET_SEMAPHORE 0x5
#define DMA_PACKET_FENCE 0x6
#define DMA_PACKET_TRAP 0x7
#define DMA_PACKET_SRBM_WRITE 0x9
#define DMA_PACKET_CONSTANT_FILL 0xd
#define DMA_PACKET_NOP 0xf
#endif #endif
...@@ -109,7 +109,7 @@ extern int radeon_lockup_timeout; ...@@ -109,7 +109,7 @@ extern int radeon_lockup_timeout;
#define RADEON_BIOS_NUM_SCRATCH 8 #define RADEON_BIOS_NUM_SCRATCH 8
/* max number of rings */ /* max number of rings */
#define RADEON_NUM_RINGS 4 #define RADEON_NUM_RINGS 5
/* fence seq are set to this number when signaled */ /* fence seq are set to this number when signaled */
#define RADEON_FENCE_SIGNALED_SEQ 0LL #define RADEON_FENCE_SIGNALED_SEQ 0LL
...@@ -124,6 +124,8 @@ extern int radeon_lockup_timeout; ...@@ -124,6 +124,8 @@ extern int radeon_lockup_timeout;
/* R600+ has an async dma ring */ /* R600+ has an async dma ring */
#define R600_RING_TYPE_DMA_INDEX 3 #define R600_RING_TYPE_DMA_INDEX 3
/* cayman add a second async dma ring */
#define CAYMAN_RING_TYPE_DMA1_INDEX 4
/* hardcode those limit for now */ /* hardcode those limit for now */
#define RADEON_VA_IB_OFFSET (1 << 20) #define RADEON_VA_IB_OFFSET (1 << 20)
...@@ -893,6 +895,7 @@ struct radeon_wb { ...@@ -893,6 +895,7 @@ struct radeon_wb {
#define RADEON_WB_CP2_RPTR_OFFSET 1536 #define RADEON_WB_CP2_RPTR_OFFSET 1536
#define R600_WB_DMA_RPTR_OFFSET 1792 #define R600_WB_DMA_RPTR_OFFSET 1792
#define R600_WB_IH_WPTR_OFFSET 2048 #define R600_WB_IH_WPTR_OFFSET 2048
#define CAYMAN_WB_DMA1_RPTR_OFFSET 2304
#define R600_WB_EVENT_OFFSET 3072 #define R600_WB_EVENT_OFFSET 3072
/** /**
......
...@@ -1481,6 +1481,26 @@ static struct radeon_asic cayman_asic = { ...@@ -1481,6 +1481,26 @@ static struct radeon_asic cayman_asic = {
.ib_test = &r600_ib_test, .ib_test = &r600_ib_test,
.is_lockup = &evergreen_gpu_is_lockup, .is_lockup = &evergreen_gpu_is_lockup,
.vm_flush = &cayman_vm_flush, .vm_flush = &cayman_vm_flush,
},
[R600_RING_TYPE_DMA_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = NULL,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &cayman_dma_is_lockup,
.vm_flush = &cayman_dma_vm_flush,
},
[CAYMAN_RING_TYPE_DMA1_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = NULL,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &cayman_dma_is_lockup,
.vm_flush = &cayman_dma_vm_flush,
} }
}, },
.irq = { .irq = {
...@@ -1497,8 +1517,8 @@ static struct radeon_asic cayman_asic = { ...@@ -1497,8 +1517,8 @@ static struct radeon_asic cayman_asic = {
.copy = { .copy = {
.blit = &r600_copy_blit, .blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
.dma = NULL, .dma = &evergreen_copy_dma,
.dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
.copy = &r600_copy_blit, .copy = &r600_copy_blit,
.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
}, },
...@@ -1586,6 +1606,26 @@ static struct radeon_asic trinity_asic = { ...@@ -1586,6 +1606,26 @@ static struct radeon_asic trinity_asic = {
.ib_test = &r600_ib_test, .ib_test = &r600_ib_test,
.is_lockup = &evergreen_gpu_is_lockup, .is_lockup = &evergreen_gpu_is_lockup,
.vm_flush = &cayman_vm_flush, .vm_flush = &cayman_vm_flush,
},
[R600_RING_TYPE_DMA_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = NULL,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &cayman_dma_is_lockup,
.vm_flush = &cayman_dma_vm_flush,
},
[CAYMAN_RING_TYPE_DMA1_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = NULL,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &cayman_dma_is_lockup,
.vm_flush = &cayman_dma_vm_flush,
} }
}, },
.irq = { .irq = {
...@@ -1602,8 +1642,8 @@ static struct radeon_asic trinity_asic = { ...@@ -1602,8 +1642,8 @@ static struct radeon_asic trinity_asic = {
.copy = { .copy = {
.blit = &r600_copy_blit, .blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
.dma = NULL, .dma = &evergreen_copy_dma,
.dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
.copy = &r600_copy_blit, .copy = &r600_copy_blit,
.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
}, },
......
...@@ -470,6 +470,10 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, ...@@ -470,6 +470,10 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
uint64_t addr, unsigned count, uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags); uint32_t incr, uint32_t flags);
int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
struct radeon_ib *ib);
bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
/* DCE6 - SI */ /* DCE6 - SI */
void dce6_bandwidth_update(struct radeon_device *rdev); void dce6_bandwidth_update(struct radeon_device *rdev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment