Commit f60cbd11 authored by Alex Deucher's avatar Alex Deucher

drm/radeon/kms: Add initial support for async DMA on cayman/TN

There are 2 async DMA engines on cayman, one at 0xd000 and
one at 0xd800.  The programming interface is the same as
evergreen however there are some changes to the commands
for using vmids.
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 233d1ad5
...@@ -2404,6 +2404,8 @@ void evergreen_disable_interrupt_state(struct radeon_device *rdev) ...@@ -2404,6 +2404,8 @@ void evergreen_disable_interrupt_state(struct radeon_device *rdev)
CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
cayman_cp_int_cntl_setup(rdev, 1, 0); cayman_cp_int_cntl_setup(rdev, 1, 0);
cayman_cp_int_cntl_setup(rdev, 2, 0); cayman_cp_int_cntl_setup(rdev, 2, 0);
tmp = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE;
WREG32(CAYMAN_DMA1_CNTL, tmp);
} else } else
WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE; tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
...@@ -2460,7 +2462,7 @@ int evergreen_irq_set(struct radeon_device *rdev) ...@@ -2460,7 +2462,7 @@ int evergreen_irq_set(struct radeon_device *rdev)
u32 grbm_int_cntl = 0; u32 grbm_int_cntl = 0;
u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0; u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
u32 afmt1 = 0, afmt2 = 0, afmt3 = 0, afmt4 = 0, afmt5 = 0, afmt6 = 0; u32 afmt1 = 0, afmt2 = 0, afmt3 = 0, afmt4 = 0, afmt5 = 0, afmt6 = 0;
u32 dma_cntl; u32 dma_cntl, dma_cntl1 = 0;
if (!rdev->irq.installed) { if (!rdev->irq.installed) {
WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
...@@ -2517,6 +2519,14 @@ int evergreen_irq_set(struct radeon_device *rdev) ...@@ -2517,6 +2519,14 @@ int evergreen_irq_set(struct radeon_device *rdev)
dma_cntl |= TRAP_ENABLE; dma_cntl |= TRAP_ENABLE;
} }
if (rdev->family >= CHIP_CAYMAN) {
dma_cntl1 = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE;
if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
DRM_DEBUG("r600_irq_set: sw int dma1\n");
dma_cntl1 |= TRAP_ENABLE;
}
}
if (rdev->irq.crtc_vblank_int[0] || if (rdev->irq.crtc_vblank_int[0] ||
atomic_read(&rdev->irq.pflip[0])) { atomic_read(&rdev->irq.pflip[0])) {
DRM_DEBUG("evergreen_irq_set: vblank 0\n"); DRM_DEBUG("evergreen_irq_set: vblank 0\n");
...@@ -2605,6 +2615,9 @@ int evergreen_irq_set(struct radeon_device *rdev) ...@@ -2605,6 +2615,9 @@ int evergreen_irq_set(struct radeon_device *rdev)
WREG32(DMA_CNTL, dma_cntl); WREG32(DMA_CNTL, dma_cntl);
if (rdev->family >= CHIP_CAYMAN)
WREG32(CAYMAN_DMA1_CNTL, dma_cntl1);
WREG32(GRBM_INT_CNTL, grbm_int_cntl); WREG32(GRBM_INT_CNTL, grbm_int_cntl);
WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
...@@ -3147,6 +3160,12 @@ int evergreen_irq_process(struct radeon_device *rdev) ...@@ -3147,6 +3160,12 @@ int evergreen_irq_process(struct radeon_device *rdev)
case 233: /* GUI IDLE */ case 233: /* GUI IDLE */
DRM_DEBUG("IH: GUI idle\n"); DRM_DEBUG("IH: GUI idle\n");
break; break;
case 244: /* DMA trap event */
if (rdev->family >= CHIP_CAYMAN) {
DRM_DEBUG("IH: DMA1 trap\n");
radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
}
break;
default: default:
DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
break; break;
......
...@@ -918,6 +918,8 @@ ...@@ -918,6 +918,8 @@
# define CTXEMPTY_INT_ENABLE (1 << 28) # define CTXEMPTY_INT_ENABLE (1 << 28)
#define DMA_TILING_CONFIG 0xD0B8 #define DMA_TILING_CONFIG 0xD0B8
#define CAYMAN_DMA1_CNTL 0xd82c
/* async DMA packets */ /* async DMA packets */
#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \ #define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
(((t) & 0x1) << 23) | \ (((t) & 0x1) << 23) | \
......
This diff is collapsed.
...@@ -50,6 +50,24 @@ ...@@ -50,6 +50,24 @@
#define VMID(x) (((x) & 0x7) << 0) #define VMID(x) (((x) & 0x7) << 0)
#define SRBM_STATUS 0x0E50 #define SRBM_STATUS 0x0E50
#define SRBM_SOFT_RESET 0x0E60
#define SOFT_RESET_BIF (1 << 1)
#define SOFT_RESET_CG (1 << 2)
#define SOFT_RESET_DC (1 << 5)
#define SOFT_RESET_DMA1 (1 << 6)
#define SOFT_RESET_GRBM (1 << 8)
#define SOFT_RESET_HDP (1 << 9)
#define SOFT_RESET_IH (1 << 10)
#define SOFT_RESET_MC (1 << 11)
#define SOFT_RESET_RLC (1 << 13)
#define SOFT_RESET_ROM (1 << 14)
#define SOFT_RESET_SEM (1 << 15)
#define SOFT_RESET_VMC (1 << 17)
#define SOFT_RESET_DMA (1 << 20)
#define SOFT_RESET_TST (1 << 21)
#define SOFT_RESET_REGBB (1 << 22)
#define SOFT_RESET_ORB (1 << 23)
#define VM_CONTEXT0_REQUEST_RESPONSE 0x1470 #define VM_CONTEXT0_REQUEST_RESPONSE 0x1470
#define REQUEST_TYPE(x) (((x) & 0xf) << 0) #define REQUEST_TYPE(x) (((x) & 0xf) << 0)
#define RESPONSE_TYPE_MASK 0x000000F0 #define RESPONSE_TYPE_MASK 0x000000F0
...@@ -599,5 +617,62 @@ ...@@ -599,5 +617,62 @@
#define PACKET3_SET_APPEND_CNT 0x75 #define PACKET3_SET_APPEND_CNT 0x75
#define PACKET3_ME_WRITE 0x7A #define PACKET3_ME_WRITE 0x7A
/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
#define DMA0_REGISTER_OFFSET 0x0 /* not a register */
#define DMA1_REGISTER_OFFSET 0x800 /* not a register */
#define DMA_RB_CNTL 0xd000
# define DMA_RB_ENABLE (1 << 0)
# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
#define DMA_RB_BASE 0xd004
#define DMA_RB_RPTR 0xd008
#define DMA_RB_WPTR 0xd00c
#define DMA_RB_RPTR_ADDR_HI 0xd01c
#define DMA_RB_RPTR_ADDR_LO 0xd020
#define DMA_IB_CNTL 0xd024
# define DMA_IB_ENABLE (1 << 0)
# define DMA_IB_SWAP_ENABLE (1 << 4)
# define CMD_VMID_FORCE (1 << 31)
#define DMA_IB_RPTR 0xd028
#define DMA_CNTL 0xd02c
# define TRAP_ENABLE (1 << 0)
# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
# define SEM_WAIT_INT_ENABLE (1 << 2)
# define DATA_SWAP_ENABLE (1 << 3)
# define FENCE_SWAP_ENABLE (1 << 4)
# define CTXEMPTY_INT_ENABLE (1 << 28)
#define DMA_STATUS_REG 0xd034
# define DMA_IDLE (1 << 0)
#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0xd044
#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0xd048
#define DMA_TILING_CONFIG 0xd0b8
#define DMA_MODE 0xd0bc
#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
(((t) & 0x1) << 23) | \
(((s) & 0x1) << 22) | \
(((n) & 0xFFFFF) << 0))
#define DMA_IB_PACKET(cmd, vmid, n) ((((cmd) & 0xF) << 28) | \
(((vmid) & 0xF) << 20) | \
(((n) & 0xFFFFF) << 0))
/* async DMA Packet types */
#define DMA_PACKET_WRITE 0x2
#define DMA_PACKET_COPY 0x3
#define DMA_PACKET_INDIRECT_BUFFER 0x4
#define DMA_PACKET_SEMAPHORE 0x5
#define DMA_PACKET_FENCE 0x6
#define DMA_PACKET_TRAP 0x7
#define DMA_PACKET_SRBM_WRITE 0x9
#define DMA_PACKET_CONSTANT_FILL 0xd
#define DMA_PACKET_NOP 0xf
#endif #endif
...@@ -109,7 +109,7 @@ extern int radeon_lockup_timeout; ...@@ -109,7 +109,7 @@ extern int radeon_lockup_timeout;
#define RADEON_BIOS_NUM_SCRATCH 8 #define RADEON_BIOS_NUM_SCRATCH 8
/* max number of rings */ /* max number of rings */
#define RADEON_NUM_RINGS 4 #define RADEON_NUM_RINGS 5
/* fence seq are set to this number when signaled */ /* fence seq are set to this number when signaled */
#define RADEON_FENCE_SIGNALED_SEQ 0LL #define RADEON_FENCE_SIGNALED_SEQ 0LL
...@@ -124,6 +124,8 @@ extern int radeon_lockup_timeout; ...@@ -124,6 +124,8 @@ extern int radeon_lockup_timeout;
/* R600+ has an async dma ring */ /* R600+ has an async dma ring */
#define R600_RING_TYPE_DMA_INDEX 3 #define R600_RING_TYPE_DMA_INDEX 3
/* cayman add a second async dma ring */
#define CAYMAN_RING_TYPE_DMA1_INDEX 4
/* hardcode those limit for now */ /* hardcode those limit for now */
#define RADEON_VA_IB_OFFSET (1 << 20) #define RADEON_VA_IB_OFFSET (1 << 20)
...@@ -893,6 +895,7 @@ struct radeon_wb { ...@@ -893,6 +895,7 @@ struct radeon_wb {
#define RADEON_WB_CP2_RPTR_OFFSET 1536 #define RADEON_WB_CP2_RPTR_OFFSET 1536
#define R600_WB_DMA_RPTR_OFFSET 1792 #define R600_WB_DMA_RPTR_OFFSET 1792
#define R600_WB_IH_WPTR_OFFSET 2048 #define R600_WB_IH_WPTR_OFFSET 2048
#define CAYMAN_WB_DMA1_RPTR_OFFSET 2304
#define R600_WB_EVENT_OFFSET 3072 #define R600_WB_EVENT_OFFSET 3072
/** /**
......
...@@ -1481,6 +1481,26 @@ static struct radeon_asic cayman_asic = { ...@@ -1481,6 +1481,26 @@ static struct radeon_asic cayman_asic = {
.ib_test = &r600_ib_test, .ib_test = &r600_ib_test,
.is_lockup = &evergreen_gpu_is_lockup, .is_lockup = &evergreen_gpu_is_lockup,
.vm_flush = &cayman_vm_flush, .vm_flush = &cayman_vm_flush,
},
[R600_RING_TYPE_DMA_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = NULL,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &cayman_dma_is_lockup,
.vm_flush = &cayman_dma_vm_flush,
},
[CAYMAN_RING_TYPE_DMA1_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = NULL,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &cayman_dma_is_lockup,
.vm_flush = &cayman_dma_vm_flush,
} }
}, },
.irq = { .irq = {
...@@ -1497,8 +1517,8 @@ static struct radeon_asic cayman_asic = { ...@@ -1497,8 +1517,8 @@ static struct radeon_asic cayman_asic = {
.copy = { .copy = {
.blit = &r600_copy_blit, .blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
.dma = NULL, .dma = &evergreen_copy_dma,
.dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
.copy = &r600_copy_blit, .copy = &r600_copy_blit,
.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
}, },
...@@ -1586,6 +1606,26 @@ static struct radeon_asic trinity_asic = { ...@@ -1586,6 +1606,26 @@ static struct radeon_asic trinity_asic = {
.ib_test = &r600_ib_test, .ib_test = &r600_ib_test,
.is_lockup = &evergreen_gpu_is_lockup, .is_lockup = &evergreen_gpu_is_lockup,
.vm_flush = &cayman_vm_flush, .vm_flush = &cayman_vm_flush,
},
[R600_RING_TYPE_DMA_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = NULL,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &cayman_dma_is_lockup,
.vm_flush = &cayman_dma_vm_flush,
},
[CAYMAN_RING_TYPE_DMA1_INDEX] = {
.ib_execute = &cayman_dma_ring_ib_execute,
.emit_fence = &evergreen_dma_fence_ring_emit,
.emit_semaphore = &r600_dma_semaphore_ring_emit,
.cs_parse = NULL,
.ring_test = &r600_dma_ring_test,
.ib_test = &r600_dma_ib_test,
.is_lockup = &cayman_dma_is_lockup,
.vm_flush = &cayman_dma_vm_flush,
} }
}, },
.irq = { .irq = {
...@@ -1602,8 +1642,8 @@ static struct radeon_asic trinity_asic = { ...@@ -1602,8 +1642,8 @@ static struct radeon_asic trinity_asic = {
.copy = { .copy = {
.blit = &r600_copy_blit, .blit = &r600_copy_blit,
.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
.dma = NULL, .dma = &evergreen_copy_dma,
.dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
.copy = &r600_copy_blit, .copy = &r600_copy_blit,
.copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX,
}, },
......
...@@ -470,6 +470,10 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, ...@@ -470,6 +470,10 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
uint64_t addr, unsigned count, uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags); uint32_t incr, uint32_t flags);
int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
struct radeon_ib *ib);
bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
/* DCE6 - SI */ /* DCE6 - SI */
void dce6_bandwidth_update(struct radeon_device *rdev); void dce6_bandwidth_update(struct radeon_device *rdev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment