Commit 963e81f9 authored by Alex Deucher's avatar Alex Deucher

drm/radeon/cik: Add support for compute queues (v4)

On CIK, the compute rings work slightly differently than
on previous asics, however the basic concepts are the same.

The main differences:
- New MEC engines for compute queues
- Multiple queues per MEC:
  - CI/KB: 1 MEC, 4 pipes per MEC, 8 queues per pipe = 32 queues
  -    KV: 2 MEC, 4 pipes per MEC, 8 queues per pipe = 64 queues
- Queues can be allocated and scheduled by another queue
- New doorbell aperture allows you to assign space in the aperture
  for the wptr which allows for userspace access to queues

v2: add wptr shadow, fix eop setup
v3: fix comment
v4: switch to new callback method
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Reviewed-by: default avatarJerome Glisse <jglisse@redhat.com>
parent 75efdee1
This diff is collapsed.
...@@ -460,6 +460,13 @@ ...@@ -460,6 +460,13 @@
# define RDERR_INT_ENABLE (1 << 0) # define RDERR_INT_ENABLE (1 << 0)
# define GUI_IDLE_INT_ENABLE (1 << 19) # define GUI_IDLE_INT_ENABLE (1 << 19)
#define CP_CPC_STATUS 0x8210
#define CP_CPC_BUSY_STAT 0x8214
#define CP_CPC_STALLED_STAT1 0x8218
#define CP_CPF_STATUS 0x821c
#define CP_CPF_BUSY_STAT 0x8220
#define CP_CPF_STALLED_STAT1 0x8224
#define CP_MEC_CNTL 0x8234 #define CP_MEC_CNTL 0x8234
#define MEC_ME2_HALT (1 << 28) #define MEC_ME2_HALT (1 << 28)
#define MEC_ME1_HALT (1 << 30) #define MEC_ME1_HALT (1 << 30)
...@@ -468,6 +475,12 @@ ...@@ -468,6 +475,12 @@
#define MEC_ME2_HALT (1 << 28) #define MEC_ME2_HALT (1 << 28)
#define MEC_ME1_HALT (1 << 30) #define MEC_ME1_HALT (1 << 30)
#define CP_STALLED_STAT3 0x8670
#define CP_STALLED_STAT1 0x8674
#define CP_STALLED_STAT2 0x8678
#define CP_STAT 0x8680
#define CP_ME_CNTL 0x86D8 #define CP_ME_CNTL 0x86D8
#define CP_CE_HALT (1 << 24) #define CP_CE_HALT (1 << 24)
#define CP_PFP_HALT (1 << 26) #define CP_PFP_HALT (1 << 26)
...@@ -701,6 +714,11 @@ ...@@ -701,6 +714,11 @@
# define CP_RINGID1_INT_STAT (1 << 30) # define CP_RINGID1_INT_STAT (1 << 30)
# define CP_RINGID0_INT_STAT (1 << 31) # define CP_RINGID0_INT_STAT (1 << 31)
#define CP_CPF_DEBUG 0xC200
#define CP_PQ_WPTR_POLL_CNTL 0xC20C
#define WPTR_POLL_EN (1 << 31)
#define CP_ME1_PIPE0_INT_CNTL 0xC214 #define CP_ME1_PIPE0_INT_CNTL 0xC214
#define CP_ME1_PIPE1_INT_CNTL 0xC218 #define CP_ME1_PIPE1_INT_CNTL 0xC218
#define CP_ME1_PIPE2_INT_CNTL 0xC21C #define CP_ME1_PIPE2_INT_CNTL 0xC21C
...@@ -773,6 +791,50 @@ ...@@ -773,6 +791,50 @@
#define RLC_GPM_SCRATCH_ADDR 0xC4B0 #define RLC_GPM_SCRATCH_ADDR 0xC4B0
#define RLC_GPM_SCRATCH_DATA 0xC4B4 #define RLC_GPM_SCRATCH_DATA 0xC4B4
#define CP_HPD_EOP_BASE_ADDR 0xC904
#define CP_HPD_EOP_BASE_ADDR_HI 0xC908
#define CP_HPD_EOP_VMID 0xC90C
#define CP_HPD_EOP_CONTROL 0xC910
#define EOP_SIZE(x) ((x) << 0)
#define EOP_SIZE_MASK (0x3f << 0)
#define CP_MQD_BASE_ADDR 0xC914
#define CP_MQD_BASE_ADDR_HI 0xC918
#define CP_HQD_ACTIVE 0xC91C
#define CP_HQD_VMID 0xC920
#define CP_HQD_PQ_BASE 0xC934
#define CP_HQD_PQ_BASE_HI 0xC938
#define CP_HQD_PQ_RPTR 0xC93C
#define CP_HQD_PQ_RPTR_REPORT_ADDR 0xC940
#define CP_HQD_PQ_RPTR_REPORT_ADDR_HI 0xC944
#define CP_HQD_PQ_WPTR_POLL_ADDR 0xC948
#define CP_HQD_PQ_WPTR_POLL_ADDR_HI 0xC94C
#define CP_HQD_PQ_DOORBELL_CONTROL 0xC950
#define DOORBELL_OFFSET(x) ((x) << 2)
#define DOORBELL_OFFSET_MASK (0x1fffff << 2)
#define DOORBELL_SOURCE (1 << 28)
#define DOORBELL_SCHD_HIT (1 << 29)
#define DOORBELL_EN (1 << 30)
#define DOORBELL_HIT (1 << 31)
#define CP_HQD_PQ_WPTR 0xC954
#define CP_HQD_PQ_CONTROL 0xC958
#define QUEUE_SIZE(x) ((x) << 0)
#define QUEUE_SIZE_MASK (0x3f << 0)
#define RPTR_BLOCK_SIZE(x) ((x) << 8)
#define RPTR_BLOCK_SIZE_MASK (0x3f << 8)
#define PQ_VOLATILE (1 << 26)
#define NO_UPDATE_RPTR (1 << 27)
#define UNORD_DISPATCH (1 << 28)
#define ROQ_PQ_IB_FLIP (1 << 29)
#define PRIV_STATE (1 << 30)
#define KMD_QUEUE (1 << 31)
#define CP_HQD_DEQUEUE_REQUEST 0xC974
#define CP_MQD_CONTROL 0xC99C
#define MQD_VMID(x) ((x) << 0)
#define MQD_VMID_MASK (0xf << 0)
#define PA_SC_RASTER_CONFIG 0x28350 #define PA_SC_RASTER_CONFIG 0x28350
# define RASTER_CONFIG_RB_MAP_0 0 # define RASTER_CONFIG_RB_MAP_0 0
# define RASTER_CONFIG_RB_MAP_1 1 # define RASTER_CONFIG_RB_MAP_1 1
......
...@@ -709,6 +709,22 @@ struct radeon_ring { ...@@ -709,6 +709,22 @@ struct radeon_ring {
u32 idx; u32 idx;
u64 last_semaphore_signal_addr; u64 last_semaphore_signal_addr;
u64 last_semaphore_wait_addr; u64 last_semaphore_wait_addr;
/* for CIK queues */
u32 me;
u32 pipe;
u32 queue;
struct radeon_bo *mqd_obj;
u32 doorbell_page_num;
u32 doorbell_offset;
unsigned wptr_offs;
};
struct radeon_mec {
struct radeon_bo *hpd_eop_obj;
u64 hpd_eop_gpu_addr;
u32 num_pipe;
u32 num_mec;
u32 num_queue;
}; };
/* /*
...@@ -966,6 +982,8 @@ struct radeon_wb { ...@@ -966,6 +982,8 @@ struct radeon_wb {
#define CAYMAN_WB_DMA1_RPTR_OFFSET 2304 #define CAYMAN_WB_DMA1_RPTR_OFFSET 2304
#define R600_WB_UVD_RPTR_OFFSET 2560 #define R600_WB_UVD_RPTR_OFFSET 2560
#define R600_WB_EVENT_OFFSET 3072 #define R600_WB_EVENT_OFFSET 3072
#define CIK_WB_CP1_WPTR_OFFSET 3328
#define CIK_WB_CP2_WPTR_OFFSET 3584
/** /**
* struct radeon_pm - power management datas * struct radeon_pm - power management datas
...@@ -1759,6 +1777,7 @@ struct radeon_device { ...@@ -1759,6 +1777,7 @@ struct radeon_device {
int msi_enabled; /* msi enabled */ int msi_enabled; /* msi enabled */
struct r600_ih ih; /* r6/700 interrupt ring */ struct r600_ih ih; /* r6/700 interrupt ring */
struct si_rlc rlc; struct si_rlc rlc;
struct radeon_mec mec;
struct work_struct hotplug_work; struct work_struct hotplug_work;
struct work_struct audio_work; struct work_struct audio_work;
struct work_struct reset_work; struct work_struct reset_work;
......
...@@ -121,9 +121,7 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority ...@@ -121,9 +121,7 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
p->ring = RADEON_RING_TYPE_GFX_INDEX; p->ring = RADEON_RING_TYPE_GFX_INDEX;
break; break;
case RADEON_CS_RING_COMPUTE: case RADEON_CS_RING_COMPUTE:
if (p->rdev->family >= CHIP_BONAIRE) if (p->rdev->family >= CHIP_TAHITI) {
p->ring = RADEON_RING_TYPE_GFX_INDEX;
else if (p->rdev->family >= CHIP_TAHITI) {
if (p->priority > 0) if (p->priority > 0)
p->ring = CAYMAN_RING_TYPE_CP1_INDEX; p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
else else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment